From 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 19 Dec 2016 23:05:39 +0100 Subject: Reorganize source tree to use single autotools instance Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion --- src/vnet/dhcp/client.c | 1031 ++++++++++++++++++++++++++++++++++++++ src/vnet/dhcp/client.h | 118 +++++ src/vnet/dhcp/packet.h | 61 +++ src/vnet/dhcp/proxy.h | 92 ++++ src/vnet/dhcp/proxy_error.def | 30 ++ src/vnet/dhcp/proxy_node.c | 1114 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 2446 insertions(+) create mode 100644 src/vnet/dhcp/client.c create mode 100644 src/vnet/dhcp/client.h create mode 100644 src/vnet/dhcp/packet.h create mode 100644 src/vnet/dhcp/proxy.h create mode 100644 src/vnet/dhcp/proxy_error.def create mode 100644 src/vnet/dhcp/proxy_node.c (limited to 'src/vnet/dhcp') diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c new file mode 100644 index 00000000..c352e310 --- /dev/null +++ b/src/vnet/dhcp/client.c @@ -0,0 +1,1031 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +dhcp_client_main_t dhcp_client_main; +static u8 * format_dhcp_client_state (u8 * s, va_list * va); +static vlib_node_registration_t dhcp_client_process_node; + +static void +dhcp_client_acquire_address (dhcp_client_main_t * dcm, dhcp_client_t * c) +{ + /* + * Install any/all info gleaned from dhcp, right here + */ + ip4_add_del_interface_address (dcm->vlib_main, c->sw_if_index, + (void *) &c->leased_address, + c->subnet_mask_width, 0 /*is_del*/); +} + +static void +dhcp_client_release_address (dhcp_client_main_t * dcm, dhcp_client_t * c) +{ + /* + * Remove any/all info gleaned from dhcp, right here. Caller(s) + * have not wiped out the info yet. + */ + + ip4_add_del_interface_address (dcm->vlib_main, c->sw_if_index, + (void *) &c->leased_address, + c->subnet_mask_width, 1 /*is_del*/); +} + +static void +set_l2_rewrite (dhcp_client_main_t * dcm, dhcp_client_t * c) +{ + /* Acquire the L2 rewrite string for the indicated sw_if_index */ + c->l2_rewrite = vnet_build_rewrite_for_sw_interface( + dcm->vnet_main, + c->sw_if_index, + VNET_LINK_IP4, + 0 /* broadcast */); +} + +/* + * dhcp_client_for_us - server-to-client callback. + * Called from proxy_node.c:dhcp_proxy_to_client_input(). + * This function first decides that the packet in question is + * actually for the dhcp client code in case we're also acting as + * a dhcp proxy. Ay caramba, what a folly! + */ +int dhcp_client_for_us (u32 bi, vlib_buffer_t * b, + ip4_header_t * ip, + udp_header_t * udp, + dhcp_header_t * dhcp) +{ + dhcp_client_main_t * dcm = &dhcp_client_main; + vlib_main_t * vm = dcm->vlib_main; + dhcp_client_t * c; + uword * p; + f64 now = vlib_time_now (dcm->vlib_main); + u8 dhcp_message_type = 0; + dhcp_option_t * o; + + /* + * Doing dhcp client on this interface? + * Presumably we will always receive dhcp clnt for-us pkts on + * the interface that's asking for an address. + */ + p = hash_get (dcm->client_by_sw_if_index, + vnet_buffer(b)->sw_if_index [VLIB_RX]); + if (p == 0) + return 0; /* no */ + + c = pool_elt_at_index (dcm->clients, p[0]); + + /* Mixing dhcp relay and dhcp proxy? DGMS... */ + if (c->state == DHCP_BOUND && c->retry_count == 0) + return 0; + + /* parse through the packet, learn what we can */ + if (dhcp->your_ip_address.as_u32) + c->leased_address.as_u32 = dhcp->your_ip_address.as_u32; + + o = (dhcp_option_t *) dhcp->options; + + while (o->option != 0xFF /* end of options */ && + (u8 *) o < (b->data + b->current_data + b->current_length)) + { + switch (o->option) + { + case 53: /* dhcp message type */ + dhcp_message_type = o->data[0]; + break; + + case 51: /* lease time */ + { + u32 lease_time_in_seconds = + clib_host_to_net_u32 (o->data_as_u32[0]); + c->lease_expires = now + (f64) lease_time_in_seconds; + c->lease_lifetime = lease_time_in_seconds; + /* Set a sensible default, in case we don't get opt 58 */ + c->lease_renewal_interval = lease_time_in_seconds / 2; + } + break; + + case 58: /* lease renew time in seconds */ + { + u32 lease_renew_time_in_seconds = + clib_host_to_net_u32 (o->data_as_u32[0]); + c->lease_renewal_interval = lease_renew_time_in_seconds; + } + break; + + case 54: /* dhcp server address */ + c->dhcp_server.as_u32 = o->data_as_u32[0]; + break; + + case 1: /* subnet mask */ + { + u32 subnet_mask = + clib_host_to_net_u32 (o->data_as_u32[0]); + c->subnet_mask_width = count_set_bits (subnet_mask); + } + break; + case 3: /* router address */ + { + u32 router_address = o->data_as_u32[0]; + c->router_address.as_u32 = router_address; + } + break; + + case 12: /* hostname */ + { + /* Replace the existing hostname if necessary */ + vec_free (c->hostname); + vec_validate (c->hostname, o->length - 1); + clib_memcpy (c->hostname, o->data, o->length); + } + break; + + /* $$$$ Your message in this space, parse more options */ + default: + break; + } + + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + + switch (c->state) + { + case DHCP_DISCOVER: + if (dhcp_message_type != DHCP_PACKET_OFFER) + { + clib_warning ("sw_if_index %d state %U message type %d", + c->sw_if_index, format_dhcp_client_state, + c->state, dhcp_message_type); + c->next_transmit = now + 5.0; + break; + } + /* Received an offer, go send a request */ + c->state = DHCP_REQUEST; + c->retry_count = 0; + c->next_transmit = 0; /* send right now... */ + /* Poke the client process, which will send the request */ + vlib_process_signal_event (vm, dhcp_client_process_node.index, + EVENT_DHCP_CLIENT_WAKEUP, c - dcm->clients); + break; + + case DHCP_BOUND: + case DHCP_REQUEST: + if (dhcp_message_type != DHCP_PACKET_ACK) + { + clib_warning ("sw_if_index %d state %U message type %d", + c->sw_if_index, format_dhcp_client_state, + c->state, dhcp_message_type); + c->next_transmit = now + 5.0; + break; + } + /* OK, we own the address (etc), add to the routing table(s) */ + if (c->state == DHCP_REQUEST) + { + void (*fp)(u32, u32, u8 *, u8, u8 *, u8 *, u8 *) = c->event_callback; + + dhcp_client_acquire_address (dcm, c); + + /* + * Configure default IP route: + */ + if (c->router_address.as_u32) + { + fib_prefix_t all_0s = + { + .fp_len = 0, + .fp_addr.ip4.as_u32 = 0x0, + .fp_proto = FIB_PROTOCOL_IP4, + }; + ip46_address_t nh = + { + .ip4 = c->router_address, + }; + + fib_table_entry_path_add (fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &all_0s, + FIB_SOURCE_DHCP, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh, + c->sw_if_index, + ~0, + 1, + NULL, // no label stack + FIB_ROUTE_PATH_FLAG_NONE); + } + + /* + * Call the user's event callback to report DHCP information + */ + if (fp) + (*fp) (c->client_index, /* clinet index */ + c->pid, + c->hostname, + 0, /* is_ipv6 */ + (u8 *)&c->leased_address, /* host IP address */ + (u8 *)&c->router_address, /* router IP address */ + (u8 *)(c->l2_rewrite + 6));/* host MAC address */ + } + + c->state = DHCP_BOUND; + c->retry_count = 0; + c->next_transmit = now + (f64) c->lease_renewal_interval; + c->lease_expires = now + (f64) c->lease_lifetime; + break; + + default: + clib_warning ("client %d bogus state %d", + c - dcm->clients, c->state); + break; + } + + /* drop the pkt, return 1 */ + vlib_buffer_free (vm, &bi, 1); + return 1; +} + +static void +send_dhcp_pkt (dhcp_client_main_t * dcm, dhcp_client_t * c, + dhcp_packet_type_t type, int is_broadcast) +{ + vlib_main_t * vm = dcm->vlib_main; + vnet_main_t * vnm = dcm->vnet_main; + vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, c->sw_if_index); + vnet_sw_interface_t * sup_sw + = vnet_get_sup_sw_interface (vnm, c->sw_if_index); + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, c->sw_if_index); + vlib_buffer_t * b; + u32 bi; + ip4_header_t * ip; + udp_header_t * udp; + dhcp_header_t * dhcp; + u32 * to_next; + vlib_frame_t * f; + dhcp_option_t * o; + u16 udp_length, ip_length; + + /* Interface(s) down? */ + if ((hw->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) == 0) + return; + if ((sup_sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0) + return; + if ((sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0) + return; + + if (vlib_buffer_alloc (vm, &bi, 1) != 1) { + clib_warning ("buffer allocation failure"); + c->next_transmit = 0; + return; + } + + /* Build a dhcpv4 pkt from whole cloth */ + b = vlib_get_buffer (vm, bi); + + ASSERT (b->current_data == 0); + + vnet_buffer(b)->sw_if_index[VLIB_RX] = c->sw_if_index; + if (is_broadcast) + { + f = vlib_get_frame_to_node (vm, hw->output_node_index); + vnet_buffer(b)->sw_if_index[VLIB_TX] = c->sw_if_index; + clib_memcpy (b->data, c->l2_rewrite, vec_len(c->l2_rewrite)); + ip = (void *) + (((u8 *)vlib_buffer_get_current (b)) + vec_len (c->l2_rewrite)); + } + else + { + f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); + vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */ + ip = vlib_buffer_get_current (b); + } + + /* Enqueue the packet right now */ + to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + + if (is_broadcast) + vlib_put_frame_to_node (vm, hw->output_node_index, f); + else + vlib_put_frame_to_node (vm, ip4_lookup_node.index, f); + + udp = (udp_header_t *)(ip+1); + dhcp = (dhcp_header_t *)(udp+1); + + /* $$$ optimize, maybe */ + memset (ip, 0, sizeof (*ip) + sizeof (*udp) + sizeof (*dhcp)); + + ip->ip_version_and_header_length = 0x45; + ip->ttl = 128; + ip->protocol = IP_PROTOCOL_UDP; + + if (is_broadcast) + { + /* src = 0.0.0.0, dst = 255.255.255.255 */ + ip->dst_address.as_u32 = ~0; + } + else + { + /* Renewing an active lease, plain old ip4 src/dst */ + ip->src_address.as_u32 = c->leased_address.as_u32; + ip->dst_address.as_u32 = c->dhcp_server.as_u32; + } + + udp->src_port = clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client); + udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server); + + /* Send the interface MAC address */ + clib_memcpy (dhcp->client_hardware_address, c->l2_rewrite + 6, 6); + + /* Lease renewal, set up client_ip_address */ + if (is_broadcast == 0) + dhcp->client_ip_address.as_u32 = c->leased_address.as_u32; + + dhcp->opcode = 1; /* request, all we send */ + dhcp->hardware_type = 1; /* ethernet */ + dhcp->hardware_address_length = 6; + dhcp->transaction_identifier = c->transaction_id; + dhcp->flags = clib_host_to_net_u16(is_broadcast ? DHCP_FLAG_BROADCAST : 0); + dhcp->magic_cookie.as_u32 = DHCP_MAGIC; + + o = (dhcp_option_t * )dhcp->options; + + /* Send option 53, the DHCP message type */ + o->option = 53; + o->length = 1; + o->data[0] = type; + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + + /* Send option 57, max msg length */ + if (0 /* not needed, apparently */) + { + o->option = 57; + o->length = 2; + { + u16 *o2 = (u16 *) o->data; + *o2 = clib_host_to_net_u16 (1152); + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + } + + /* + * If server ip address is available with non-zero value, + * option 54 (DHCP Server Identifier) is sent. + */ + if (c->dhcp_server.as_u32) + { + o->option = 54; + o->length = 4; + clib_memcpy (o->data, &c->dhcp_server.as_u32, 4); + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + + /* send option 50, requested IP address */ + if (c->leased_address.as_u32) + { + o->option = 50; + o->length = 4; + clib_memcpy (o->data, &c->leased_address.as_u32, 4); + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + + /* send option 12, host name */ + if (vec_len (c->hostname)) + { + o->option = 12; + o->length = vec_len (c->hostname); + clib_memcpy (o->data, c->hostname, vec_len (c->hostname)); + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + + /* $$ maybe send the client s/w version if anyone cares */ + + /* + * send option 55, parameter request list + * The current list - see below, matches the Linux dhcp client's list + * Any specific dhcp server config and/or dhcp server may or may + * not yield specific options. + */ + o->option = 55; + o->length = vec_len (c->option_55_data); + clib_memcpy (o->data, c->option_55_data, vec_len(c->option_55_data)); + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + + /* End of list */ + o->option = 0xff; + o->length = 0; + o++; + + b->current_length = ((u8 *)o) - b->data; + + /* fix ip length, checksum and udp length */ + ip_length = vlib_buffer_length_in_chain (vm, b); + if (is_broadcast) + ip_length -= vec_len (c->l2_rewrite); + + ip->length = clib_host_to_net_u16(ip_length); + ip->checksum = ip4_header_checksum(ip); + + udp_length = ip_length - (sizeof (*ip)); + udp->length = clib_host_to_net_u16 (udp_length); +} + +static int +dhcp_discover_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now) +{ + /* + * State machine "DISCOVER" state. Send a dhcp discover packet, + * eventually back off the retry rate. + */ + send_dhcp_pkt (dcm, c, DHCP_PACKET_DISCOVER, 1 /* is_broadcast */); + + c->retry_count++; + if (c->retry_count > 10) + c->next_transmit = now + 5.0; + else + c->next_transmit = now + 1.0; + return 0; +} + +static int +dhcp_request_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now) +{ + /* + * State machine "REQUEST" state. Send a dhcp request packet, + * eventually drop back to the discover state. + */ + send_dhcp_pkt (dcm, c, DHCP_PACKET_REQUEST, 1 /* is_broadcast */); + + c->retry_count++; + if (c->retry_count > 7 /* lucky you */) + { + c->state = DHCP_DISCOVER; + c->next_transmit = now; + c->retry_count = 0; + return 1; + } + c->next_transmit = now + 1.0; + return 0; +} + +static int +dhcp_bound_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now) +{ + /* + * State machine "BOUND" state. Send a dhcp request packet, + * eventually, when the lease expires, forget the dhcp data + * and go back to the stone age. + */ + send_dhcp_pkt (dcm, c, DHCP_PACKET_REQUEST, 0 /* is_broadcast */); + + c->retry_count++; + if (c->retry_count > 10) + c->next_transmit = now + 5.0; + else + c->next_transmit = now + 1.0; + + if (now > c->lease_expires) + { + if (c->router_address.as_u32) + { + fib_prefix_t all_0s = + { + .fp_len = 0, + .fp_addr.ip4.as_u32 = 0x0, + .fp_proto = FIB_PROTOCOL_IP4, + }; + ip46_address_t nh = { + .ip4 = c->router_address, + }; + + fib_table_entry_path_remove(fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &all_0s, + FIB_SOURCE_DHCP, + FIB_PROTOCOL_IP4, + &nh, + c->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + } + + dhcp_client_release_address (dcm, c); + c->state = DHCP_DISCOVER; + c->next_transmit = now; + c->retry_count = 0; + /* Wipe out any memory of the address we had... */ + c->leased_address.as_u32 = 0; + c->subnet_mask_width = 0; + c->router_address.as_u32 = 0; + c->lease_renewal_interval = 0; + c->dhcp_server.as_u32 = 0; + return 1; + } + return 0; +} + +static f64 dhcp_client_sm (f64 now, f64 timeout, uword pool_index) +{ + dhcp_client_main_t * dcm = &dhcp_client_main; + dhcp_client_t * c; + + /* deleted, pooched, yadda yadda yadda */ + if (pool_is_free_index (dcm->clients, pool_index)) + return timeout; + + c = pool_elt_at_index (dcm->clients, pool_index); + + /* Time for us to do something with this client? */ + if (now < c->next_transmit) + return timeout; + + again: + switch (c->state) + { + case DHCP_DISCOVER: /* send a discover */ + if (dhcp_discover_state (dcm, c, now)) + goto again; + break; + + case DHCP_REQUEST: /* send a request */ + if (dhcp_request_state (dcm, c, now)) + goto again; + break; + + case DHCP_BOUND: /* bound, renew needed? */ + if (dhcp_bound_state (dcm, c, now)) + goto again; + break; + + default: + clib_warning ("dhcp client %d bogus state %d", + c - dcm->clients, c->state); + break; + } + + if (c->next_transmit < now + timeout) + return c->next_transmit - now; + + return timeout; +} + +static uword +dhcp_client_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + f64 timeout = 100.0; + f64 now; + uword event_type; + uword * event_data = 0; + dhcp_client_main_t * dcm = &dhcp_client_main; + dhcp_client_t * c; + int i; + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, timeout); + + event_type = vlib_process_get_events (vm, &event_data); + + now = vlib_time_now (vm); + + switch (event_type) + { + case EVENT_DHCP_CLIENT_WAKEUP: + for (i = 0; i < vec_len (event_data); i++) + timeout = dhcp_client_sm (now, timeout, event_data[i]); + break; + + case ~0: + pool_foreach (c, dcm->clients, + ({ + timeout = dhcp_client_sm (now, timeout, + (uword)(c - dcm->clients)); + })); + if (pool_elts (dcm->clients) == 0) + timeout = 100.0; + break; + } + + vec_reset_length (event_data); + } + + /* NOTREACHED */ + return 0; +} + +VLIB_REGISTER_NODE (dhcp_client_process_node,static) = { + .function = dhcp_client_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dhcp-client-process", + .process_log2_n_stack_bytes = 16, +}; + +static u8 * format_dhcp_client_state (u8 * s, va_list * va) +{ + dhcp_client_state_t state = va_arg (*va, dhcp_client_state_t); + char * str = "BOGUS!"; + + switch (state) + { +#define _(a) \ + case a: \ + str = #a; \ + break; + foreach_dhcp_client_state; +#undef _ + default: + break; + } + + s = format (s, "%s", str); + return s; +} + +static u8 * format_dhcp_client (u8 * s, va_list * va) +{ + dhcp_client_main_t * dcm = va_arg (*va, dhcp_client_main_t *); + dhcp_client_t * c = va_arg (*va, dhcp_client_t *); + int verbose = va_arg (*va, int); + + s = format (s, "[%d] %U state %U ", c - dcm->clients, + format_vnet_sw_if_index_name, dcm->vnet_main, c->sw_if_index, + format_dhcp_client_state, c->state); + + if (c->leased_address.as_u32) + s = format (s, "addr %U/%d gw %U\n", + format_ip4_address, &c->leased_address, + c->subnet_mask_width, format_ip4_address, &c->router_address); + else + s = format (s, "no address\n"); + + if (verbose) + { + s = format (s, "retry count %d, next xmt %.2f", + c->retry_count, c->next_transmit); + } + return s; +} + +static clib_error_t * +show_dhcp_client_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dhcp_client_main_t * dcm = &dhcp_client_main; + dhcp_client_t * c; + int verbose = 0; + u32 sw_if_index = ~0; + uword * p; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "intfc %U", + unformat_vnet_sw_interface, dcm->vnet_main, + &sw_if_index)) + ; + else if (unformat (input, "verbose")) + verbose = 1; + else + break; + } + + if (sw_if_index != ~0) + { + p = hash_get (dcm->client_by_sw_if_index, sw_if_index); + if (p == 0) + return clib_error_return (0, "dhcp client not configured"); + c = pool_elt_at_index (dcm->clients, p[0]); + vlib_cli_output (vm, "%U", format_dhcp_client, dcm, c, verbose); + return 0; + } + + pool_foreach (c, dcm->clients, + ({ + vlib_cli_output (vm, "%U", format_dhcp_client, dcm, c, verbose); + })); + + return 0; +} + +VLIB_CLI_COMMAND (show_dhcp_client_command, static) = { + .path = "show dhcp client", + .short_help = "show dhcp client [intfc ][verbose]", + .function = show_dhcp_client_command_fn, +}; + + +int dhcp_client_add_del (dhcp_client_add_del_args_t * a) +{ + dhcp_client_main_t * dcm = &dhcp_client_main; + vlib_main_t * vm = dcm->vlib_main; + dhcp_client_t * c; + uword * p; + fib_prefix_t all_1s = + { + .fp_len = 32, + .fp_addr.ip4.as_u32 = 0xffffffff, + .fp_proto = FIB_PROTOCOL_IP4, + }; + fib_prefix_t all_0s = + { + .fp_len = 0, + .fp_addr.ip4.as_u32 = 0x0, + .fp_proto = FIB_PROTOCOL_IP4, + }; + + p = hash_get (dcm->client_by_sw_if_index, a->sw_if_index); + + if ((p && a->is_add) || (!p && a->is_add == 0)) + return VNET_API_ERROR_INVALID_VALUE; + + if (a->is_add) + { + pool_get (dcm->clients, c); + memset (c, 0, sizeof (*c)); + c->state = DHCP_DISCOVER; + c->sw_if_index = a->sw_if_index; + c->client_index = a->client_index; + c->pid = a->pid; + c->event_callback = a->event_callback; + c->option_55_data = a->option_55_data; + c->hostname = a->hostname; + c->client_identifier = a->client_identifier; + do { + c->transaction_id = random_u32 (&dcm->seed); + } while (c->transaction_id == 0); + set_l2_rewrite (dcm, c); + hash_set (dcm->client_by_sw_if_index, a->sw_if_index, c - dcm->clients); + + /* this add is ref counted by FIB so we can add for each itf */ + fib_table_entry_special_add(fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &all_1s, + FIB_SOURCE_DHCP, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + + /* + * enable the interface to RX IPv4 packets + * this is also ref counted + */ + ip4_sw_interface_enable_disable (c->sw_if_index, 1); + + vlib_process_signal_event (vm, dhcp_client_process_node.index, + EVENT_DHCP_CLIENT_WAKEUP, c - dcm->clients); + } + else + { + c = pool_elt_at_index (dcm->clients, p[0]); + + fib_table_entry_special_remove(fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &all_1s, + FIB_SOURCE_DHCP); + + if (c->router_address.as_u32) + { + ip46_address_t nh = { + .ip4 = c->router_address, + }; + + fib_table_entry_path_remove(fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &all_0s, + FIB_SOURCE_DHCP, + FIB_PROTOCOL_IP4, + &nh, + c->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + } + ip4_sw_interface_enable_disable (c->sw_if_index, 0); + + vec_free (c->option_55_data); + vec_free (c->hostname); + vec_free (c->client_identifier); + vec_free (c->l2_rewrite); + hash_unset (dcm->client_by_sw_if_index, c->sw_if_index); + pool_put (dcm->clients, c); + } + return 0; +} + +int +dhcp_client_config (vlib_main_t * vm, + u32 sw_if_index, + u8 * hostname, + u32 is_add, + u32 client_index, + void * event_callback, + u32 pid) +{ + dhcp_client_add_del_args_t _a, *a = &_a; + int rv; + + memset (a, 0, sizeof (*a)); + a->is_add = is_add; + a->sw_if_index = sw_if_index; + a->client_index = client_index; + a->pid = pid; + a->event_callback = event_callback; + vec_validate(a->hostname, strlen((char *)hostname) - 1); + strncpy((char *)a->hostname, (char *)hostname, vec_len(a->hostname)); + a->client_identifier = format (0, "vpe 1.0%c", 0); + /* + * Option 55 request list. These data precisely match + * the Ubuntu dhcp client. YMMV. + */ + + /* Subnet Mask */ + vec_add1 (a->option_55_data, 1); + /* Broadcast address */ + vec_add1 (a->option_55_data, 28); + /* time offset */ + vec_add1 (a->option_55_data, 2); + /* Router */ + vec_add1 (a->option_55_data, 3); + /* Domain Name */ + vec_add1 (a->option_55_data, 15); + /* DNS */ + vec_add1 (a->option_55_data, 6); + /* Domain search */ + vec_add1 (a->option_55_data, 119); + /* Host name */ + vec_add1 (a->option_55_data, 12); + /* NetBIOS name server */ + vec_add1 (a->option_55_data, 44); + /* NetBIOS Scope */ + vec_add1 (a->option_55_data, 47); + /* MTU */ + vec_add1 (a->option_55_data, 26); + /* Classless static route */ + vec_add1 (a->option_55_data, 121); + /* NTP servers */ + vec_add1 (a->option_55_data, 42); + + rv = dhcp_client_add_del (a); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_VALUE: + + vec_free (a->hostname); + vec_free (a->client_identifier); + vec_free (a->option_55_data); + + if (is_add) + clib_warning ("dhcp client already enabled on intf_idx %d", + sw_if_index); + else + clib_warning ("dhcp client not enabled on on intf_idx %d", + sw_if_index); + break; + + default: + clib_warning ("dhcp_client_add_del returned %d", rv); + } + + return rv; +} + +static clib_error_t * +dhcp_client_set_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + + dhcp_client_main_t * dcm = &dhcp_client_main; + u32 sw_if_index; + u8 * hostname = 0; + u8 sw_if_index_set = 0; + int is_add = 1; + dhcp_client_add_del_args_t _a, *a = &_a; + int rv; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "intfc %U", + unformat_vnet_sw_interface, dcm->vnet_main, + &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (input, "hostname %v", &hostname)) + ; + else if (unformat (input, "del")) + is_add = 0; + else + break; + } + + if (sw_if_index_set == 0) + return clib_error_return (0, "interface not specified"); + + memset (a, 0, sizeof (*a)); + a->is_add = is_add; + a->sw_if_index = sw_if_index; + a->hostname = hostname; + a->client_identifier = format (0, "vpe 1.0%c", 0); + + /* + * Option 55 request list. These data precisely match + * the Ubuntu dhcp client. YMMV. + */ + + /* Subnet Mask */ + vec_add1 (a->option_55_data, 1); + /* Broadcast address */ + vec_add1 (a->option_55_data, 28); + /* time offset */ + vec_add1 (a->option_55_data, 2); + /* Router */ + vec_add1 (a->option_55_data, 3); + /* Domain Name */ + vec_add1 (a->option_55_data, 15); + /* DNS */ + vec_add1 (a->option_55_data, 6); + /* Domain search */ + vec_add1 (a->option_55_data, 119); + /* Host name */ + vec_add1 (a->option_55_data, 12); + /* NetBIOS name server */ + vec_add1 (a->option_55_data, 44); + /* NetBIOS Scope */ + vec_add1 (a->option_55_data, 47); + /* MTU */ + vec_add1 (a->option_55_data, 26); + /* Classless static route */ + vec_add1 (a->option_55_data, 121); + /* NTP servers */ + vec_add1 (a->option_55_data, 42); + + rv = dhcp_client_add_del (a); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_VALUE: + + vec_free (a->hostname); + vec_free (a->client_identifier); + vec_free (a->option_55_data); + if (is_add) + return clib_error_return (0, "dhcp client already enabled on %U", + format_vnet_sw_if_index_name, + dcm->vnet_main, sw_if_index); + else + return clib_error_return (0, "dhcp client not enabled on %U", + format_vnet_sw_if_index_name, + dcm->vnet_main, sw_if_index); + break; + + default: + vlib_cli_output (vm, "dhcp_client_add_del returned %d", rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (dhcp_client_set_command, static) = { + .path = "set dhcp client", + .short_help = "set dhcp client [del] intfc [hostname ]", + .function = dhcp_client_set_command_fn, +}; + +static clib_error_t * +dhcp_client_init (vlib_main_t * vm) +{ + dhcp_client_main_t * dcm = &dhcp_client_main; + + dcm->vlib_main = vm; + dcm->vnet_main = vnet_get_main(); + dcm->seed = 0xdeaddabe; + return 0; +} + +VLIB_INIT_FUNCTION (dhcp_client_init); diff --git a/src/vnet/dhcp/client.h b/src/vnet/dhcp/client.h new file mode 100644 index 00000000..d15e686b --- /dev/null +++ b/src/vnet/dhcp/client.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * client.h: dhcp client + */ + +#ifndef included_dhcp_client_h +#define included_dhcp_client_h + +#define foreach_dhcp_client_state \ +_(DHCP_DISCOVER) \ +_(DHCP_REQUEST) \ +_(DHCP_BOUND) + +typedef enum { +#define _(a) a, + foreach_dhcp_client_state +#undef _ +} dhcp_client_state_t; + +typedef struct { + dhcp_client_state_t state; + + /* the interface in question */ + u32 sw_if_index; + + /* State machine retry counter */ + u32 retry_count; + + /* Send next pkt at this time */ + f64 next_transmit; + f64 lease_expires; + + /* DHCP transaction ID, a random number */ + u32 transaction_id; + + /* leased address, other learned info DHCP */ + ip4_address_t leased_address; /* from your_ip_address field */ + ip4_address_t dhcp_server; + u32 subnet_mask_width; /* option 1 */ + ip4_address_t router_address; /* option 3 */ + u32 lease_renewal_interval; /* option 51 */ + u32 lease_lifetime; /* option 59 */ + + /* Requested data (option 55) */ + u8 * option_55_data; + + u8 * l2_rewrite; + + /* hostname and software client identifiers */ + u8 * hostname; + u8 * client_identifier; /* software version, e.g. vpe 1.0*/ + + /* Information used for event callback */ + u32 client_index; + u32 pid; + void * event_callback; +} dhcp_client_t; + +typedef struct { + /* DHCP client pool */ + dhcp_client_t * clients; + uword * client_by_sw_if_index; + u32 seed; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} dhcp_client_main_t; + +typedef struct { + int is_add; + u32 sw_if_index; + + /* vectors, consumed by dhcp client code */ + u8 * hostname; + u8 * client_identifier; + + /* Bytes containing requested option numbers */ + u8 * option_55_data; + + /* Information used for event callback */ + u32 client_index; + u32 pid; + void * event_callback; +} dhcp_client_add_del_args_t; + +dhcp_client_main_t dhcp_client_main; + +#define EVENT_DHCP_CLIENT_WAKEUP 1 + +int dhcp_client_for_us (u32 bi0, + vlib_buffer_t * b0, + ip4_header_t * ip0, + udp_header_t * u0, + dhcp_header_t * dh0); + +int dhcp_client_config (vlib_main_t * vm, + u32 sw_if_index, + u8 * hostname, + u32 is_add, + u32 client_index, + void *event_callback, + u32 pid); + +#endif /* included_dhcp_client_h */ diff --git a/src/vnet/dhcp/packet.h b/src/vnet/dhcp/packet.h new file mode 100644 index 00000000..267a8eaf --- /dev/null +++ b/src/vnet/dhcp/packet.h @@ -0,0 +1,61 @@ +#ifndef included_vnet_dhcp_packet_h +#define included_vnet_dhcp_packet_h + +/* + * DHCP packet format + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +typedef struct { + u8 opcode; /* 1 = request, 2 = reply */ + u8 hardware_type; /* 1 = ethernet */ + u8 hardware_address_length; + u8 hops; + u32 transaction_identifier; + u16 seconds; + u16 flags; +#define DHCP_FLAG_BROADCAST (1<<15) + ip4_address_t client_ip_address; + ip4_address_t your_ip_address; /* use this one */ + ip4_address_t server_ip_address; + ip4_address_t gateway_ip_address; /* use option 3, not this one */ + u8 client_hardware_address[16]; + u8 server_name[64]; + u8 boot_filename[128]; + ip4_address_t magic_cookie; + u8 options[0]; +} dhcp_header_t; + +typedef struct { + u8 option; + u8 length; + union { + u8 data[0]; + u32 data_as_u32[0]; + }; +} __attribute__((packed)) dhcp_option_t; + +typedef enum { + DHCP_PACKET_DISCOVER=1, + DHCP_PACKET_OFFER, + DHCP_PACKET_REQUEST, + DHCP_PACKET_ACK=5, +} dhcp_packet_type_t; + +/* charming antique: 99.130.83.99 is the dhcp magic cookie */ +#define DHCP_MAGIC (clib_host_to_net_u32(0x63825363)) + +#endif /* included_vnet_dhcp_packet_h */ diff --git a/src/vnet/dhcp/proxy.h b/src/vnet/dhcp/proxy.h new file mode 100644 index 00000000..e12c0d00 --- /dev/null +++ b/src/vnet/dhcp/proxy.h @@ -0,0 +1,92 @@ +/* + * proxy.h: dhcp proxy + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_dhcp_proxy_h +#define included_dhcp_proxy_h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef enum { +#define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n, +#include +#undef dhcp_proxy_error + DHCP_PROXY_N_ERROR, +} dhcp_proxy_error_t; + +typedef struct { + u32 oui; + u32 fib_id; +} vss_id; + +typedef union { + u8 as_u8[8]; + vss_id vpn_id; +} vss_info; + +typedef struct { + ip4_address_t dhcp_server; + ip4_address_t dhcp_src_address; + u32 insert_option_82; + u32 server_fib_index; + u32 valid; +} dhcp_server_t; + +typedef struct { + /* Pool of DHCP servers */ + dhcp_server_t * dhcp_servers; + + /* Pool of selected DHCP server. Zero is the default server */ + u32 * dhcp_server_index_by_rx_fib_index; + + /* to drop pkts in server-to-client direction */ + u32 error_drop_node_index; + + vss_info *opt82vss; + + /* hash lookup specific vrf_id -> option 82 vss suboption */ + uword * opt82vss_index_by_vrf_id; + + /* convenience */ + dhcp_client_main_t * dhcp_client_main; + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} dhcp_proxy_main_t; + +dhcp_proxy_main_t dhcp_proxy_main; + +int dhcp_proxy_set_server (ip4_address_t *addr, ip4_address_t *src_address, + u32 fib_id, int insert_option_82, int is_del); + +int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, + u32 rx_fib_id, + u32 server_fib_id, + int insert_option_82, int is_del); + +int dhcp_proxy_set_option82_vss(u32 vrf_id, + u32 oui, + u32 fib_id, + int is_del); +#endif /* included_dhcp_proxy_h */ diff --git a/src/vnet/dhcp/proxy_error.def b/src/vnet/dhcp/proxy_error.def new file mode 100644 index 00000000..6aa06eb5 --- /dev/null +++ b/src/vnet/dhcp/proxy_error.def @@ -0,0 +1,30 @@ +/* + * dhcp_proxy_error.def: dhcp proxy errors + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +dhcp_proxy_error (NONE, "no error") +dhcp_proxy_error (NO_SERVER, "no dhcp server configured") +dhcp_proxy_error (RELAY_TO_SERVER, "DHCP packets relayed to the server") +dhcp_proxy_error (RELAY_TO_CLIENT, "DHCP packets relayed to clients") +dhcp_proxy_error (OPTION_82_ERROR, "DHCP failed to insert option 82") +dhcp_proxy_error (NO_OPTION_82, "DHCP option 82 missing") +dhcp_proxy_error (BAD_OPTION_82, "Bad DHCP option 82 value") +dhcp_proxy_error (BAD_FIB_ID, "DHCP option 82 fib-id to fib-index map failure") +dhcp_proxy_error (NO_INTERFACE_ADDRESS, "DHCP no interface address") +dhcp_proxy_error (OPTION_82_VSS_NOT_PROCESSED, "DHCP VSS not processed by DHCP server") +dhcp_proxy_error (BAD_YIADDR, "DHCP packets with bad your_ip_address fields") +dhcp_proxy_error (BAD_SVR_FIB_OR_ADDRESS, "DHCP packets not from DHCP server or server FIB.") +dhcp_proxy_error (PKT_TOO_BIG, "DHCP packets which are too big.") diff --git a/src/vnet/dhcp/proxy_node.c b/src/vnet/dhcp/proxy_node.c new file mode 100644 index 00000000..d0d99d7e --- /dev/null +++ b/src/vnet/dhcp/proxy_node.c @@ -0,0 +1,1114 @@ +/* + * proxy_node.c: dhcp proxy node processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +static char * dhcp_proxy_error_strings[] = { +#define dhcp_proxy_error(n,s) s, +#include "proxy_error.def" +#undef dhcp_proxy_error +}; + +#define foreach_dhcp_proxy_to_server_input_next \ + _ (DROP, "error-drop") \ + _ (LOOKUP, "ip4-lookup") \ + _ (SEND_TO_CLIENT, "dhcp-proxy-to-client") + +typedef enum { +#define _(s,n) DHCP_PROXY_TO_SERVER_INPUT_NEXT_##s, + foreach_dhcp_proxy_to_server_input_next +#undef _ + DHCP_PROXY_TO_SERVER_INPUT_N_NEXT, +} dhcp_proxy_to_server_input_next_t; + +typedef struct { + /* 0 => to server, 1 => to client */ + int which; + ip4_address_t trace_ip4_address; + u32 error; + u32 sw_if_index; + u32 original_sw_if_index; +} dhcp_proxy_trace_t; + +#define VPP_DHCP_OPTION82_SUB1_SIZE 6 +#define VPP_DHCP_OPTION82_SUB5_SIZE 6 +#define VPP_DHCP_OPTION82_VSS_SIZE 12 +#define VPP_DHCP_OPTION82_SIZE (VPP_DHCP_OPTION82_SUB1_SIZE + \ + VPP_DHCP_OPTION82_SUB5_SIZE + \ + VPP_DHCP_OPTION82_VSS_SIZE +3) + +vlib_node_registration_t dhcp_proxy_to_server_node; +vlib_node_registration_t dhcp_proxy_to_client_node; + +u8 * format_dhcp_proxy_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + dhcp_proxy_trace_t * t = va_arg (*args, dhcp_proxy_trace_t *); + + if (t->which == 0) + s = format (s, "DHCP proxy: sent to server %U\n", + format_ip4_address, &t->trace_ip4_address, t->error); + else + s = format (s, "DHCP proxy: broadcast to client from %U\n", + format_ip4_address, &t->trace_ip4_address); + + if (t->error != (u32)~0) + s = format (s, " error: %s\n", dhcp_proxy_error_strings[t->error]); + + s = format (s, " original_sw_if_index: %d, sw_if_index: %d\n", + t->original_sw_if_index, t->sw_if_index); + + return s; +} + +u8 * format_dhcp_proxy_header_with_length (u8 * s, va_list * args) +{ + dhcp_header_t * h = va_arg (*args, dhcp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "dhcp header truncated"); + + s = format (s, "DHCP Proxy"); + + return s; +} + +static uword +dhcp_proxy_to_server_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + u32 pkts_to_server=0, pkts_to_client=0, pkts_no_server=0; + u32 pkts_no_interface_address=0; + u32 pkts_too_big=0; + ip4_main_t * im = &ip4_main; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * u0; + dhcp_header_t * h0; + ip4_header_t * ip0; + u32 next0; + u32 old0, new0; + ip_csum_t sum0; + u32 error0 = (u32) ~0; + u32 sw_if_index = 0; + u32 original_sw_if_index = 0; + u8 *end = NULL; + u32 fib_index, server_index; + dhcp_server_t * server; + u32 rx_sw_if_index; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + h0 = vlib_buffer_get_current (b0); + + /* + * udp_local hands us the DHCP header, need udp hdr, + * ip hdr to relay to server + */ + vlib_buffer_advance (b0, -(sizeof(*u0))); + u0 = vlib_buffer_get_current (b0); + + /* This blows. Return traffic has src_port = 67, dst_port = 67 */ + if (u0->src_port == clib_net_to_host_u16(UDP_DST_PORT_dhcp_to_server)) + { + vlib_buffer_advance (b0, sizeof(*u0)); + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_SEND_TO_CLIENT; + error0 = 0; + pkts_to_client++; + goto do_enqueue; + } + + rx_sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + fib_index = im->fib_index_by_sw_if_index [rx_sw_if_index]; + + if (fib_index < vec_len(dpm->dhcp_server_index_by_rx_fib_index)) + server_index = dpm->dhcp_server_index_by_rx_fib_index[fib_index]; + else + server_index = 0; + + if (PREDICT_FALSE (pool_is_free_index (dpm->dhcp_servers, + server_index))) + { + no_server: + error0 = DHCP_PROXY_ERROR_NO_SERVER; + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_server++; + goto do_trace; + } + + server = pool_elt_at_index (dpm->dhcp_servers, server_index); + if (server->valid == 0) + goto no_server; + + vlib_buffer_advance (b0, -(sizeof(*ip0))); + ip0 = vlib_buffer_get_current (b0); + + /* disable UDP checksum */ + u0->checksum = 0; + sum0 = ip0->checksum; + old0 = ip0->dst_address.as_u32; + new0 = server->dhcp_server.as_u32; + ip0->dst_address.as_u32 = server->dhcp_server.as_u32; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + sum0 = ip0->checksum; + old0 = ip0->src_address.as_u32; + new0 = server->dhcp_src_address.as_u32; + ip0->src_address.as_u32 = new0; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + /* Send to DHCP server via the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = + server->server_fib_index; + + h0->gateway_ip_address.as_u32 = server->dhcp_src_address.as_u32; + pkts_to_server++; + + if (server->insert_option_82) + { + u32 fib_index, fib_id, opt82_fib_id=0, opt82_oui=0; + ip4_fib_t * fib; + dhcp_option_t *o = (dhcp_option_t *) h0->options; + u32 len = 0; + vlib_buffer_free_list_t *fl; + + fib_index = im->fib_index_by_sw_if_index + [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; + fib = ip4_fib_get (fib_index); + fib_id = fib->table_id; + + end = b0->data + b0->current_data + b0->current_length; + /* TLVs are not performance-friendly... */ + while (o->option != 0xFF /* end of options */ && (u8 *)o < end) + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + + fl = vlib_buffer_get_free_list (vm, b0->free_list_index); + // start write at (option*)o, some packets have padding + if (((u8 *)o - (u8 *)b0->data + VPP_DHCP_OPTION82_SIZE) > fl->n_data_bytes) + { + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_too_big++; + goto do_trace; + } + + if ((o->option == 0xFF) && ((u8 *)o <= end)) + { + vnet_main_t *vnm = vnet_get_main(); + u16 old_l0, new_l0; + ip4_address_t _ia0, * ia0 = &_ia0; + uword *p_vss; + vss_info *vss; + vnet_sw_interface_t *swif; + sw_if_index = 0; + original_sw_if_index = 0; + + original_sw_if_index = sw_if_index = + vnet_buffer(b0)->sw_if_index[VLIB_RX]; + swif = vnet_get_sw_interface (vnm, sw_if_index); + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; + + p_vss = hash_get (dpm->opt82vss_index_by_vrf_id, + fib_id); + if (p_vss) + { + vss = pool_elt_at_index (dpm->opt82vss, p_vss[0]); + opt82_oui = vss->vpn_id.oui; + opt82_fib_id = vss->vpn_id.fib_id; + } + /* + * Get the first ip4 address on the [client-side] + * RX interface, if not unnumbered. otherwise use + * the loopback interface's ip address. + */ + ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0); + + if (ia0 == 0) + { + error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS; + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_interface_address++; + goto do_trace; + } + + /* Add option 82 */ + o->option = 82; /* option 82 */ + o->length = 12; /* 12 octets to follow */ + o->data[0] = 1; /* suboption 1, circuit ID (=FIB id) */ + o->data[1] = 4; /* length of suboption */ + o->data[2] = (original_sw_if_index >> 24) & 0xFF; + o->data[3] = (original_sw_if_index >> 16) & 0xFF; + o->data[4] = (original_sw_if_index >> 8) & 0xFF; + o->data[5] = (original_sw_if_index >> 0) & 0xFF; + o->data[6] = 5; /* suboption 5 (client RX intfc address) */ + o->data[7] = 4; /* length 4 */ + o->data[8] = ia0->as_u8[0]; + o->data[9] = ia0->as_u8[1]; + o->data[10] = ia0->as_u8[2]; + o->data[11] = ia0->as_u8[3]; + o->data[12] = 0xFF; + if (opt82_oui !=0 || opt82_fib_id != 0) + { + o->data[12] = 151; /* vss suboption */ + if (255 == opt82_fib_id) { + o->data[13] = 1; /* length */ + o->data[14] = 255; /* vss option type */ + o->data[15] = 152; /* vss control suboption */ + o->data[16] = 0; /* length */ + /* and a new "end-of-options" option (0xff) */ + o->data[17] = 0xFF; + o->length += 5; + } else { + o->data[13] = 8; /* length */ + o->data[14] = 1; /* vss option type */ + o->data[15] = (opt82_oui >> 16) & 0xff; + o->data[16] = (opt82_oui >> 8) & 0xff; + o->data[17] = (opt82_oui ) & 0xff; + o->data[18] = (opt82_fib_id >> 24) & 0xff; + o->data[19] = (opt82_fib_id >> 16) & 0xff; + o->data[20] = (opt82_fib_id >> 8) & 0xff; + o->data[21] = (opt82_fib_id) & 0xff; + o->data[22] = 152; /* vss control suboption */ + o->data[23] = 0; /* length */ + + /* and a new "end-of-options" option (0xff) */ + o->data[24] = 0xFF; + o->length += 12; + } + } + + len = o->length + 3; + b0->current_length += len; + /* Fix IP header length and checksum */ + old_l0 = ip0->length; + new_l0 = clib_net_to_host_u16 (old_l0); + new_l0 += len; + new_l0 = clib_host_to_net_u16 (new_l0); + ip0->length = new_l0; + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + /* Fix UDP length */ + new_l0 = clib_net_to_host_u16 (u0->length); + new_l0 += len; + u0->length = clib_host_to_net_u16 (new_l0); + } else { + vlib_node_increment_counter + (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_OPTION_82_ERROR, 1); + } + } + + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcp_proxy_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->which = 0; /* to server */ + tr->error = error0; + tr->original_sw_if_index = original_sw_if_index; + tr->sw_if_index = sw_if_index; + if (next0 == DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP) + tr->trace_ip4_address.as_u32 = server->dhcp_server.as_u32; + } + + do_enqueue: + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_RELAY_TO_CLIENT, + pkts_to_client); + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_RELAY_TO_SERVER, + pkts_to_server); + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_NO_SERVER, + pkts_no_server); + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS, + pkts_no_interface_address); + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_PKT_TOO_BIG, + pkts_too_big); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dhcp_proxy_to_server_node) = { + .function = dhcp_proxy_to_server_input, + .name = "dhcp-proxy-to-server", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = DHCP_PROXY_N_ERROR, + .error_strings = dhcp_proxy_error_strings, + + .n_next_nodes = DHCP_PROXY_TO_SERVER_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [DHCP_PROXY_TO_SERVER_INPUT_NEXT_##s] = n, + foreach_dhcp_proxy_to_server_input_next +#undef _ + }, + + .format_buffer = format_dhcp_proxy_header_with_length, + .format_trace = format_dhcp_proxy_trace, +#if 0 + .unformat_buffer = unformat_dhcp_proxy_header, +#endif +}; + +static uword +dhcp_proxy_to_client_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, * from; + ethernet_main_t *em = ethernet_get_main (vm); + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + vnet_main_t * vnm = vnet_get_main(); + ip4_main_t * im = &ip4_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * u0; + dhcp_header_t * h0; + ip4_header_t * ip0 = 0; + ip4_address_t * ia0 = 0; + u32 old0, new0; + ip_csum_t sum0; + ethernet_interface_t *ei0; + ethernet_header_t *mac0; + vnet_hw_interface_t *hi0; + vlib_frame_t *f0; + u32 * to_next0; + u32 sw_if_index = ~0; + vnet_sw_interface_t *si0; + u32 error0 = (u32)~0; + vnet_sw_interface_t *swif; + u32 server_index; + u32 fib_index; + dhcp_server_t * server; + u32 original_sw_if_index = (u32) ~0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + /* + * udp_local hands us the DHCP header, need udp hdr, + * ip hdr to relay to client + */ + vlib_buffer_advance (b0, -(sizeof(*u0))); + u0 = vlib_buffer_get_current (b0); + + vlib_buffer_advance (b0, -(sizeof(*ip0))); + ip0 = vlib_buffer_get_current (b0); + + /* Consumed by dhcp client code? */ + if (dhcp_client_for_us (bi0, b0, ip0, u0, h0)) + continue; + + if (1 /* dpm->insert_option_82 */) + { + dhcp_option_t *o = (dhcp_option_t *) h0->options; + dhcp_option_t *sub; + + /* Parse through TLVs looking for option 82. + The circuit-ID is the FIB number we need + to track down the client-facing interface */ + + while (o->option != 0xFF /* end of options */ && + (u8 *) o < (b0->data + b0->current_data + b0->current_length)) + { + if (o->option == 82) + { + u32 vss_exist = 0; + u32 vss_ctrl = 0; + sub = (dhcp_option_t *) &o->data[0]; + while (sub->option != 0xFF /* end of options */ && + (u8 *) sub < (u8 *)(o + o->length)) { + /* If this is one of ours, it will have + total length 12, circuit-id suboption type, + and the sw_if_index */ + if (sub->option == 1 && sub->length == 4) + { + sw_if_index = (o->data[2] << 24) + | (o->data[3] << 16) + | (o->data[4] << 8) + | (o->data[5]); + } else if (sub->option == 151 && + sub->length == 7 && + sub->data[0] == 1) + vss_exist = 1; + else if (sub->option == 152 && sub->length == 0) + vss_ctrl = 1; + sub = (dhcp_option_t *) + (((uword) sub) + (sub->length + 2)); + } + if (vss_ctrl && vss_exist) + vlib_node_increment_counter + (vm, dhcp_proxy_to_client_node.index, + DHCP_PROXY_ERROR_OPTION_82_VSS_NOT_PROCESSED, 1); + + } + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + } + + if (sw_if_index == (u32)~0) + { + error0 = DHCP_PROXY_ERROR_NO_OPTION_82; + + drop_packet: + vlib_node_increment_counter (vm, dhcp_proxy_to_client_node.index, + error0, 1); + f0 = vlib_get_frame_to_node (vm, dpm->error_drop_node_index); + to_next0 = vlib_frame_vector_args (f0); + to_next0[0] = bi0; + f0->n_vectors = 1; + vlib_put_frame_to_node (vm, dpm->error_drop_node_index, f0); + goto do_trace; + } + + + if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index)) + { + error0 = DHCP_PROXY_ERROR_BAD_OPTION_82; + goto drop_packet; + } + + fib_index = im->fib_index_by_sw_if_index [sw_if_index]; + + if (fib_index < vec_len(dpm->dhcp_server_index_by_rx_fib_index)) + server_index = dpm->dhcp_server_index_by_rx_fib_index[fib_index]; + else + server_index = 0; + + if (PREDICT_FALSE (pool_is_free_index (dpm->dhcp_servers, + server_index))) + { + error0 = DHCP_PROXY_ERROR_BAD_OPTION_82; + goto drop_packet; + } + + server = pool_elt_at_index (dpm->dhcp_servers, server_index); + if (server->valid == 0) + { + error0 = DHCP_PROXY_ERROR_NO_SERVER; + goto drop_packet; + } + + if (ip0->src_address.as_u32 != server->dhcp_server.as_u32) + { + error0 = DHCP_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; + goto drop_packet; + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index; + + swif = vnet_get_sw_interface (vnm, sw_if_index); + original_sw_if_index = sw_if_index; + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; + + ia0 = ip4_interface_first_address (&ip4_main, sw_if_index, 0); + if (ia0 == 0) + { + error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS; + goto drop_packet; + } + + u0->checksum = 0; + u0->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcp_to_client); + sum0 = ip0->checksum; + old0 = ip0->dst_address.as_u32; + new0 = 0xFFFFFFFF; + ip0->dst_address.as_u32 = new0; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + dst_address /* offset of changed member */); + ip0->checksum = ip_csum_fold (sum0); + + sum0 = ip0->checksum; + old0 = ip0->src_address.as_u32; + new0 = ia0->as_u32; + ip0->src_address.as_u32 = new0; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + src_address /* offset of changed member */); + ip0->checksum = ip_csum_fold (sum0); + + vlib_buffer_advance (b0, -(sizeof(ethernet_header_t))); + si0 = vnet_get_sw_interface (vnm, original_sw_if_index); + if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) + vlib_buffer_advance (b0, -4 /* space for VLAN tag */); + + mac0 = vlib_buffer_get_current (b0); + + hi0 = vnet_get_sup_hw_interface (vnm, original_sw_if_index); + ei0 = pool_elt_at_index (em->interfaces, hi0->hw_instance); + clib_memcpy (mac0->src_address, ei0->address, sizeof (ei0->address)); + memset (mac0->dst_address, 0xff, sizeof (mac0->dst_address)); + mac0->type = (si0->type == VNET_SW_INTERFACE_TYPE_SUB) ? + clib_net_to_host_u16(0x8100) : clib_net_to_host_u16 (0x0800); + + if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) + { + u32 * vlan_tag = (u32 *)(mac0+1); + u32 tmp; + tmp = (si0->sub.id << 16) | 0x0800; + *vlan_tag = clib_host_to_net_u32 (tmp); + } + + /* $$$ This needs to be rewritten, for sure */ + f0 = vlib_get_frame_to_node (vm, hi0->output_node_index); + to_next0 = vlib_frame_vector_args (f0); + to_next0[0] = bi0; + f0->n_vectors = 1; + vlib_put_frame_to_node (vm, hi0->output_node_index, f0); + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcp_proxy_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->which = 1; /* to client */ + tr->trace_ip4_address.as_u32 = ia0 ? ia0->as_u32 : 0; + tr->error = error0; + tr->original_sw_if_index = original_sw_if_index; + tr->sw_if_index = sw_if_index; + } + } + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dhcp_proxy_to_client_node) = { + .function = dhcp_proxy_to_client_input, + .name = "dhcp-proxy-to-client", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = DHCP_PROXY_N_ERROR, + .error_strings = dhcp_proxy_error_strings, + .format_buffer = format_dhcp_proxy_header_with_length, + .format_trace = format_dhcp_proxy_trace, +#if 0 + .unformat_buffer = unformat_dhcp_proxy_header, +#endif +}; + +clib_error_t * dhcp_proxy_init (vlib_main_t * vm) +{ + dhcp_proxy_main_t * dm = &dhcp_proxy_main; + vlib_node_t * error_drop_node; + dhcp_server_t * server; + + dm->vlib_main = vm; + dm->vnet_main = vnet_get_main(); + error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); + dm->error_drop_node_index = error_drop_node->index; + + dm->opt82vss_index_by_vrf_id = hash_create (0, sizeof (uword)); + + udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_client, + dhcp_proxy_to_client_node.index, 1 /* is_ip4 */); + + udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_server, + dhcp_proxy_to_server_node.index, 1 /* is_ip4 */); + + /* Create the default server, don't mark it valid */ + pool_get (dm->dhcp_servers, server); + memset (server, 0, sizeof (*server)); + + return 0; +} + +VLIB_INIT_FUNCTION (dhcp_proxy_init); + +int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, + u32 rx_fib_id, + u32 server_fib_id, + int insert_option_82, int is_del) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + dhcp_server_t * server = 0; + u32 server_index = 0; + u32 rx_fib_index = 0; + + if (addr->as_u32 == 0) + return VNET_API_ERROR_INVALID_DST_ADDRESS; + + if (src_address->as_u32 == 0) + return VNET_API_ERROR_INVALID_SRC_ADDRESS; + + rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, + rx_fib_id); + + if (rx_fib_id == 0) + { + server = pool_elt_at_index (dpm->dhcp_servers, 0); + + if (is_del) + { + memset (server, 0, sizeof (*server)); + return 0; + } + goto initialize_it; + } + + if (is_del) + { + if (rx_fib_index >= vec_len(dpm->dhcp_server_index_by_rx_fib_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + server_index = dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index]; + ASSERT(server_index > 0); + + /* Use the default server again. */ + dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = 0; + server = pool_elt_at_index (dpm->dhcp_servers, server_index); + memset (server, 0, sizeof (*server)); + pool_put (dpm->dhcp_servers, server); + return 0; + } + + if (rx_fib_index < vec_len(dpm->dhcp_server_index_by_rx_fib_index)) + { + server_index = dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index]; + if (server_index != 0) + { + server = pool_elt_at_index (dpm->dhcp_servers, server_index); + goto initialize_it; + } + } + + pool_get (dpm->dhcp_servers, server); + + initialize_it: + + server->dhcp_server.as_u32 = addr->as_u32; + server->server_fib_index = + fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, + server_fib_id); + server->dhcp_src_address.as_u32 = src_address->as_u32; + server->insert_option_82 = insert_option_82; + server->valid = 1; + if (rx_fib_index) + { + vec_validate (dpm->dhcp_server_index_by_rx_fib_index, rx_fib_index); + dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = + server - dpm->dhcp_servers; + } + + return 0; +} + +/* Old API, manipulates the default server (only) */ +int dhcp_proxy_set_server (ip4_address_t *addr, ip4_address_t *src_address, + u32 fib_id, int insert_option_82, int is_del) +{ + return dhcp_proxy_set_server_2 (addr, src_address, 0 /* rx_fib_id */, + fib_id /* server_fib_id */, + insert_option_82, is_del); +} + + +static clib_error_t * +dhcp_proxy_set_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip4_address_t server_addr, src_addr; + u32 server_fib_id = 0, rx_fib_id = 0; + int is_del = 0; + int add_option_82 = 0; + int set_src = 0, set_server = 0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "server %U", + unformat_ip4_address, &server_addr)) + set_server = 1; + else if (unformat (input, "server-fib-id %d", &server_fib_id)) + ; + else if (unformat (input, "rx-fib-id %d", &rx_fib_id)) + ; + else if (unformat(input, "src-address %U", + unformat_ip4_address, &src_addr)) + set_src = 1; + else if (unformat (input, "add-option-82") + || unformat (input, "insert-option-82")) + add_option_82 = 1; + else if (unformat (input, "delete") || + unformat (input, "del")) + is_del = 1; + else + break; + } + + if (is_del || (set_server && set_src)) + { + int rv; + + rv = dhcp_proxy_set_server_2 (&server_addr, &src_addr, rx_fib_id, + server_fib_id, add_option_82, is_del); + switch (rv) + { + case 0: + return 0; + + case VNET_API_ERROR_INVALID_DST_ADDRESS: + return clib_error_return (0, "Invalid server address"); + + case VNET_API_ERROR_INVALID_SRC_ADDRESS: + return clib_error_return (0, "Invalid src address"); + + case VNET_API_ERROR_NO_SUCH_INNER_FIB: + return clib_error_return (0, "No such rx fib id %d", rx_fib_id); + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "No such server fib id %d", + server_fib_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return + (0, "Fib id %d: no per-fib DHCP server configured", rx_fib_id); + + default: + return clib_error_return (0, "BUG: rv %d", rv); + } + } + else + return clib_error_return (0, "parse error`%U'", + format_unformat_error, input); +} + +VLIB_CLI_COMMAND (dhcp_proxy_set_command, static) = { + .path = "set dhcp proxy", + .short_help = "set dhcp proxy [del] server src-address [add-option-82] [server-fib-id ] [rx-fib-id ]", + .function = dhcp_proxy_set_command_fn, +}; + +u8 * format_dhcp_proxy_server (u8 * s, va_list * args) +{ + dhcp_proxy_main_t * dm = va_arg (*args, dhcp_proxy_main_t *); + dhcp_server_t * server = va_arg (*args, dhcp_server_t *); + u32 rx_fib_index = va_arg (*args, u32); + ip4_fib_t * rx_fib, * server_fib; + u32 server_fib_id = ~0, rx_fib_id = ~0; + + if (dm == 0) + { + s = format (s, "%=16s%=16s%=14s%=14s%=20s", "Server", "Src Address", + "Server FIB", "RX FIB", "Insert Option 82"); + return s; + } + + server_fib = ip4_fib_get(server->server_fib_index); + + if (server_fib) + server_fib_id = server_fib->table_id; + + rx_fib = ip4_fib_get(rx_fib_index); + + if (rx_fib) + rx_fib_id = rx_fib->table_id; + + s = format (s, "%=16U%=16U%=14u%=14u%=20s", + format_ip4_address, &server->dhcp_server, + format_ip4_address, &server->dhcp_src_address, + server_fib_id, rx_fib_id, + server->insert_option_82 ? "yes" : "no"); + return s; +} + +static clib_error_t * +dhcp_proxy_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + ip4_main_t * im = &ip4_main; + dhcp_server_t * server; + u32 server_index; + int i; + + vlib_cli_output (vm, "%U", format_dhcp_proxy_server, 0 /* header line */, + 0, 0); + + for (i = 0; i < vec_len (im->fibs); i++) + { + if (i < vec_len(dpm->dhcp_server_index_by_rx_fib_index)) + server_index = dpm->dhcp_server_index_by_rx_fib_index[i]; + else + server_index = 0; + server = pool_elt_at_index (dpm->dhcp_servers, server_index); + if (server->valid) + vlib_cli_output (vm, "%U", format_dhcp_proxy_server, dpm, + server, i); + } + + return 0; +} + +VLIB_CLI_COMMAND (dhcp_proxy_show_command, static) = { + .path = "show dhcp proxy", + .short_help = "Display dhcp proxy server info", + .function = dhcp_proxy_show_command_fn, +}; + + +int dhcp_proxy_set_option82_vss( u32 vrf_id, + u32 oui, + u32 fib_id, + int is_del) +{ + dhcp_proxy_main_t *dm = &dhcp_proxy_main; + uword *p; + vss_info *a; + u32 old_oui=0, old_fib_id=0; + + p = hash_get (dm->opt82vss_index_by_vrf_id, vrf_id); + + if (p) + { + a = pool_elt_at_index (dm->opt82vss, p[0]); + if (!a) + return VNET_API_ERROR_NO_SUCH_FIB; + old_oui = a->vpn_id.oui; + old_fib_id = a->vpn_id.fib_id; + + if (is_del) + { + if (old_oui == oui && + old_fib_id == fib_id) + { + pool_put(dm->opt82vss, a); + hash_unset (dm->opt82vss_index_by_vrf_id, vrf_id); + return 0; + } + else + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + pool_put(dm->opt82vss, a); + hash_unset (dm->opt82vss_index_by_vrf_id, vrf_id); + } else if (is_del) + return VNET_API_ERROR_NO_SUCH_ENTRY; + pool_get (dm->opt82vss, a); + memset (a, ~0, sizeof (a[0])); + a->vpn_id.oui = oui; + a->vpn_id.fib_id = fib_id; + hash_set (dm->opt82vss_index_by_vrf_id, vrf_id, a - dm->opt82vss); + + return 0; +} + +static clib_error_t * +dhcp_option_82_vss_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_del = 0, got_new_vpn_id=0; + u32 oui=0, fib_id=0, tbl_id=~0; + + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat(input, "delete") || unformat(input, "del")) + is_del = 1; + else if (unformat (input, "oui %d", &oui)) + got_new_vpn_id = 1; + else if (unformat (input, "vpn-id %d", &fib_id)) + got_new_vpn_id = 1; + else if (unformat (input, "table %d", &tbl_id)) + got_new_vpn_id = 1; + else + break; + } + if (tbl_id == ~0) + return clib_error_return (0, "no table ID specified."); + + if (is_del || got_new_vpn_id) + { + int rv; + rv = dhcp_proxy_set_option82_vss(tbl_id, oui, fib_id, is_del); + switch (rv) + { + case 0: + return 0; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "option 82 vss(oui:%d, vpn-id:%d) not found in table %d", + oui, fib_id, tbl_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "option 82 vss for table %d not found in in pool.", + tbl_id); + default: + return clib_error_return (0, "BUG: rv %d", rv); + } + } + else + return clib_error_return (0, "parse error`%U'", + format_unformat_error, input); +} + +VLIB_CLI_COMMAND (dhcp_proxy_vss_command,static) = { + .path = "set dhcp option-82 vss", + .short_help = "set dhcp option-82 vss [del] table oui vpn-id ", + .function = dhcp_option_82_vss_fn, +}; + + +static clib_error_t * +dhcp_vss_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + dhcp_proxy_main_t * dm = &dhcp_proxy_main; + vss_info *v; + u32 oui; + u32 fib_id; + u32 tbl_id; + uword index; + + vlib_cli_output (vm, "%=9s%=11s%=12s","Table", "OUI", "VPN-ID"); + hash_foreach (tbl_id, index, dm->opt82vss_index_by_vrf_id, + ({ + v = pool_elt_at_index (dm->opt82vss, index); + oui = v->vpn_id.oui; + fib_id = v->vpn_id.fib_id; + vlib_cli_output (vm, "%=9d 0x%08x%=12d", + tbl_id, oui, fib_id); + })); + + return 0; +} + +VLIB_CLI_COMMAND (dhcp_proxy_vss_show_command, static) = { + .path = "show dhcp vss", + .short_help = "show dhcp VSS", + .function = dhcp_vss_show_command_fn, +}; + +static clib_error_t * +dhcp_option_82_address_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + dhcp_proxy_main_t *dm = &dhcp_proxy_main; + vnet_main_t *vnm = vnet_get_main(); + u32 sw_if_index0=0, sw_if_index; + ip4_address_t *ia0; + vnet_sw_interface_t *swif; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat(input, "%U", + unformat_vnet_sw_interface, dm->vnet_main, &sw_if_index0)) + { + swif = vnet_get_sw_interface (vnm, sw_if_index0); + sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ? + swif->unnumbered_sw_if_index : sw_if_index0; + ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0); + if (ia0) + { + vlib_cli_output (vm, "%=20s%=20s", "interface", + "source IP address"); + + vlib_cli_output (vm, "%=20U%=20U", + format_vnet_sw_if_index_name, + dm->vnet_main, sw_if_index0, + format_ip4_address, ia0); + } + else + vlib_cli_output (vm, "%=34s %=20U", + "No IPv4 address configured on", + format_vnet_sw_if_index_name, + dm->vnet_main, sw_if_index); + } + else + break; + } + + return 0; +} + +VLIB_CLI_COMMAND (dhcp_proxy_address_show_command,static) = { + .path = "show dhcp option-82-address interface", + .short_help = "show dhcp option-82-address interface ", + .function = dhcp_option_82_address_show_command_fn, +}; -- cgit 1.2.3-korg From c8d8770a3e09c300eeff461a11ef3723b8e029cb Mon Sep 17 00:00:00 2001 From: Pavel Kotucek Date: Wed, 25 Jan 2017 07:25:32 +0100 Subject: API refactoring : dhcp Change-Id: I3829835ed2126e51e96690c907deac623dc77151 Signed-off-by: Pavel Kotucek --- src/vnet.am | 8 +- src/vnet/dhcp/dhcp.api | 166 ++++++++++++++++++++++++++++++ src/vnet/dhcp/dhcp_api.c | 253 ++++++++++++++++++++++++++++++++++++++++++++++ src/vnet/vnet_all_api_h.h | 1 + src/vpp/api/api.c | 158 ----------------------------- src/vpp/api/vpe.api | 147 +-------------------------- 6 files changed, 427 insertions(+), 306 deletions(-) create mode 100644 src/vnet/dhcp/dhcp.api create mode 100644 src/vnet/dhcp/dhcp_api.c (limited to 'src/vnet/dhcp') diff --git a/src/vnet.am b/src/vnet.am index 669ea1ff..c6922493 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -657,10 +657,14 @@ endif ######################################## libvnet_la_SOURCES += \ vnet/dhcp/client.c \ - vnet/dhcp/client.h + vnet/dhcp/client.h \ + vnet/dhcp/dhcp_api.c nobase_include_HEADERS += \ - vnet/dhcp/client.h + vnet/dhcp/client.h \ + vnet/dhcp/dhcp.api.h + +API_FILES += vnet/dhcp/dhcp.api ######################################## # DHCP proxy diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api new file mode 100644 index 00000000..c228cd04 --- /dev/null +++ b/src/vnet/dhcp/dhcp.api @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief DHCP Proxy config add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf_id - vrf id + @param if_ipv6 - ipv6 if non-zero, else ipv4 + @param is_add - add the config if non-zero, else delete + @param insert_circuit_id - option82 suboption 1 fib number + @param dhcp_server[] - server address + @param dhcp_src_address[] - +*/ +define dhcp_proxy_config +{ + u32 client_index; + u32 context; + u32 vrf_id; + u8 is_ipv6; + u8 is_add; + u8 insert_circuit_id; + u8 dhcp_server[16]; + u8 dhcp_src_address[16]; +}; + +/** \brief DHCP Proxy config response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define dhcp_proxy_config_reply +{ + u32 context; + i32 retval; +}; + +/** \brief DHCP Proxy config 2 add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param rx_vrf_id - receive vrf id + @param server_vrf_id - server vrf id + @param if_ipv6 - ipv6 if non-zero, else ipv4 + @param is_add - add the config if non-zero, else delete + @param insert_circuit_id - option82 suboption 1 fib number + @param dhcp_server[] - server address + @param dhcp_src_address[] - +*/ +define dhcp_proxy_config_2 +{ + u32 client_index; + u32 context; + u32 rx_vrf_id; + u32 server_vrf_id; + u8 is_ipv6; + u8 is_add; + u8 insert_circuit_id; + u8 dhcp_server[16]; + u8 dhcp_src_address[16]; +}; + +/** \brief DHCP Proxy config 2 add / del response + @param context - sender context, to match reply w/ request + @param retval - return code for request +*/ +define dhcp_proxy_config_2_reply +{ + u32 context; + i32 retval; +}; + +/** \brief DHCP Proxy set / unset vss request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param tbl_id - table id + @param oui - first part of vpn id + @param fib_id - second part of vpn id + @param is_ipv6 - ip6 if non-zero, else ip4 + @param is_add - set vss if non-zero, else delete +*/ +define dhcp_proxy_set_vss +{ + u32 client_index; + u32 context; + u32 tbl_id; + u32 oui; + u32 fib_id; + u8 is_ipv6; + u8 is_add; +}; + +/** \brief DHCP proxy set / unset vss response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define dhcp_proxy_set_vss_reply +{ + u32 context; + i32 retval; +}; + +/** \brief DHCP Client config add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - index of the interface for DHCP client + @param hostname - hostname + @param is_add - add the config if non-zero, else delete + @param want_dhcp_event - DHCP event sent to the sender + via dhcp_compl_event API message if non-zero + @param pid - sender's pid +*/ +define dhcp_client_config +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 hostname[64]; + u8 is_add; + u8 want_dhcp_event; + u32 pid; +}; + +/** \brief DHCP Client config response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define dhcp_client_config_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Tell client about a DHCP completion event + @param client_index - opaque cookie to identify the sender + @param pid - client pid registered to receive notification + @param is_ipv6 - if non-zero the address is ipv6, else ipv4 + @param host_address - Host IP address + @param router_address - Router IP address + @param host_mac - Host MAC address +*/ +define dhcp_compl_event +{ + u32 client_index; + u32 pid; + u8 hostname[64]; + u8 is_ipv6; + u8 host_address[16]; + u8 router_address[16]; + u8 host_mac[6]; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ \ No newline at end of file diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c new file mode 100644 index 00000000..88b32b24 --- /dev/null +++ b/src/vnet/dhcp/dhcp_api.c @@ -0,0 +1,253 @@ +/* + *------------------------------------------------------------------ + * dhcp_api.c - dhcp api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(DHCP_PROXY_CONFIG,dhcp_proxy_config) \ +_(DHCP_PROXY_CONFIG_2,dhcp_proxy_config_2) \ +_(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss) \ +_(DHCP_CLIENT_CONFIG, dhcp_client_config) + +static void +dhcpv4_proxy_config (vl_api_dhcp_proxy_config_t * mp) +{ + vl_api_dhcp_proxy_config_reply_t *rmp; + int rv; + + rv = dhcp_proxy_set_server ((ip4_address_t *) (&mp->dhcp_server), + (ip4_address_t *) (&mp->dhcp_src_address), + (u32) ntohl (mp->vrf_id), + (int) mp->insert_circuit_id, + (int) (mp->is_add == 0)); + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); +} + + +static void +dhcpv6_proxy_config (vl_api_dhcp_proxy_config_t * mp) +{ + vl_api_dhcp_proxy_config_reply_t *rmp; + int rv = -1; + + rv = dhcpv6_proxy_set_server ((ip6_address_t *) (&mp->dhcp_server), + (ip6_address_t *) (&mp->dhcp_src_address), + (u32) ntohl (mp->vrf_id), + (int) mp->insert_circuit_id, + (int) (mp->is_add == 0)); + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); +} + +static void +dhcpv4_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp) +{ + vl_api_dhcp_proxy_config_reply_t *rmp; + int rv; + + rv = dhcp_proxy_set_server_2 ((ip4_address_t *) (&mp->dhcp_server), + (ip4_address_t *) (&mp->dhcp_src_address), + (u32) ntohl (mp->rx_vrf_id), + (u32) ntohl (mp->server_vrf_id), + (int) mp->insert_circuit_id, + (int) (mp->is_add == 0)); + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY); +} + + +static void +dhcpv6_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp) +{ + vl_api_dhcp_proxy_config_reply_t *rmp; + int rv = -1; + + rv = dhcpv6_proxy_set_server_2 ((ip6_address_t *) (&mp->dhcp_server), + (ip6_address_t *) (&mp->dhcp_src_address), + (u32) ntohl (mp->rx_vrf_id), + (u32) ntohl (mp->server_vrf_id), + (int) mp->insert_circuit_id, + (int) (mp->is_add == 0)); + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY); +} + + +static void +vl_api_dhcp_proxy_set_vss_t_handler (vl_api_dhcp_proxy_set_vss_t * mp) +{ + vl_api_dhcp_proxy_set_vss_reply_t *rmp; + int rv; + if (!mp->is_ipv6) + rv = dhcp_proxy_set_option82_vss (ntohl (mp->tbl_id), + ntohl (mp->oui), + ntohl (mp->fib_id), + (int) mp->is_add == 0); + else + rv = dhcpv6_proxy_set_vss (ntohl (mp->tbl_id), + ntohl (mp->oui), + ntohl (mp->fib_id), (int) mp->is_add == 0); + + REPLY_MACRO (VL_API_DHCP_PROXY_SET_VSS_REPLY); +} + + +static void vl_api_dhcp_proxy_config_t_handler + (vl_api_dhcp_proxy_config_t * mp) +{ + if (mp->is_ipv6 == 0) + dhcpv4_proxy_config (mp); + else + dhcpv6_proxy_config (mp); +} + +void +dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, + u8 is_ipv6, u8 * host_address, u8 * router_address, + u8 * host_mac) +{ + unix_shared_memory_queue_t *q; + vl_api_dhcp_compl_event_t *mp; + + q = vl_api_client_index_to_input_queue (client_index); + if (!q) + return; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->client_index = client_index; + mp->pid = pid; + mp->is_ipv6 = is_ipv6; + clib_memcpy (&mp->hostname, hostname, vec_len (hostname)); + mp->hostname[vec_len (hostname) + 1] = '\n'; + clib_memcpy (&mp->host_address[0], host_address, 16); + clib_memcpy (&mp->router_address[0], router_address, 16); + + if (NULL != host_mac) + clib_memcpy (&mp->host_mac[0], host_mac, 6); + + mp->_vl_msg_id = ntohs (VL_API_DHCP_COMPL_EVENT); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void vl_api_dhcp_proxy_config_2_t_handler + (vl_api_dhcp_proxy_config_2_t * mp) +{ + if (mp->is_ipv6 == 0) + dhcpv4_proxy_config_2 (mp); + else + dhcpv6_proxy_config_2 (mp); +} + +static void vl_api_dhcp_client_config_t_handler + (vl_api_dhcp_client_config_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_dhcp_client_config_reply_t *rmp; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + rv = dhcp_client_config (vm, ntohl (mp->sw_if_index), + mp->hostname, mp->is_add, mp->client_index, + mp->want_dhcp_event ? dhcp_compl_event_callback : + NULL, mp->pid); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_DHCP_CLIENT_CONFIG_REPLY); +} + +/* + * dhcp_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_dhcp; +#undef _ +} + +static clib_error_t * +dhcp_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (dhcp_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index d76eee5a..4ba3a0e3 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -54,6 +54,7 @@ #include #include #include +#include /* * fd.io coding-style-patch-verification: ON diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 9f6f260b..6317f557 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -52,12 +52,9 @@ #include #include #include -#include -#include #if WITH_LIBSSL > 0 #include #endif -#include #include #include #include @@ -124,10 +121,6 @@ _(PROXY_ARP_ADD_DEL, proxy_arp_add_del) \ _(PROXY_ARP_INTFC_ENABLE_DISABLE, proxy_arp_intfc_enable_disable) \ _(VNET_GET_SUMMARY_STATS, vnet_get_summary_stats) \ _(RESET_FIB, reset_fib) \ -_(DHCP_PROXY_CONFIG,dhcp_proxy_config) \ -_(DHCP_PROXY_CONFIG_2,dhcp_proxy_config_2) \ -_(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss) \ -_(DHCP_CLIENT_CONFIG, dhcp_client_config) \ _(CREATE_LOOPBACK, create_loopback) \ _(CONTROL_PING, control_ping) \ _(CLI_REQUEST, cli_request) \ @@ -1064,157 +1057,6 @@ vl_api_reset_fib_t_handler (vl_api_reset_fib_t * mp) REPLY_MACRO (VL_API_RESET_FIB_REPLY); } - -static void -dhcpv4_proxy_config (vl_api_dhcp_proxy_config_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv; - - rv = dhcp_proxy_set_server ((ip4_address_t *) (&mp->dhcp_server), - (ip4_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->vrf_id), - (int) mp->insert_circuit_id, - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); -} - - -static void -dhcpv6_proxy_config (vl_api_dhcp_proxy_config_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv = -1; - - rv = dhcpv6_proxy_set_server ((ip6_address_t *) (&mp->dhcp_server), - (ip6_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->vrf_id), - (int) mp->insert_circuit_id, - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); -} - -static void -dhcpv4_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv; - - rv = dhcp_proxy_set_server_2 ((ip4_address_t *) (&mp->dhcp_server), - (ip4_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->rx_vrf_id), - (u32) ntohl (mp->server_vrf_id), - (int) mp->insert_circuit_id, - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY); -} - - -static void -dhcpv6_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv = -1; - - rv = dhcpv6_proxy_set_server_2 ((ip6_address_t *) (&mp->dhcp_server), - (ip6_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->rx_vrf_id), - (u32) ntohl (mp->server_vrf_id), - (int) mp->insert_circuit_id, - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY); -} - - -static void -vl_api_dhcp_proxy_set_vss_t_handler (vl_api_dhcp_proxy_set_vss_t * mp) -{ - vl_api_dhcp_proxy_set_vss_reply_t *rmp; - int rv; - if (!mp->is_ipv6) - rv = dhcp_proxy_set_option82_vss (ntohl (mp->tbl_id), - ntohl (mp->oui), - ntohl (mp->fib_id), - (int) mp->is_add == 0); - else - rv = dhcpv6_proxy_set_vss (ntohl (mp->tbl_id), - ntohl (mp->oui), - ntohl (mp->fib_id), (int) mp->is_add == 0); - - REPLY_MACRO (VL_API_DHCP_PROXY_SET_VSS_REPLY); -} - - -static void vl_api_dhcp_proxy_config_t_handler - (vl_api_dhcp_proxy_config_t * mp) -{ - if (mp->is_ipv6 == 0) - dhcpv4_proxy_config (mp); - else - dhcpv6_proxy_config (mp); -} - -static void vl_api_dhcp_proxy_config_2_t_handler - (vl_api_dhcp_proxy_config_2_t * mp) -{ - if (mp->is_ipv6 == 0) - dhcpv4_proxy_config_2 (mp); - else - dhcpv6_proxy_config_2 (mp); -} - -void -dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, - u8 is_ipv6, u8 * host_address, u8 * router_address, - u8 * host_mac) -{ - unix_shared_memory_queue_t *q; - vl_api_dhcp_compl_event_t *mp; - - q = vl_api_client_index_to_input_queue (client_index); - if (!q) - return; - - mp = vl_msg_api_alloc (sizeof (*mp)); - mp->client_index = client_index; - mp->pid = pid; - mp->is_ipv6 = is_ipv6; - clib_memcpy (&mp->hostname, hostname, vec_len (hostname)); - mp->hostname[vec_len (hostname) + 1] = '\n'; - clib_memcpy (&mp->host_address[0], host_address, 16); - clib_memcpy (&mp->router_address[0], router_address, 16); - - if (NULL != host_mac) - clib_memcpy (&mp->host_mac[0], host_mac, 6); - - mp->_vl_msg_id = ntohs (VL_API_DHCP_COMPL_EVENT); - - vl_msg_api_send_shmem (q, (u8 *) & mp); -} - -static void vl_api_dhcp_client_config_t_handler - (vl_api_dhcp_client_config_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - vl_api_dhcp_client_config_reply_t *rmp; - int rv = 0; - - VALIDATE_SW_IF_INDEX (mp); - - rv = dhcp_client_config (vm, ntohl (mp->sw_if_index), - mp->hostname, mp->is_add, mp->client_index, - mp->want_dhcp_event ? dhcp_compl_event_callback : - NULL, mp->pid); - - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_DHCP_CLIENT_CONFIG_REPLY); -} - static void vl_api_create_loopback_t_handler (vl_api_create_loopback_t * mp) { diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index a00033c5..3a35a54a 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -43,6 +43,7 @@ * DPDK APIs: see ... /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} * CLASSIFY APIs: see ... /src/vnet/classify/{classify.api, classify_api.c} * FLOW APIs: see ... /src/vnet/flow/{flow.api, flow_api.c} + * DHCP APIs: see ... /src/vnet/dhcp/{dhcpk.api, dhcp_api.c} */ /** \brief Create a new subinterface with the given vlan id @@ -398,68 +399,6 @@ define reset_fib_reply i32 retval; }; -/** \brief DHCP Proxy config add / del request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param vrf_id - vrf id - @param if_ipv6 - ipv6 if non-zero, else ipv4 - @param is_add - add the config if non-zero, else delete - @param insert_circuit_id - option82 suboption 1 fib number - @param dhcp_server[] - server address - @param dhcp_src_address[] - -*/ -define dhcp_proxy_config -{ - u32 client_index; - u32 context; - u32 vrf_id; - u8 is_ipv6; - u8 is_add; - u8 insert_circuit_id; - u8 dhcp_server[16]; - u8 dhcp_src_address[16]; -}; - -/** \brief DHCP Proxy config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_proxy_config_reply -{ - u32 context; - i32 retval; -}; - -/** \brief DHCP Proxy set / unset vss request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param tbl_id - table id - @param oui - first part of vpn id - @param fib_id - second part of vpn id - @param is_ipv6 - ip6 if non-zero, else ip4 - @param is_add - set vss if non-zero, else delete -*/ -define dhcp_proxy_set_vss -{ - u32 client_index; - u32 context; - u32 tbl_id; - u32 oui; - u32 fib_id; - u8 is_ipv6; - u8 is_add; -}; - -/** \brief DHCP proxy set / unset vss response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_proxy_set_vss_reply -{ - u32 context; - i32 retval; -}; - /** \brief Create loopback interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -834,40 +773,6 @@ define add_node_next_reply u32 next_index; }; -/** \brief DHCP Proxy config 2 add / del request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param rx_vrf_id - receive vrf id - @param server_vrf_id - server vrf id - @param if_ipv6 - ipv6 if non-zero, else ipv4 - @param is_add - add the config if non-zero, else delete - @param insert_circuit_id - option82 suboption 1 fib number - @param dhcp_server[] - server address - @param dhcp_src_address[] - -*/ -define dhcp_proxy_config_2 -{ - u32 client_index; - u32 context; - u32 rx_vrf_id; - u32 server_vrf_id; - u8 is_ipv6; - u8 is_add; - u8 insert_circuit_id; - u8 dhcp_server[16]; - u8 dhcp_src_address[16]; -}; - -/** \brief DHCP Proxy config 2 add / del response - @param context - sender context, to match reply w/ request - @param retval - return code for request -*/ -define dhcp_proxy_config_2_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 interface ethernet flow point filtering enable/disable request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1083,37 +988,6 @@ define ip6_nd_event u8 mac_ip; }; -/** \brief DHCP Client config add / del request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - index of the interface for DHCP client - @param hostname - hostname - @param is_add - add the config if non-zero, else delete - @param want_dhcp_event - DHCP event sent to the sender - via dhcp_compl_event API message if non-zero - @param pid - sender's pid -*/ -define dhcp_client_config -{ - u32 client_index; - u32 context; - u32 sw_if_index; - u8 hostname[64]; - u8 is_add; - u8 want_dhcp_event; - u32 pid; -}; - -/** \brief DHCP Client config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_client_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset input ACL interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1146,25 +1020,6 @@ define input_acl_set_interface_reply i32 retval; }; -/** \brief Tell client about a DHCP completion event - @param client_index - opaque cookie to identify the sender - @param pid - client pid registered to receive notification - @param is_ipv6 - if non-zero the address is ipv6, else ipv4 - @param host_address - Host IP address - @param router_address - Router IP address - @param host_mac - Host MAC address -*/ -define dhcp_compl_event -{ - u32 client_index; - u32 pid; - u8 hostname[64]; - u8 is_ipv6; - u8 host_address[16]; - u8 router_address[16]; - u8 host_mac[6]; -}; - /** \brief cop: enable/disable junk filtration features on an interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request -- cgit 1.2.3-korg From fca0c242e4edfdb05231ef18d60c14273067ff0a Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Fri, 13 Jan 2017 07:57:46 -0800 Subject: DHCPv[46] proxy tests Change-Id: I6aaf9c602cd515ed9d4416d286f9191d048c1a87 Signed-off-by: Neale Ranns --- src/vnet/dhcp/proxy_node.c | 30 +- test/patches/scapy-2.3.3/dhcp6-options.patch | 58 +++ test/test_dhcp.py | 739 +++++++++++++++++++++++++++ test/vpp_ip_route.py | 67 ++- test/vpp_papi_provider.py | 36 ++ 5 files changed, 911 insertions(+), 19 deletions(-) create mode 100644 test/patches/scapy-2.3.3/dhcp6-options.patch create mode 100644 test/test_dhcp.py (limited to 'src/vnet/dhcp') diff --git a/src/vnet/dhcp/proxy_node.c b/src/vnet/dhcp/proxy_node.c index d0d99d7e..6a58fcdb 100644 --- a/src/vnet/dhcp/proxy_node.c +++ b/src/vnet/dhcp/proxy_node.c @@ -703,6 +703,12 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, dhcp_server_t * server = 0; u32 server_index = 0; u32 rx_fib_index = 0; + const fib_prefix_t all_1s = + { + .fp_len = 32, + .fp_addr.ip4.as_u32 = 0xffffffff, + .fp_proto = FIB_PROTOCOL_IP4, + }; if (addr->as_u32 == 0) return VNET_API_ERROR_INVALID_DST_ADDRESS; @@ -720,8 +726,18 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, if (is_del) { memset (server, 0, sizeof (*server)); - return 0; + fib_table_entry_special_remove(rx_fib_index, + &all_1s, + FIB_SOURCE_DHCP); + return 0; } + if (!server->valid) + fib_table_entry_special_add(rx_fib_index, + &all_1s, + FIB_SOURCE_DHCP, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + goto initialize_it; } @@ -738,6 +754,11 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, server = pool_elt_at_index (dpm->dhcp_servers, server_index); memset (server, 0, sizeof (*server)); pool_put (dpm->dhcp_servers, server); + + fib_table_entry_special_remove(rx_fib_index, + &all_1s, + FIB_SOURCE_DHCP); + return 0; } @@ -752,9 +773,16 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, } pool_get (dpm->dhcp_servers, server); + + fib_table_entry_special_add(rx_fib_index, + &all_1s, + FIB_SOURCE_DHCP, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); initialize_it: + server->dhcp_server.as_u32 = addr->as_u32; server->server_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, diff --git a/test/patches/scapy-2.3.3/dhcp6-options.patch b/test/patches/scapy-2.3.3/dhcp6-options.patch new file mode 100644 index 00000000..0e649398 --- /dev/null +++ b/test/patches/scapy-2.3.3/dhcp6-options.patch @@ -0,0 +1,58 @@ +diff --git a/scapy/layers/dhcp6.py b/scapy/layers/dhcp6.py +index 4cb9291..a1adcfc 100644 +--- a/scapy/layers/dhcp6.py ++++ b/scapy/layers/dhcp6.py +@@ -74,7 +74,9 @@ dhcp6opts = { 1: "CLIENTID", + 36: "OPTION_GEOCONF_CIVIC", #RFC-ietf-geopriv-dhcp-civil-09.txt + 37: "OPTION_REMOTE_ID", #RFC4649 + 38: "OPTION_SUBSCRIBER_ID", #RFC4580 +- 39: "OPTION_CLIENT_FQDN" } #RFC4704 ++ 39: "OPTION_CLIENT_FQDN", #RFC4704 ++ 68: "OPTION_VSS", #RFC6607 ++ 79: "OPTION_CLIENT_LINKLAYER_ADDR" } #RFC6939 + + dhcp6opts_by_code = { 1: "DHCP6OptClientId", + 2: "DHCP6OptServerId", +@@ -116,12 +118,14 @@ dhcp6opts_by_code = { 1: "DHCP6OptClientId", + #40: "DHCP6OptPANAAgent", #RFC-ietf-dhc-paa-option-05.txt + #41: "DHCP6OptNewPOSIXTimeZone, #RFC4833 + #42: "DHCP6OptNewTZDBTimeZone, #RFC4833 +- 43: "DHCP6OptRelayAgentERO" #RFC4994 ++ 43: "DHCP6OptRelayAgentERO", #RFC4994 + #44: "DHCP6OptLQQuery", #RFC5007 + #45: "DHCP6OptLQClientData", #RFC5007 + #46: "DHCP6OptLQClientTime", #RFC5007 + #47: "DHCP6OptLQRelayData", #RFC5007 + #48: "DHCP6OptLQClientLink", #RFC5007 ++ 68: "DHCP6OptVSS", #RFC6607 ++ 79: "DHCP6OptClientLinkLayerAddr", #RFC6939 + } + + +@@ -838,6 +842,26 @@ class DHCP6OptRelayAgentERO(_DHCP6OptGuessPayload): # RFC4994 + _OptReqListField("reqopts", [23, 24], + length_from = lambda pkt: pkt.optlen) ] + ++# "Client link-layer address type. The link-layer type MUST be a valid hardware ++# type assigned by the IANA, as described in [RFC0826] ++class DHCP6OptClientLinkLayerAddr(_DHCP6OptGuessPayload): #RFC6939 ++ name = "DHCP6 Option - Client Link Layer address" ++ fields_desc = [ ShortEnumField("optcode", 79, dhcp6opts), ++ FieldLenField("optlen", None, length_of="clladdr", ++ adjust = lambda pkt,x: x+1), ++ ShortField("lltype", 1), # ethernet ++ _LLAddrField("clladdr", ETHER_ANY) ] ++ ++# Virtual Subnet selection ++class DHCP6OptVSS(_DHCP6OptGuessPayload): #RFC6607 ++ name = "DHCP6 Option - Virtual Subnet Selection" ++ fields_desc = [ ShortEnumField("optcode", 68, dhcp6opts), ++ FieldLenField("optlen", None, length_of="data", ++ adjust = lambda pkt,x: x+1), ++ ByteField("type", 255), # Default Global/default table ++ StrLenField("data", "", ++ length_from = lambda pkt: pkt.optlen) ] ++ + ##################################################################### + ### DHCPv6 messages ### + ##################################################################### diff --git a/test/test_dhcp.py b/test/test_dhcp.py new file mode 100644 index 00000000..bdff679c --- /dev/null +++ b/test/test_dhcp.py @@ -0,0 +1,739 @@ +#!/usr/bin/env python + +import unittest +import socket + +from framework import VppTestCase, VppTestRunner +from vpp_ip_route import IpRoute, RoutePath +from vpp_lo_interface import VppLoInterface + +from scapy.layers.l2 import Ether, getmacbyip +from scapy.layers.inet import IP, UDP, ICMP +from scapy.layers.inet6 import IPv6, in6_getnsmac, in6_mactoifaceid +from scapy.layers.dhcp import DHCP, BOOTP, DHCPTypes +from scapy.layers.dhcp6 import DHCP6, DHCP6_Solicit, DHCP6_RelayForward, \ + DHCP6_RelayReply, DHCP6_Advertise, DHCP6OptRelayMsg, DHCP6OptIfaceId, \ + DHCP6OptStatusCode, DHCP6OptVSS, DHCP6OptClientLinkLayerAddr +from socket import AF_INET, AF_INET6 +from scapy.utils import inet_pton, inet_ntop +from scapy.utils6 import in6_ptop + +DHCP4_CLIENT_PORT = 68 +DHCP4_SERVER_PORT = 67 +DHCP6_CLIENT_PORT = 547 +DHCP6_SERVER_PORT = 546 + + +def mk_ll_addr(mac): + + euid = in6_mactoifaceid(mac) + addr = "fe80::" + euid + return addr + + +class TestDHCP(VppTestCase): + """ DHCP Test Case """ + + def setUp(self): + super(TestDHCP, self).setUp() + + # create 3 pg interfaces + self.create_pg_interfaces(range(4)) + + # pg0 and 1 are IP configured in VRF 0 and 1. + # pg2 and 3 are non IP-configured in VRF 0 and 1 + table_id = 0 + for i in self.pg_interfaces[:2]: + i.admin_up() + i.set_table_ip4(table_id) + i.set_table_ip6(table_id) + i.config_ip4() + i.resolve_arp() + i.config_ip6() + i.resolve_ndp() + table_id += 1 + + table_id = 0 + for i in self.pg_interfaces[2:]: + i.admin_up() + i.set_table_ip4(table_id) + i.set_table_ip6(table_id) + table_id += 1 + + def send_and_assert_no_replies(self, intf, pkts, remark): + intf.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + for i in self.pg_interfaces: + i.assert_nothing_captured(remark=remark) + + def validate_option_82(self, pkt, intf, ip_addr): + dhcp = pkt[DHCP] + found = 0 + data = [] + + for i in dhcp.options: + if type(i) is tuple: + if i[0] == "relay_agent_Information": + # + # There are two sb-options present - each of length 6. + # + data = i[1] + self.assertEqual(len(data), 12) + + # + # First sub-option is ID 1, len 4, then encoded + # sw_if_index. This test uses low valued indicies + # so [2:4] are 0. + # The ID space is VPP internal - so no matching value + # scapy + # + self.assertEqual(ord(data[0]), 1) + self.assertEqual(ord(data[1]), 4) + self.assertEqual(ord(data[2]), 0) + self.assertEqual(ord(data[3]), 0) + self.assertEqual(ord(data[4]), 0) + self.assertEqual(ord(data[5]), intf._sw_if_index) + + # + # next sub-option is the IP address of the client side + # interface. + # sub-option ID=5, length (of a v4 address)=4 + # + claddr = socket.inet_pton(AF_INET, ip_addr) + + self.assertEqual(ord(data[6]), 5) + self.assertEqual(ord(data[7]), 4) + self.assertEqual(data[8], claddr[0]) + self.assertEqual(data[9], claddr[1]) + self.assertEqual(data[10], claddr[2]) + self.assertEqual(data[11], claddr[3]) + + found = 1 + self.assertTrue(found) + + return data + + def verify_dhcp_offer(self, pkt, intf, check_option_82=True): + ether = pkt[Ether] + self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff") + self.assertEqual(ether.src, intf.local_mac) + + ip = pkt[IP] + self.assertEqual(ip.dst, "255.255.255.255") + self.assertEqual(ip.src, intf.local_ip4) + + udp = pkt[UDP] + self.assertEqual(udp.dport, DHCP4_CLIENT_PORT) + self.assertEqual(udp.sport, DHCP4_SERVER_PORT) + + dhcp = pkt[DHCP] + is_offer = False + for o in dhcp.options: + if type(o) is tuple: + if o[0] == "message-type" \ + and DHCPTypes[o[1]] == "offer": + is_offer = True + self.assertTrue(is_offer) + + if check_option_82: + data = self.validate_option_82(pkt, intf, intf.local_ip4) + + def verify_dhcp_discover(self, pkt, intf, src_intf=None, + option_82_present=True): + ether = pkt[Ether] + self.assertEqual(ether.dst, intf.remote_mac) + self.assertEqual(ether.src, intf.local_mac) + + ip = pkt[IP] + self.assertEqual(ip.dst, intf.remote_ip4) + self.assertEqual(ip.src, intf.local_ip4) + + udp = pkt[UDP] + self.assertEqual(udp.dport, DHCP4_SERVER_PORT) + self.assertEqual(udp.sport, DHCP4_CLIENT_PORT) + + dhcp = pkt[DHCP] + + is_discover = False + for o in dhcp.options: + if type(o) is tuple: + if o[0] == "message-type" \ + and DHCPTypes[o[1]] == "discover": + is_discover = True + self.assertTrue(is_discover) + + if option_82_present: + data = self.validate_option_82(pkt, src_intf, src_intf.local_ip4) + return data + else: + for i in dhcp.options: + if type(i) is tuple: + self.assertNotEqual(i[0], "relay_agent_Information") + + def verify_dhcp6_solicit(self, pkt, intf, + peer_ip, peer_mac, + fib_id=0, + oui=0): + ether = pkt[Ether] + self.assertEqual(ether.dst, intf.remote_mac) + self.assertEqual(ether.src, intf.local_mac) + + ip = pkt[IPv6] + self.assertEqual(in6_ptop(ip.dst), in6_ptop(intf.remote_ip6)) + self.assertEqual(in6_ptop(ip.src), in6_ptop(intf.local_ip6)) + + udp = pkt[UDP] + self.assertEqual(udp.dport, DHCP6_CLIENT_PORT) + self.assertEqual(udp.sport, DHCP6_SERVER_PORT) + + relay = pkt[DHCP6_RelayForward] + self.assertEqual(in6_ptop(relay.peeraddr), in6_ptop(peer_ip)) + oid = pkt[DHCP6OptIfaceId] + cll = pkt[DHCP6OptClientLinkLayerAddr] + self.assertEqual(cll.optlen, 8) + self.assertEqual(cll.lltype, 1) + self.assertEqual(cll.clladdr, peer_mac) + + vss = pkt[DHCP6OptVSS] + self.assertEqual(vss.optlen, 8) + self.assertEqual(vss.type, 1) + # the OUI and FIB-id are really 3 and 4 bytes resp. + # but the tested range is small + self.assertEqual(ord(vss.data[0]), 0) + self.assertEqual(ord(vss.data[1]), 0) + self.assertEqual(ord(vss.data[2]), oui) + self.assertEqual(ord(vss.data[3]), 0) + self.assertEqual(ord(vss.data[4]), 0) + self.assertEqual(ord(vss.data[5]), 0) + self.assertEqual(ord(vss.data[6]), fib_id) + + # the relay message should be an encoded Solicit + msg = pkt[DHCP6OptRelayMsg] + sol = DHCP6_Solicit() + self.assertEqual(msg.optlen, len(str(sol))) + self.assertEqual(str(sol), (str(msg[1]))[:msg.optlen]) + + def verify_dhcp6_advert(self, pkt, intf, peer): + ether = pkt[Ether] + self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff") + self.assertEqual(ether.src, intf.local_mac) + + ip = pkt[IPv6] + self.assertEqual(in6_ptop(ip.dst), in6_ptop(peer)) + self.assertEqual(in6_ptop(ip.src), in6_ptop(intf.local_ip6)) + + udp = pkt[UDP] + self.assertEqual(udp.dport, DHCP6_SERVER_PORT) + self.assertEqual(udp.sport, DHCP6_CLIENT_PORT) + + # not sure why this is not decoding + # adv = pkt[DHCP6_Advertise] + + def test_dhcp_proxy(self): + """ DHCPv4 Proxy """ + + # + # Verify no response to DHCP request without DHCP config + # + p_disc_vrf0 = (Ether(dst="ff:ff:ff:ff:ff:ff", + src=self.pg2.remote_mac) / + IP(src="0.0.0.0", dst="255.255.255.255") / + UDP(sport=DHCP4_CLIENT_PORT, + dport=DHCP4_SERVER_PORT) / + BOOTP(op=1) / + DHCP(options=[('message-type', 'discover'), ('end')])) + pkts_disc_vrf0 = [p_disc_vrf0] + p_disc_vrf1 = (Ether(dst="ff:ff:ff:ff:ff:ff", + src=self.pg3.remote_mac) / + IP(src="0.0.0.0", dst="255.255.255.255") / + UDP(sport=DHCP4_CLIENT_PORT, + dport=DHCP4_SERVER_PORT) / + BOOTP(op=1) / + DHCP(options=[('message-type', 'discover'), ('end')])) + pkts_disc_vrf1 = [p_disc_vrf0] + + self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0, + "DHCP with no configuration") + self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1, + "DHCP with no configuration") + + # + # Enable DHCP proxy in VRF 0 + # + server_addr = self.pg0.remote_ip4n + src_addr = self.pg0.local_ip4n + + self.vapi.dhcp_proxy_config(server_addr, + src_addr, + rx_table_id=0) + + # + # Now a DHCP request on pg2, which is in the same VRF + # as the DHCP config, will result in a relayed DHCP + # message to the [fake] server + # + self.pg2.add_stream(pkts_disc_vrf0) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg0.get_capture(1) + rx = rx[0] + + # + # Rx'd packet should be to the server address and from the configured + # source address + # UDP source ports are unchanged + # we've no option 82 config so that should be absent + # + self.verify_dhcp_discover(rx, self.pg0, option_82_present=False) + + # + # Inject a response from the server + # VPP will only relay the offer if option 82 is present. + # so this one is dropped + # + p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) / + UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_SERVER_PORT) / + BOOTP(op=1) / + DHCP(options=[('message-type', 'offer'), ('end')])) + pkts = [p] + + self.send_and_assert_no_replies(self.pg0, pkts, + "DHCP offer no option 82") + + # + # Configure sending option 82 in relayed messages + # + self.vapi.dhcp_proxy_config(server_addr, + src_addr, + rx_table_id=0, + insert_circuit_id=1) + + # + # Send a request: + # again dropped, but ths time because there is no IP addrees on the + # clinet interfce to fill in the option. + # + self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0, + "DHCP no relay address") + + # + # configure an IP address on the client facing interface + # + self.pg2.config_ip4() + + # + # Try again with a discover packet + # Rx'd packet should be to the server address and from the configured + # source address + # UDP source ports are unchanged + # we've no option 82 config so that should be absent + # + self.pg2.add_stream(pkts_disc_vrf0) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg0.get_capture(1) + rx = rx[0] + + option_82 = self.verify_dhcp_discover(rx, self.pg0, src_intf=self.pg2) + + # + # Create an DHCP offer reply from the server with a correctly formatted + # option 82. i.e. send back what we just captured + # The offer, sent mcast to the client, still has option 82. + # + p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) / + UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_SERVER_PORT) / + BOOTP(op=1) / + DHCP(options=[('message-type', 'offer'), + ('relay_agent_Information', option_82), + ('end')])) + pkts = [p] + + self.pg0.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg2.get_capture(1) + rx = rx[0] + + self.verify_dhcp_offer(rx, self.pg2) + + # + # Bogus Option 82: + # + # 1. not our IP address = not checked by VPP? so offer is replayed + # to client + bad_ip = option_82[0:8] + chr(33) + option_82[9:] + + p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) / + UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_SERVER_PORT) / + BOOTP(op=1) / + DHCP(options=[('message-type', 'offer'), + ('relay_agent_Information', bad_ip), + ('end')])) + pkts = [p] + + self.pg0.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + rx = self.pg2.get_capture(1) + rx = rx[0] + + self.verify_dhcp_offer(rx, self.pg2, check_option_82=False) + self.pg0.assert_nothing_captured(remark="") + + # 2. Not a sw_if_index VPP knows + bad_if_index = option_82[0:2] + chr(33) + option_82[3:] + + p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) / + UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_SERVER_PORT) / + BOOTP(op=1) / + DHCP(options=[('message-type', 'offer'), + ('relay_agent_Information', bad_if_index), + ('end')])) + pkts = [p] + self.send_and_assert_no_replies(self.pg0, pkts, + "DHCP offer option 82 bad if index") + + # + # Send a DHCP request in VRF 1. should be dropped. + # + self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1, + "DHCP with no configuration VRF 1") + + # + # Delete the DHCP config in VRF 0 + # Should now drop requests. + # + self.vapi.dhcp_proxy_config(server_addr, + src_addr, + rx_table_id=0, + is_add=0, + insert_circuit_id=1) + + self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0, + "DHCP config removed VRF 0") + self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1, + "DHCP config removed VRF 1") + + # + # Add DHCP config for VRF 1 + # + server_addr = self.pg1.remote_ip4n + src_addr = self.pg1.local_ip4n + self.vapi.dhcp_proxy_config(server_addr, + src_addr, + rx_table_id=1, + server_table_id=1, + insert_circuit_id=1) + + # + # Confim DHCP requests ok in VRF 1. + # - dropped on IP config on client interface + # + self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1, + "DHCP config removed VRF 1") + + # + # configure an IP address on the client facing interface + # + self.pg3.config_ip4() + + self.pg3.add_stream(pkts_disc_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(1) + rx = rx[0] + self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3) + + # + # remove DHCP config to cleanup + # + self.vapi.dhcp_proxy_config(server_addr, + src_addr, + rx_table_id=1, + server_table_id=1, + insert_circuit_id=1, + is_add=0) + + self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0, + "DHCP cleanup VRF 0") + self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1, + "DHCP cleanup VRF 1") + + def test_dhcp6_proxy(self): + """ DHCPv6 Proxy""" + # + # Verify no response to DHCP request without DHCP config + # + dhcp_solicit_dst = "ff02::1:2" + dhcp_solicit_src_vrf0 = mk_ll_addr(self.pg2.remote_mac) + dhcp_solicit_src_vrf1 = mk_ll_addr(self.pg3.remote_mac) + server_addr_vrf0 = self.pg0.remote_ip6n + src_addr_vrf0 = self.pg0.local_ip6n + server_addr_vrf1 = self.pg1.remote_ip6n + src_addr_vrf1 = self.pg1.local_ip6n + + # + # Add the Route to receive the DHCP packets + # + route_dhcp_vrf0 = IpRoute(self, dhcp_solicit_dst, 128, + [], is_local=1, is_ip6=1) + route_dhcp_vrf0.add_vpp_config() + route_dhcp_vrf1 = IpRoute(self, dhcp_solicit_dst, 128, + [], is_local=1, is_ip6=1, + table_id=1) + route_dhcp_vrf1.add_vpp_config() + + dmac = in6_getnsmac(inet_pton(socket.AF_INET6, dhcp_solicit_dst)) + p_solicit_vrf0 = (Ether(dst=dmac, src=self.pg2.remote_mac) / + IPv6(src=dhcp_solicit_src_vrf0, + dst=dhcp_solicit_dst) / + UDP(sport=DHCP6_SERVER_PORT, + dport=DHCP6_CLIENT_PORT) / + DHCP6_Solicit()) + pkts_solicit_vrf0 = [p_solicit_vrf0] + p_solicit_vrf1 = (Ether(dst=dmac, src=self.pg3.remote_mac) / + IPv6(src=dhcp_solicit_src_vrf1, + dst=dhcp_solicit_dst) / + UDP(sport=DHCP6_SERVER_PORT, + dport=DHCP6_CLIENT_PORT) / + DHCP6_Solicit()) + pkts_solicit_vrf1 = [p_solicit_vrf1] + + self.send_and_assert_no_replies(self.pg2, pkts_solicit_vrf0, + "DHCP with no configuration") + self.send_and_assert_no_replies(self.pg3, pkts_solicit_vrf1, + "DHCP with no configuration") + + # + # DHCPv6 config in VRF 0. + # Packets still dropped because the client facing interface has no + # IPv6 config + # + self.vapi.dhcp_proxy_config(server_addr_vrf0, + src_addr_vrf0, + rx_table_id=0, + server_table_id=0, + insert_circuit_id=1, + is_ipv6=1) + + self.send_and_assert_no_replies(self.pg2, pkts_solicit_vrf0, + "DHCP with no configuration") + self.send_and_assert_no_replies(self.pg3, pkts_solicit_vrf1, + "DHCP with no configuration") + + # + # configure an IP address on the client facing interface + # + self.pg2.config_ip6() + + # + # Now the DHCP requests are relayed to the server + # + self.pg2.add_stream(pkts_solicit_vrf0) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg0.get_capture(1) + rx = rx[0] + self.verify_dhcp6_solicit(rx, self.pg0, + dhcp_solicit_src_vrf0, + self.pg2.remote_mac) + + # + # Exception cases for rejected relay responses + # + + # 1 - not a relay reply + p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) / + UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) / + DHCP6_Advertise()) + pkts_adv_vrf0 = [p_adv_vrf0] + self.send_and_assert_no_replies(self.pg2, pkts_adv_vrf0, + "DHCP6 not a relay reply") + + # 2 - no relay message option + p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) / + UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) / + DHCP6_RelayReply() / + DHCP6_Advertise()) + pkts_adv_vrf0 = [p_adv_vrf0] + self.send_and_assert_no_replies(self.pg2, pkts_adv_vrf0, + "DHCP not a relay message") + + # 3 - no circuit ID + p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) / + UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) / + DHCP6_RelayReply() / + DHCP6OptRelayMsg(optlen=0) / + DHCP6_Advertise()) + pkts_adv_vrf0 = [p_adv_vrf0] + self.send_and_assert_no_replies(self.pg2, pkts_adv_vrf0, + "DHCP6 no circuit ID") + # 4 - wrong circuit ID + p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) / + UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) / + DHCP6_RelayReply() / + DHCP6OptIfaceId(optlen=4, ifaceid='\x00\x00\x00\x05') / + DHCP6OptRelayMsg(optlen=0) / + DHCP6_Advertise()) + pkts_adv_vrf0 = [p_adv_vrf0] + self.send_and_assert_no_replies(self.pg2, pkts_adv_vrf0, + "DHCP6 wrong circuit ID") + + # + # Send the relay response (the advertisement) + # - no peer address + p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) / + UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) / + DHCP6_RelayReply() / + DHCP6OptIfaceId(optlen=4, ifaceid='\x00\x00\x00\x03') / + DHCP6OptRelayMsg(optlen=0) / + DHCP6_Advertise(trid=1) / + DHCP6OptStatusCode(statuscode=0)) + pkts_adv_vrf0 = [p_adv_vrf0] + + self.pg0.add_stream(pkts_adv_vrf0) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg2.get_capture(1) + rx = rx[0] + self.verify_dhcp6_advert(rx, self.pg2, "::") + + # + # Send the relay response (the advertisement) + # - with peer address + p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) / + UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) / + DHCP6_RelayReply(peeraddr=dhcp_solicit_src_vrf0) / + DHCP6OptIfaceId(optlen=4, ifaceid='\x00\x00\x00\x03') / + DHCP6OptRelayMsg(optlen=0) / + DHCP6_Advertise(trid=1) / + DHCP6OptStatusCode(statuscode=0)) + pkts_adv_vrf0 = [p_adv_vrf0] + + self.pg0.add_stream(pkts_adv_vrf0) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg2.get_capture(1) + rx = rx[0] + self.verify_dhcp6_advert(rx, self.pg2, dhcp_solicit_src_vrf0) + + # + # Add all the config for VRF 1 + # + self.vapi.dhcp_proxy_config(server_addr_vrf1, + src_addr_vrf1, + rx_table_id=1, + server_table_id=1, + insert_circuit_id=1, + is_ipv6=1) + self.pg3.config_ip6() + + # + # VRF 1 solicit + # + self.pg3.add_stream(pkts_solicit_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(1) + rx = rx[0] + self.verify_dhcp6_solicit(rx, self.pg1, + dhcp_solicit_src_vrf1, + self.pg3.remote_mac) + + # + # VRF 1 Advert + # + p_adv_vrf1 = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / + IPv6(dst=self.pg1.local_ip6, src=self.pg1.remote_ip6) / + UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) / + DHCP6_RelayReply(peeraddr=dhcp_solicit_src_vrf1) / + DHCP6OptIfaceId(optlen=4, ifaceid='\x00\x00\x00\x04') / + DHCP6OptRelayMsg(optlen=0) / + DHCP6_Advertise(trid=1) / + DHCP6OptStatusCode(statuscode=0)) + pkts_adv_vrf1 = [p_adv_vrf1] + + self.pg1.add_stream(pkts_adv_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg3.get_capture(1) + rx = rx[0] + self.verify_dhcp6_advert(rx, self.pg3, dhcp_solicit_src_vrf1) + + # + # Add VSS config + # table=1, fib=id=1, oui=4 + self.vapi.dhcp_proxy_set_vss(1, 1, 4, is_ip6=1) + + self.pg3.add_stream(pkts_solicit_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(1) + rx = rx[0] + self.verify_dhcp6_solicit(rx, self.pg1, + dhcp_solicit_src_vrf1, + self.pg3.remote_mac, + fib_id=1, + oui=4) + + # + # Remove the VSS config + # relayed DHCP has default vlaues in the option. + # + self.vapi.dhcp_proxy_set_vss(1, 1, 4, is_ip6=1, is_add=0) + + self.pg3.add_stream(pkts_solicit_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(1) + rx = rx[0] + self.verify_dhcp6_solicit(rx, self.pg1, + dhcp_solicit_src_vrf1, + self.pg3.remote_mac) + + # + # Cleanup + # + self.vapi.dhcp_proxy_config(server_addr_vrf1, + src_addr_vrf1, + rx_table_id=1, + server_table_id=1, + insert_circuit_id=1, + is_ipv6=1, + is_add=0) + self.vapi.dhcp_proxy_config(server_addr_vrf1, + src_addr_vrf1, + rx_table_id=0, + server_table_id=0, + insert_circuit_id=1, + is_ipv6=1, + is_add=0) + + route_dhcp_vrf0.remove_vpp_config() + route_dhcp_vrf1.remove_vpp_config() + +if __name__ == '__main__': + unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py index 975e3934..fc9133fb 100644 --- a/test/vpp_ip_route.py +++ b/test/vpp_ip_route.py @@ -19,12 +19,16 @@ class RoutePath: nh_sw_if_index, nh_table_id=0, labels=[], - nh_via_label=MPLS_LABEL_INVALID): - self.nh_addr = socket.inet_pton(socket.AF_INET, nh_addr) + nh_via_label=MPLS_LABEL_INVALID, + is_ip6=0): self.nh_itf = nh_sw_if_index self.nh_table_id = nh_table_id self.nh_via_label = nh_via_label self.nh_labels = labels + if is_ip6: + self.nh_addr = socket.inet_pton(socket.AF_INET6, nh_addr) + else: + self.nh_addr = socket.inet_pton(socket.AF_INET, nh_addr) class IpRoute: @@ -33,34 +37,61 @@ class IpRoute: """ def __init__(self, test, dest_addr, - dest_addr_len, paths, table_id=0): + dest_addr_len, paths, table_id=0, is_ip6=0, is_local=0): self._test = test self.paths = paths - self.dest_addr = socket.inet_pton(socket.AF_INET, dest_addr) self.dest_addr_len = dest_addr_len self.table_id = table_id + self.is_ip6 = is_ip6 + self.is_local = is_local + if is_ip6: + self.dest_addr = socket.inet_pton(socket.AF_INET6, dest_addr) + else: + self.dest_addr = socket.inet_pton(socket.AF_INET, dest_addr) def add_vpp_config(self): - for path in self.paths: + if self.is_local: self._test.vapi.ip_add_del_route( self.dest_addr, self.dest_addr_len, - path.nh_addr, - path.nh_itf, + socket.inet_pton(socket.AF_INET6, "::"), + 0xffffffff, + is_local=1, table_id=self.table_id, - next_hop_out_label_stack=path.nh_labels, - next_hop_n_out_labels=len( - path.nh_labels), - next_hop_via_label=path.nh_via_label) + is_ipv6=self.is_ip6) + else: + for path in self.paths: + self._test.vapi.ip_add_del_route( + self.dest_addr, + self.dest_addr_len, + path.nh_addr, + path.nh_itf, + table_id=self.table_id, + next_hop_out_label_stack=path.nh_labels, + next_hop_n_out_labels=len( + path.nh_labels), + next_hop_via_label=path.nh_via_label, + is_ipv6=self.is_ip6) def remove_vpp_config(self): - for path in self.paths: - self._test.vapi.ip_add_del_route(self.dest_addr, - self.dest_addr_len, - path.nh_addr, - path.nh_itf, - table_id=self.table_id, - is_add=0) + if self.is_local: + self._test.vapi.ip_add_del_route( + self.dest_addr, + self.dest_addr_len, + socket.inet_pton(socket.AF_INET6, "::"), + 0xffffffff, + is_local=1, + is_add=0, + table_id=self.table_id, + is_ipv6=self.is_ip6) + else: + for path in self.paths: + self._test.vapi.ip_add_del_route(self.dest_addr, + self.dest_addr_len, + path.nh_addr, + path.nh_itf, + table_id=self.table_id, + is_add=0) class MplsIpBind: diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 4f2cea8b..1b2895e9 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -1142,3 +1142,39 @@ class VppPapiProvider(object): 'template_interval': template_interval, 'udp_checksum': udp_checksum, }) + + def dhcp_proxy_config(self, + dhcp_server, + dhcp_src_address, + rx_table_id=0, + server_table_id=0, + is_add=1, + is_ipv6=0, + insert_circuit_id=0): + return self.api( + self.papi.dhcp_proxy_config_2, + { + 'rx_vrf_id': rx_table_id, + 'server_vrf_id': server_table_id, + 'is_ipv6': is_ipv6, + 'is_add': is_add, + 'insert_circuit_id': insert_circuit_id, + 'dhcp_server': dhcp_server, + 'dhcp_src_address': dhcp_src_address, + }) + + def dhcp_proxy_set_vss(self, + table_id, + fib_id, + oui, + is_add=1, + is_ip6=0): + return self.api( + self.papi.dhcp_proxy_set_vss, + { + 'tbl_id': table_id, + 'fib_id': fib_id, + 'is_ipv6': is_ip6, + 'is_add': is_add, + 'oui': oui, + }) -- cgit 1.2.3-korg From 20a175a18414c67e38b5ce0709b33fb1df8069c9 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 14 Feb 2017 07:28:41 -0800 Subject: dhcp: multiple additions DHCP additions: 1) DHCPv4 will only relay a message back to the client, if the Option82 information is present. So make this the default. 2) It is no longer possible to select via the API to "insert circuit ID" - since this is now default 3) Remove the version 2 API since it's now the same as version 1. 4) Adding the VSS option is now conditional only on the presence of VSS config (not the 'insert' option in the set API) 5) DHCP proxy dump via API Change-Id: Ia7271ba8c1d4dbf34a02c401d268ccfbb1b74f17 Signed-off-by: Neale Ranns --- src/scripts/vnet/dhcp/proxy | 21 ++ src/vat/api_format.c | 173 ++++++----- src/vnet/dhcp/client.c | 1 + src/vnet/dhcp/client.h | 3 + src/vnet/dhcp/dhcp.api | 67 ++--- src/vnet/dhcp/dhcp_api.c | 112 ++++--- src/vnet/dhcp/proxy.h | 33 ++- src/vnet/dhcp/proxy_error.def | 3 +- src/vnet/dhcp/proxy_node.c | 676 +++++++++++++++++++++++------------------- src/vnet/dhcpv6/proxy.h | 19 +- src/vnet/dhcpv6/proxy_node.c | 513 ++++++++++++++++++-------------- src/vnet/dpo/receive_dpo.c | 5 + src/vpp/api/custom_dump.c | 34 --- test/test_dhcp.py | 166 ++++++----- test/vpp_papi_provider.py | 6 +- 15 files changed, 978 insertions(+), 854 deletions(-) create mode 100644 src/scripts/vnet/dhcp/proxy (limited to 'src/vnet/dhcp') diff --git a/src/scripts/vnet/dhcp/proxy b/src/scripts/vnet/dhcp/proxy new file mode 100644 index 00000000..c709d87d --- /dev/null +++ b/src/scripts/vnet/dhcp/proxy @@ -0,0 +1,21 @@ +loop create +loop create + +set int state loop0 up +set int state loop1 up + +set int ip table loop1 1 +set int ip6 table loop1 1 + +set int ip addr loop0 10.0.0.1/24 +set int ip addr loop0 10.0.1.1/24 + +set int ip addr loop0 2001::1/64 +set int ip addr loop0 2001:1::1/64 + +set dhcp proxy server 10.255.0.1 src-address 10.0.0.1 server-fib-id 0 rx-fib-id 0 +set dhcp proxy server 10.255.0.2 src-address 10.0.1.1 server-fib-id 1 rx-fib-id 1 + +set dhcpv6 proxy server 3001::1 src-address 2001::1 server-fib-id 0 rx-fib-id 0 +set dhcpv6 proxy server 3002::1 src-address 2001:1::1 server-fib-id 1 rx-fib-id 1 + diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 11e68214..78c5e279 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -3819,7 +3819,6 @@ _(reset_vrf_reply) \ _(oam_add_del_reply) \ _(reset_fib_reply) \ _(dhcp_proxy_config_reply) \ -_(dhcp_proxy_config_2_reply) \ _(dhcp_proxy_set_vss_reply) \ _(dhcp_client_config_reply) \ _(set_ip_flow_hash_reply) \ @@ -4033,8 +4032,8 @@ _(CREATE_SUBIF_REPLY, create_subif_reply) \ _(OAM_ADD_DEL_REPLY, oam_add_del_reply) \ _(RESET_FIB_REPLY, reset_fib_reply) \ _(DHCP_PROXY_CONFIG_REPLY, dhcp_proxy_config_reply) \ -_(DHCP_PROXY_CONFIG_2_REPLY, dhcp_proxy_config_2_reply) \ _(DHCP_PROXY_SET_VSS_REPLY, dhcp_proxy_set_vss_reply) \ +_(DHCP_PROXY_DETAILS, dhcp_proxy_details) \ _(DHCP_CLIENT_CONFIG_REPLY, dhcp_client_config_reply) \ _(SET_IP_FLOW_HASH_REPLY, set_ip_flow_hash_reply) \ _(SW_INTERFACE_IP6_ENABLE_DISABLE_REPLY, \ @@ -7635,9 +7634,9 @@ api_dhcp_proxy_config (vat_main_t * vam) { unformat_input_t *i = vam->input; vl_api_dhcp_proxy_config_t *mp; - u32 vrf_id = 0; + u32 rx_vrf_id = 0; + u32 server_vrf_id = 0; u8 is_add = 1; - u8 insert_cid = 1; u8 v4_address_set = 0; u8 v6_address_set = 0; ip4_address_t v4address; @@ -7653,9 +7652,9 @@ api_dhcp_proxy_config (vat_main_t * vam) { if (unformat (i, "del")) is_add = 0; - else if (unformat (i, "vrf %d", &vrf_id)) + else if (unformat (i, "rx_vrf_id %d", &rx_vrf_id)) ; - else if (unformat (i, "insert-cid %d", &insert_cid)) + else if (unformat (i, "server_vrf_id %d", &server_vrf_id)) ; else if (unformat (i, "svr %U", unformat_ip4_address, &v4address)) v4_address_set = 1; @@ -7701,9 +7700,9 @@ api_dhcp_proxy_config (vat_main_t * vam) /* Construct the API message */ M (DHCP_PROXY_CONFIG, mp); - mp->insert_circuit_id = insert_cid; mp->is_add = is_add; - mp->vrf_id = ntohl (vrf_id); + mp->rx_vrf_id = ntohl (rx_vrf_id); + mp->server_vrf_id = ntohl (server_vrf_id); if (v6_address_set) { mp->is_ipv6 = 1; @@ -7724,100 +7723,98 @@ api_dhcp_proxy_config (vat_main_t * vam) return ret; } -static int -api_dhcp_proxy_config_2 (vat_main_t * vam) +#define vl_api_dhcp_proxy_details_t_endian vl_noop_handler +#define vl_api_dhcp_proxy_details_t_print vl_noop_handler + +static void +vl_api_dhcp_proxy_details_t_handler (vl_api_dhcp_proxy_details_t * mp) { - unformat_input_t *i = vam->input; - vl_api_dhcp_proxy_config_2_t *mp; - u32 rx_vrf_id = 0; - u32 server_vrf_id = 0; - u8 is_add = 1; - u8 insert_cid = 1; - u8 v4_address_set = 0; - u8 v6_address_set = 0; - ip4_address_t v4address; - ip6_address_t v6address; - u8 v4_src_address_set = 0; - u8 v6_src_address_set = 0; - ip4_address_t v4srcaddress; - ip6_address_t v6srcaddress; - int ret; + vat_main_t *vam = &vat_main; - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "del")) - is_add = 0; - else if (unformat (i, "rx_vrf_id %d", &rx_vrf_id)) - ; - else if (unformat (i, "server_vrf_id %d", &server_vrf_id)) - ; - else if (unformat (i, "insert-cid %d", &insert_cid)) - ; - else if (unformat (i, "svr %U", unformat_ip4_address, &v4address)) - v4_address_set = 1; - else if (unformat (i, "svr %U", unformat_ip6_address, &v6address)) - v6_address_set = 1; - else if (unformat (i, "src %U", unformat_ip4_address, &v4srcaddress)) - v4_src_address_set = 1; - else if (unformat (i, "src %U", unformat_ip6_address, &v6srcaddress)) - v6_src_address_set = 1; - else - break; - } + if (mp->is_ipv6) + print (vam->ofp, + "RX Table-ID %d, Server Table-ID %d, Server Address %U, Source Address %U, VSS FIB-ID %d, VSS OUI %d", + ntohl (mp->rx_vrf_id), + ntohl (mp->server_vrf_id), + format_ip6_address, mp->dhcp_server, + format_ip6_address, mp->dhcp_src_address, + ntohl (mp->vss_oui), ntohl (mp->vss_fib_id)); + else + print (vam->ofp, + "RX Table-ID %d, Server Table-ID %d, Server Address %U, Source Address %U, VSS FIB-ID %d, VSS OUI %d", + ntohl (mp->rx_vrf_id), + ntohl (mp->server_vrf_id), + format_ip4_address, mp->dhcp_server, + format_ip4_address, mp->dhcp_src_address, + ntohl (mp->vss_oui), ntohl (mp->vss_fib_id)); +} - if (v4_address_set && v6_address_set) - { - errmsg ("both v4 and v6 server addresses set"); - return -99; - } - if (!v4_address_set && !v6_address_set) - { - errmsg ("no server addresses set"); - return -99; - } +static void vl_api_dhcp_proxy_details_t_handler_json + (vl_api_dhcp_proxy_details_t * mp) +{ + vat_main_t *vam = &vat_main; + vat_json_node_t *node = NULL; + struct in_addr ip4; + struct in6_addr ip6; - if (v4_src_address_set && v6_src_address_set) + if (VAT_JSON_ARRAY != vam->json_tree.type) { - errmsg ("both v4 and v6 src addresses set"); - return -99; + ASSERT (VAT_JSON_NONE == vam->json_tree.type); + vat_json_init_array (&vam->json_tree); } - if (!v4_src_address_set && !v6_src_address_set) + node = vat_json_array_add (&vam->json_tree); + + vat_json_init_object (node); + vat_json_object_add_uint (node, "rx-table-id", ntohl (mp->rx_vrf_id)); + vat_json_object_add_uint (node, "server-table-id", + ntohl (mp->server_vrf_id)); + if (mp->is_ipv6) { - errmsg ("no src addresses set"); - return -99; + clib_memcpy (&ip6, &mp->dhcp_server, sizeof (ip6)); + vat_json_object_add_ip6 (node, "server_address", ip6); + clib_memcpy (&ip6, &mp->dhcp_src_address, sizeof (ip6)); + vat_json_object_add_ip6 (node, "src_address", ip6); } - - if (!(v4_src_address_set && v4_address_set) && - !(v6_src_address_set && v6_address_set)) + else { - errmsg ("no matching server and src addresses set"); - return -99; + clib_memcpy (&ip4, &mp->dhcp_server, sizeof (ip4)); + vat_json_object_add_ip4 (node, "server_address", ip4); + clib_memcpy (&ip4, &mp->dhcp_src_address, sizeof (ip4)); + vat_json_object_add_ip4 (node, "src_address", ip4); } + vat_json_object_add_uint (node, "vss-fib-id", ntohl (mp->vss_fib_id)); + vat_json_object_add_uint (node, "vss-oui", ntohl (mp->vss_oui)); +} - /* Construct the API message */ - M (DHCP_PROXY_CONFIG_2, mp); +static int +api_dhcp_proxy_dump (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_control_ping_t *mp_ping; + vl_api_dhcp_proxy_dump_t *mp; + u8 is_ipv6 = 0; + int ret; - mp->insert_circuit_id = insert_cid; - mp->is_add = is_add; - mp->rx_vrf_id = ntohl (rx_vrf_id); - mp->server_vrf_id = ntohl (server_vrf_id); - if (v6_address_set) - { - mp->is_ipv6 = 1; - clib_memcpy (mp->dhcp_server, &v6address, sizeof (v6address)); - clib_memcpy (mp->dhcp_src_address, &v6srcaddress, sizeof (v6address)); - } - else + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { - clib_memcpy (mp->dhcp_server, &v4address, sizeof (v4address)); - clib_memcpy (mp->dhcp_src_address, &v4srcaddress, sizeof (v4address)); + if (unformat (i, "ipv6")) + is_ipv6 = 1; + else + { + clib_warning ("parse error '%U'", format_unformat_error, i); + return -99; + } } - /* send it... */ + M (DHCP_PROXY_DUMP, mp); + + mp->is_ip6 = is_ipv6; S (mp); - /* Wait for a reply, return good/bad news */ + /* Use a control ping for synchronization */ + M (CONTROL_PING, mp_ping); + S (mp_ping); + W (ret); return ret; } @@ -18187,12 +18184,10 @@ _(oam_add_del, "src dst [vrf ] [del]") \ _(reset_fib, "vrf [ipv6]") \ _(dhcp_proxy_config, \ "svr src \n" \ - "insert-cid [del]") \ -_(dhcp_proxy_config_2, \ - "svr src \n" \ - "rx_vrf_id server_vrf_id insert-cid [del]") \ + "rx_vrf_id server_vrf_id [del]") \ _(dhcp_proxy_set_vss, \ "tbl_id fib_id oui [ipv6] [del]") \ +_(dhcp_proxy_dump, "ip6") \ _(dhcp_client_config, \ " | sw_if_index [hostname ] [disable_event] [del]") \ _(set_ip_flow_hash, \ diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c index c352e310..8a1a43b3 100644 --- a/src/vnet/dhcp/client.c +++ b/src/vnet/dhcp/client.c @@ -13,6 +13,7 @@ * limitations under the License. */ #include +#include #include #include diff --git a/src/vnet/dhcp/client.h b/src/vnet/dhcp/client.h index d15e686b..a74368cb 100644 --- a/src/vnet/dhcp/client.h +++ b/src/vnet/dhcp/client.h @@ -19,6 +19,9 @@ #ifndef included_dhcp_client_h #define included_dhcp_client_h +#include +#include + #define foreach_dhcp_client_state \ _(DHCP_DISCOVER) \ _(DHCP_REQUEST) \ diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api index c228cd04..8daadd8c 100644 --- a/src/vnet/dhcp/dhcp.api +++ b/src/vnet/dhcp/dhcp.api @@ -16,7 +16,8 @@ /** \brief DHCP Proxy config add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param vrf_id - vrf id + @param rx_vrf_id - Rx/interface vrf id + @param server_vrf_id - server vrf id @param if_ipv6 - ipv6 if non-zero, else ipv4 @param is_add - add the config if non-zero, else delete @param insert_circuit_id - option82 suboption 1 fib number @@ -27,10 +28,10 @@ define dhcp_proxy_config { u32 client_index; u32 context; - u32 vrf_id; + u32 rx_vrf_id; + u32 server_vrf_id; u8 is_ipv6; u8 is_add; - u8 insert_circuit_id; u8 dhcp_server[16]; u8 dhcp_src_address[16]; }; @@ -45,40 +46,6 @@ define dhcp_proxy_config_reply i32 retval; }; -/** \brief DHCP Proxy config 2 add / del request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param rx_vrf_id - receive vrf id - @param server_vrf_id - server vrf id - @param if_ipv6 - ipv6 if non-zero, else ipv4 - @param is_add - add the config if non-zero, else delete - @param insert_circuit_id - option82 suboption 1 fib number - @param dhcp_server[] - server address - @param dhcp_src_address[] - -*/ -define dhcp_proxy_config_2 -{ - u32 client_index; - u32 context; - u32 rx_vrf_id; - u32 server_vrf_id; - u8 is_ipv6; - u8 is_add; - u8 insert_circuit_id; - u8 dhcp_server[16]; - u8 dhcp_src_address[16]; -}; - -/** \brief DHCP Proxy config 2 add / del response - @param context - sender context, to match reply w/ request - @param retval - return code for request -*/ -define dhcp_proxy_config_2_reply -{ - u32 context; - i32 retval; -}; - /** \brief DHCP Proxy set / unset vss request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -159,6 +126,32 @@ define dhcp_compl_event u8 host_mac[6]; }; +/** \brief Dump DHCP proxy table + @param client_index - opaque cookie to identify the sender + @param True for IPv6 proxy table +*/ +define dhcp_proxy_dump +{ + u32 client_index; + u32 context; + u8 is_ip6; +}; + +/** \brief Tell client about a DHCP completion event + @param client_index - opaque cookie to identify the sender +*/ +define dhcp_proxy_details +{ + u32 context; + u32 rx_vrf_id; + u32 server_vrf_id; + u32 vss_oui; + u32 vss_fib_id; + u8 is_ipv6; + u8 dhcp_server[16]; + u8 dhcp_src_address[16]; +}; + /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c index 88b32b24..ce9039b7 100644 --- a/src/vnet/dhcp/dhcp_api.c +++ b/src/vnet/dhcp/dhcp_api.c @@ -46,7 +46,8 @@ #define foreach_vpe_api_msg \ _(DHCP_PROXY_CONFIG,dhcp_proxy_config) \ -_(DHCP_PROXY_CONFIG_2,dhcp_proxy_config_2) \ +_(DHCP_PROXY_DUMP,dhcp_proxy_dump) \ +_(DHCP_PROXY_DETAILS,dhcp_proxy_details) \ _(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss) \ _(DHCP_CLIENT_CONFIG, dhcp_client_config) @@ -58,8 +59,8 @@ dhcpv4_proxy_config (vl_api_dhcp_proxy_config_t * mp) rv = dhcp_proxy_set_server ((ip4_address_t *) (&mp->dhcp_server), (ip4_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->vrf_id), - (int) mp->insert_circuit_id, + (u32) ntohl (mp->rx_vrf_id), + (u32) ntohl (mp->server_vrf_id), (int) (mp->is_add == 0)); REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); @@ -74,44 +75,11 @@ dhcpv6_proxy_config (vl_api_dhcp_proxy_config_t * mp) rv = dhcpv6_proxy_set_server ((ip6_address_t *) (&mp->dhcp_server), (ip6_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->vrf_id), - (int) mp->insert_circuit_id, - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); -} - -static void -dhcpv4_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv; - - rv = dhcp_proxy_set_server_2 ((ip4_address_t *) (&mp->dhcp_server), - (ip4_address_t *) (&mp->dhcp_src_address), (u32) ntohl (mp->rx_vrf_id), (u32) ntohl (mp->server_vrf_id), - (int) mp->insert_circuit_id, (int) (mp->is_add == 0)); - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY); -} - - -static void -dhcpv6_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv = -1; - - rv = dhcpv6_proxy_set_server_2 ((ip6_address_t *) (&mp->dhcp_server), - (ip6_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->rx_vrf_id), - (u32) ntohl (mp->server_vrf_id), - (int) mp->insert_circuit_id, - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY); + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); } @@ -143,6 +111,67 @@ static void vl_api_dhcp_proxy_config_t_handler dhcpv6_proxy_config (mp); } +static void +vl_api_dhcp_proxy_dump_t_handler (vl_api_dhcp_proxy_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + if (mp->is_ip6 == 0) + dhcp_proxy_dump (q, mp->context); + else + dhcpv6_proxy_dump (q, mp->context); +} + +void +dhcp_send_details (void *opaque, + u32 context, + const ip46_address_t * server, + const ip46_address_t * src, + u32 server_fib_id, + u32 rx_fib_id, u32 vss_fib_id, u32 vss_oui) +{ + vl_api_dhcp_proxy_details_t *mp; + unix_shared_memory_queue_t *q = opaque; + + mp = vl_msg_api_alloc (sizeof (*mp)); + if (!mp) + return; + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_DETAILS); + mp->context = context; + + mp->rx_vrf_id = htonl (rx_fib_id); + mp->server_vrf_id = htonl (server_fib_id); + mp->vss_oui = htonl (vss_oui); + mp->vss_fib_id = htonl (vss_fib_id); + + mp->is_ipv6 = !ip46_address_is_ip4 (server); + + if (mp->is_ipv6) + { + memcpy (mp->dhcp_server, server, 16); + memcpy (mp->dhcp_src_address, src, 16); + } + else + { + /* put the address in the first bytes */ + memcpy (mp->dhcp_server, &server->ip4, 4); + memcpy (mp->dhcp_src_address, &src->ip4, 4); + } + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + + +static void +vl_api_dhcp_proxy_details_t_handler (vl_api_dhcp_proxy_details_t * mp) +{ + clib_warning ("BUG"); +} + void dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, u8 is_ipv6, u8 * host_address, u8 * router_address, @@ -172,15 +201,6 @@ dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, vl_msg_api_send_shmem (q, (u8 *) & mp); } -static void vl_api_dhcp_proxy_config_2_t_handler - (vl_api_dhcp_proxy_config_2_t * mp) -{ - if (mp->is_ipv6 == 0) - dhcpv4_proxy_config_2 (mp); - else - dhcpv6_proxy_config_2 (mp); -} - static void vl_api_dhcp_client_config_t_handler (vl_api_dhcp_client_config_t * mp) { diff --git a/src/vnet/dhcp/proxy.h b/src/vnet/dhcp/proxy.h index e12c0d00..4b115c74 100644 --- a/src/vnet/dhcp/proxy.h +++ b/src/vnet/dhcp/proxy.h @@ -27,7 +27,6 @@ #include #include #include -#include typedef enum { #define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n, @@ -49,9 +48,7 @@ typedef union { typedef struct { ip4_address_t dhcp_server; ip4_address_t dhcp_src_address; - u32 insert_option_82; u32 server_fib_index; - u32 valid; } dhcp_server_t; typedef struct { @@ -64,29 +61,39 @@ typedef struct { /* to drop pkts in server-to-client direction */ u32 error_drop_node_index; - vss_info *opt82vss; + vss_info *vss; /* hash lookup specific vrf_id -> option 82 vss suboption */ - uword * opt82vss_index_by_vrf_id; + u32 *vss_index_by_rx_fib_index; /* convenience */ - dhcp_client_main_t * dhcp_client_main; vlib_main_t * vlib_main; vnet_main_t * vnet_main; } dhcp_proxy_main_t; -dhcp_proxy_main_t dhcp_proxy_main; +extern dhcp_proxy_main_t dhcp_proxy_main; -int dhcp_proxy_set_server (ip4_address_t *addr, ip4_address_t *src_address, - u32 fib_id, int insert_option_82, int is_del); +void dhcp_send_details (void *opaque, + u32 context, + const ip46_address_t *server, + const ip46_address_t *src, + u32 server_fib_id, + u32 rx_fib_id, + u32 vss_fib_id, + u32 vss_oui); -int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, - u32 rx_fib_id, - u32 server_fib_id, - int insert_option_82, int is_del); +int dhcp_proxy_set_server (ip4_address_t *addr, + ip4_address_t *src_address, + u32 fib_id, + u32 server_fib_id, + int is_del); int dhcp_proxy_set_option82_vss(u32 vrf_id, u32 oui, u32 fib_id, int is_del); + +void dhcp_proxy_dump(void *opaque, + u32 context); + #endif /* included_dhcp_proxy_h */ diff --git a/src/vnet/dhcp/proxy_error.def b/src/vnet/dhcp/proxy_error.def index 6aa06eb5..6d790d73 100644 --- a/src/vnet/dhcp/proxy_error.def +++ b/src/vnet/dhcp/proxy_error.def @@ -21,7 +21,8 @@ dhcp_proxy_error (RELAY_TO_SERVER, "DHCP packets relayed to the server") dhcp_proxy_error (RELAY_TO_CLIENT, "DHCP packets relayed to clients") dhcp_proxy_error (OPTION_82_ERROR, "DHCP failed to insert option 82") dhcp_proxy_error (NO_OPTION_82, "DHCP option 82 missing") -dhcp_proxy_error (BAD_OPTION_82, "Bad DHCP option 82 value") +dhcp_proxy_error (BAD_OPTION_82_ITF, "Bad DHCP option 82 interface value") +dhcp_proxy_error (BAD_OPTION_82_ADDR, "Bad DHCP option 82 address value") dhcp_proxy_error (BAD_FIB_ID, "DHCP option 82 fib-id to fib-index map failure") dhcp_proxy_error (NO_INTERFACE_ADDRESS, "DHCP no interface address") dhcp_proxy_error (OPTION_82_VSS_NOT_PROCESSED, "DHCP VSS not processed by DHCP server") diff --git a/src/vnet/dhcp/proxy_node.c b/src/vnet/dhcp/proxy_node.c index 6a58fcdb..ab6819fe 100644 --- a/src/vnet/dhcp/proxy_node.c +++ b/src/vnet/dhcp/proxy_node.c @@ -18,6 +18,7 @@ #include #include #include +#include #include static char * dhcp_proxy_error_strings[] = { @@ -57,6 +58,8 @@ typedef struct { vlib_node_registration_t dhcp_proxy_to_server_node; vlib_node_registration_t dhcp_proxy_to_client_node; +dhcp_proxy_main_t dhcp_proxy_main; + u8 * format_dhcp_proxy_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); @@ -94,6 +97,42 @@ u8 * format_dhcp_proxy_header_with_length (u8 * s, va_list * args) return s; } +static inline vss_info * +dhcp_get_vss_info (dhcp_proxy_main_t *dm, + u32 rx_fib_index) +{ + vss_info *v; + + if (vec_len(dm->vss_index_by_rx_fib_index) <= rx_fib_index || + dm->vss_index_by_rx_fib_index[rx_fib_index] == ~0) + { + v = NULL; + } + else + { + v = pool_elt_at_index (dm->vss, + dm->vss_index_by_rx_fib_index[rx_fib_index]); + } + + return (v); +} + +static inline dhcp_server_t * +dhcp_get_server (dhcp_proxy_main_t *dm, + u32 rx_fib_index) +{ + dhcp_server_t *s = NULL; + + if (vec_len(dm->dhcp_server_index_by_rx_fib_index) > rx_fib_index && + dm->dhcp_server_index_by_rx_fib_index[rx_fib_index] != ~0) + { + s = pool_elt_at_index (dm->dhcp_servers, + dm->dhcp_server_index_by_rx_fib_index[rx_fib_index]); + } + + return (s); +} + static uword dhcp_proxy_to_server_input (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -131,9 +170,12 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, u32 sw_if_index = 0; u32 original_sw_if_index = 0; u8 *end = NULL; - u32 fib_index, server_index; + u32 fib_index; dhcp_server_t * server; u32 rx_sw_if_index; + dhcp_option_t *o; + u32 len = 0; + vlib_buffer_free_list_t *fl; bi0 = from[0]; to_next[0] = bi0; @@ -166,26 +208,16 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, rx_sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; fib_index = im->fib_index_by_sw_if_index [rx_sw_if_index]; - - if (fib_index < vec_len(dpm->dhcp_server_index_by_rx_fib_index)) - server_index = dpm->dhcp_server_index_by_rx_fib_index[fib_index]; - else - server_index = 0; + server = dhcp_get_server(dpm, fib_index); - if (PREDICT_FALSE (pool_is_free_index (dpm->dhcp_servers, - server_index))) + if (PREDICT_FALSE (NULL == server)) { - no_server: error0 = DHCP_PROXY_ERROR_NO_SERVER; next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; pkts_no_server++; goto do_trace; } - server = pool_elt_at_index (dpm->dhcp_servers, server_index); - if (server->valid == 0) - goto no_server; - vlib_buffer_advance (b0, -(sizeof(*ip0))); ip0 = vlib_buffer_get_current (b0); @@ -216,142 +248,131 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, h0->gateway_ip_address.as_u32 = server->dhcp_src_address.as_u32; pkts_to_server++; - if (server->insert_option_82) - { - u32 fib_index, fib_id, opt82_fib_id=0, opt82_oui=0; - ip4_fib_t * fib; - dhcp_option_t *o = (dhcp_option_t *) h0->options; - u32 len = 0; - vlib_buffer_free_list_t *fl; + o = (dhcp_option_t *) h0->options; - fib_index = im->fib_index_by_sw_if_index - [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; - fib = ip4_fib_get (fib_index); - fib_id = fib->table_id; - - end = b0->data + b0->current_data + b0->current_length; - /* TLVs are not performance-friendly... */ - while (o->option != 0xFF /* end of options */ && (u8 *)o < end) - o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); - - fl = vlib_buffer_get_free_list (vm, b0->free_list_index); - // start write at (option*)o, some packets have padding - if (((u8 *)o - (u8 *)b0->data + VPP_DHCP_OPTION82_SIZE) > fl->n_data_bytes) - { - next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_too_big++; - goto do_trace; - } + fib_index = im->fib_index_by_sw_if_index + [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; + + end = b0->data + b0->current_data + b0->current_length; + /* TLVs are not performance-friendly... */ + while (o->option != 0xFF /* end of options */ && (u8 *)o < end) + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); - if ((o->option == 0xFF) && ((u8 *)o <= end)) - { - vnet_main_t *vnm = vnet_get_main(); - u16 old_l0, new_l0; - ip4_address_t _ia0, * ia0 = &_ia0; - uword *p_vss; - vss_info *vss; - vnet_sw_interface_t *swif; - sw_if_index = 0; - original_sw_if_index = 0; + fl = vlib_buffer_get_free_list (vm, b0->free_list_index); + // start write at (option*)o, some packets have padding + if (((u8 *)o - (u8 *)b0->data + VPP_DHCP_OPTION82_SIZE) > fl->n_data_bytes) + { + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_too_big++; + goto do_trace; + } + + if ((o->option == 0xFF) && ((u8 *)o <= end)) + { + vnet_main_t *vnm = vnet_get_main(); + u16 old_l0, new_l0; + ip4_address_t _ia0, * ia0 = &_ia0; + vss_info *vss; + vnet_sw_interface_t *swif; + sw_if_index = 0; + original_sw_if_index = 0; - original_sw_if_index = sw_if_index = - vnet_buffer(b0)->sw_if_index[VLIB_RX]; - swif = vnet_get_sw_interface (vnm, sw_if_index); - if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) - sw_if_index = swif->unnumbered_sw_if_index; + original_sw_if_index = sw_if_index = + vnet_buffer(b0)->sw_if_index[VLIB_RX]; + swif = vnet_get_sw_interface (vnm, sw_if_index); + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; - p_vss = hash_get (dpm->opt82vss_index_by_vrf_id, - fib_id); - if (p_vss) - { - vss = pool_elt_at_index (dpm->opt82vss, p_vss[0]); - opt82_oui = vss->vpn_id.oui; - opt82_fib_id = vss->vpn_id.fib_id; - } - /* - * Get the first ip4 address on the [client-side] - * RX interface, if not unnumbered. otherwise use - * the loopback interface's ip address. - */ - ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0); + /* + * Get the first ip4 address on the [client-side] + * RX interface, if not unnumbered. otherwise use + * the loopback interface's ip address. + */ + ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0); - if (ia0 == 0) - { - error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS; - next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_interface_address++; - goto do_trace; - } - - /* Add option 82 */ - o->option = 82; /* option 82 */ - o->length = 12; /* 12 octets to follow */ - o->data[0] = 1; /* suboption 1, circuit ID (=FIB id) */ - o->data[1] = 4; /* length of suboption */ - o->data[2] = (original_sw_if_index >> 24) & 0xFF; - o->data[3] = (original_sw_if_index >> 16) & 0xFF; - o->data[4] = (original_sw_if_index >> 8) & 0xFF; - o->data[5] = (original_sw_if_index >> 0) & 0xFF; - o->data[6] = 5; /* suboption 5 (client RX intfc address) */ - o->data[7] = 4; /* length 4 */ - o->data[8] = ia0->as_u8[0]; - o->data[9] = ia0->as_u8[1]; - o->data[10] = ia0->as_u8[2]; - o->data[11] = ia0->as_u8[3]; - o->data[12] = 0xFF; - if (opt82_oui !=0 || opt82_fib_id != 0) - { - o->data[12] = 151; /* vss suboption */ - if (255 == opt82_fib_id) { - o->data[13] = 1; /* length */ - o->data[14] = 255; /* vss option type */ - o->data[15] = 152; /* vss control suboption */ - o->data[16] = 0; /* length */ - /* and a new "end-of-options" option (0xff) */ - o->data[17] = 0xFF; - o->length += 5; - } else { - o->data[13] = 8; /* length */ - o->data[14] = 1; /* vss option type */ - o->data[15] = (opt82_oui >> 16) & 0xff; - o->data[16] = (opt82_oui >> 8) & 0xff; - o->data[17] = (opt82_oui ) & 0xff; - o->data[18] = (opt82_fib_id >> 24) & 0xff; - o->data[19] = (opt82_fib_id >> 16) & 0xff; - o->data[20] = (opt82_fib_id >> 8) & 0xff; - o->data[21] = (opt82_fib_id) & 0xff; - o->data[22] = 152; /* vss control suboption */ - o->data[23] = 0; /* length */ + if (ia0 == 0) + { + error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS; + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_interface_address++; + goto do_trace; + } + + /* Add option 82 */ + o->option = 82; /* option 82 */ + o->length = 12; /* 12 octets to follow */ + o->data[0] = 1; /* suboption 1, circuit ID (=FIB id) */ + o->data[1] = 4; /* length of suboption */ + o->data[2] = (original_sw_if_index >> 24) & 0xFF; + o->data[3] = (original_sw_if_index >> 16) & 0xFF; + o->data[4] = (original_sw_if_index >> 8) & 0xFF; + o->data[5] = (original_sw_if_index >> 0) & 0xFF; + o->data[6] = 5; /* suboption 5 (client RX intfc address) */ + o->data[7] = 4; /* length 4 */ + o->data[8] = ia0->as_u8[0]; + o->data[9] = ia0->as_u8[1]; + o->data[10] = ia0->as_u8[2]; + o->data[11] = ia0->as_u8[3]; + o->data[12] = 0xFF; + + vss = dhcp_get_vss_info (dpm, fib_index); + if (NULL != vss) + { + u32 opt82_fib_id=0, opt82_oui=0; + + opt82_oui = vss->vpn_id.oui; + opt82_fib_id = vss->vpn_id.fib_id; + + o->data[12] = 151; /* vss suboption */ + if (255 == opt82_fib_id) { + o->data[13] = 1; /* length */ + o->data[14] = 255; /* vss option type */ + o->data[15] = 152; /* vss control suboption */ + o->data[16] = 0; /* length */ + /* and a new "end-of-options" option (0xff) */ + o->data[17] = 0xFF; + o->length += 5; + } else { + o->data[13] = 8; /* length */ + o->data[14] = 1; /* vss option type */ + o->data[15] = (opt82_oui >> 16) & 0xff; + o->data[16] = (opt82_oui >> 8) & 0xff; + o->data[17] = (opt82_oui ) & 0xff; + o->data[18] = (opt82_fib_id >> 24) & 0xff; + o->data[19] = (opt82_fib_id >> 16) & 0xff; + o->data[20] = (opt82_fib_id >> 8) & 0xff; + o->data[21] = (opt82_fib_id) & 0xff; + o->data[22] = 152; /* vss control suboption */ + o->data[23] = 0; /* length */ - /* and a new "end-of-options" option (0xff) */ - o->data[24] = 0xFF; - o->length += 12; - } + /* and a new "end-of-options" option (0xff) */ + o->data[24] = 0xFF; + o->length += 12; } - - len = o->length + 3; - b0->current_length += len; - /* Fix IP header length and checksum */ - old_l0 = ip0->length; - new_l0 = clib_net_to_host_u16 (old_l0); - new_l0 += len; - new_l0 = clib_host_to_net_u16 (new_l0); - ip0->length = new_l0; - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */); - ip0->checksum = ip_csum_fold (sum0); - - /* Fix UDP length */ - new_l0 = clib_net_to_host_u16 (u0->length); - new_l0 += len; - u0->length = clib_host_to_net_u16 (new_l0); - } else { - vlib_node_increment_counter - (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_OPTION_82_ERROR, 1); - } - } + } + + len = o->length + 3; + b0->current_length += len; + /* Fix IP header length and checksum */ + old_l0 = ip0->length; + new_l0 = clib_net_to_host_u16 (old_l0); + new_l0 += len; + new_l0 = clib_host_to_net_u16 (new_l0); + ip0->length = new_l0; + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + /* Fix UDP length */ + new_l0 = clib_net_to_host_u16 (u0->length); + new_l0 += len; + u0->length = clib_host_to_net_u16 (new_l0); + } else { + vlib_node_increment_counter + (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_OPTION_82_ERROR, 1); + } next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; @@ -451,11 +472,13 @@ dhcp_proxy_to_client_input (vlib_main_t * vm, vnet_sw_interface_t *si0; u32 error0 = (u32)~0; vnet_sw_interface_t *swif; - u32 server_index; u32 fib_index; dhcp_server_t * server; u32 original_sw_if_index = (u32) ~0; - + ip4_address_t relay_addr = { + .as_u32 = 0, + }; + bi0 = from[0]; from += 1; n_left_from -= 1; @@ -501,13 +524,21 @@ dhcp_proxy_to_client_input (vlib_main_t * vm, and the sw_if_index */ if (sub->option == 1 && sub->length == 4) { - sw_if_index = (o->data[2] << 24) - | (o->data[3] << 16) - | (o->data[4] << 8) - | (o->data[5]); - } else if (sub->option == 151 && - sub->length == 7 && - sub->data[0] == 1) + sw_if_index = ((sub->data[0] << 24) | + (sub->data[1] << 16) | + (sub->data[2] << 8) | + (sub->data[3])); + } + else if (sub->option == 5 && sub->length == 4) + { + relay_addr.as_u8[0] = sub->data[0]; + relay_addr.as_u8[1] = sub->data[1]; + relay_addr.as_u8[2] = sub->data[2]; + relay_addr.as_u8[3] = sub->data[3]; + } + else if (sub->option == 151 && + sub->length == 7 && + sub->data[0] == 1) vss_exist = 1; else if (sub->option == 152 && sub->length == 0) vss_ctrl = 1; @@ -539,34 +570,27 @@ dhcp_proxy_to_client_input (vlib_main_t * vm, goto do_trace; } + if (relay_addr.as_u32 == 0) + { + error0 = DHCP_PROXY_ERROR_BAD_OPTION_82_ADDR; + goto drop_packet; + } if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index)) { - error0 = DHCP_PROXY_ERROR_BAD_OPTION_82; + error0 = DHCP_PROXY_ERROR_BAD_OPTION_82_ITF; goto drop_packet; } fib_index = im->fib_index_by_sw_if_index [sw_if_index]; + server = dhcp_get_server(dpm, fib_index); - if (fib_index < vec_len(dpm->dhcp_server_index_by_rx_fib_index)) - server_index = dpm->dhcp_server_index_by_rx_fib_index[fib_index]; - else - server_index = 0; - - if (PREDICT_FALSE (pool_is_free_index (dpm->dhcp_servers, - server_index))) - { - error0 = DHCP_PROXY_ERROR_BAD_OPTION_82; - goto drop_packet; - } - - server = pool_elt_at_index (dpm->dhcp_servers, server_index); - if (server->valid == 0) + if (PREDICT_FALSE (NULL == server)) { error0 = DHCP_PROXY_ERROR_NO_SERVER; goto drop_packet; } - + if (ip0->src_address.as_u32 != server->dhcp_server.as_u32) { error0 = DHCP_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; @@ -587,6 +611,12 @@ dhcp_proxy_to_client_input (vlib_main_t * vm, goto drop_packet; } + if (relay_addr.as_u32 != ia0->as_u32) + { + error0 = DHCP_PROXY_ERROR_BAD_YIADDR; + goto drop_packet; + } + u0->checksum = 0; u0->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcp_to_client); sum0 = ip0->checksum; @@ -677,7 +707,7 @@ clib_error_t * dhcp_proxy_init (vlib_main_t * vm) error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); dm->error_drop_node_index = error_drop_node->index; - dm->opt82vss_index_by_vrf_id = hash_create (0, sizeof (uword)); + dm->vss_index_by_rx_fib_index = NULL; udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_client, dhcp_proxy_to_client_node.index, 1 /* is_ip4 */); @@ -694,15 +724,17 @@ clib_error_t * dhcp_proxy_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (dhcp_proxy_init); -int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, - u32 rx_fib_id, - u32 server_fib_id, - int insert_option_82, int is_del) +int dhcp_proxy_set_server (ip4_address_t *addr, + ip4_address_t *src_address, + u32 rx_fib_id, + u32 server_fib_id, + int is_del) { dhcp_proxy_main_t * dpm = &dhcp_proxy_main; dhcp_server_t * server = 0; u32 server_index = 0; u32 rx_fib_index = 0; + const fib_prefix_t all_1s = { .fp_len = 32, @@ -719,97 +751,68 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, rx_fib_id); - if (rx_fib_id == 0) - { - server = pool_elt_at_index (dpm->dhcp_servers, 0); - - if (is_del) - { - memset (server, 0, sizeof (*server)); - fib_table_entry_special_remove(rx_fib_index, - &all_1s, - FIB_SOURCE_DHCP); - return 0; - } - if (!server->valid) - fib_table_entry_special_add(rx_fib_index, - &all_1s, - FIB_SOURCE_DHCP, - FIB_ENTRY_FLAG_LOCAL, - ADJ_INDEX_INVALID); - - goto initialize_it; - } - if (is_del) { if (rx_fib_index >= vec_len(dpm->dhcp_server_index_by_rx_fib_index)) return VNET_API_ERROR_NO_SUCH_ENTRY; server_index = dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index]; - ASSERT(server_index > 0); + + if (server_index == ~0) + return VNET_API_ERROR_NO_SUCH_ENTRY; /* Use the default server again. */ - dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = 0; + dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = ~0; server = pool_elt_at_index (dpm->dhcp_servers, server_index); - memset (server, 0, sizeof (*server)); - pool_put (dpm->dhcp_servers, server); fib_table_entry_special_remove(rx_fib_index, &all_1s, FIB_SOURCE_DHCP); + fib_table_unlock (rx_fib_index, + FIB_PROTOCOL_IP4); + fib_table_unlock (server->server_fib_index, + FIB_PROTOCOL_IP4); + memset (server, 0, sizeof (*server)); + pool_put (dpm->dhcp_servers, server); return 0; } - - if (rx_fib_index < vec_len(dpm->dhcp_server_index_by_rx_fib_index)) - { - server_index = dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index]; - if (server_index != 0) - { - server = pool_elt_at_index (dpm->dhcp_servers, server_index); - goto initialize_it; - } - } - - pool_get (dpm->dhcp_servers, server); - - fib_table_entry_special_add(rx_fib_index, - &all_1s, - FIB_SOURCE_DHCP, - FIB_ENTRY_FLAG_LOCAL, - ADJ_INDEX_INVALID); - - initialize_it: - - - server->dhcp_server.as_u32 = addr->as_u32; - server->server_fib_index = - fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, - server_fib_id); - server->dhcp_src_address.as_u32 = src_address->as_u32; - server->insert_option_82 = insert_option_82; - server->valid = 1; - if (rx_fib_index) - { - vec_validate (dpm->dhcp_server_index_by_rx_fib_index, rx_fib_index); + else + { + vec_validate_init_empty(dpm->dhcp_server_index_by_rx_fib_index, + rx_fib_index, + ~0); + + pool_get (dpm->dhcp_servers, server); + + server->dhcp_server.as_u32 = addr->as_u32; + server->dhcp_src_address.as_u32 = src_address->as_u32; + + fib_table_entry_special_add(rx_fib_index, + &all_1s, + FIB_SOURCE_DHCP, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + fib_table_lock (rx_fib_index, + FIB_PROTOCOL_IP4); + + server->server_fib_index = + fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, + server_fib_id); + + vec_validate_init_empty (dpm->dhcp_server_index_by_rx_fib_index, + rx_fib_index, + ~0); dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = - server - dpm->dhcp_servers; - } + server - dpm->dhcp_servers; + } - return 0; -} + fib_table_unlock (rx_fib_index, + FIB_PROTOCOL_IP4); -/* Old API, manipulates the default server (only) */ -int dhcp_proxy_set_server (ip4_address_t *addr, ip4_address_t *src_address, - u32 fib_id, int insert_option_82, int is_del) -{ - return dhcp_proxy_set_server_2 (addr, src_address, 0 /* rx_fib_id */, - fib_id /* server_fib_id */, - insert_option_82, is_del); + return 0; } - static clib_error_t * dhcp_proxy_set_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -818,7 +821,6 @@ dhcp_proxy_set_command_fn (vlib_main_t * vm, ip4_address_t server_addr, src_addr; u32 server_fib_id = 0, rx_fib_id = 0; int is_del = 0; - int add_option_82 = 0; int set_src = 0, set_server = 0; while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) @@ -833,9 +835,6 @@ dhcp_proxy_set_command_fn (vlib_main_t * vm, else if (unformat(input, "src-address %U", unformat_ip4_address, &src_addr)) set_src = 1; - else if (unformat (input, "add-option-82") - || unformat (input, "insert-option-82")) - add_option_82 = 1; else if (unformat (input, "delete") || unformat (input, "del")) is_del = 1; @@ -847,8 +846,8 @@ dhcp_proxy_set_command_fn (vlib_main_t * vm, { int rv; - rv = dhcp_proxy_set_server_2 (&server_addr, &src_addr, rx_fib_id, - server_fib_id, add_option_82, is_del); + rv = dhcp_proxy_set_server (&server_addr, &src_addr, rx_fib_id, + server_fib_id, is_del); switch (rv) { case 0: @@ -882,7 +881,7 @@ dhcp_proxy_set_command_fn (vlib_main_t * vm, VLIB_CLI_COMMAND (dhcp_proxy_set_command, static) = { .path = "set dhcp proxy", - .short_help = "set dhcp proxy [del] server src-address [add-option-82] [server-fib-id ] [rx-fib-id ]", + .short_help = "set dhcp proxy [del] server src-address [server-fib-id ] [rx-fib-id ]", .function = dhcp_proxy_set_command_fn, }; @@ -896,8 +895,8 @@ u8 * format_dhcp_proxy_server (u8 * s, va_list * args) if (dm == 0) { - s = format (s, "%=16s%=16s%=14s%=14s%=20s", "Server", "Src Address", - "Server FIB", "RX FIB", "Insert Option 82"); + s = format (s, "%=16s%=16s%=14s%=14s", "Server", "Src Address", + "Server FIB", "RX FIB"); return s; } @@ -911,11 +910,10 @@ u8 * format_dhcp_proxy_server (u8 * s, va_list * args) if (rx_fib) rx_fib_id = rx_fib->table_id; - s = format (s, "%=16U%=16U%=14u%=14u%=20s", + s = format (s, "%=16U%=16U%=14u%=14u", format_ip4_address, &server->dhcp_server, format_ip4_address, &server->dhcp_src_address, - server_fib_id, rx_fib_id, - server->insert_option_82 ? "yes" : "no"); + server_fib_id, rx_fib_id); return s; } @@ -925,24 +923,22 @@ dhcp_proxy_show_command_fn (vlib_main_t * vm, vlib_cli_command_t * cmd) { dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - ip4_main_t * im = &ip4_main; dhcp_server_t * server; - u32 server_index; - int i; + u32 server_index, i; vlib_cli_output (vm, "%U", format_dhcp_proxy_server, 0 /* header line */, 0, 0); - for (i = 0; i < vec_len (im->fibs); i++) - { - if (i < vec_len(dpm->dhcp_server_index_by_rx_fib_index)) - server_index = dpm->dhcp_server_index_by_rx_fib_index[i]; - else - server_index = 0; + vec_foreach_index (i, dpm->dhcp_server_index_by_rx_fib_index) + { + server_index = dpm->dhcp_server_index_by_rx_fib_index[i]; + if (~0 == server_index) + continue; + server = pool_elt_at_index (dpm->dhcp_servers, server_index); - if (server->valid) - vlib_cli_output (vm, "%U", format_dhcp_proxy_server, dpm, - server, i); + + vlib_cli_output (vm, "%U", format_dhcp_proxy_server, dpm, + server, i); } return 0; @@ -954,50 +950,104 @@ VLIB_CLI_COMMAND (dhcp_proxy_show_command, static) = { .function = dhcp_proxy_show_command_fn, }; +void +dhcp_proxy_dump (void *opaque, + u32 context) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + ip4_fib_t *s_fib, *r_fib; + dhcp_server_t * server; + u32 server_index, i; + vss_info *v; + + vec_foreach_index (i, dpm->dhcp_server_index_by_rx_fib_index) + { + server_index = dpm->dhcp_server_index_by_rx_fib_index[i]; + if (~0 == server_index) + continue; + + server = pool_elt_at_index (dpm->dhcp_servers, server_index); + v = dhcp_get_vss_info(dpm, i); + + ip46_address_t src_addr = { + .ip4 = server->dhcp_src_address, + }; + ip46_address_t server_addr = { + .ip4 = server->dhcp_server, + }; + + s_fib = ip4_fib_get(server->server_fib_index); + r_fib = ip4_fib_get(i); + + dhcp_send_details(opaque, + context, + &server_addr, + &src_addr, + s_fib->table_id, + r_fib->table_id, + (v ? v->vpn_id.fib_id : 0), + (v ? v->vpn_id.oui : 0)); + } +} -int dhcp_proxy_set_option82_vss( u32 vrf_id, - u32 oui, - u32 fib_id, - int is_del) +int dhcp_proxy_set_option82_vss(u32 tbl_id, + u32 oui, + u32 fib_id, + int is_del) { dhcp_proxy_main_t *dm = &dhcp_proxy_main; - uword *p; - vss_info *a; - u32 old_oui=0, old_fib_id=0; + vss_info *v = NULL; + u32 rx_fib_index; + int rc = 0; - p = hash_get (dm->opt82vss_index_by_vrf_id, vrf_id); + rx_fib_index = ip4_fib_table_find_or_create_and_lock(tbl_id); + v = dhcp_get_vss_info(dm, rx_fib_index); - if (p) - { - a = pool_elt_at_index (dm->opt82vss, p[0]); - if (!a) - return VNET_API_ERROR_NO_SUCH_FIB; - old_oui = a->vpn_id.oui; - old_fib_id = a->vpn_id.fib_id; - + if (NULL != v) + { if (is_del) - { - if (old_oui == oui && - old_fib_id == fib_id) - { - pool_put(dm->opt82vss, a); - hash_unset (dm->opt82vss_index_by_vrf_id, vrf_id); - return 0; - } - else - return VNET_API_ERROR_NO_SUCH_ENTRY; - } - pool_put(dm->opt82vss, a); - hash_unset (dm->opt82vss_index_by_vrf_id, vrf_id); - } else if (is_del) - return VNET_API_ERROR_NO_SUCH_ENTRY; - pool_get (dm->opt82vss, a); - memset (a, ~0, sizeof (a[0])); - a->vpn_id.oui = oui; - a->vpn_id.fib_id = fib_id; - hash_set (dm->opt82vss_index_by_vrf_id, vrf_id, a - dm->opt82vss); + { + /* release the lock held on the table when the VSS + * info was created */ + fib_table_unlock (rx_fib_index, + FIB_PROTOCOL_IP4); + + pool_put (dm->vss, v); + dm->vss_index_by_rx_fib_index[rx_fib_index] = ~0; + } + else + { + /* this is a modify */ + v->vpn_id.fib_id = fib_id; + v->vpn_id.oui = oui; + } + } + else + { + if (is_del) + rc = VNET_API_ERROR_NO_SUCH_ENTRY; + else + { + /* create a new entry */ + vec_validate_init_empty(dm->vss_index_by_rx_fib_index, + rx_fib_index, ~0); + + /* hold a lock on the table whilst the VSS info exist */ + fib_table_lock (rx_fib_index, + FIB_PROTOCOL_IP4); + + pool_get (dm->vss, v); + v->vpn_id.fib_id = fib_id; + v->vpn_id.oui = oui; + dm->vss_index_by_rx_fib_index[rx_fib_index] = v - dm->vss; + } + } + + /* Release the lock taken during the create_or_lock at the start */ + fib_table_unlock (rx_fib_index, + FIB_PROTOCOL_IP4); - return 0; + return (rc); } static clib_error_t * @@ -1065,20 +1115,20 @@ dhcp_vss_show_command_fn (vlib_main_t * vm, { dhcp_proxy_main_t * dm = &dhcp_proxy_main; + ip4_fib_t *fib; + u32 *fib_index; vss_info *v; - u32 oui; - u32 fib_id; - u32 tbl_id; - uword index; vlib_cli_output (vm, "%=9s%=11s%=12s","Table", "OUI", "VPN-ID"); - hash_foreach (tbl_id, index, dm->opt82vss_index_by_vrf_id, + pool_foreach (fib_index, dm->vss_index_by_rx_fib_index, ({ - v = pool_elt_at_index (dm->opt82vss, index); - oui = v->vpn_id.oui; - fib_id = v->vpn_id.fib_id; - vlib_cli_output (vm, "%=9d 0x%08x%=12d", - tbl_id, oui, fib_id); + fib = ip4_fib_get (*fib_index); + v = pool_elt_at_index (dm->vss, *fib_index); + + vlib_cli_output (vm, "%=6d%=6d%=12d", + fib->table_id, + v->vpn_id.oui, + v->vpn_id.fib_id); })); return 0; diff --git a/src/vnet/dhcpv6/proxy.h b/src/vnet/dhcpv6/proxy.h index 9e18913a..77ced361 100644 --- a/src/vnet/dhcpv6/proxy.h +++ b/src/vnet/dhcpv6/proxy.h @@ -48,9 +48,7 @@ typedef union { typedef struct { ip6_address_t dhcp6_server; ip6_address_t dhcp6_src_address; - u32 insert_vss; u32 server_fib6_index; - u32 valid; } dhcpv6_server_t; typedef struct { @@ -70,7 +68,7 @@ typedef struct { dhcpv6_vss_info *vss; /* hash lookup specific vrf_id -> VSS vector index*/ - uword *vss_index_by_vrf_id; + u32 *vss_index_by_rx_fib_index; /* convenience */ vlib_main_t * vlib_main; @@ -79,17 +77,18 @@ typedef struct { dhcpv6_proxy_main_t dhcpv6_proxy_main; -int dhcpv6_proxy_set_server (ip6_address_t *addr, ip6_address_t *src_address, - u32 fib_id, int insert_vss, int is_del); - int dhcpv6_proxy_set_vss(u32 tbl_id, u32 oui, u32 fib_id, int is_del); -int dhcpv6_proxy_set_server_2 (ip6_address_t *addr, ip6_address_t *src_address, - u32 rx_fib_id, - u32 server_fib_id, - int insert_vss, int is_del); +int dhcpv6_proxy_set_server(ip6_address_t *addr, + ip6_address_t *src_address, + u32 rx_fib_id, + u32 server_fib_id, + int is_del); + +void dhcpv6_proxy_dump(void *opaque, + u32 context); #endif /* included_dhcpv6_proxy_h */ diff --git a/src/vnet/dhcpv6/proxy_node.c b/src/vnet/dhcpv6/proxy_node.c index 4137624c..f40798e6 100644 --- a/src/vnet/dhcpv6/proxy_node.c +++ b/src/vnet/dhcpv6/proxy_node.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -117,6 +118,42 @@ static inline void copy_ip6_address (ip6_address_t *dst, ip6_address_t *src) dst->as_u64[1] = src->as_u64[1]; } +static inline dhcpv6_vss_info * +dhcpv6_get_vss_info (dhcpv6_proxy_main_t *dm, + u32 rx_fib_index) +{ + dhcpv6_vss_info *v; + + if (vec_len(dm->vss_index_by_rx_fib_index) <= rx_fib_index || + dm->vss_index_by_rx_fib_index[rx_fib_index] == ~0) + { + v = NULL; + } + else + { + v = pool_elt_at_index (dm->vss, + dm->vss_index_by_rx_fib_index[rx_fib_index]); + } + + return (v); +} + +static inline dhcpv6_server_t * +dhcpv6_get_server (dhcpv6_proxy_main_t *dm, + u32 rx_fib_index) +{ + dhcpv6_server_t *s = NULL; + + if (vec_len(dm->dhcp6_server_index_by_rx_fib_index) > rx_fib_index && + dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] != ~0) + { + s = pool_elt_at_index (dm->dhcp6_servers, + dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index]); + } + + return (s); +} + static uword dhcpv6_proxy_to_server_input (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -132,13 +169,10 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, u32 pkts_wrong_msg_type=0; u32 pkts_too_big=0; ip6_main_t * im = &ip6_main; - ip6_fib_t * fib; ip6_address_t * src; int bogus_length; dhcpv6_server_t * server; u32 rx_fib_idx = 0, server_fib_idx = 0; - u32 server_idx; - u32 fib_id1 = 0; next_index = node->cached_next_index; @@ -172,12 +206,8 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, ethernet_header_t * e_h0; u8 client_src_mac[6]; vlib_buffer_free_list_t *fl; - - uword *p_vss; - u32 oui1=0; dhcpv6_vss_info *vss; - bi0 = from[0]; to_next[0] = bi0; from += 1; @@ -228,25 +258,15 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, /* Send to DHCPV6 server via the configured FIB */ rx_sw_if_index = sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_idx = im->fib_index_by_sw_if_index [rx_sw_if_index]; + server = dhcpv6_get_server(dpm, rx_fib_idx); - if (vec_len(dpm->dhcp6_server_index_by_rx_fib_index) <= rx_fib_idx) - goto no_server; - - server_idx = dpm->dhcp6_server_index_by_rx_fib_index[rx_fib_idx]; - - if (PREDICT_FALSE (pool_is_free_index (dpm->dhcp6_servers, - server_idx))) - { - no_server: - error0 = DHCPV6_PROXY_ERROR_NO_SERVER; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_server++; - goto do_trace; - } - - server = pool_elt_at_index(dpm->dhcp6_servers, server_idx); - if (server->valid == 0) - goto no_server; + if (PREDICT_FALSE (NULL == server)) + { + error0 = DHCPV6_PROXY_ERROR_NO_SERVER; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_server++; + goto do_trace; + } server_fib_idx = server->server_fib6_index; vnet_buffer(b0)->sw_if_index[VLIB_TX] = server_fib_idx; @@ -331,19 +351,6 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, id1 = (dhcpv6_int_id_t *) (((uword) ip1) + b0->current_length); b0->current_length += (sizeof (*id1)); - - fib = ip6_fib_get (rx_fib_idx); - - //TODO: Revisit if hash makes sense here - p_vss = hash_get (dpm->vss_index_by_vrf_id, - fib->table_id); - if (p_vss) - { - vss = pool_elt_at_index (dpm->vss, p_vss[0]); - oui1 = vss->vpn_id.oui; - fib_id1 = vss->vpn_id.fib_id; - } - id1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_INTERFACE_ID); id1->opt.length = clib_host_to_net_u16(sizeof(rx_sw_if_index)); id1->int_idx = clib_host_to_net_u32(rx_sw_if_index); @@ -360,20 +367,24 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, clib_memcpy(cmac->data, client_src_mac, 6); u1->length += sizeof(*cmac); } - if (server->insert_vss !=0 ) { + + //TODO: Revisit if hash makes sense here + vss = dhcpv6_get_vss_info(dpm, rx_fib_idx); + + if (NULL != vss) { vss1 = (dhcpv6_vss_t *) (((uword) ip1) + b0->current_length); b0->current_length += (sizeof (*vss1)); vss1->opt.length =clib_host_to_net_u16(sizeof(*vss1) - sizeof(vss1->opt)); vss1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_VSS); vss1->data[0] = 1; // type - vss1->data[1] = oui1>>16 & 0xff; - vss1->data[2] = oui1>>8 & 0xff; - vss1->data[3] = oui1 & 0xff; - vss1->data[4] = fib_id1>>24 & 0xff; - vss1->data[5] = fib_id1>>16 & 0xff; - vss1->data[6] = fib_id1>>8 & 0xff; - vss1->data[7] = fib_id1 & 0xff; + vss1->data[1] = vss->vpn_id.oui >>16 & 0xff; + vss1->data[2] = vss->vpn_id.oui >>8 & 0xff; + vss1->data[3] = vss->vpn_id.oui & 0xff; + vss1->data[4] = vss->vpn_id.fib_id >> 24 & 0xff; + vss1->data[5] = vss->vpn_id.fib_id >> 16 & 0xff; + vss1->data[6] = vss->vpn_id.fib_id >> 8 & 0xff; + vss1->data[7] = vss->vpn_id.fib_id & 0xff; u1->length += sizeof(*vss1); } @@ -524,9 +535,8 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm, u16 len = 0; u8 interface_opt_flag = 0; u8 relay_msg_opt_flag = 0; - ip6_fib_t * svr_fib; ip6_main_t * im = &ip6_main; - u32 server_fib_idx, svr_fib_id, client_fib_idx, server_idx; + u32 server_fib_idx, client_fib_idx; bi0 = from[0]; from += 1; @@ -608,31 +618,18 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm, vlib_buffer_advance (b0, sizeof(*r0)); client_fib_idx = im->fib_index_by_sw_if_index[sw_if_index]; - if (client_fib_idx < vec_len(dm->dhcp6_server_index_by_rx_fib_index)) - server_idx = dm->dhcp6_server_index_by_rx_fib_index[client_fib_idx]; - else - server_idx = 0; - - if (PREDICT_FALSE (pool_is_free_index (dm->dhcp6_servers, server_idx))) - { - error0 = DHCPV6_PROXY_ERROR_WRONG_INTERFACE_ID_OPTION; - goto drop_packet; - } + server = dhcpv6_get_server(dm, client_fib_idx); - server = pool_elt_at_index (dm->dhcp6_servers, server_idx); - if (server->valid == 0) + if (NULL == server) { error0 = DHCPV6_PROXY_ERROR_NO_SERVER; goto drop_packet; } - server_fib_idx = im->fib_index_by_sw_if_index [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; - svr_fib = ip6_fib_get (server_fib_idx); - svr_fib_id = svr_fib->table_id; - if (svr_fib_id != server->server_fib6_index || + if (server_fib_idx != server->server_fib6_index || ip0->src_address.as_u64[0] != server->dhcp6_server.as_u64[0] || ip0->src_address.as_u64[1] != server->dhcp6_server.as_u64[1]) { @@ -760,7 +757,7 @@ clib_error_t * dhcpv6_proxy_init (vlib_main_t * vm) error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); dm->error_drop_node_index = error_drop_node->index; - dm->vss_index_by_vrf_id = hash_create (0, sizeof (uword)); + dm->vss_index_by_rx_fib_index = NULL; /* RFC says this is the dhcpv6 server address */ dm->all_dhcpv6_server_address.as_u64[0] = clib_host_to_net_u64 (0xFF05000000000000); @@ -785,121 +782,138 @@ clib_error_t * dhcpv6_proxy_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (dhcpv6_proxy_init); -/* Old API, manipulates a single server (only) shared by all Rx VRFs */ -int dhcpv6_proxy_set_server (ip6_address_t *addr, ip6_address_t *src_address, - u32 fib_id, int insert_vss, int is_del) -{ - return dhcpv6_proxy_set_server_2 (addr, src_address, - 0, fib_id, - insert_vss, is_del); -} - -int dhcpv6_proxy_set_server_2 (ip6_address_t *addr, ip6_address_t *src_address, - u32 rx_fib_id, u32 server_fib_id, - int insert_vss, int is_del) +int dhcpv6_proxy_set_server (ip6_address_t *addr, + ip6_address_t *src_address, + u32 rx_fib_id, + u32 server_fib_id, + int is_del) { dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; dhcpv6_server_t * server = 0; - u32 server_fib_index = 0; u32 rx_fib_index = 0; + int rc = 0; rx_fib_index = ip6_mfib_table_find_or_create_and_lock(rx_fib_id); - server_fib_index = ip6_fib_table_find_or_create_and_lock(server_fib_id); - - if (is_del) - { - - if (rx_fib_index >= vec_len(dm->dhcp6_server_index_by_rx_fib_index)) - return VNET_API_ERROR_NO_SUCH_ENTRY; - server_fib_index = dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index]; - - dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] = 0; - server = pool_elt_at_index (dm->dhcp6_servers, server_fib_index); - memset (server, 0, sizeof (*server)); - pool_put (dm->dhcp6_servers, server); - return 0; + const mfib_prefix_t all_dhcp_servers = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_grp_addr = { + .ip6 = dm->all_dhcpv6_server_relay_agent_address, } + }; - if (addr->as_u64[0] == 0 && - addr->as_u64[1] == 0 ) - return VNET_API_ERROR_INVALID_DST_ADDRESS; - - if (src_address->as_u64[0] == 0 && - src_address->as_u64[1] == 0) - return VNET_API_ERROR_INVALID_SRC_ADDRESS; - - if (rx_fib_id == 0) + if (is_del) { - server = pool_elt_at_index (dm->dhcp6_servers, 0); - if (server->valid) - goto reconfigure_it; - else - goto initialize_it; - } + server = dhcpv6_get_server(dm, rx_fib_index); - if (rx_fib_index < vec_len(dm->dhcp6_server_index_by_rx_fib_index)) - { - server_fib_index = dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index]; - if (server_fib_index != 0) + if (NULL == server) { - server = pool_elt_at_index (dm->dhcp6_servers, server_fib_index); - goto initialize_it; + rc = VNET_API_ERROR_NO_SUCH_ENTRY; + goto out; } - } - /*Allocate a new server*/ - pool_get (dm->dhcp6_servers, server); - - initialize_it: - { - const mfib_prefix_t all_dhcp_servers = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_grp_addr = { - .ip6 = dm->all_dhcpv6_server_relay_agent_address, - } - }; - const fib_route_path_t path_for_us = { - .frp_proto = FIB_PROTOCOL_IP6, - .frp_addr = zero_addr, - .frp_sw_if_index = 0xffffffff, - .frp_fib_index = ~0, - .frp_weight = 0, - .frp_flags = FIB_ROUTE_PATH_LOCAL, - }; - mfib_table_entry_path_update(rx_fib_index, - &all_dhcp_servers, - MFIB_SOURCE_DHCP, - &path_for_us, - MFIB_ITF_FLAG_FORWARD); /* - * Each interface that is enabled in this table, needs to be added - * as an accepting interface, but this is not easily doable in VPP. - * So we cheat. Add a flag to the entry that indicates accept form - * any interface. - * We will still only accept on v6 enabled interfaces, since the input - * feature ensures this. + * release the locks held on the server fib and rx mfib */ - mfib_table_entry_update(rx_fib_index, + mfib_table_entry_delete(rx_fib_index, &all_dhcp_servers, - MFIB_SOURCE_DHCP, - MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF); - } + MFIB_SOURCE_DHCP); + mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); + fib_table_unlock(server->server_fib6_index, FIB_PROTOCOL_IP6); -reconfigure_it: + dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] = ~0; - copy_ip6_address(&server->dhcp6_server, addr); - copy_ip6_address(&server->dhcp6_src_address, src_address); - server->server_fib6_index = server_fib_index; - server->valid = 1; - server->insert_vss = insert_vss; + memset (server, 0, sizeof (*server)); + pool_put (dm->dhcp6_servers, server); + } + else + { + if (addr->as_u64[0] == 0 && + addr->as_u64[1] == 0 ) + { + rc = VNET_API_ERROR_INVALID_DST_ADDRESS; + goto out; + } + if (src_address->as_u64[0] == 0 && + src_address->as_u64[1] == 0) + { + rc = VNET_API_ERROR_INVALID_SRC_ADDRESS; + goto out; + } - vec_validate (dm->dhcp6_server_index_by_rx_fib_index, rx_fib_index); - dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] = - server - dm->dhcp6_servers; + server = dhcpv6_get_server(dm, rx_fib_index); - return 0; + if (NULL != server) + { + /* modify of an existing entry */ + ip6_fib_t *fib; + + fib = ip6_fib_get(server->server_fib6_index); + + if (fib->table_id != server_fib_id) + { + /* swap tables */ + fib_table_unlock(server->server_fib6_index, FIB_PROTOCOL_IP6); + server->server_fib6_index = + ip6_fib_table_find_or_create_and_lock(server_fib_id); + } + } + else + { + /* Allocate a new server */ + pool_get (dm->dhcp6_servers, server); + + vec_validate_init_empty (dm->dhcp6_server_index_by_rx_fib_index, + rx_fib_index, ~0); + dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] = + server - dm->dhcp6_servers; + + server->server_fib6_index = + ip6_fib_table_find_or_create_and_lock(server_fib_id); + mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6); + + const mfib_prefix_t all_dhcp_servers = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_grp_addr = { + .ip6 = dm->all_dhcpv6_server_relay_agent_address, + } + }; + const fib_route_path_t path_for_us = { + .frp_proto = FIB_PROTOCOL_IP6, + .frp_addr = zero_addr, + .frp_sw_if_index = 0xffffffff, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = FIB_ROUTE_PATH_LOCAL, + }; + mfib_table_entry_path_update(rx_fib_index, + &all_dhcp_servers, + MFIB_SOURCE_DHCP, + &path_for_us, + MFIB_ITF_FLAG_FORWARD); + /* + * Each interface that is enabled in this table, needs to be added + * as an accepting interface, but this is not easily doable in VPP. + * So we cheat. Add a flag to the entry that indicates accept form + * any interface. + * We will still only accept on v6 enabled interfaces, since the + * input feature ensures this. + */ + mfib_table_entry_update(rx_fib_index, + &all_dhcp_servers, + MFIB_SOURCE_DHCP, + MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF); + } + copy_ip6_address(&server->dhcp6_server, addr); + copy_ip6_address(&server->dhcp6_src_address, src_address); + } + +out: + mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); + + return (rc); } static clib_error_t * @@ -910,7 +924,7 @@ dhcpv6_proxy_set_command_fn (vlib_main_t * vm, ip6_address_t addr, src_addr; int set_server = 0, set_src_address = 0; u32 rx_fib_id = 0, server_fib_id = 0; - int is_del = 0, add_vss = 0; + int is_del = 0; while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) { @@ -924,9 +938,6 @@ dhcpv6_proxy_set_command_fn (vlib_main_t * vm, ; else if (unformat (input, "rx-fib-id %d", &rx_fib_id)) ; - else if (unformat (input, "add-vss-option") - || unformat (input, "insert-option")) - add_vss = 1; else if (unformat (input, "delete") || unformat (input, "del")) is_del = 1; @@ -938,8 +949,8 @@ dhcpv6_proxy_set_command_fn (vlib_main_t * vm, { int rv; - rv = dhcpv6_proxy_set_server_2 (&addr, &src_addr, rx_fib_id, - server_fib_id, add_vss, is_del); + rv = dhcpv6_proxy_set_server (&addr, &src_addr, rx_fib_id, + server_fib_id, is_del); //TODO: Complete the errors switch (rv) @@ -962,7 +973,7 @@ dhcpv6_proxy_set_command_fn (vlib_main_t * vm, VLIB_CLI_COMMAND (dhcpv6_proxy_set_command, static) = { .path = "set dhcpv6 proxy", .short_help = "set dhcpv6 proxy [del] server src-address " - "[add-vss-option] [server-fib-id ] [rx-fib-id ] ", + "[server-fib-id ] [rx-fib-id ] ", .function = dhcpv6_proxy_set_command_fn, }; @@ -976,8 +987,8 @@ u8 * format_dhcpv6_proxy_server (u8 * s, va_list * args) if (dm == 0) { - s = format (s, "%=40s%=40s%=14s%=14s%=20s", "Server Address", "Source Address", - "Server FIB", "RX FIB", "Insert VSS Option"); + s = format (s, "%=40s%=40s%=14s%=14s", "Server Address", "Source Address", + "Server FIB", "RX FIB"); return s; } @@ -990,11 +1001,10 @@ u8 * format_dhcpv6_proxy_server (u8 * s, va_list * args) if (rx_fib) rx_fib_id = rx_fib->table_id; - s = format (s, "%=40U%=40U%=14u%=14u%=20s", + s = format (s, "%=40U%=40U%=14u%=14u", format_ip6_address, &server->dhcp6_server, format_ip6_address, &server->dhcp6_src_address, - server_fib_id, rx_fib_id, - server->insert_vss ? "yes" : "no"); + server_fib_id, rx_fib_id); return s; } @@ -1003,25 +1013,25 @@ dhcpv6_proxy_show_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; - ip6_main_t * im = &ip6_main; + dhcpv6_proxy_main_t * dpm = &dhcpv6_proxy_main; int i; u32 server_index; dhcpv6_server_t * server; vlib_cli_output (vm, "%U", format_dhcpv6_proxy_server, 0 /* header line */, 0, 0); - for (i = 0; i < vec_len (im->fibs); i++) - { - if (i < vec_len(dm->dhcp6_server_index_by_rx_fib_index)) - server_index = dm->dhcp6_server_index_by_rx_fib_index[i]; - else - server_index = 0; - server = pool_elt_at_index (dm->dhcp6_servers, server_index); - if (server->valid) - vlib_cli_output (vm, "%U", format_dhcpv6_proxy_server, dm, - server, i); - } + vec_foreach_index (i, dpm->dhcp6_server_index_by_rx_fib_index) + { + server_index = dpm->dhcp6_server_index_by_rx_fib_index[i]; + if (~0 == server_index) + continue; + + server = pool_elt_at_index (dpm->dhcp6_servers, server_index); + + vlib_cli_output (vm, "%U", format_dhcpv6_proxy_server, dpm, + server, i); + } + return 0; } @@ -1031,51 +1041,104 @@ VLIB_CLI_COMMAND (dhcpv6_proxy_show_command, static) = { .function = dhcpv6_proxy_show_command_fn, }; +void +dhcpv6_proxy_dump (void *opaque, + u32 context) +{ + dhcpv6_proxy_main_t * dpm = &dhcpv6_proxy_main; + ip6_fib_t *s_fib, *r_fib; + dhcpv6_server_t * server; + u32 server_index, i; + dhcpv6_vss_info *v; + + vec_foreach_index (i, dpm->dhcp6_server_index_by_rx_fib_index) + { + server_index = dpm->dhcp6_server_index_by_rx_fib_index[i]; + if (~0 == server_index) + continue; + + server = pool_elt_at_index (dpm->dhcp6_servers, server_index); + v = dhcpv6_get_vss_info(dpm, i); + + ip46_address_t src_addr = { + .ip6 = server->dhcp6_src_address, + }; + ip46_address_t server_addr = { + .ip6 = server->dhcp6_server, + }; + + s_fib = ip6_fib_get(server->server_fib6_index); + r_fib = ip6_fib_get(i); + + dhcp_send_details(opaque, + context, + &server_addr, + &src_addr, + s_fib->table_id, + r_fib->table_id, + (v ? v->vpn_id.fib_id : 0), + (v ? v->vpn_id.oui : 0)); + } +} + int dhcpv6_proxy_set_vss(u32 tbl_id, u32 oui, u32 fib_id, int is_del) { dhcpv6_proxy_main_t *dm = &dhcpv6_proxy_main; - u32 old_oui, old_fib_id; - uword *p; - dhcpv6_vss_info *v; + dhcpv6_vss_info *v = NULL; + u32 rx_fib_index; + int rc = 0; - p = hash_get (dm->vss_index_by_vrf_id, tbl_id); + rx_fib_index = ip6_fib_table_find_or_create_and_lock(tbl_id); + v = dhcpv6_get_vss_info(dm, rx_fib_index); - if (p) { - v = pool_elt_at_index (dm->vss, p[0]); - if (!v) - return VNET_API_ERROR_NO_SUCH_FIB; - - old_oui = v->vpn_id.oui; - old_fib_id = v->vpn_id.fib_id; + if (NULL != v) + { + if (is_del) + { + /* release the lock held on the table when the VSS + * info was created */ + fib_table_unlock (rx_fib_index, + FIB_PROTOCOL_IP6); + pool_put (dm->vss, v); + dm->vss_index_by_rx_fib_index[rx_fib_index] = ~0; + } + else + { + /* this is a modify */ + v->vpn_id.fib_id = fib_id; + v->vpn_id.oui = oui; + } + } + else + { if (is_del) + rc = VNET_API_ERROR_NO_SUCH_ENTRY; + else { - if (old_oui == oui && - old_fib_id == fib_id ) - { - pool_put(dm->vss, v); - hash_unset (dm->vss_index_by_vrf_id, tbl_id); - return 0; - } - else - return VNET_API_ERROR_NO_SUCH_ENTRY; + /* create a new entry */ + vec_validate_init_empty(dm->vss_index_by_rx_fib_index, + rx_fib_index, ~0); + + /* hold a lock on the table whilst the VSS info exist */ + fib_table_lock (rx_fib_index, + FIB_PROTOCOL_IP6); + + pool_get (dm->vss, v); + v->vpn_id.fib_id = fib_id; + v->vpn_id.oui = oui; + dm->vss_index_by_rx_fib_index[rx_fib_index] = v - dm->vss; } + } - pool_put(dm->vss, v); - hash_unset (dm->vss_index_by_vrf_id, tbl_id); - } else if (is_del) - return VNET_API_ERROR_NO_SUCH_ENTRY; - - pool_get (dm->vss, v); - memset (v, ~0, sizeof (*v)); - v->vpn_id.fib_id = fib_id; - v->vpn_id.oui = oui; - hash_set (dm->vss_index_by_vrf_id, tbl_id, v - dm->vss); + /* Release the lock taken during the create_or_lock at the start */ + fib_table_unlock (rx_fib_index, + FIB_PROTOCOL_IP6); - return 0; + return (rc); } @@ -1147,19 +1210,19 @@ dhcpv6_vss_show_command_fn (vlib_main_t * vm, { dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; dhcpv6_vss_info *v; - u32 oui; - u32 fib_id; - u32 tbl_id; - uword index; + ip6_fib_t *fib; + u32 *fib_index; vlib_cli_output (vm, "%=6s%=6s%=12s","Table", "OUI", "VPN ID"); - hash_foreach (tbl_id, index, dm->vss_index_by_vrf_id, + pool_foreach (fib_index, dm->vss_index_by_rx_fib_index, ({ - v = pool_elt_at_index (dm->vss, index); - oui = v->vpn_id.oui; - fib_id = v->vpn_id.fib_id; - vlib_cli_output (vm, "%=6d%=6d%=12d", - tbl_id, oui, fib_id); + fib = ip6_fib_get (*fib_index); + v = pool_elt_at_index (dm->vss, *fib_index); + + vlib_cli_output (vm, "%=6d%=6d%=12d", + fib->table_id, + v->vpn_id.oui, + v->vpn_id.fib_id); })); return 0; diff --git a/src/vnet/dpo/receive_dpo.c b/src/vnet/dpo/receive_dpo.c index 2b2571c6..83e33ed8 100644 --- a/src/vnet/dpo/receive_dpo.c +++ b/src/vnet/dpo/receive_dpo.c @@ -102,6 +102,11 @@ format_receive_dpo (u8 *s, va_list *ap) vnet_main_t * vnm = vnet_get_main(); receive_dpo_t *rd; + if (pool_is_free_index(receive_dpo_pool, index)) + { + return (format(s, "dpo-receive DELETED")); + } + rd = receive_dpo_get(index); if (~0 != rd->rd_sw_if_index) diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index a7dca989..70b4e4c9 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -772,37 +772,6 @@ static void *vl_api_dhcp_proxy_config_t_print { u8 *s; - s = format (0, "SCRIPT: dhcp_proxy_config "); - - s = format (s, "vrf_id %d ", ntohl (mp->vrf_id)); - - if (mp->is_ipv6) - { - s = format (s, "svr %U ", format_ip6_address, - (ip6_address_t *) mp->dhcp_server); - s = format (s, "src %U ", format_ip6_address, - (ip6_address_t *) mp->dhcp_src_address); - } - else - { - s = format (s, "svr %U ", format_ip4_address, - (ip4_address_t *) mp->dhcp_server); - s = format (s, "src %U ", format_ip4_address, - (ip4_address_t *) mp->dhcp_src_address); - } - if (mp->is_add == 0) - s = format (s, "del "); - - s = format (s, "insert-cid %d ", mp->insert_circuit_id); - - FINISH; -} - -static void *vl_api_dhcp_proxy_config_2_t_print - (vl_api_dhcp_proxy_config_2_t * mp, void *handle) -{ - u8 *s; - s = format (0, "SCRIPT: dhcp_proxy_config_2 "); s = format (s, "rx_vrf_id %d ", ntohl (mp->rx_vrf_id)); @@ -825,8 +794,6 @@ static void *vl_api_dhcp_proxy_config_2_t_print if (mp->is_add == 0) s = format (s, "del "); - s = format (s, "insert-cid %d ", mp->insert_circuit_id); - FINISH; } @@ -2954,7 +2921,6 @@ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ _(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table) \ _(CLASSIFY_SET_INTERFACE_L2_TABLES, classify_set_interface_l2_tables) \ _(ADD_NODE_NEXT, add_node_next) \ -_(DHCP_PROXY_CONFIG_2, dhcp_proxy_config_2) \ _(DHCP_CLIENT_CONFIG, dhcp_client_config) \ _(L2TPV3_CREATE_TUNNEL, l2tpv3_create_tunnel) \ _(L2TPV3_SET_TUNNEL_COOKIES, l2tpv3_set_tunnel_cookies) \ diff --git a/test/test_dhcp.py b/test/test_dhcp.py index 04ab2e11..fbfb8a0c 100644 --- a/test/test_dhcp.py +++ b/test/test_dhcp.py @@ -65,7 +65,7 @@ class TestDHCP(VppTestCase): for i in self.pg_interfaces: i.assert_nothing_captured(remark=remark) - def validate_option_82(self, pkt, intf, ip_addr): + def validate_relay_options(self, pkt, intf, ip_addr, fib_id, oui): dhcp = pkt[DHCP] found = 0 data = [] @@ -77,7 +77,10 @@ class TestDHCP(VppTestCase): # There are two sb-options present - each of length 6. # data = i[1] - self.assertEqual(len(data), 12) + if oui != 0: + self.assertEqual(len(data), 24) + else: + self.assertEqual(len(data), 12) # # First sub-option is ID 1, len 4, then encoded @@ -107,12 +110,30 @@ class TestDHCP(VppTestCase): self.assertEqual(data[10], claddr[2]) self.assertEqual(data[11], claddr[3]) + if oui != 0: + # sub-option 151 encodes the 3 byte oui + # and the 4 byte fib_id + self.assertEqual(ord(data[12]), 151) + self.assertEqual(ord(data[13]), 8) + self.assertEqual(ord(data[14]), 1) + self.assertEqual(ord(data[15]), 0) + self.assertEqual(ord(data[16]), 0) + self.assertEqual(ord(data[17]), oui) + self.assertEqual(ord(data[18]), 0) + self.assertEqual(ord(data[19]), 0) + self.assertEqual(ord(data[20]), 0) + self.assertEqual(ord(data[21]), fib_id) + + # VSS control sub-option + self.assertEqual(ord(data[22]), 152) + self.assertEqual(ord(data[23]), 0) + found = 1 self.assertTrue(found) return data - def verify_dhcp_offer(self, pkt, intf, check_option_82=True): + def verify_dhcp_offer(self, pkt, intf): ether = pkt[Ether] self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff") self.assertEqual(ether.src, intf.local_mac) @@ -134,11 +155,9 @@ class TestDHCP(VppTestCase): is_offer = True self.assertTrue(is_offer) - if check_option_82: - data = self.validate_option_82(pkt, intf, intf.local_ip4) + data = self.validate_relay_options(pkt, intf, intf.local_ip4, 0, 0) - def verify_dhcp_discover(self, pkt, intf, src_intf=None, - option_82_present=True): + def verify_dhcp_discover(self, pkt, intf, src_intf=None, fib_id=0, oui=0): ether = pkt[Ether] self.assertEqual(ether.dst, intf.remote_mac) self.assertEqual(ether.src, intf.local_mac) @@ -161,13 +180,10 @@ class TestDHCP(VppTestCase): is_discover = True self.assertTrue(is_discover) - if option_82_present: - data = self.validate_option_82(pkt, src_intf, src_intf.local_ip4) - return data - else: - for i in dhcp.options: - if type(i) is tuple: - self.assertNotEqual(i[0], "relay_agent_Information") + data = self.validate_relay_options(pkt, src_intf, + src_intf.local_ip4, + fib_id, oui) + return data def verify_dhcp6_solicit(self, pkt, intf, peer_ip, peer_mac, @@ -193,18 +209,19 @@ class TestDHCP(VppTestCase): self.assertEqual(cll.lltype, 1) self.assertEqual(cll.clladdr, peer_mac) - vss = pkt[DHCP6OptVSS] - self.assertEqual(vss.optlen, 8) - self.assertEqual(vss.type, 1) - # the OUI and FIB-id are really 3 and 4 bytes resp. - # but the tested range is small - self.assertEqual(ord(vss.data[0]), 0) - self.assertEqual(ord(vss.data[1]), 0) - self.assertEqual(ord(vss.data[2]), oui) - self.assertEqual(ord(vss.data[3]), 0) - self.assertEqual(ord(vss.data[4]), 0) - self.assertEqual(ord(vss.data[5]), 0) - self.assertEqual(ord(vss.data[6]), fib_id) + if fib_id != 0: + vss = pkt[DHCP6OptVSS] + self.assertEqual(vss.optlen, 8) + self.assertEqual(vss.type, 1) + # the OUI and FIB-id are really 3 and 4 bytes resp. + # but the tested range is small + self.assertEqual(ord(vss.data[0]), 0) + self.assertEqual(ord(vss.data[1]), 0) + self.assertEqual(ord(vss.data[2]), oui) + self.assertEqual(ord(vss.data[3]), 0) + self.assertEqual(ord(vss.data[4]), 0) + self.assertEqual(ord(vss.data[5]), 0) + self.assertEqual(ord(vss.data[6]), fib_id) # the relay message should be an encoded Solicit msg = pkt[DHCP6OptRelayMsg] @@ -267,29 +284,16 @@ class TestDHCP(VppTestCase): rx_table_id=0) # - # Now a DHCP request on pg2, which is in the same VRF - # as the DHCP config, will result in a relayed DHCP - # message to the [fake] server - # - self.pg2.add_stream(pkts_disc_vrf0) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() - - rx = self.pg0.get_capture(1) - rx = rx[0] - - # - # Rx'd packet should be to the server address and from the configured - # source address - # UDP source ports are unchanged - # we've no option 82 config so that should be absent + # Discover packets from the client are dropped because there is no + # IP address configured on the client facing interface # - self.verify_dhcp_discover(rx, self.pg0, option_82_present=False) + self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0, + "Discover DHCP no relay address") # # Inject a response from the server - # VPP will only relay the offer if option 82 is present. - # so this one is dropped + # dropped, because there is no IP addrees on the + # clinet interfce to fill in the option. # p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) / @@ -298,24 +302,8 @@ class TestDHCP(VppTestCase): DHCP(options=[('message-type', 'offer'), ('end')])) pkts = [p] - self.send_and_assert_no_replies(self.pg0, pkts, - "DHCP offer no option 82") - - # - # Configure sending option 82 in relayed messages - # - self.vapi.dhcp_proxy_config(server_addr, - src_addr, - rx_table_id=0, - insert_circuit_id=1) - - # - # Send a request: - # again dropped, but ths time because there is no IP addrees on the - # clinet interfce to fill in the option. - # - self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0, - "DHCP no relay address") + self.send_and_assert_no_replies(self.pg2, pkts, + "Offer DHCP no relay address") # # configure an IP address on the client facing interface @@ -376,15 +364,8 @@ class TestDHCP(VppTestCase): ('relay_agent_Information', bad_ip), ('end')])) pkts = [p] - - self.pg0.add_stream(pkts) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() - rx = self.pg2.get_capture(1) - rx = rx[0] - - self.verify_dhcp_offer(rx, self.pg2, check_option_82=False) - self.pg0.assert_nothing_captured(remark="") + self.send_and_assert_no_replies(self.pg0, pkts, + "DHCP offer option 82 bad address") # 2. Not a sw_if_index VPP knows bad_if_index = option_82[0:2] + chr(33) + option_82[3:] @@ -413,8 +394,7 @@ class TestDHCP(VppTestCase): self.vapi.dhcp_proxy_config(server_addr, src_addr, rx_table_id=0, - is_add=0, - insert_circuit_id=1) + is_add=0) self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0, "DHCP config removed VRF 0") @@ -429,8 +409,7 @@ class TestDHCP(VppTestCase): self.vapi.dhcp_proxy_config(server_addr, src_addr, rx_table_id=1, - server_table_id=1, - insert_circuit_id=1) + server_table_id=1) # # Confim DHCP requests ok in VRF 1. @@ -452,14 +431,41 @@ class TestDHCP(VppTestCase): rx = rx[0] self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3) + # + # Add VSS config + # table=1, fib=id=1, oui=4 + self.vapi.dhcp_proxy_set_vss(1, 1, 4) + + self.pg3.add_stream(pkts_disc_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(1) + rx = rx[0] + self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3, + fib_id=1, oui=4) + + # + # Remove the VSS config + # relayed DHCP has default vlaues in the option. + # + self.vapi.dhcp_proxy_set_vss(1, 1, 4, is_add=0) + + self.pg3.add_stream(pkts_disc_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(1) + rx = rx[0] + self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3) + # # remove DHCP config to cleanup # self.vapi.dhcp_proxy_config(server_addr, src_addr, rx_table_id=1, - server_table_id=1, - insert_circuit_id=1, + server_table_id=11, is_add=0) self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0, @@ -510,7 +516,6 @@ class TestDHCP(VppTestCase): src_addr_vrf0, rx_table_id=0, server_table_id=0, - insert_circuit_id=1, is_ipv6=1) self.send_and_assert_no_replies(self.pg2, pkts_solicit_vrf0, @@ -630,7 +635,6 @@ class TestDHCP(VppTestCase): src_addr_vrf1, rx_table_id=1, server_table_id=1, - insert_circuit_id=1, is_ipv6=1) self.pg3.config_ip6() @@ -708,14 +712,12 @@ class TestDHCP(VppTestCase): src_addr_vrf1, rx_table_id=1, server_table_id=1, - insert_circuit_id=1, is_ipv6=1, is_add=0) self.vapi.dhcp_proxy_config(server_addr_vrf1, src_addr_vrf1, rx_table_id=0, server_table_id=0, - insert_circuit_id=1, is_ipv6=1, is_add=0) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 32680424..59e58ad0 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -1240,16 +1240,14 @@ class VppPapiProvider(object): rx_table_id=0, server_table_id=0, is_add=1, - is_ipv6=0, - insert_circuit_id=0): + is_ipv6=0): return self.api( - self.papi.dhcp_proxy_config_2, + self.papi.dhcp_proxy_config, { 'rx_vrf_id': rx_table_id, 'server_vrf_id': server_table_id, 'is_ipv6': is_ipv6, 'is_add': is_add, - 'insert_circuit_id': insert_circuit_id, 'dhcp_server': dhcp_server, 'dhcp_src_address': dhcp_src_address, }) -- cgit 1.2.3-korg From 2dd6852d8109e39d15a5c60f7ba58f1abcf9e455 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Thu, 16 Feb 2017 03:38:59 -0800 Subject: Consolidate DHCP v4 and V6 implementation. No functional change intended The DHCP proxy and VSS information maintained by VPP is the same for v4 and v6, so we can manage this state using the same code. Packet handling is cleary different, so this is kept separate. Change-Id: I10f10cc1f7f19debcd4c4b099c6de64e56bb0c69 Signed-off-by: Neale Ranns --- src/vnet.am | 26 +- src/vnet/dhcp/client.c | 2 +- src/vnet/dhcp/client.h | 2 +- src/vnet/dhcp/dhcp4_packet.h | 61 ++ src/vnet/dhcp/dhcp4_proxy_error.def | 32 + src/vnet/dhcp/dhcp4_proxy_node.c | 983 +++++++++++++++++++++++++++ src/vnet/dhcp/dhcp6_packet.h | 183 +++++ src/vnet/dhcp/dhcp6_proxy_error.def | 29 + src/vnet/dhcp/dhcp6_proxy_node.c | 1065 +++++++++++++++++++++++++++++ src/vnet/dhcp/dhcp_api.c | 95 ++- src/vnet/dhcp/dhcp_proxy.c | 275 ++++++++ src/vnet/dhcp/dhcp_proxy.h | 248 +++++++ src/vnet/dhcp/packet.h | 61 -- src/vnet/dhcp/proxy.h | 99 --- src/vnet/dhcp/proxy_error.def | 31 - src/vnet/dhcp/proxy_node.c | 1192 -------------------------------- src/vnet/dhcpv6/packet.h | 183 ----- src/vnet/dhcpv6/proxy.h | 94 --- src/vnet/dhcpv6/proxy_error.def | 29 - src/vnet/dhcpv6/proxy_node.c | 1280 ----------------------------------- src/vpp/api/custom_dump.c | 3 +- test/test_dhcp.py | 2 +- 22 files changed, 2933 insertions(+), 3042 deletions(-) create mode 100644 src/vnet/dhcp/dhcp4_packet.h create mode 100644 src/vnet/dhcp/dhcp4_proxy_error.def create mode 100644 src/vnet/dhcp/dhcp4_proxy_node.c create mode 100644 src/vnet/dhcp/dhcp6_packet.h create mode 100644 src/vnet/dhcp/dhcp6_proxy_error.def create mode 100644 src/vnet/dhcp/dhcp6_proxy_node.c create mode 100644 src/vnet/dhcp/dhcp_proxy.c create mode 100644 src/vnet/dhcp/dhcp_proxy.h delete mode 100644 src/vnet/dhcp/packet.h delete mode 100644 src/vnet/dhcp/proxy.h delete mode 100644 src/vnet/dhcp/proxy_error.def delete mode 100644 src/vnet/dhcp/proxy_node.c delete mode 100644 src/vnet/dhcpv6/packet.h delete mode 100644 src/vnet/dhcpv6/proxy.h delete mode 100644 src/vnet/dhcpv6/proxy_error.def delete mode 100644 src/vnet/dhcpv6/proxy_node.c (limited to 'src/vnet/dhcp') diff --git a/src/vnet.am b/src/vnet.am index 70f1e7e9..64484e18 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -674,7 +674,7 @@ libvnet_la_SOURCES += \ vnet/dhcp/dhcp_api.c nobase_include_HEADERS += \ - vnet/dhcp/client.h \ + vnet/dhcp/client.h \ vnet/dhcp/dhcp.api.h API_FILES += vnet/dhcp/dhcp.api @@ -683,13 +683,16 @@ API_FILES += vnet/dhcp/dhcp.api # DHCP proxy ######################################## libvnet_la_SOURCES += \ - vnet/dhcp/proxy_node.c \ - vnet/dhcp/proxy.h + vnet/dhcp/dhcp6_proxy_node.c \ + vnet/dhcp/dhcp4_proxy_node.c \ + vnet/dhcp/dhcp_proxy.c nobase_include_HEADERS += \ - vnet/dhcp/packet.h \ - vnet/dhcp/proxy.h \ - vnet/dhcp/proxy_error.def + vnet/dhcp/dhcp4_packet.h \ + vnet/dhcp/dhcp6_packet.h \ + vnet/dhcp/dhcp_proxy.h \ + vnet/dhcp/dhcp6_proxy_error.def \ + vnet/dhcp/dhcp4_proxy_error.def ######################################## # ipv6 segment routing @@ -709,17 +712,6 @@ nobase_include_HEADERS += \ API_FILES += vnet/sr/sr.api -######################################## -# DHCPv6 proxy -######################################## -libvnet_la_SOURCES += \ - vnet/dhcpv6/proxy_node.c - -nobase_include_HEADERS += \ - vnet/dhcpv6/packet.h \ - vnet/dhcpv6/proxy.h \ - vnet/dhcpv6/proxy_error.def - ######################################## # IPFIX / netflow v10 ######################################## diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c index 8a1a43b3..d34c5a64 100644 --- a/src/vnet/dhcp/client.c +++ b/src/vnet/dhcp/client.c @@ -14,7 +14,7 @@ */ #include #include -#include +#include #include dhcp_client_main_t dhcp_client_main; diff --git a/src/vnet/dhcp/client.h b/src/vnet/dhcp/client.h index a74368cb..1f85d7ce 100644 --- a/src/vnet/dhcp/client.h +++ b/src/vnet/dhcp/client.h @@ -20,7 +20,7 @@ #define included_dhcp_client_h #include -#include +#include #define foreach_dhcp_client_state \ _(DHCP_DISCOVER) \ diff --git a/src/vnet/dhcp/dhcp4_packet.h b/src/vnet/dhcp/dhcp4_packet.h new file mode 100644 index 00000000..28c4b156 --- /dev/null +++ b/src/vnet/dhcp/dhcp4_packet.h @@ -0,0 +1,61 @@ +#ifndef included_vnet_dhcp4_packet_h +#define included_vnet_dhcp4_packet_h + +/* + * DHCP packet format + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +typedef struct { + u8 opcode; /* 1 = request, 2 = reply */ + u8 hardware_type; /* 1 = ethernet */ + u8 hardware_address_length; + u8 hops; + u32 transaction_identifier; + u16 seconds; + u16 flags; +#define DHCP_FLAG_BROADCAST (1<<15) + ip4_address_t client_ip_address; + ip4_address_t your_ip_address; /* use this one */ + ip4_address_t server_ip_address; + ip4_address_t gateway_ip_address; /* use option 3, not this one */ + u8 client_hardware_address[16]; + u8 server_name[64]; + u8 boot_filename[128]; + ip4_address_t magic_cookie; + u8 options[0]; +} dhcp_header_t; + +typedef struct { + u8 option; + u8 length; + union { + u8 data[0]; + u32 data_as_u32[0]; + }; +} __attribute__((packed)) dhcp_option_t; + +typedef enum { + DHCP_PACKET_DISCOVER=1, + DHCP_PACKET_OFFER, + DHCP_PACKET_REQUEST, + DHCP_PACKET_ACK=5, +} dhcp_packet_type_t; + +/* charming antique: 99.130.83.99 is the dhcp magic cookie */ +#define DHCP_MAGIC (clib_host_to_net_u32(0x63825363)) + +#endif /* included_vnet_dhcp4_packet_h */ diff --git a/src/vnet/dhcp/dhcp4_proxy_error.def b/src/vnet/dhcp/dhcp4_proxy_error.def new file mode 100644 index 00000000..adf04808 --- /dev/null +++ b/src/vnet/dhcp/dhcp4_proxy_error.def @@ -0,0 +1,32 @@ +/* + * dhcp_proxy_error.def: dhcp proxy errors + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +dhcp_proxy_error (NONE, "no error") +dhcp_proxy_error (NO_SERVER, "no dhcp server configured") +dhcp_proxy_error (RELAY_TO_SERVER, "DHCP packets relayed to the server") +dhcp_proxy_error (RELAY_TO_CLIENT, "DHCP packets relayed to clients") +dhcp_proxy_error (OPTION_82_ERROR, "DHCP failed to insert option 82") +dhcp_proxy_error (NO_OPTION_82, "DHCP option 82 missing") +dhcp_proxy_error (BAD_OPTION_82_ITF, "Bad DHCP option 82 interface value") +dhcp_proxy_error (BAD_OPTION_82_ADDR, "Bad DHCP option 82 address value") +dhcp_proxy_error (BAD_FIB_ID, "DHCP option 82 fib-id to fib-index map failure") +dhcp_proxy_error (NO_INTERFACE_ADDRESS, "DHCP no interface address") +dhcp_proxy_error (OPTION_82_VSS_NOT_PROCESSED, "DHCP VSS not processed by DHCP server") +dhcp_proxy_error (BAD_YIADDR, "DHCP packets with bad your_ip_address fields") +dhcp_proxy_error (BAD_SVR_FIB_OR_ADDRESS, "DHCP packets not from DHCP server or server FIB.") +dhcp_proxy_error (PKT_TOO_BIG, "DHCP packets which are too big.") + diff --git a/src/vnet/dhcp/dhcp4_proxy_node.c b/src/vnet/dhcp/dhcp4_proxy_node.c new file mode 100644 index 00000000..88a99249 --- /dev/null +++ b/src/vnet/dhcp/dhcp4_proxy_node.c @@ -0,0 +1,983 @@ +/* + * proxy_node.c: dhcp proxy node processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +static char * dhcp_proxy_error_strings[] = { +#define dhcp_proxy_error(n,s) s, +#include +#undef dhcp_proxy_error +}; + +#define foreach_dhcp_proxy_to_server_input_next \ + _ (DROP, "error-drop") \ + _ (LOOKUP, "ip4-lookup") \ + _ (SEND_TO_CLIENT, "dhcp-proxy-to-client") + +typedef enum { +#define _(s,n) DHCP_PROXY_TO_SERVER_INPUT_NEXT_##s, + foreach_dhcp_proxy_to_server_input_next +#undef _ + DHCP_PROXY_TO_SERVER_INPUT_N_NEXT, +} dhcp_proxy_to_server_input_next_t; + +typedef struct { + /* 0 => to server, 1 => to client */ + int which; + ip4_address_t trace_ip4_address; + u32 error; + u32 sw_if_index; + u32 original_sw_if_index; +} dhcp_proxy_trace_t; + +#define VPP_DHCP_OPTION82_SUB1_SIZE 6 +#define VPP_DHCP_OPTION82_SUB5_SIZE 6 +#define VPP_DHCP_OPTION82_VSS_SIZE 12 +#define VPP_DHCP_OPTION82_SIZE (VPP_DHCP_OPTION82_SUB1_SIZE + \ + VPP_DHCP_OPTION82_SUB5_SIZE + \ + VPP_DHCP_OPTION82_VSS_SIZE +3) + +static vlib_node_registration_t dhcp_proxy_to_server_node; +static vlib_node_registration_t dhcp_proxy_to_client_node; + +static u8 * +format_dhcp_proxy_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + dhcp_proxy_trace_t * t = va_arg (*args, dhcp_proxy_trace_t *); + + if (t->which == 0) + s = format (s, "DHCP proxy: sent to server %U\n", + format_ip4_address, &t->trace_ip4_address, t->error); + else + s = format (s, "DHCP proxy: broadcast to client from %U\n", + format_ip4_address, &t->trace_ip4_address); + + if (t->error != (u32)~0) + s = format (s, " error: %s\n", dhcp_proxy_error_strings[t->error]); + + s = format (s, " original_sw_if_index: %d, sw_if_index: %d\n", + t->original_sw_if_index, t->sw_if_index); + + return s; +} + +static u8 * +format_dhcp_proxy_header_with_length (u8 * s, va_list * args) +{ + dhcp_header_t * h = va_arg (*args, dhcp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "dhcp header truncated"); + + s = format (s, "DHCP Proxy"); + + return s; +} + +static uword +dhcp_proxy_to_server_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + u32 pkts_to_server=0, pkts_to_client=0, pkts_no_server=0; + u32 pkts_no_interface_address=0; + u32 pkts_too_big=0; + ip4_main_t * im = &ip4_main; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * u0; + dhcp_header_t * h0; + ip4_header_t * ip0; + u32 next0; + u32 old0, new0; + ip_csum_t sum0; + u32 error0 = (u32) ~0; + u32 sw_if_index = 0; + u32 original_sw_if_index = 0; + u8 *end = NULL; + u32 fib_index; + dhcp_server_t * server; + u32 rx_sw_if_index; + dhcp_option_t *o; + u32 len = 0; + vlib_buffer_free_list_t *fl; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + h0 = vlib_buffer_get_current (b0); + + /* + * udp_local hands us the DHCP header, need udp hdr, + * ip hdr to relay to server + */ + vlib_buffer_advance (b0, -(sizeof(*u0))); + u0 = vlib_buffer_get_current (b0); + + /* This blows. Return traffic has src_port = 67, dst_port = 67 */ + if (u0->src_port == clib_net_to_host_u16(UDP_DST_PORT_dhcp_to_server)) + { + vlib_buffer_advance (b0, sizeof(*u0)); + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_SEND_TO_CLIENT; + error0 = 0; + pkts_to_client++; + goto do_enqueue; + } + + rx_sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + fib_index = im->fib_index_by_sw_if_index [rx_sw_if_index]; + server = dhcp_get_server(dpm, fib_index, FIB_PROTOCOL_IP4); + + if (PREDICT_FALSE (NULL == server)) + { + error0 = DHCP_PROXY_ERROR_NO_SERVER; + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_server++; + goto do_trace; + } + + vlib_buffer_advance (b0, -(sizeof(*ip0))); + ip0 = vlib_buffer_get_current (b0); + + /* disable UDP checksum */ + u0->checksum = 0; + sum0 = ip0->checksum; + old0 = ip0->dst_address.as_u32; + new0 = server->dhcp_server.ip4.as_u32; + ip0->dst_address.as_u32 = server->dhcp_server.ip4.as_u32; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + sum0 = ip0->checksum; + old0 = ip0->src_address.as_u32; + new0 = server->dhcp_src_address.ip4.as_u32; + ip0->src_address.as_u32 = new0; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + /* Send to DHCP server via the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = + server->server_fib_index; + + h0->gateway_ip_address.as_u32 = server->dhcp_src_address.ip4.as_u32; + pkts_to_server++; + + o = (dhcp_option_t *) h0->options; + + fib_index = im->fib_index_by_sw_if_index + [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; + + end = b0->data + b0->current_data + b0->current_length; + /* TLVs are not performance-friendly... */ + while (o->option != 0xFF /* end of options */ && (u8 *)o < end) + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + + fl = vlib_buffer_get_free_list (vm, b0->free_list_index); + // start write at (option*)o, some packets have padding + if (((u8 *)o - (u8 *)b0->data + VPP_DHCP_OPTION82_SIZE) > fl->n_data_bytes) + { + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_too_big++; + goto do_trace; + } + + if ((o->option == 0xFF) && ((u8 *)o <= end)) + { + vnet_main_t *vnm = vnet_get_main(); + u16 old_l0, new_l0; + ip4_address_t _ia0, * ia0 = &_ia0; + dhcp_vss_t *vss; + vnet_sw_interface_t *swif; + sw_if_index = 0; + original_sw_if_index = 0; + + original_sw_if_index = sw_if_index = + vnet_buffer(b0)->sw_if_index[VLIB_RX]; + swif = vnet_get_sw_interface (vnm, sw_if_index); + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; + + /* + * Get the first ip4 address on the [client-side] + * RX interface, if not unnumbered. otherwise use + * the loopback interface's ip address. + */ + ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0); + + if (ia0 == 0) + { + error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS; + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_interface_address++; + goto do_trace; + } + + /* Add option 82 */ + o->option = 82; /* option 82 */ + o->length = 12; /* 12 octets to follow */ + o->data[0] = 1; /* suboption 1, circuit ID (=FIB id) */ + o->data[1] = 4; /* length of suboption */ + o->data[2] = (original_sw_if_index >> 24) & 0xFF; + o->data[3] = (original_sw_if_index >> 16) & 0xFF; + o->data[4] = (original_sw_if_index >> 8) & 0xFF; + o->data[5] = (original_sw_if_index >> 0) & 0xFF; + o->data[6] = 5; /* suboption 5 (client RX intfc address) */ + o->data[7] = 4; /* length 4 */ + o->data[8] = ia0->as_u8[0]; + o->data[9] = ia0->as_u8[1]; + o->data[10] = ia0->as_u8[2]; + o->data[11] = ia0->as_u8[3]; + o->data[12] = 0xFF; + + vss = dhcp_get_vss_info (dpm, fib_index, FIB_PROTOCOL_IP4); + if (NULL != vss) + { + u32 opt82_fib_id=0, opt82_oui=0; + + opt82_oui = vss->oui; + opt82_fib_id = vss->fib_id; + + o->data[12] = 151; /* vss suboption */ + if (255 == opt82_fib_id) { + o->data[13] = 1; /* length */ + o->data[14] = 255; /* vss option type */ + o->data[15] = 152; /* vss control suboption */ + o->data[16] = 0; /* length */ + /* and a new "end-of-options" option (0xff) */ + o->data[17] = 0xFF; + o->length += 5; + } else { + o->data[13] = 8; /* length */ + o->data[14] = 1; /* vss option type */ + o->data[15] = (opt82_oui >> 16) & 0xff; + o->data[16] = (opt82_oui >> 8) & 0xff; + o->data[17] = (opt82_oui ) & 0xff; + o->data[18] = (opt82_fib_id >> 24) & 0xff; + o->data[19] = (opt82_fib_id >> 16) & 0xff; + o->data[20] = (opt82_fib_id >> 8) & 0xff; + o->data[21] = (opt82_fib_id) & 0xff; + o->data[22] = 152; /* vss control suboption */ + o->data[23] = 0; /* length */ + + /* and a new "end-of-options" option (0xff) */ + o->data[24] = 0xFF; + o->length += 12; + } + } + + len = o->length + 3; + b0->current_length += len; + /* Fix IP header length and checksum */ + old_l0 = ip0->length; + new_l0 = clib_net_to_host_u16 (old_l0); + new_l0 += len; + new_l0 = clib_host_to_net_u16 (new_l0); + ip0->length = new_l0; + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + /* Fix UDP length */ + new_l0 = clib_net_to_host_u16 (u0->length); + new_l0 += len; + u0->length = clib_host_to_net_u16 (new_l0); + } else { + vlib_node_increment_counter + (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_OPTION_82_ERROR, 1); + } + + next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcp_proxy_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->which = 0; /* to server */ + tr->error = error0; + tr->original_sw_if_index = original_sw_if_index; + tr->sw_if_index = sw_if_index; + if (next0 == DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP) + tr->trace_ip4_address.as_u32 = server->dhcp_server.ip4.as_u32; + } + + do_enqueue: + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_RELAY_TO_CLIENT, + pkts_to_client); + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_RELAY_TO_SERVER, + pkts_to_server); + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_NO_SERVER, + pkts_no_server); + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS, + pkts_no_interface_address); + vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, + DHCP_PROXY_ERROR_PKT_TOO_BIG, + pkts_too_big); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dhcp_proxy_to_server_node, static) = { + .function = dhcp_proxy_to_server_input, + .name = "dhcp-proxy-to-server", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = DHCP_PROXY_N_ERROR, + .error_strings = dhcp_proxy_error_strings, + + .n_next_nodes = DHCP_PROXY_TO_SERVER_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [DHCP_PROXY_TO_SERVER_INPUT_NEXT_##s] = n, + foreach_dhcp_proxy_to_server_input_next +#undef _ + }, + + .format_buffer = format_dhcp_proxy_header_with_length, + .format_trace = format_dhcp_proxy_trace, +#if 0 + .unformat_buffer = unformat_dhcp_proxy_header, +#endif +}; + +static uword +dhcp_proxy_to_client_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, * from; + ethernet_main_t *em = ethernet_get_main (vm); + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + vnet_main_t * vnm = vnet_get_main(); + ip4_main_t * im = &ip4_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * u0; + dhcp_header_t * h0; + ip4_header_t * ip0 = 0; + ip4_address_t * ia0 = 0; + u32 old0, new0; + ip_csum_t sum0; + ethernet_interface_t *ei0; + ethernet_header_t *mac0; + vnet_hw_interface_t *hi0; + vlib_frame_t *f0; + u32 * to_next0; + u32 sw_if_index = ~0; + vnet_sw_interface_t *si0; + u32 error0 = (u32)~0; + vnet_sw_interface_t *swif; + u32 fib_index; + dhcp_server_t * server; + u32 original_sw_if_index = (u32) ~0; + ip4_address_t relay_addr = { + .as_u32 = 0, + }; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + /* + * udp_local hands us the DHCP header, need udp hdr, + * ip hdr to relay to client + */ + vlib_buffer_advance (b0, -(sizeof(*u0))); + u0 = vlib_buffer_get_current (b0); + + vlib_buffer_advance (b0, -(sizeof(*ip0))); + ip0 = vlib_buffer_get_current (b0); + + /* Consumed by dhcp client code? */ + if (dhcp_client_for_us (bi0, b0, ip0, u0, h0)) + continue; + + if (1 /* dpm->insert_option_82 */) + { + dhcp_option_t *o = (dhcp_option_t *) h0->options; + dhcp_option_t *sub; + + /* Parse through TLVs looking for option 82. + The circuit-ID is the FIB number we need + to track down the client-facing interface */ + + while (o->option != 0xFF /* end of options */ && + (u8 *) o < (b0->data + b0->current_data + b0->current_length)) + { + if (o->option == 82) + { + u32 vss_exist = 0; + u32 vss_ctrl = 0; + sub = (dhcp_option_t *) &o->data[0]; + while (sub->option != 0xFF /* end of options */ && + (u8 *) sub < (u8 *)(o + o->length)) { + /* If this is one of ours, it will have + total length 12, circuit-id suboption type, + and the sw_if_index */ + if (sub->option == 1 && sub->length == 4) + { + sw_if_index = ((sub->data[0] << 24) | + (sub->data[1] << 16) | + (sub->data[2] << 8) | + (sub->data[3])); + } + else if (sub->option == 5 && sub->length == 4) + { + relay_addr.as_u8[0] = sub->data[0]; + relay_addr.as_u8[1] = sub->data[1]; + relay_addr.as_u8[2] = sub->data[2]; + relay_addr.as_u8[3] = sub->data[3]; + } + else if (sub->option == 151 && + sub->length == 7 && + sub->data[0] == 1) + vss_exist = 1; + else if (sub->option == 152 && sub->length == 0) + vss_ctrl = 1; + sub = (dhcp_option_t *) + (((uword) sub) + (sub->length + 2)); + } + if (vss_ctrl && vss_exist) + vlib_node_increment_counter + (vm, dhcp_proxy_to_client_node.index, + DHCP_PROXY_ERROR_OPTION_82_VSS_NOT_PROCESSED, 1); + + } + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + } + + if (sw_if_index == (u32)~0) + { + error0 = DHCP_PROXY_ERROR_NO_OPTION_82; + + drop_packet: + vlib_node_increment_counter (vm, dhcp_proxy_to_client_node.index, + error0, 1); + f0 = vlib_get_frame_to_node (vm, dpm->error_drop_node_index); + to_next0 = vlib_frame_vector_args (f0); + to_next0[0] = bi0; + f0->n_vectors = 1; + vlib_put_frame_to_node (vm, dpm->error_drop_node_index, f0); + goto do_trace; + } + + if (relay_addr.as_u32 == 0) + { + error0 = DHCP_PROXY_ERROR_BAD_OPTION_82_ADDR; + goto drop_packet; + } + + if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index)) + { + error0 = DHCP_PROXY_ERROR_BAD_OPTION_82_ITF; + goto drop_packet; + } + + fib_index = im->fib_index_by_sw_if_index [sw_if_index]; + server = dhcp_get_server(dpm, fib_index, FIB_PROTOCOL_IP4); + + if (PREDICT_FALSE (NULL == server)) + { + error0 = DHCP_PROXY_ERROR_NO_SERVER; + goto drop_packet; + } + + if (ip0->src_address.as_u32 != server->dhcp_server.ip4.as_u32) + { + error0 = DHCP_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; + goto drop_packet; + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index; + + swif = vnet_get_sw_interface (vnm, sw_if_index); + original_sw_if_index = sw_if_index; + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; + + ia0 = ip4_interface_first_address (&ip4_main, sw_if_index, 0); + if (ia0 == 0) + { + error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS; + goto drop_packet; + } + + if (relay_addr.as_u32 != ia0->as_u32) + { + error0 = DHCP_PROXY_ERROR_BAD_YIADDR; + goto drop_packet; + } + + u0->checksum = 0; + u0->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcp_to_client); + sum0 = ip0->checksum; + old0 = ip0->dst_address.as_u32; + new0 = 0xFFFFFFFF; + ip0->dst_address.as_u32 = new0; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + dst_address /* offset of changed member */); + ip0->checksum = ip_csum_fold (sum0); + + sum0 = ip0->checksum; + old0 = ip0->src_address.as_u32; + new0 = ia0->as_u32; + ip0->src_address.as_u32 = new0; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + src_address /* offset of changed member */); + ip0->checksum = ip_csum_fold (sum0); + + vlib_buffer_advance (b0, -(sizeof(ethernet_header_t))); + si0 = vnet_get_sw_interface (vnm, original_sw_if_index); + if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) + vlib_buffer_advance (b0, -4 /* space for VLAN tag */); + + mac0 = vlib_buffer_get_current (b0); + + hi0 = vnet_get_sup_hw_interface (vnm, original_sw_if_index); + ei0 = pool_elt_at_index (em->interfaces, hi0->hw_instance); + clib_memcpy (mac0->src_address, ei0->address, sizeof (ei0->address)); + memset (mac0->dst_address, 0xff, sizeof (mac0->dst_address)); + mac0->type = (si0->type == VNET_SW_INTERFACE_TYPE_SUB) ? + clib_net_to_host_u16(0x8100) : clib_net_to_host_u16 (0x0800); + + if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) + { + u32 * vlan_tag = (u32 *)(mac0+1); + u32 tmp; + tmp = (si0->sub.id << 16) | 0x0800; + *vlan_tag = clib_host_to_net_u32 (tmp); + } + + /* $$$ This needs to be rewritten, for sure */ + f0 = vlib_get_frame_to_node (vm, hi0->output_node_index); + to_next0 = vlib_frame_vector_args (f0); + to_next0[0] = bi0; + f0->n_vectors = 1; + vlib_put_frame_to_node (vm, hi0->output_node_index, f0); + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcp_proxy_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->which = 1; /* to client */ + tr->trace_ip4_address.as_u32 = ia0 ? ia0->as_u32 : 0; + tr->error = error0; + tr->original_sw_if_index = original_sw_if_index; + tr->sw_if_index = sw_if_index; + } + } + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dhcp_proxy_to_client_node, static) = { + .function = dhcp_proxy_to_client_input, + .name = "dhcp-proxy-to-client", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = DHCP_PROXY_N_ERROR, + .error_strings = dhcp_proxy_error_strings, + .format_buffer = format_dhcp_proxy_header_with_length, + .format_trace = format_dhcp_proxy_trace, +#if 0 + .unformat_buffer = unformat_dhcp_proxy_header, +#endif +}; + +static clib_error_t * +dhcp4_proxy_init (vlib_main_t * vm) +{ + dhcp_proxy_main_t * dm = &dhcp_proxy_main; + vlib_node_t * error_drop_node; + + error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); + dm->error_drop_node_index = error_drop_node->index; + + udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_client, + dhcp_proxy_to_client_node.index, 1 /* is_ip4 */); + + udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_server, + dhcp_proxy_to_server_node.index, 1 /* is_ip4 */); + + return 0; +} + + +VLIB_INIT_FUNCTION (dhcp4_proxy_init); + +int +dhcp4_proxy_set_server (ip46_address_t *addr, + ip46_address_t *src_addr, + u32 rx_table_id, + u32 server_table_id, + int is_del) +{ + u32 rx_fib_index = 0; + int rc = 0; + + const fib_prefix_t all_1s = + { + .fp_len = 32, + .fp_addr.ip4.as_u32 = 0xffffffff, + .fp_proto = FIB_PROTOCOL_IP4, + }; + + if (ip46_address_is_zero(addr)) + return VNET_API_ERROR_INVALID_DST_ADDRESS; + + if (ip46_address_is_zero(src_addr)) + return VNET_API_ERROR_INVALID_SRC_ADDRESS; + + rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, + rx_table_id); + + if (is_del) + { + rc = dhcp_proxy_server_del (FIB_PROTOCOL_IP4, rx_fib_index); + + if (0 == rc) + { + fib_table_entry_special_remove(rx_fib_index, + &all_1s, + FIB_SOURCE_DHCP); + fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4); + } + } + else + { + if (dhcp_proxy_server_add (FIB_PROTOCOL_IP4, + addr, src_addr, + rx_fib_index, server_table_id)) + { + fib_table_entry_special_add(rx_fib_index, + &all_1s, + FIB_SOURCE_DHCP, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + fib_table_lock (rx_fib_index, FIB_PROTOCOL_IP4); + } + } + fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4); + + return (rc); +} + +static clib_error_t * +dhcp4_proxy_set_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip46_address_t server_addr, src_addr; + u32 server_table_id = 0, rx_table_id = 0; + int is_del = 0; + int set_src = 0, set_server = 0; + + memset(&server_addr, 0, sizeof(server_addr)); + memset(&src_addr, 0, sizeof(src_addr)); + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "server %U", + unformat_ip4_address, &server_addr.ip4)) + set_server = 1; + else if (unformat (input, "server-fib-id %d", &server_table_id)) + ; + else if (unformat (input, "rx-fib-id %d", &rx_table_id)) + ; + else if (unformat(input, "src-address %U", + unformat_ip4_address, &src_addr.ip4)) + set_src = 1; + else if (unformat (input, "delete") || + unformat (input, "del")) + is_del = 1; + else + break; + } + + if (is_del || (set_server && set_src)) + { + int rv; + + rv = dhcp4_proxy_set_server (&server_addr, &src_addr, rx_table_id, + server_table_id, is_del); + switch (rv) + { + case 0: + return 0; + + case VNET_API_ERROR_INVALID_DST_ADDRESS: + return clib_error_return (0, "Invalid server address"); + + case VNET_API_ERROR_INVALID_SRC_ADDRESS: + return clib_error_return (0, "Invalid src address"); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return + (0, "Fib id %d: no per-fib DHCP server configured", rx_table_id); + + default: + return clib_error_return (0, "BUG: rv %d", rv); + } + } + else + return clib_error_return (0, "parse error`%U'", + format_unformat_error, input); +} + +VLIB_CLI_COMMAND (dhcp_proxy_set_command, static) = { + .path = "set dhcp proxy", + .short_help = "set dhcp proxy [del] server src-address [server-fib-id ] [rx-fib-id ]", + .function = dhcp4_proxy_set_command_fn, +}; + +static u8 * +format_dhcp4_proxy_server (u8 * s, va_list * args) +{ + dhcp_server_t * server = va_arg (*args, dhcp_server_t *); + ip4_fib_t * rx_fib, * server_fib; + + if (server == 0) + { + s = format (s, "%=16s%=16s%=14s%=14s", "Server", "Src Address", + "Server FIB", "RX FIB"); + return s; + } + + server_fib = ip4_fib_get(server->server_fib_index); + rx_fib = ip4_fib_get(server->rx_fib_index); + + s = format (s, "%=16U%=16U%=14u%=14u", + format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY, + format_ip46_address, &server->dhcp_src_address, IP46_TYPE_ANY, + server_fib->table_id, + rx_fib->table_id); + return s; +} + +static int +dhcp4_proxy_show_walk (dhcp_server_t *server, + void *ctx) +{ + vlib_main_t * vm = ctx; + + vlib_cli_output (vm, "%U", format_dhcp4_proxy_server, server); + + return (1); +} + +static clib_error_t * +dhcp4_proxy_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_cli_output (vm, "%U", format_dhcp4_proxy_server, NULL /* header line */); + + dhcp_proxy_walk(FIB_PROTOCOL_IP4, dhcp4_proxy_show_walk, vm); + + return (NULL); +} + +VLIB_CLI_COMMAND (dhcp_proxy_show_command, static) = { + .path = "show dhcp proxy", + .short_help = "Display dhcp proxy server info", + .function = dhcp4_proxy_show_command_fn, +}; + +static clib_error_t * +dhcp_option_82_vss_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_del = 0, got_new_vpn_id=0; + u32 oui=0, fib_id=0, tbl_id=~0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat(input, "delete") || unformat(input, "del")) + is_del = 1; + else if (unformat (input, "oui %d", &oui)) + got_new_vpn_id = 1; + else if (unformat (input, "vpn-id %d", &fib_id)) + got_new_vpn_id = 1; + else if (unformat (input, "table %d", &tbl_id)) + got_new_vpn_id = 1; + else + break; + } + if (tbl_id == ~0) + return clib_error_return (0, "no table ID specified."); + + if (is_del || got_new_vpn_id) + { + int rv; + rv = dhcp_proxy_set_vss(FIB_PROTOCOL_IP4, tbl_id, oui, fib_id, is_del); + switch (rv) + { + case 0: + return 0; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "option 82 vss(oui:%d, vpn-id:%d) not found in table %d", + oui, fib_id, tbl_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "option 82 vss for table %d not found in in pool.", + tbl_id); + default: + return clib_error_return (0, "BUG: rv %d", rv); + } + } + else + return clib_error_return (0, "parse error`%U'", + format_unformat_error, input); +} + +VLIB_CLI_COMMAND (dhcp_proxy_vss_command,static) = { + .path = "set dhcp option-82 vss", + .short_help = "set dhcp option-82 vss [del] table
oui vpn-id ", + .function = dhcp_option_82_vss_fn, +}; + +static clib_error_t * +dhcp_vss_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + dhcp_vss_walk(FIB_PROTOCOL_IP4, dhcp_vss_show_walk, vm); + + return (NULL); +} + +VLIB_CLI_COMMAND (dhcp_proxy_vss_show_command, static) = { + .path = "show dhcp vss", + .short_help = "show dhcp VSS", + .function = dhcp_vss_show_command_fn, +}; + +static clib_error_t * +dhcp_option_82_address_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + vnet_main_t *vnm = vnet_get_main(); + u32 sw_if_index0=0, sw_if_index; + vnet_sw_interface_t *swif; + ip4_address_t *ia0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat(input, "%U", + unformat_vnet_sw_interface, vnm, &sw_if_index0)) + { + swif = vnet_get_sw_interface (vnm, sw_if_index0); + sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ? + swif->unnumbered_sw_if_index : sw_if_index0; + ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0); + if (ia0) + { + vlib_cli_output (vm, "%=20s%=20s", "interface", + "source IP address"); + + vlib_cli_output (vm, "%=20U%=20U", + format_vnet_sw_if_index_name, + vnm, sw_if_index0, + format_ip4_address, ia0); + } + else + vlib_cli_output (vm, "%=34s %=20U", + "No IPv4 address configured on", + format_vnet_sw_if_index_name, + vnm, sw_if_index); + } + else + break; + } + + return 0; +} + +VLIB_CLI_COMMAND (dhcp_proxy_address_show_command,static) = { + .path = "show dhcp option-82-address interface", + .short_help = "show dhcp option-82-address interface ", + .function = dhcp_option_82_address_show_command_fn, +}; diff --git a/src/vnet/dhcp/dhcp6_packet.h b/src/vnet/dhcp/dhcp6_packet.h new file mode 100644 index 00000000..ddcde7a0 --- /dev/null +++ b/src/vnet/dhcp/dhcp6_packet.h @@ -0,0 +1,183 @@ +#ifndef included_vnet_dhcp6_packet_h +#define included_vnet_dhcp6_packet_h + +/* + * DHCP packet format + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +// #define DHCP_VRF_NAME_MAX_LEN L3VM_MAX_NAME_STR_LEN +// #define DHCPV6_MAX_VRF_NAME_LEN L3VM_MAX_NAME_STR_LEN +#define DHCP_MAX_RELAY_ADDR 16 +#define PROTO_UDP 17 +#define DHCPV6_CLIENT_PORT 546 +#define DHCPV6_SERVER_PORT 547 +#define HOP_COUNT_LIMIT 32 +#define DHCPV6_CISCO_ENT_NUM 9 + +/* + * DHCPv6 message types + */ +typedef enum dhcpv6_msg_type_{ + DHCPV6_MSG_SOLICIT = 1, + DHCPV6_MSG_ADVERTISE = 2, + DHCPV6_MSG_REQUEST = 3, + DHCPV6_MSG_CONFIRM = 4, + DHCPV6_MSG_RENEW = 5, + DHCPV6_MSG_REBIND = 6, + DHCPV6_MSG_REPLY = 7, + DHCPV6_MSG_RELEASE = 8, + DHCPV6_MSG_DECLINE = 9, + DHCPV6_MSG_RECONFIGURE = 10, + DHCPV6_MSG_INFORMATION_REQUEST = 11, + DHCPV6_MSG_RELAY_FORW = 12, + DHCPV6_MSG_RELAY_REPL = 13, +} dhcpv6_msg_type_t; + +/* + * DHCPv6 options types + */ +enum { + DHCPV6_OPTION_CLIENTID = 1, + DHCPV6_OPTION_SERVERID = 2, + DHCPV6_OPTION_IA_NA = 3, + DHCPV6_OPTION_IA_TA = 4, + DHCPV6_OPTION_IAADDR = 5, + DHCPV6_OPTION_ORO = 6, + DHCPV6_OPTION_PREFERENCE = 7, + DHCPV6_OPTION_ELAPSED_TIME = 8, + DHCPV6_OPTION_RELAY_MSG = 9, + DHCPV6_OPTION_AUTH = 11, + DHCPV6_OPTION_UNICAST = 12, + DHCPV6_OPTION_STATUS_CODE = 13, + DHCPV6_OPTION_RAPID_COMMIT = 14, + DHCPV6_OPTION_USER_CLASS = 15, + DHCPV6_OPTION_VENDOR_CLASS = 16, + DHCPV6_OPTION_VENDOR_OPTS = 17, + DHCPV6_OPTION_INTERFACE_ID = 18, // relay agent fills this + DHCPV6_OPTION_RECONF_MSG = 19, + DHCPV6_OPTION_RECONF_ACCEPT = 20, + DHCPV6_OPTION_REMOTEID = 37, // relay agent fills this + DHCPV6_OPTION_VSS = 68, // relay agent fills this + DHCPV6_OPTION_CLIENT_LINK_LAYER_ADDRESS = 79, + DHCPV6_OPTION_MAX +}; + +/* +* DHCPv6 status codes + */ +enum { + DHCPV6_STATUS_SUCCESS = 0, + DHCPV6_STATUS_UNSPEC_FAIL = 1, + DHCPV6_STATUS_NOADDRS_AVAIL = 2, + DHCPV6_STATUS_NO_BINDING = 3, + DHCPV6_STATUS_NOT_ONLINK = 4, + DHCPV6_STATUS_USE_MULTICAST = 5, +}; + +/* + * DHCPv6 DUID types + */ +enum { + DHCPV6_DUID_LLT = 1, /* DUID Based on Link-layer Address Plus Time */ + DHCPV6_DUID_EN = 2, /* DUID Based on Enterprise Number */ + DHCPV6_DUID_LL = 3, /* DUID Based on Link-layer Address */ +}; + +//Structure for DHCPv6 payload from client +typedef struct dhcpv6_hdr_ { + union { + u8 msg_type; //DHCP msg type + u32 xid; // transaction id + }u; + u8 data[0]; +} dhcpv6_header_t; + + + +typedef CLIB_PACKED (struct dhcpv6_relay_ctx_ { + dhcpv6_header_t *pkt; + u32 pkt_len; + u32 dhcpv6_len; //DHCPv6 payload load +// if_ordinal iod; + u32 if_index; + u32 ctx_id; + char ctx_name[32+1]; + u8 dhcp_msg_type; +}) dhcpv6_relay_ctx_t; + +//Structure for DHCPv6 RELAY-FORWARD and DHCPv6 RELAY-REPLY pkts +typedef CLIB_PACKED (struct dhcpv6_relay_hdr_ { + u8 msg_type; + u8 hop_count; + ip6_address_t link_addr; + ip6_address_t peer_addr; + u8 data[0]; +}) dhcpv6_relay_hdr_t; + +typedef enum dhcp_stats_action_type_ { + DHCP_STATS_ACTION_FORWARDED=1, + DHCP_STATS_ACTION_RECEIVED, + DHCP_STATS_ACTION_DROPPED +} dhcp_stats_action_type_t; +//Generic counters for a packet +typedef struct dhcp_stats_counters_ { + u64 rx_pkts; //counter for received pkts + u64 tx_pkts; //counter for forwarded pkts + u64 drops; //counter for dropped pkts +} dhcp_stats_counters_t; + + +typedef enum dhcpv6_stats_drop_reason_ { + DHCPV6_RELAY_PKT_DROP_RELAYDISABLE = 1, + DHCPV6_RELAY_PKT_DROP_MAX_HOPS, + DHCPV6_RELAY_PKT_DROP_VALIDATION_FAIL, + DHCPV6_RELAY_PKT_DROP_UNKNOWN_OP_INTF, + DHCPV6_RELAY_PKT_DROP_BAD_CONTEXT, + DHCPV6_RELAY_PKT_DROP_OPT_INSERT_FAIL, + DHCPV6_RELAY_PKT_DROP_REPLY_FROM_CLIENT, +} dhcpv6_stats_drop_reason_t; + +typedef CLIB_PACKED (struct { + u16 option; + u16 length; + u8 data[0]; +}) dhcpv6_option_t; + +typedef CLIB_PACKED (struct { + dhcpv6_option_t opt; + u32 int_idx; +}) dhcpv6_int_id_t; + +typedef CLIB_PACKED (struct { + dhcpv6_option_t opt; + u8 data[8]; // data[0]:type, data[1..7]: VPN ID +}) dhcpv6_vss_t; + +typedef CLIB_PACKED (struct { + dhcpv6_option_t opt; + u32 ent_num; + u32 rmt_id; +}) dhcpv6_rmt_id_t; + +typedef CLIB_PACKED (struct { + dhcpv6_option_t opt; + u16 link_type; + u8 data[6]; // data[0]:data[5]: MAC address +}) dhcpv6_client_mac_t; + + +#endif /* included_vnet_dhcp6_packet_h */ diff --git a/src/vnet/dhcp/dhcp6_proxy_error.def b/src/vnet/dhcp/dhcp6_proxy_error.def new file mode 100644 index 00000000..55fa7317 --- /dev/null +++ b/src/vnet/dhcp/dhcp6_proxy_error.def @@ -0,0 +1,29 @@ +/* + * dhcp_proxy_error.def: dhcp proxy errors + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +dhcpv6_proxy_error (NONE, "no error") +dhcpv6_proxy_error (NO_SERVER, "no dhcpv6 server configured") +dhcpv6_proxy_error (RELAY_TO_SERVER, "DHCPV6 packets relayed to the server") +dhcpv6_proxy_error (RELAY_TO_CLIENT, "DHCPV6 packets relayed to clients") +dhcpv6_proxy_error (NO_INTERFACE_ADDRESS, "DHCPV6 no interface address") +dhcpv6_proxy_error (WRONG_MESSAGE_TYPE, "DHCPV6 wrong message type.") +dhcpv6_proxy_error (NO_SRC_ADDRESS, "DHCPV6 no srouce IPv6 address configured.") +dhcpv6_proxy_error (NO_CIRCUIT_ID_OPTION, "DHCPv6 reply packets without circuit ID option") +dhcpv6_proxy_error (NO_RELAY_MESSAGE_OPTION, "DHCPv6 reply packets without relay message option") +dhcpv6_proxy_error (BAD_SVR_FIB_OR_ADDRESS, "DHCPv6 packets not from DHCPv6 server or server FIB.") +dhcpv6_proxy_error (PKT_TOO_BIG, "DHCPv6 packets which are too big.") +dhcpv6_proxy_error (WRONG_INTERFACE_ID_OPTION, "DHCPv6 reply to invalid interface.") diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c new file mode 100644 index 00000000..ed44977d --- /dev/null +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -0,0 +1,1065 @@ +/* + * dhcp6_proxy_node.c: dhcpv6 proxy node processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +static char * dhcpv6_proxy_error_strings[] = { +#define dhcpv6_proxy_error(n,s) s, +#include +#undef dhcpv6_proxy_error +}; + +#define foreach_dhcpv6_proxy_to_server_input_next \ + _ (DROP, "error-drop") \ + _ (LOOKUP, "ip6-lookup") \ + _ (SEND_TO_CLIENT, "dhcpv6-proxy-to-client") + + +typedef enum { +#define _(s,n) DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_##s, + foreach_dhcpv6_proxy_to_server_input_next +#undef _ + DHCPV6_PROXY_TO_SERVER_INPUT_N_NEXT, +} dhcpv6_proxy_to_server_input_next_t; + +typedef struct { + /* 0 => to server, 1 => to client */ + int which; + u8 packet_data[64]; + u32 error; + u32 sw_if_index; + u32 original_sw_if_index; +} dhcpv6_proxy_trace_t; + +static vlib_node_registration_t dhcpv6_proxy_to_server_node; +static vlib_node_registration_t dhcpv6_proxy_to_client_node; + +/* all DHCP servers address */ +static ip6_address_t all_dhcpv6_server_address; +static ip6_address_t all_dhcpv6_server_relay_agent_address; + +static u8 * +format_dhcpv6_proxy_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + dhcpv6_proxy_trace_t * t = va_arg (*args, dhcpv6_proxy_trace_t *); + + if (t->which == 0) + s = format (s, "DHCPV6 proxy: sent to server %U", + format_ip6_address, &t->packet_data, sizeof (ip6_address_t)); + else + s = format (s, "DHCPV6 proxy: sent to client from %U", + format_ip6_address, &t->packet_data, sizeof (ip6_address_t)); + if (t->error != (u32)~0) + s = format (s, " error: %s\n", dhcpv6_proxy_error_strings[t->error]); + + s = format (s, " original_sw_if_index: %d, sw_if_index: %d\n", + t->original_sw_if_index, t->sw_if_index); + + return s; +} + +static u8 * +format_dhcpv6_proxy_header_with_length (u8 * s, va_list * args) +{ + dhcpv6_header_t * h = va_arg (*args, dhcpv6_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "dhcpv6 header truncated"); + + s = format (s, "DHCPV6 Proxy"); + + return s; +} +/* get first interface address */ +static ip6_address_t * +ip6_interface_first_global_or_site_address (ip6_main_t * im, u32 sw_if_index) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia = 0; + ip6_address_t * result = 0; + + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip6_address_t * a = ip_interface_address_get_address (lm, ia); + if ((a->as_u8[0] & 0xe0) == 0x20 || + (a->as_u8[0] & 0xfe) == 0xfc) { + result = a; + break; + } + })); + return result; +} + +static inline void copy_ip6_address (ip6_address_t *dst, + ip6_address_t *src) +{ + dst->as_u64[0] = src->as_u64[0]; + dst->as_u64[1] = src->as_u64[1]; +} + +static uword +dhcpv6_proxy_to_server_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + u32 pkts_to_server=0, pkts_to_client=0, pkts_no_server=0; + u32 pkts_no_interface_address=0, pkts_no_exceeding_max_hop=0; + u32 pkts_no_src_address=0; + u32 pkts_wrong_msg_type=0; + u32 pkts_too_big=0; + ip6_main_t * im = &ip6_main; + ip6_address_t * src; + int bogus_length; + dhcp_server_t * server; + u32 rx_fib_idx = 0, server_fib_idx = 0; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vnet_main_t *vnm = vnet_get_main(); + u32 sw_if_index = 0; + u32 rx_sw_if_index = 0; + vnet_sw_interface_t *swif; + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * u0, *u1; + dhcpv6_header_t * h0; // client msg hdr + ip6_header_t * ip0, *ip1; + ip6_address_t _ia0, *ia0=&_ia0; + u32 next0; + u32 error0 = (u32) ~0; + dhcpv6_option_t *fwd_opt; + dhcpv6_relay_hdr_t *r1; + u16 len; + dhcpv6_int_id_t *id1; + dhcpv6_vss_t *vss1; + dhcpv6_client_mac_t *cmac; // client mac + ethernet_header_t * e_h0; + u8 client_src_mac[6]; + vlib_buffer_free_list_t *fl; + dhcp_vss_t *vss; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + h0 = vlib_buffer_get_current (b0); + + /* + * udp_local hands us the DHCPV6 header. + */ + u0 = (void *)h0 -(sizeof(*u0)); + ip0 = (void *)u0 -(sizeof(*ip0)); + e_h0 = (void *)ip0 - ethernet_buffer_header_size(b0); + + clib_memcpy(client_src_mac, e_h0->src_address, 6); + + switch (h0->u.msg_type) { + case DHCPV6_MSG_SOLICIT: + case DHCPV6_MSG_REQUEST: + case DHCPV6_MSG_CONFIRM: + case DHCPV6_MSG_RENEW: + case DHCPV6_MSG_REBIND: + case DHCPV6_MSG_RELEASE: + case DHCPV6_MSG_DECLINE: + case DHCPV6_MSG_INFORMATION_REQUEST: + case DHCPV6_MSG_RELAY_FORW: + /* send to server */ + break; + case DHCPV6_MSG_RELAY_REPL: + /* send to client */ + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_SEND_TO_CLIENT; + error0 = 0; + pkts_to_client++; + goto do_enqueue; + default: + /* drop the packet */ + pkts_wrong_msg_type++; + error0 = DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + goto do_trace; + + } + + /* Send to DHCPV6 server via the configured FIB */ + rx_sw_if_index = sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_idx = im->fib_index_by_sw_if_index [rx_sw_if_index]; + server = dhcp_get_server(dpm, rx_fib_idx, FIB_PROTOCOL_IP6); + + if (PREDICT_FALSE (NULL == server)) + { + error0 = DHCPV6_PROXY_ERROR_NO_SERVER; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_server++; + goto do_trace; + } + + server_fib_idx = server->server_fib_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = server_fib_idx; + + + /* relay-option header pointer */ + vlib_buffer_advance(b0, -(sizeof(*fwd_opt))); + fwd_opt = vlib_buffer_get_current(b0); + /* relay message header pointer */ + vlib_buffer_advance(b0, -(sizeof(*r1))); + r1 = vlib_buffer_get_current(b0); + + vlib_buffer_advance(b0, -(sizeof(*u1))); + u1 = vlib_buffer_get_current(b0); + + vlib_buffer_advance(b0, -(sizeof(*ip1))); + ip1 = vlib_buffer_get_current(b0); + + /* fill in all that rubbish... */ + len = clib_net_to_host_u16(u0->length) - sizeof(udp_header_t); + copy_ip6_address(&r1->peer_addr, &ip0->src_address); + + r1->msg_type = DHCPV6_MSG_RELAY_FORW; + fwd_opt->length = clib_host_to_net_u16(len); + fwd_opt->option = clib_host_to_net_u16(DHCPV6_OPTION_RELAY_MSG); + + r1->hop_count++; + r1->hop_count = (h0->u.msg_type != DHCPV6_MSG_RELAY_FORW) ? 0 : r1->hop_count; + + if (PREDICT_FALSE(r1->hop_count >= HOP_COUNT_LIMIT)) + { + error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_exceeding_max_hop++; + goto do_trace; + } + + + /* If relay-fwd and src address is site or global unicast address */ + if (h0->u.msg_type == DHCPV6_MSG_RELAY_FORW && + ((ip0->src_address.as_u8[0] & 0xe0) == 0x20 || + (ip0->src_address.as_u8[0] & 0xfe) == 0xfc)) + { + /* Set link address to zero */ + r1->link_addr.as_u64[0] = 0; + r1->link_addr.as_u64[1] = 0; + goto link_address_set; + } + + /* if receiving interface is unnumbered, use receiving interface + * IP address as link address, otherwise use the loopback interface + * IP address as link address. + */ + + swif = vnet_get_sw_interface (vnm, rx_sw_if_index); + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; + + ia0 = ip6_interface_first_global_or_site_address(&ip6_main, sw_if_index); + if (ia0 == 0) + { + error0 = DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_interface_address++; + goto do_trace; + } + + copy_ip6_address(&r1->link_addr, ia0); + + link_address_set: + fl = vlib_buffer_get_free_list (vm, b0->free_list_index); + + if ((b0->current_length+sizeof(*id1)+sizeof(*vss1)+sizeof(*cmac)) + > fl->n_data_bytes) + { + error0 = DHCPV6_PROXY_ERROR_PKT_TOO_BIG; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_too_big++; + goto do_trace; + } + + id1 = (dhcpv6_int_id_t *) (((uword) ip1) + b0->current_length); + b0->current_length += (sizeof (*id1)); + + id1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_INTERFACE_ID); + id1->opt.length = clib_host_to_net_u16(sizeof(rx_sw_if_index)); + id1->int_idx = clib_host_to_net_u32(rx_sw_if_index); + + u1->length =0; + if (h0->u.msg_type != DHCPV6_MSG_RELAY_FORW) + { + cmac = (dhcpv6_client_mac_t *) (((uword) ip1) + b0->current_length); + b0->current_length += (sizeof (*cmac)); + cmac->opt.length =clib_host_to_net_u16(sizeof(*cmac) - + sizeof(cmac->opt)); + cmac->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_CLIENT_LINK_LAYER_ADDRESS); + cmac->link_type = clib_host_to_net_u16(1); // ethernet + clib_memcpy(cmac->data, client_src_mac, 6); + u1->length += sizeof(*cmac); + } + + vss = dhcp_get_vss_info(dpm, rx_fib_idx, FIB_PROTOCOL_IP6); + + if (NULL != vss) { + vss1 = (dhcpv6_vss_t *) (((uword) ip1) + b0->current_length); + b0->current_length += (sizeof (*vss1)); + vss1->opt.length =clib_host_to_net_u16(sizeof(*vss1) - + sizeof(vss1->opt)); + vss1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_VSS); + vss1->data[0] = 1; // type + vss1->data[1] = vss->oui >>16 & 0xff; + vss1->data[2] = vss->oui >>8 & 0xff; + vss1->data[3] = vss->oui & 0xff; + vss1->data[4] = vss->fib_id >> 24 & 0xff; + vss1->data[5] = vss->fib_id >> 16 & 0xff; + vss1->data[6] = vss->fib_id >> 8 & 0xff; + vss1->data[7] = vss->fib_id & 0xff; + u1->length += sizeof(*vss1); + } + + pkts_to_server++; + u1->checksum = 0; + u1->src_port = clib_host_to_net_u16(UDP_DST_PORT_dhcpv6_to_client); + u1->dst_port = clib_host_to_net_u16(UDP_DST_PORT_dhcpv6_to_server); + + u1->length = + clib_host_to_net_u16( clib_net_to_host_u16(fwd_opt->length) + + sizeof(*r1) + sizeof(*fwd_opt) + + sizeof(*u1) + sizeof(*id1) + u1->length); + + memset(ip1, 0, sizeof(*ip1)); + ip1->ip_version_traffic_class_and_flow_label = 0x60; + ip1->payload_length = u1->length; + ip1->protocol = PROTO_UDP; + ip1->hop_limit = HOP_COUNT_LIMIT; + src = (server->dhcp_server.ip6.as_u64[0] || + server->dhcp_server.ip6.as_u64[1]) ? + &server->dhcp_server.ip6 : &all_dhcpv6_server_address; + copy_ip6_address(&ip1->dst_address, src); + + + ia0 = ip6_interface_first_global_or_site_address + (&ip6_main, vnet_buffer(b0)->sw_if_index[VLIB_RX]); + + src = (server->dhcp_src_address.ip6.as_u64[0] || + server->dhcp_src_address.ip6.as_u64[1]) ? + &server->dhcp_src_address.ip6 : ia0; + if (ia0 == 0) + { + error0 = DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_src_address++; + goto do_trace; + } + + copy_ip6_address (&ip1->src_address, src); + + + u1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, ip1, + &bogus_length); + ASSERT(bogus_length == 0); + + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcpv6_proxy_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->which = 0; /* to server */ + tr->error = error0; + tr->original_sw_if_index = rx_sw_if_index; + tr->sw_if_index = sw_if_index; + if (DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP == next0) + copy_ip6_address((ip6_address_t *)&tr->packet_data[0], &server->dhcp_server.ip6); + } + + do_enqueue: + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_RELAY_TO_CLIENT, + pkts_to_client); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_RELAY_TO_SERVER, + pkts_to_server); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS, + pkts_no_interface_address); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE, + pkts_wrong_msg_type); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS, + pkts_no_src_address); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_PKT_TOO_BIG, + pkts_too_big); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dhcpv6_proxy_to_server_node, static) = { + .function = dhcpv6_proxy_to_server_input, + .name = "dhcpv6-proxy-to-server", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = DHCPV6_PROXY_N_ERROR, + .error_strings = dhcpv6_proxy_error_strings, + + .n_next_nodes = DHCPV6_PROXY_TO_SERVER_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_##s] = n, + foreach_dhcpv6_proxy_to_server_input_next +#undef _ + }, + + .format_buffer = format_dhcpv6_proxy_header_with_length, + .format_trace = format_dhcpv6_proxy_trace, +#if 0 + .unformat_buffer = unformat_dhcpv6_proxy_header, +#endif +}; + +static uword +dhcpv6_proxy_to_client_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + + u32 n_left_from, * from; + ethernet_main_t *em = ethernet_get_main (vm); + dhcp_proxy_main_t * dm = &dhcp_proxy_main; + dhcp_server_t * server; + vnet_main_t * vnm = vnet_get_main(); + int bogus_length; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * u0, *u1=0; + dhcpv6_relay_hdr_t * h0; + ip6_header_t * ip1 = 0, *ip0; + ip6_address_t _ia0, * ia0 = &_ia0; + ip6_address_t client_address; + ethernet_interface_t *ei0; + ethernet_header_t *mac0; + vnet_hw_interface_t *hi0; + vlib_frame_t *f0; + u32 * to_next0; + u32 sw_if_index = ~0; + u32 original_sw_if_index = ~0; + vnet_sw_interface_t *si0; + u32 error0 = (u32)~0; + vnet_sw_interface_t *swif; + dhcpv6_option_t *r0 = 0, *o; + u16 len = 0; + u8 interface_opt_flag = 0; + u8 relay_msg_opt_flag = 0; + ip6_main_t * im = &ip6_main; + u32 server_fib_idx, client_fib_idx; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + if (DHCPV6_MSG_RELAY_REPL != h0->msg_type) + { + error0 = DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE; + + drop_packet: + vlib_node_increment_counter (vm, dhcpv6_proxy_to_client_node.index, + error0, 1); + + f0 = vlib_get_frame_to_node (vm, dm->error_drop_node_index); + to_next0 = vlib_frame_vector_args (f0); + to_next0[0] = bi0; + f0->n_vectors = 1; + vlib_put_frame_to_node (vm, dm->error_drop_node_index, f0); + goto do_trace; + } + /* hop count seems not need to be checked */ + if (HOP_COUNT_LIMIT < h0->hop_count) + { + error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS; + goto drop_packet; + } + u0 = (void *)h0 -(sizeof(*u0)); + ip0 = (void *)u0 -(sizeof(*ip0)); + + vlib_buffer_advance (b0, sizeof(*h0)); + o = vlib_buffer_get_current (b0); + + /* Parse through TLVs looking for option 18 (DHCPV6_OPTION_INTERFACE_ID) + _and_ option 9 (DHCPV6_OPTION_RELAY_MSG) option which must be there. + Currently assuming no other options need to be processed + The interface-ID is the FIB number we need + to track down the client-facing interface */ + + while ((u8 *) o < (b0->data + b0->current_data + b0->current_length)) + { + if (DHCPV6_OPTION_INTERFACE_ID == clib_net_to_host_u16(o->option)) + { + interface_opt_flag = 1; + if (clib_net_to_host_u16(o->length) == sizeof(sw_if_index)) + sw_if_index = clib_net_to_host_u32(((dhcpv6_int_id_t*)o)->int_idx); + if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index)) + { + error0 = DHCPV6_PROXY_ERROR_WRONG_INTERFACE_ID_OPTION; + goto drop_packet; + } + } + if (DHCPV6_OPTION_RELAY_MSG == clib_net_to_host_u16(o->option)) + { + relay_msg_opt_flag = 1; + r0 = vlib_buffer_get_current (b0); + } + if ((relay_msg_opt_flag == 1) && (interface_opt_flag == 1)) + break; + vlib_buffer_advance (b0, sizeof(*o) + clib_net_to_host_u16(o->length)); + o = (dhcpv6_option_t *) (((uword) o) + clib_net_to_host_u16(o->length) + sizeof(*o)); + } + + if ((relay_msg_opt_flag == 0) || (r0 == 0)) + { + error0 = DHCPV6_PROXY_ERROR_NO_RELAY_MESSAGE_OPTION; + goto drop_packet; + } + + if ((u32)~0 == sw_if_index) + { + error0 = DHCPV6_PROXY_ERROR_NO_CIRCUIT_ID_OPTION; + goto drop_packet; + } + + //Advance buffer to start of encapsulated DHCPv6 message + vlib_buffer_advance (b0, sizeof(*r0)); + + client_fib_idx = im->fib_index_by_sw_if_index[sw_if_index]; + server = dhcp_get_server(dm, client_fib_idx, FIB_PROTOCOL_IP6); + + if (NULL == server) + { + error0 = DHCPV6_PROXY_ERROR_NO_SERVER; + goto drop_packet; + } + + server_fib_idx = im->fib_index_by_sw_if_index + [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; + + if (server_fib_idx != server->server_fib_index || + ip0->src_address.as_u64[0] != server->dhcp_server.ip6.as_u64[0] || + ip0->src_address.as_u64[1] != server->dhcp_server.ip6.as_u64[1]) + { + //drop packet if not from server with configured address or FIB + error0 = DHCPV6_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; + goto drop_packet; + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = original_sw_if_index + = sw_if_index; + + swif = vnet_get_sw_interface (vnm, original_sw_if_index); + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; + + + /* + * udp_local hands us the DHCPV6 header, need udp hdr, + * ip hdr to relay to client + */ + vlib_buffer_advance (b0, -(sizeof(*u1))); + u1 = vlib_buffer_get_current (b0); + + vlib_buffer_advance (b0, -(sizeof(*ip1))); + ip1 = vlib_buffer_get_current (b0); + + copy_ip6_address(&client_address, &h0->peer_addr); + + ia0 = ip6_interface_first_address (&ip6_main, sw_if_index); + if (ia0 == 0) + { + error0 = DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS; + goto drop_packet; + } + + len = clib_net_to_host_u16(r0->length); + memset(ip1, 0, sizeof(*ip1)); + copy_ip6_address(&ip1->dst_address, &client_address); + u1->checksum = 0; + u1->src_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcpv6_to_server); + u1->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcpv6_to_client); + u1->length = clib_host_to_net_u16 (len + sizeof(udp_header_t)); + + ip1->ip_version_traffic_class_and_flow_label = + ip0->ip_version_traffic_class_and_flow_label & + 0x00000fff; + ip1->payload_length = u1->length; + ip1->protocol = PROTO_UDP; + ip1->hop_limit = HOP_COUNT_LIMIT; + copy_ip6_address(&ip1->src_address, ia0); + + u1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, ip1, + &bogus_length); + ASSERT(bogus_length == 0); + + vlib_buffer_advance (b0, -(sizeof(ethernet_header_t))); + si0 = vnet_get_sw_interface (vnm, original_sw_if_index); + if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) + vlib_buffer_advance (b0, -4 /* space for VLAN tag */); + + mac0 = vlib_buffer_get_current (b0); + + hi0 = vnet_get_sup_hw_interface (vnm, original_sw_if_index); + ei0 = pool_elt_at_index (em->interfaces, hi0->hw_instance); + clib_memcpy (mac0->src_address, ei0->address, sizeof (ei0->address)); + memset (&mac0->dst_address, 0xff, sizeof (mac0->dst_address)); + mac0->type = (si0->type == VNET_SW_INTERFACE_TYPE_SUB) ? + clib_net_to_host_u16(0x8100) : clib_net_to_host_u16 (0x86dd); + + if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) + { + u32 * vlan_tag = (u32 *)(mac0+1); + u32 tmp; + tmp = (si0->sub.id << 16) | 0x0800; + *vlan_tag = clib_host_to_net_u32 (tmp); + } + + /* $$$ consider adding a dynamic next to the graph node, for performance */ + f0 = vlib_get_frame_to_node (vm, hi0->output_node_index); + to_next0 = vlib_frame_vector_args (f0); + to_next0[0] = bi0; + f0->n_vectors = 1; + vlib_put_frame_to_node (vm, hi0->output_node_index, f0); + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcpv6_proxy_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->which = 1; /* to client */ + if (ia0) + copy_ip6_address((ip6_address_t*)tr->packet_data, ia0); + tr->error = error0; + tr->original_sw_if_index = original_sw_if_index; + tr->sw_if_index = sw_if_index; + } + } + return from_frame->n_vectors; + +} + +VLIB_REGISTER_NODE (dhcpv6_proxy_to_client_node, static) = { + .function = dhcpv6_proxy_to_client_input, + .name = "dhcpv6-proxy-to-client", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = DHCPV6_PROXY_N_ERROR, + .error_strings = dhcpv6_proxy_error_strings, + .format_buffer = format_dhcpv6_proxy_header_with_length, + .format_trace = format_dhcpv6_proxy_trace, +#if 0 + .unformat_buffer = unformat_dhcpv6_proxy_header, +#endif +}; + +static clib_error_t * +dhcp6_proxy_init (vlib_main_t * vm) +{ + dhcp_proxy_main_t * dm = &dhcp_proxy_main; + vlib_node_t * error_drop_node; + + error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); + dm->error_drop_node_index = error_drop_node->index; + + /* RFC says this is the dhcpv6 server address */ + all_dhcpv6_server_address.as_u64[0] = clib_host_to_net_u64 (0xFF05000000000000); + all_dhcpv6_server_address.as_u64[1] = clib_host_to_net_u64 (0x00010003); + + /* RFC says this is the server and agent address */ + all_dhcpv6_server_relay_agent_address.as_u64[0] = clib_host_to_net_u64 (0xFF02000000000000); + all_dhcpv6_server_relay_agent_address.as_u64[1] = clib_host_to_net_u64 (0x00010002); + + udp_register_dst_port (vm, UDP_DST_PORT_dhcpv6_to_client, + dhcpv6_proxy_to_client_node.index, 0 /* is_ip6 */); + + udp_register_dst_port (vm, UDP_DST_PORT_dhcpv6_to_server, + dhcpv6_proxy_to_server_node.index, 0 /* is_ip6 */); + + return 0; +} + +VLIB_INIT_FUNCTION (dhcp6_proxy_init); + +int +dhcp6_proxy_set_server (ip46_address_t *addr, + ip46_address_t *src_addr, + u32 rx_table_id, + u32 server_table_id, + int is_del) +{ + u32 rx_fib_index = 0; + int rc = 0; + + const mfib_prefix_t all_dhcp_servers = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_grp_addr = { + .ip6 = all_dhcpv6_server_relay_agent_address, + } + }; + + if (ip46_address_is_zero(addr)) + return VNET_API_ERROR_INVALID_DST_ADDRESS; + + if (ip46_address_is_zero(src_addr)) + return VNET_API_ERROR_INVALID_SRC_ADDRESS; + + rx_fib_index = mfib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, + rx_table_id); + + if (is_del) + { + rc = dhcp_proxy_server_del (FIB_PROTOCOL_IP6, rx_fib_index); + + if (0 == rc) + { + mfib_table_entry_delete(rx_fib_index, + &all_dhcp_servers, + MFIB_SOURCE_DHCP); + mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); + } + } + else + { + const fib_route_path_t path_for_us = { + .frp_proto = FIB_PROTOCOL_IP6, + .frp_addr = zero_addr, + .frp_sw_if_index = 0xffffffff, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = FIB_ROUTE_PATH_LOCAL, + }; + if (dhcp_proxy_server_add (FIB_PROTOCOL_IP6, addr, src_addr, + rx_fib_index, server_table_id)) + { + mfib_table_entry_path_update(rx_fib_index, + &all_dhcp_servers, + MFIB_SOURCE_DHCP, + &path_for_us, + MFIB_ITF_FLAG_FORWARD); + /* + * Each interface that is enabled in this table, needs to be added + * as an accepting interface, but this is not easily doable in VPP. + * So we cheat. Add a flag to the entry that indicates accept form + * any interface. + * We will still only accept on v6 enabled interfaces, since the + * input feature ensures this. + */ + mfib_table_entry_update(rx_fib_index, + &all_dhcp_servers, + MFIB_SOURCE_DHCP, + MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF); + mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6); + } + } + + mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); + + return (rc); +} + +static clib_error_t * +dhcpv6_proxy_set_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip46_address_t addr, src_addr; + int set_server = 0, set_src_address = 0; + u32 rx_table_id = 0, server_table_id = 0; + int is_del = 0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "server %U", + unformat_ip6_address, &addr.ip6)) + set_server = 1; + else if (unformat(input, "src-address %U", + unformat_ip6_address, &src_addr.ip6)) + set_src_address =1; + else if (unformat (input, "server-fib-id %d", &server_table_id)) + ; + else if (unformat (input, "rx-fib-id %d", &rx_table_id)) + ; + else if (unformat (input, "delete") || + unformat (input, "del")) + is_del = 1; + else + break; + } + + if (is_del || (set_server && set_src_address)) + { + int rv; + + rv = dhcp6_proxy_set_server (&addr, &src_addr, rx_table_id, + server_table_id, is_del); + + //TODO: Complete the errors + switch (rv) + { + case 0: + return 0; + + case VNET_API_ERROR_INVALID_DST_ADDRESS: + return clib_error_return (0, "Invalid server address"); + + case VNET_API_ERROR_INVALID_SRC_ADDRESS: + return clib_error_return (0, "Invalid src address"); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return + (0, "Fib id %d: no per-fib DHCP server configured", rx_table_id); + + default: + return clib_error_return (0, "BUG: rv %d", rv); + } + } + else + return clib_error_return (0, "parse error`%U'", + format_unformat_error, input); +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_set_command, static) = { + .path = "set dhcpv6 proxy", + .short_help = "set dhcpv6 proxy [del] server src-address " + "[server-fib-id ] [rx-fib-id ] ", + .function = dhcpv6_proxy_set_command_fn, +}; + +static u8 * +format_dhcp6_proxy_server (u8 * s, va_list * args) +{ + dhcp_server_t * server = va_arg (*args, dhcp_server_t *); + ip6_fib_t * rx_fib, * server_fib; + + if (NULL == server) + { + s = format (s, "%=40s%=40s%=14s%=14s", "Server Address", "Source Address", + "Server FIB", "RX FIB"); + return s; + } + + server_fib = ip6_fib_get(server->server_fib_index); + rx_fib = ip6_fib_get(server->rx_fib_index); + + + s = format (s, "%=40U%=40U%=14u%=14u", + format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY, + format_ip46_address, &server->dhcp_src_address, IP46_TYPE_ANY, + server_fib->table_id, rx_fib->table_id); + return s; +} + +static int +dhcp6_proxy_show_walk (dhcp_server_t *server, + void *ctx) +{ + vlib_main_t * vm = ctx; + + vlib_cli_output (vm, "%U", format_dhcp6_proxy_server, server); + + return (1); +} + +static clib_error_t * +dhcpv6_proxy_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_cli_output (vm, "%U", format_dhcp6_proxy_server, NULL /* header line */); + + dhcp_proxy_walk(FIB_PROTOCOL_IP6, dhcp6_proxy_show_walk, vm); + + return (NULL); +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_show_command, static) = { + .path = "show dhcpv6 proxy", + .short_help = "Display dhcpv6 proxy info", + .function = dhcpv6_proxy_show_command_fn, +}; + +static clib_error_t * +dhcpv6_vss_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_del = 0, got_new_vss=0; + u32 oui=0; + u32 fib_id=0, tbl_id=~0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "oui %d", &oui)) + got_new_vss = 1; + else if (unformat (input, "vpn-id %d", &fib_id)) + got_new_vss = 1; + else if (unformat (input, "table %d", &tbl_id)) + got_new_vss = 1; + else if (unformat(input, "delete") || unformat(input, "del")) + is_del = 1; + else + break; + } + + if (tbl_id ==~0) + return clib_error_return (0, "no table ID specified."); + + if (is_del || got_new_vss) + { + int rv; + + rv = dhcp_proxy_set_vss(FIB_PROTOCOL_IP6, tbl_id, oui, fib_id, is_del); + switch (rv) + { + case 0: + return 0; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "vss info (oui:%d, vpn-id:%d) not found in table %d.", + oui, fib_id, tbl_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "vss for table %d not found in pool.", + tbl_id); + + default: + return clib_error_return (0, "BUG: rv %d", rv); + } + } + else + return clib_error_return (0, "parse error`%U'", + format_unformat_error, input); + +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_vss_command, static) = { + .path = "set dhcpv6 vss", + .short_help = "set dhcpv6 vss table oui vpn-idx ", + .function = dhcpv6_vss_command_fn, +}; + +static clib_error_t * +dhcpv6_vss_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + dhcp_vss_walk(FIB_PROTOCOL_IP6, dhcp_vss_show_walk, vm); + + return (NULL); +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_vss_show_command, static) = { + .path = "show dhcpv6 vss", + .short_help = "show dhcpv6 VSS", + .function = dhcpv6_vss_show_command_fn, +}; + +static clib_error_t * +dhcpv6_link_address_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + vnet_main_t *vnm = vnet_get_main(); + u32 sw_if_index0=0, sw_if_index; + vnet_sw_interface_t *swif; + ip6_address_t *ia0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat(input, "%U", + unformat_vnet_sw_interface, vnm, &sw_if_index0)) + { + swif = vnet_get_sw_interface (vnm, sw_if_index0); + sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ? + swif->unnumbered_sw_if_index : sw_if_index0; + ia0 = ip6_interface_first_address(&ip6_main, sw_if_index); + if (ia0) + { + vlib_cli_output (vm, "%=20s%=48s", "interface", "link-address"); + + vlib_cli_output (vm, "%=20U%=48U", + format_vnet_sw_if_index_name, vnm, sw_if_index0, + format_ip6_address, ia0); + } else + vlib_cli_output (vm, "%=34s%=20U", "No IPv6 address configured on", + format_vnet_sw_if_index_name, vnm, sw_if_index); + } else + break; + } + + return 0; +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_address_show_command, static) = { + .path = "show dhcpv6 link-address interface", + .short_help = "show dhcpv6 link-address interface ", + .function = dhcpv6_link_address_show_command_fn, +}; diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c index ce9039b7..bdf02cae 100644 --- a/src/vnet/dhcp/dhcp_api.c +++ b/src/vnet/dhcp/dhcp_api.c @@ -22,9 +22,8 @@ #include #include -#include +#include #include -#include #include @@ -51,52 +50,19 @@ _(DHCP_PROXY_DETAILS,dhcp_proxy_details) \ _(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss) \ _(DHCP_CLIENT_CONFIG, dhcp_client_config) -static void -dhcpv4_proxy_config (vl_api_dhcp_proxy_config_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv; - - rv = dhcp_proxy_set_server ((ip4_address_t *) (&mp->dhcp_server), - (ip4_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->rx_vrf_id), - (u32) ntohl (mp->server_vrf_id), - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); -} - - -static void -dhcpv6_proxy_config (vl_api_dhcp_proxy_config_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv = -1; - - rv = dhcpv6_proxy_set_server ((ip6_address_t *) (&mp->dhcp_server), - (ip6_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->rx_vrf_id), - (u32) ntohl (mp->server_vrf_id), - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); -} - static void vl_api_dhcp_proxy_set_vss_t_handler (vl_api_dhcp_proxy_set_vss_t * mp) { vl_api_dhcp_proxy_set_vss_reply_t *rmp; int rv; - if (!mp->is_ipv6) - rv = dhcp_proxy_set_option82_vss (ntohl (mp->tbl_id), - ntohl (mp->oui), - ntohl (mp->fib_id), - (int) mp->is_add == 0); - else - rv = dhcpv6_proxy_set_vss (ntohl (mp->tbl_id), - ntohl (mp->oui), - ntohl (mp->fib_id), (int) mp->is_add == 0); + + rv = dhcp_proxy_set_vss ((mp->is_ipv6 ? + FIB_PROTOCOL_IP6 : + FIB_PROTOCOL_IP4), + ntohl (mp->tbl_id), + ntohl (mp->oui), + ntohl (mp->fib_id), (int) mp->is_add == 0); REPLY_MACRO (VL_API_DHCP_PROXY_SET_VSS_REPLY); } @@ -105,10 +71,38 @@ vl_api_dhcp_proxy_set_vss_t_handler (vl_api_dhcp_proxy_set_vss_t * mp) static void vl_api_dhcp_proxy_config_t_handler (vl_api_dhcp_proxy_config_t * mp) { - if (mp->is_ipv6 == 0) - dhcpv4_proxy_config (mp); + vl_api_dhcp_proxy_set_vss_reply_t *rmp; + ip46_address_t src, server; + int rv = -1; + + if (mp->is_ipv6) + { + clib_memcpy (&src.ip6, mp->dhcp_src_address, sizeof (src.ip6)); + clib_memcpy (&server.ip6, mp->dhcp_server, sizeof (server.ip6)); + + rv = dhcp6_proxy_set_server (&server, + &src, + (u32) ntohl (mp->rx_vrf_id), + (u32) ntohl (mp->server_vrf_id), + (int) (mp->is_add == 0)); + } else - dhcpv6_proxy_config (mp); + { + ip46_address_reset (&src); + ip46_address_reset (&server); + + clib_memcpy (&src.ip4, mp->dhcp_src_address, sizeof (src.ip4)); + clib_memcpy (&server.ip4, mp->dhcp_server, sizeof (server.ip4)); + + rv = dhcp4_proxy_set_server (&server, + &src, + (u32) ntohl (mp->rx_vrf_id), + (u32) ntohl (mp->server_vrf_id), + (int) (mp->is_add == 0)); + } + + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); } static void @@ -120,14 +114,13 @@ vl_api_dhcp_proxy_dump_t_handler (vl_api_dhcp_proxy_dump_t * mp) if (q == 0) return; - if (mp->is_ip6 == 0) - dhcp_proxy_dump (q, mp->context); - else - dhcpv6_proxy_dump (q, mp->context); + dhcp_proxy_dump ((mp->is_ip6 == 0 ? + FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4), q, mp->context); } void -dhcp_send_details (void *opaque, +dhcp_send_details (fib_protocol_t proto, + void *opaque, u32 context, const ip46_address_t * server, const ip46_address_t * src, @@ -149,7 +142,7 @@ dhcp_send_details (void *opaque, mp->vss_oui = htonl (vss_oui); mp->vss_fib_id = htonl (vss_fib_id); - mp->is_ipv6 = !ip46_address_is_ip4 (server); + mp->is_ipv6 = (proto == FIB_PROTOCOL_IP6); if (mp->is_ipv6) { diff --git a/src/vnet/dhcp/dhcp_proxy.c b/src/vnet/dhcp/dhcp_proxy.c new file mode 100644 index 00000000..da2deea6 --- /dev/null +++ b/src/vnet/dhcp/dhcp_proxy.c @@ -0,0 +1,275 @@ +/* + * proxy_node.c: common dhcp v4 and v6 proxy node processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +/** + * @brief Shard 4/6 instance of DHCP main + */ +dhcp_proxy_main_t dhcp_proxy_main; + +void +dhcp_proxy_walk (fib_protocol_t proto, + dhcp_proxy_walk_fn_t fn, + void *ctx) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + dhcp_server_t * server; + u32 server_index, i; + + vec_foreach_index (i, dpm->dhcp_server_index_by_rx_fib_index[proto]) + { + server_index = dpm->dhcp_server_index_by_rx_fib_index[proto][i]; + if (~0 == server_index) + continue; + + server = pool_elt_at_index (dpm->dhcp_servers[proto], server_index); + + if (!fn(server, ctx)) + break; + } +} + +void +dhcp_vss_walk (fib_protocol_t proto, + dhcp_vss_walk_fn_t fn, + void *ctx) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + dhcp_vss_t * vss; + u32 vss_index, i; + fib_table_t *fib; + + + vec_foreach_index (i, dpm->vss_index_by_rx_fib_index[proto]) + { + vss_index = dpm->vss_index_by_rx_fib_index[proto][i]; + if (~0 == vss_index) + continue; + + vss = pool_elt_at_index (dpm->vss[proto], vss_index); + + fib = fib_table_get(i, proto); + + if (!fn(vss, fib->ft_table_id, ctx)) + break; + } +} + +int +dhcp_proxy_server_del (fib_protocol_t proto, + u32 rx_fib_index) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + dhcp_server_t * server = 0; + int rc = 0; + + server = dhcp_get_server(dpm, rx_fib_index, proto); + + if (NULL == server) + { + rc = VNET_API_ERROR_NO_SUCH_ENTRY; + } + else + { + /* Use the default server again. */ + dpm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] = ~0; + + fib_table_unlock (server->server_fib_index, proto); + + pool_put (dpm->dhcp_servers[proto], server); + } + + return (rc); +} + +int +dhcp_proxy_server_add (fib_protocol_t proto, + ip46_address_t *addr, + ip46_address_t *src_address, + u32 rx_fib_index, + u32 server_table_id) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + dhcp_server_t * server = 0; + int new = 0; + + server = dhcp_get_server(dpm, rx_fib_index, proto); + + if (NULL == server) + { + vec_validate_init_empty(dpm->dhcp_server_index_by_rx_fib_index[proto], + rx_fib_index, + ~0); + + pool_get (dpm->dhcp_servers[proto], server); + memset (server, 0, sizeof (*server)); + new = 1; + + dpm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] = + server - dpm->dhcp_servers[proto]; + + server->rx_fib_index = rx_fib_index; + server->server_fib_index = + fib_table_find_or_create_and_lock(proto, server_table_id); + } + else + { + /* modify, may need to swap server FIBs */ + u32 tmp_index; + + tmp_index = fib_table_find(proto, server_table_id); + + if (tmp_index != server->server_fib_index) + { + tmp_index = server->server_fib_index; + + /* certainly swapping if the fib doesn't exist */ + server->server_fib_index = + fib_table_find_or_create_and_lock(proto, server_table_id); + fib_table_unlock (tmp_index, proto); + } + } + + server->dhcp_server = *addr; + server->dhcp_src_address = *src_address; + + return (new); +} + +typedef struct dhcp4_proxy_dump_walk_ctx_t_ +{ + fib_protocol_t proto; + void *opaque; + u32 context; +} dhcp_proxy_dump_walk_cxt_t; + +static int +dhcp_proxy_dump_walk (dhcp_server_t *server, + void *arg) +{ + dhcp_proxy_dump_walk_cxt_t *ctx = arg; + fib_table_t *s_fib, *r_fib; + dhcp_vss_t *v; + + v = dhcp_get_vss_info(&dhcp_proxy_main, + server->rx_fib_index, + ctx->proto); + + s_fib = fib_table_get(server->server_fib_index, ctx->proto); + r_fib = fib_table_get(server->rx_fib_index, ctx->proto); + + dhcp_send_details(ctx->proto, + ctx->opaque, + ctx->context, + &server->dhcp_server, + &server->dhcp_src_address, + s_fib->ft_table_id, + r_fib->ft_table_id, + (v ? v->fib_id : 0), + (v ? v->oui : 0)); + + return (1); +} + +void +dhcp_proxy_dump (fib_protocol_t proto, + void *opaque, + u32 context) +{ + dhcp_proxy_dump_walk_cxt_t ctx = { + .proto = proto, + .opaque = opaque, + .context = context, + }; + dhcp_proxy_walk(proto, dhcp_proxy_dump_walk, &ctx); +} + +int +dhcp_vss_show_walk (dhcp_vss_t *vss, + u32 rx_table_id, + void *ctx) +{ + vlib_main_t * vm = ctx; + + vlib_cli_output (vm, "%=6d%=6d%=12d", + rx_table_id, + vss->oui, + vss->fib_id); + + return (1); +} + +int dhcp_proxy_set_vss (fib_protocol_t proto, + u32 tbl_id, + u32 oui, + u32 fib_id, + int is_del) +{ + dhcp_proxy_main_t *dm = &dhcp_proxy_main; + dhcp_vss_t *v = NULL; + u32 rx_fib_index; + int rc = 0; + + rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id); + v = dhcp_get_vss_info(dm, rx_fib_index, proto); + + if (NULL != v) + { + if (is_del) + { + /* release the lock held on the table when the VSS + * info was created */ + fib_table_unlock (rx_fib_index, proto); + + pool_put (dm->vss[proto], v); + dm->vss_index_by_rx_fib_index[proto][rx_fib_index] = ~0; + } + else + { + /* this is a modify */ + v->fib_id = fib_id; + v->oui = oui; + } + } + else + { + if (is_del) + rc = VNET_API_ERROR_NO_SUCH_ENTRY; + else + { + /* create a new entry */ + vec_validate_init_empty(dm->vss_index_by_rx_fib_index[proto], + rx_fib_index, ~0); + + /* hold a lock on the table whilst the VSS info exist */ + fib_table_lock (rx_fib_index, proto); + + pool_get (dm->vss[proto], v); + v->fib_id = fib_id; + v->oui = oui; + dm->vss_index_by_rx_fib_index[proto][rx_fib_index] = + v - dm->vss[proto]; + } + } + + /* Release the lock taken during the create_or_lock at the start */ + fib_table_unlock (rx_fib_index, proto); + + return (rc); +} diff --git a/src/vnet/dhcp/dhcp_proxy.h b/src/vnet/dhcp/dhcp_proxy.h new file mode 100644 index 00000000..c0d79c41 --- /dev/null +++ b/src/vnet/dhcp/dhcp_proxy.h @@ -0,0 +1,248 @@ +/* + * dhcp_proxy.h: DHCP v4 & v6 proxy common functions/types + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_dhcp_proxy_h +#define included_dhcp_proxy_h + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef enum { +#define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n, +#include +#undef dhcp_proxy_error + DHCP_PROXY_N_ERROR, +} dhcp_proxy_error_t; + +typedef enum { +#define dhcpv6_proxy_error(n,s) DHCPV6_PROXY_ERROR_##n, +#include +#undef dhcpv6_proxy_error + DHCPV6_PROXY_N_ERROR, +} dhcpv6_proxy_error_t; + + +/** + * @brief The Virtual Sub-net Selection information for a given RX FIB + */ +typedef struct dhcp_vss_t_ { + /** + * @brief ?? RFC doesn't say + */ + u32 oui; + /** + * @brief VPN-ID + */ + u32 fib_id; +} dhcp_vss_t; + +/** + * @brief A DHCP proxy server represenation + */ +typedef struct dhcp_server_t_ { + /** + * @brief The address of the DHCP server to which to relay the client's + * messages + */ + ip46_address_t dhcp_server; + + /** + * @brief The source address to use in relayed messaes + */ + ip46_address_t dhcp_src_address; + + /** + * @brief The FIB index (not the external Table-ID) in which the server + * is reachable. + */ + u32 server_fib_index; + + /** + * @brief The FIB index (not the external Table-ID) in which the client + * is resides. + */ + u32 rx_fib_index; +} dhcp_server_t; + +#define DHCP_N_PROTOS (FIB_PROTOCOL_IP6 + 1) + +/** + * @brief Collection of global DHCP proxy data + */ +typedef struct { + /* Pool of DHCP servers */ + dhcp_server_t *dhcp_servers[DHCP_N_PROTOS]; + + /* Pool of selected DHCP server. Zero is the default server */ + u32 * dhcp_server_index_by_rx_fib_index[DHCP_N_PROTOS]; + + /* to drop pkts in server-to-client direction */ + u32 error_drop_node_index; + + dhcp_vss_t *vss[DHCP_N_PROTOS]; + + /* hash lookup specific vrf_id -> option 82 vss suboption */ + u32 *vss_index_by_rx_fib_index[DHCP_N_PROTOS]; + +} dhcp_proxy_main_t; + +extern dhcp_proxy_main_t dhcp_proxy_main; + +/** + * @brief Send the details of a proxy session to the API client during a dump + */ +void dhcp_send_details (fib_protocol_t proto, + void *opaque, + u32 context, + const ip46_address_t *server, + const ip46_address_t *src, + u32 server_fib_id, + u32 rx_fib_id, + u32 vss_fib_id, + u32 vss_oui); + +/** + * @brief Show (on CLI) a VSS config during a show walk + */ +int dhcp_vss_show_walk (dhcp_vss_t *vss, + u32 rx_table_id, + void *ctx); + +/** + * @brief Configure/set a new VSS info + */ +int dhcp_proxy_set_vss(fib_protocol_t proto, + u32 vrf_id, + u32 oui, + u32 fib_id, + int is_del); + +/** + * @brief Dump the proxy configs to the API + */ +void dhcp_proxy_dump(fib_protocol_t proto, + void *opaque, + u32 context); + +/** + * @brief Add a new DHCP proxy server configuration. + * @return 1 is the config is new, + * 0 otherwise (implying a modify of an existing) + */ +int dhcp_proxy_server_add(fib_protocol_t proto, + ip46_address_t *addr, + ip46_address_t *src_address, + u32 rx_fib_iindex, + u32 server_table_id); + +/** + * @brief Delete a DHCP proxy config + * @return 0 is deleted, otherwise an error code + */ +int dhcp_proxy_server_del(fib_protocol_t proto, + u32 rx_fib_index); + +/** + * @brief Callback function invoked for each DHCP proxy entry + * return 0 to break the walk, non-zero otherwise. + */ +typedef int (*dhcp_proxy_walk_fn_t)(dhcp_server_t *server, + void *ctx); + +/** + * @brief Walk/Visit each DHCP proxy server + */ +void dhcp_proxy_walk(fib_protocol_t proto, + dhcp_proxy_walk_fn_t fn, + void *ctx); + +/** + * @brief Callback function invoked for each DHCP VSS entry + * return 0 to break the walk, non-zero otherwise. + */ +typedef int (*dhcp_vss_walk_fn_t)(dhcp_vss_t *server, + u32 rx_table_id, + void *ctx); + +/** + * @brief Walk/Visit each DHCP proxy VSS + */ +void dhcp_vss_walk(fib_protocol_t proto, + dhcp_vss_walk_fn_t fn, + void *ctx); + +/** + * @brief Get the VSS data for the FIB index + */ +static inline dhcp_vss_t * +dhcp_get_vss_info (dhcp_proxy_main_t *dm, + u32 rx_fib_index, + fib_protocol_t proto) +{ + dhcp_vss_t *v = NULL; + + if (vec_len(dm->vss_index_by_rx_fib_index[proto]) > rx_fib_index && + dm->vss_index_by_rx_fib_index[proto][rx_fib_index] != ~0) + { + v = pool_elt_at_index ( + dm->vss[proto], + dm->vss_index_by_rx_fib_index[proto][rx_fib_index]); + } + + return (v); +} + +/** + * @brief Get the DHCP proxy server data for the FIB index + */ +static inline dhcp_server_t * +dhcp_get_server (dhcp_proxy_main_t *dm, + u32 rx_fib_index, + fib_protocol_t proto) +{ + dhcp_server_t *s = NULL; + + if (vec_len(dm->dhcp_server_index_by_rx_fib_index[proto]) > rx_fib_index && + dm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] != ~0) + { + s = pool_elt_at_index ( + dm->dhcp_servers[proto], + dm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index]); + } + + return (s); +} + +int dhcp6_proxy_set_server (ip46_address_t *addr, + ip46_address_t *src_addr, + u32 rx_table_id, + u32 server_table_id, + int is_del); +int dhcp4_proxy_set_server (ip46_address_t *addr, + ip46_address_t *src_addr, + u32 rx_table_id, + u32 server_table_id, + int is_del); + +#endif /* included_dhcp_proxy_h */ diff --git a/src/vnet/dhcp/packet.h b/src/vnet/dhcp/packet.h deleted file mode 100644 index 267a8eaf..00000000 --- a/src/vnet/dhcp/packet.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef included_vnet_dhcp_packet_h -#define included_vnet_dhcp_packet_h - -/* - * DHCP packet format - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -typedef struct { - u8 opcode; /* 1 = request, 2 = reply */ - u8 hardware_type; /* 1 = ethernet */ - u8 hardware_address_length; - u8 hops; - u32 transaction_identifier; - u16 seconds; - u16 flags; -#define DHCP_FLAG_BROADCAST (1<<15) - ip4_address_t client_ip_address; - ip4_address_t your_ip_address; /* use this one */ - ip4_address_t server_ip_address; - ip4_address_t gateway_ip_address; /* use option 3, not this one */ - u8 client_hardware_address[16]; - u8 server_name[64]; - u8 boot_filename[128]; - ip4_address_t magic_cookie; - u8 options[0]; -} dhcp_header_t; - -typedef struct { - u8 option; - u8 length; - union { - u8 data[0]; - u32 data_as_u32[0]; - }; -} __attribute__((packed)) dhcp_option_t; - -typedef enum { - DHCP_PACKET_DISCOVER=1, - DHCP_PACKET_OFFER, - DHCP_PACKET_REQUEST, - DHCP_PACKET_ACK=5, -} dhcp_packet_type_t; - -/* charming antique: 99.130.83.99 is the dhcp magic cookie */ -#define DHCP_MAGIC (clib_host_to_net_u32(0x63825363)) - -#endif /* included_vnet_dhcp_packet_h */ diff --git a/src/vnet/dhcp/proxy.h b/src/vnet/dhcp/proxy.h deleted file mode 100644 index 4b115c74..00000000 --- a/src/vnet/dhcp/proxy.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * proxy.h: dhcp proxy - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_dhcp_proxy_h -#define included_dhcp_proxy_h - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef enum { -#define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n, -#include -#undef dhcp_proxy_error - DHCP_PROXY_N_ERROR, -} dhcp_proxy_error_t; - -typedef struct { - u32 oui; - u32 fib_id; -} vss_id; - -typedef union { - u8 as_u8[8]; - vss_id vpn_id; -} vss_info; - -typedef struct { - ip4_address_t dhcp_server; - ip4_address_t dhcp_src_address; - u32 server_fib_index; -} dhcp_server_t; - -typedef struct { - /* Pool of DHCP servers */ - dhcp_server_t * dhcp_servers; - - /* Pool of selected DHCP server. Zero is the default server */ - u32 * dhcp_server_index_by_rx_fib_index; - - /* to drop pkts in server-to-client direction */ - u32 error_drop_node_index; - - vss_info *vss; - - /* hash lookup specific vrf_id -> option 82 vss suboption */ - u32 *vss_index_by_rx_fib_index; - - /* convenience */ - vlib_main_t * vlib_main; - vnet_main_t * vnet_main; -} dhcp_proxy_main_t; - -extern dhcp_proxy_main_t dhcp_proxy_main; - -void dhcp_send_details (void *opaque, - u32 context, - const ip46_address_t *server, - const ip46_address_t *src, - u32 server_fib_id, - u32 rx_fib_id, - u32 vss_fib_id, - u32 vss_oui); - -int dhcp_proxy_set_server (ip4_address_t *addr, - ip4_address_t *src_address, - u32 fib_id, - u32 server_fib_id, - int is_del); - -int dhcp_proxy_set_option82_vss(u32 vrf_id, - u32 oui, - u32 fib_id, - int is_del); - -void dhcp_proxy_dump(void *opaque, - u32 context); - -#endif /* included_dhcp_proxy_h */ diff --git a/src/vnet/dhcp/proxy_error.def b/src/vnet/dhcp/proxy_error.def deleted file mode 100644 index 6d790d73..00000000 --- a/src/vnet/dhcp/proxy_error.def +++ /dev/null @@ -1,31 +0,0 @@ -/* - * dhcp_proxy_error.def: dhcp proxy errors - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -dhcp_proxy_error (NONE, "no error") -dhcp_proxy_error (NO_SERVER, "no dhcp server configured") -dhcp_proxy_error (RELAY_TO_SERVER, "DHCP packets relayed to the server") -dhcp_proxy_error (RELAY_TO_CLIENT, "DHCP packets relayed to clients") -dhcp_proxy_error (OPTION_82_ERROR, "DHCP failed to insert option 82") -dhcp_proxy_error (NO_OPTION_82, "DHCP option 82 missing") -dhcp_proxy_error (BAD_OPTION_82_ITF, "Bad DHCP option 82 interface value") -dhcp_proxy_error (BAD_OPTION_82_ADDR, "Bad DHCP option 82 address value") -dhcp_proxy_error (BAD_FIB_ID, "DHCP option 82 fib-id to fib-index map failure") -dhcp_proxy_error (NO_INTERFACE_ADDRESS, "DHCP no interface address") -dhcp_proxy_error (OPTION_82_VSS_NOT_PROCESSED, "DHCP VSS not processed by DHCP server") -dhcp_proxy_error (BAD_YIADDR, "DHCP packets with bad your_ip_address fields") -dhcp_proxy_error (BAD_SVR_FIB_OR_ADDRESS, "DHCP packets not from DHCP server or server FIB.") -dhcp_proxy_error (PKT_TOO_BIG, "DHCP packets which are too big.") diff --git a/src/vnet/dhcp/proxy_node.c b/src/vnet/dhcp/proxy_node.c deleted file mode 100644 index ab6819fe..00000000 --- a/src/vnet/dhcp/proxy_node.c +++ /dev/null @@ -1,1192 +0,0 @@ -/* - * proxy_node.c: dhcp proxy node processing - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -static char * dhcp_proxy_error_strings[] = { -#define dhcp_proxy_error(n,s) s, -#include "proxy_error.def" -#undef dhcp_proxy_error -}; - -#define foreach_dhcp_proxy_to_server_input_next \ - _ (DROP, "error-drop") \ - _ (LOOKUP, "ip4-lookup") \ - _ (SEND_TO_CLIENT, "dhcp-proxy-to-client") - -typedef enum { -#define _(s,n) DHCP_PROXY_TO_SERVER_INPUT_NEXT_##s, - foreach_dhcp_proxy_to_server_input_next -#undef _ - DHCP_PROXY_TO_SERVER_INPUT_N_NEXT, -} dhcp_proxy_to_server_input_next_t; - -typedef struct { - /* 0 => to server, 1 => to client */ - int which; - ip4_address_t trace_ip4_address; - u32 error; - u32 sw_if_index; - u32 original_sw_if_index; -} dhcp_proxy_trace_t; - -#define VPP_DHCP_OPTION82_SUB1_SIZE 6 -#define VPP_DHCP_OPTION82_SUB5_SIZE 6 -#define VPP_DHCP_OPTION82_VSS_SIZE 12 -#define VPP_DHCP_OPTION82_SIZE (VPP_DHCP_OPTION82_SUB1_SIZE + \ - VPP_DHCP_OPTION82_SUB5_SIZE + \ - VPP_DHCP_OPTION82_VSS_SIZE +3) - -vlib_node_registration_t dhcp_proxy_to_server_node; -vlib_node_registration_t dhcp_proxy_to_client_node; - -dhcp_proxy_main_t dhcp_proxy_main; - -u8 * format_dhcp_proxy_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - dhcp_proxy_trace_t * t = va_arg (*args, dhcp_proxy_trace_t *); - - if (t->which == 0) - s = format (s, "DHCP proxy: sent to server %U\n", - format_ip4_address, &t->trace_ip4_address, t->error); - else - s = format (s, "DHCP proxy: broadcast to client from %U\n", - format_ip4_address, &t->trace_ip4_address); - - if (t->error != (u32)~0) - s = format (s, " error: %s\n", dhcp_proxy_error_strings[t->error]); - - s = format (s, " original_sw_if_index: %d, sw_if_index: %d\n", - t->original_sw_if_index, t->sw_if_index); - - return s; -} - -u8 * format_dhcp_proxy_header_with_length (u8 * s, va_list * args) -{ - dhcp_header_t * h = va_arg (*args, dhcp_header_t *); - u32 max_header_bytes = va_arg (*args, u32); - u32 header_bytes; - - header_bytes = sizeof (h[0]); - if (max_header_bytes != 0 && header_bytes > max_header_bytes) - return format (s, "dhcp header truncated"); - - s = format (s, "DHCP Proxy"); - - return s; -} - -static inline vss_info * -dhcp_get_vss_info (dhcp_proxy_main_t *dm, - u32 rx_fib_index) -{ - vss_info *v; - - if (vec_len(dm->vss_index_by_rx_fib_index) <= rx_fib_index || - dm->vss_index_by_rx_fib_index[rx_fib_index] == ~0) - { - v = NULL; - } - else - { - v = pool_elt_at_index (dm->vss, - dm->vss_index_by_rx_fib_index[rx_fib_index]); - } - - return (v); -} - -static inline dhcp_server_t * -dhcp_get_server (dhcp_proxy_main_t *dm, - u32 rx_fib_index) -{ - dhcp_server_t *s = NULL; - - if (vec_len(dm->dhcp_server_index_by_rx_fib_index) > rx_fib_index && - dm->dhcp_server_index_by_rx_fib_index[rx_fib_index] != ~0) - { - s = pool_elt_at_index (dm->dhcp_servers, - dm->dhcp_server_index_by_rx_fib_index[rx_fib_index]); - } - - return (s); -} - -static uword -dhcp_proxy_to_server_input (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, * from, * to_next; - dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - u32 pkts_to_server=0, pkts_to_client=0, pkts_no_server=0; - u32 pkts_no_interface_address=0; - u32 pkts_too_big=0; - ip4_main_t * im = &ip4_main; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t * b0; - udp_header_t * u0; - dhcp_header_t * h0; - ip4_header_t * ip0; - u32 next0; - u32 old0, new0; - ip_csum_t sum0; - u32 error0 = (u32) ~0; - u32 sw_if_index = 0; - u32 original_sw_if_index = 0; - u8 *end = NULL; - u32 fib_index; - dhcp_server_t * server; - u32 rx_sw_if_index; - dhcp_option_t *o; - u32 len = 0; - vlib_buffer_free_list_t *fl; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - h0 = vlib_buffer_get_current (b0); - - /* - * udp_local hands us the DHCP header, need udp hdr, - * ip hdr to relay to server - */ - vlib_buffer_advance (b0, -(sizeof(*u0))); - u0 = vlib_buffer_get_current (b0); - - /* This blows. Return traffic has src_port = 67, dst_port = 67 */ - if (u0->src_port == clib_net_to_host_u16(UDP_DST_PORT_dhcp_to_server)) - { - vlib_buffer_advance (b0, sizeof(*u0)); - next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_SEND_TO_CLIENT; - error0 = 0; - pkts_to_client++; - goto do_enqueue; - } - - rx_sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - - fib_index = im->fib_index_by_sw_if_index [rx_sw_if_index]; - server = dhcp_get_server(dpm, fib_index); - - if (PREDICT_FALSE (NULL == server)) - { - error0 = DHCP_PROXY_ERROR_NO_SERVER; - next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_server++; - goto do_trace; - } - - vlib_buffer_advance (b0, -(sizeof(*ip0))); - ip0 = vlib_buffer_get_current (b0); - - /* disable UDP checksum */ - u0->checksum = 0; - sum0 = ip0->checksum; - old0 = ip0->dst_address.as_u32; - new0 = server->dhcp_server.as_u32; - ip0->dst_address.as_u32 = server->dhcp_server.as_u32; - sum0 = ip_csum_update (sum0, old0, new0, - ip4_header_t /* structure */, - dst_address /* changed member */); - ip0->checksum = ip_csum_fold (sum0); - - sum0 = ip0->checksum; - old0 = ip0->src_address.as_u32; - new0 = server->dhcp_src_address.as_u32; - ip0->src_address.as_u32 = new0; - sum0 = ip_csum_update (sum0, old0, new0, - ip4_header_t /* structure */, - src_address /* changed member */); - ip0->checksum = ip_csum_fold (sum0); - - /* Send to DHCP server via the configured FIB */ - vnet_buffer(b0)->sw_if_index[VLIB_TX] = - server->server_fib_index; - - h0->gateway_ip_address.as_u32 = server->dhcp_src_address.as_u32; - pkts_to_server++; - - o = (dhcp_option_t *) h0->options; - - fib_index = im->fib_index_by_sw_if_index - [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; - - end = b0->data + b0->current_data + b0->current_length; - /* TLVs are not performance-friendly... */ - while (o->option != 0xFF /* end of options */ && (u8 *)o < end) - o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); - - fl = vlib_buffer_get_free_list (vm, b0->free_list_index); - // start write at (option*)o, some packets have padding - if (((u8 *)o - (u8 *)b0->data + VPP_DHCP_OPTION82_SIZE) > fl->n_data_bytes) - { - next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_too_big++; - goto do_trace; - } - - if ((o->option == 0xFF) && ((u8 *)o <= end)) - { - vnet_main_t *vnm = vnet_get_main(); - u16 old_l0, new_l0; - ip4_address_t _ia0, * ia0 = &_ia0; - vss_info *vss; - vnet_sw_interface_t *swif; - sw_if_index = 0; - original_sw_if_index = 0; - - original_sw_if_index = sw_if_index = - vnet_buffer(b0)->sw_if_index[VLIB_RX]; - swif = vnet_get_sw_interface (vnm, sw_if_index); - if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) - sw_if_index = swif->unnumbered_sw_if_index; - - /* - * Get the first ip4 address on the [client-side] - * RX interface, if not unnumbered. otherwise use - * the loopback interface's ip address. - */ - ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0); - - if (ia0 == 0) - { - error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS; - next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_interface_address++; - goto do_trace; - } - - /* Add option 82 */ - o->option = 82; /* option 82 */ - o->length = 12; /* 12 octets to follow */ - o->data[0] = 1; /* suboption 1, circuit ID (=FIB id) */ - o->data[1] = 4; /* length of suboption */ - o->data[2] = (original_sw_if_index >> 24) & 0xFF; - o->data[3] = (original_sw_if_index >> 16) & 0xFF; - o->data[4] = (original_sw_if_index >> 8) & 0xFF; - o->data[5] = (original_sw_if_index >> 0) & 0xFF; - o->data[6] = 5; /* suboption 5 (client RX intfc address) */ - o->data[7] = 4; /* length 4 */ - o->data[8] = ia0->as_u8[0]; - o->data[9] = ia0->as_u8[1]; - o->data[10] = ia0->as_u8[2]; - o->data[11] = ia0->as_u8[3]; - o->data[12] = 0xFF; - - vss = dhcp_get_vss_info (dpm, fib_index); - if (NULL != vss) - { - u32 opt82_fib_id=0, opt82_oui=0; - - opt82_oui = vss->vpn_id.oui; - opt82_fib_id = vss->vpn_id.fib_id; - - o->data[12] = 151; /* vss suboption */ - if (255 == opt82_fib_id) { - o->data[13] = 1; /* length */ - o->data[14] = 255; /* vss option type */ - o->data[15] = 152; /* vss control suboption */ - o->data[16] = 0; /* length */ - /* and a new "end-of-options" option (0xff) */ - o->data[17] = 0xFF; - o->length += 5; - } else { - o->data[13] = 8; /* length */ - o->data[14] = 1; /* vss option type */ - o->data[15] = (opt82_oui >> 16) & 0xff; - o->data[16] = (opt82_oui >> 8) & 0xff; - o->data[17] = (opt82_oui ) & 0xff; - o->data[18] = (opt82_fib_id >> 24) & 0xff; - o->data[19] = (opt82_fib_id >> 16) & 0xff; - o->data[20] = (opt82_fib_id >> 8) & 0xff; - o->data[21] = (opt82_fib_id) & 0xff; - o->data[22] = 152; /* vss control suboption */ - o->data[23] = 0; /* length */ - - /* and a new "end-of-options" option (0xff) */ - o->data[24] = 0xFF; - o->length += 12; - } - } - - len = o->length + 3; - b0->current_length += len; - /* Fix IP header length and checksum */ - old_l0 = ip0->length; - new_l0 = clib_net_to_host_u16 (old_l0); - new_l0 += len; - new_l0 = clib_host_to_net_u16 (new_l0); - ip0->length = new_l0; - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */); - ip0->checksum = ip_csum_fold (sum0); - - /* Fix UDP length */ - new_l0 = clib_net_to_host_u16 (u0->length); - new_l0 += len; - u0->length = clib_host_to_net_u16 (new_l0); - } else { - vlib_node_increment_counter - (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_OPTION_82_ERROR, 1); - } - - next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; - - do_trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - dhcp_proxy_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->which = 0; /* to server */ - tr->error = error0; - tr->original_sw_if_index = original_sw_if_index; - tr->sw_if_index = sw_if_index; - if (next0 == DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP) - tr->trace_ip4_address.as_u32 = server->dhcp_server.as_u32; - } - - do_enqueue: - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_RELAY_TO_CLIENT, - pkts_to_client); - vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_RELAY_TO_SERVER, - pkts_to_server); - vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_NO_SERVER, - pkts_no_server); - vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS, - pkts_no_interface_address); - vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_PKT_TOO_BIG, - pkts_too_big); - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dhcp_proxy_to_server_node) = { - .function = dhcp_proxy_to_server_input, - .name = "dhcp-proxy-to-server", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .n_errors = DHCP_PROXY_N_ERROR, - .error_strings = dhcp_proxy_error_strings, - - .n_next_nodes = DHCP_PROXY_TO_SERVER_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [DHCP_PROXY_TO_SERVER_INPUT_NEXT_##s] = n, - foreach_dhcp_proxy_to_server_input_next -#undef _ - }, - - .format_buffer = format_dhcp_proxy_header_with_length, - .format_trace = format_dhcp_proxy_trace, -#if 0 - .unformat_buffer = unformat_dhcp_proxy_header, -#endif -}; - -static uword -dhcp_proxy_to_client_input (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, * from; - ethernet_main_t *em = ethernet_get_main (vm); - dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - vnet_main_t * vnm = vnet_get_main(); - ip4_main_t * im = &ip4_main; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - while (n_left_from > 0) - { - u32 bi0; - vlib_buffer_t * b0; - udp_header_t * u0; - dhcp_header_t * h0; - ip4_header_t * ip0 = 0; - ip4_address_t * ia0 = 0; - u32 old0, new0; - ip_csum_t sum0; - ethernet_interface_t *ei0; - ethernet_header_t *mac0; - vnet_hw_interface_t *hi0; - vlib_frame_t *f0; - u32 * to_next0; - u32 sw_if_index = ~0; - vnet_sw_interface_t *si0; - u32 error0 = (u32)~0; - vnet_sw_interface_t *swif; - u32 fib_index; - dhcp_server_t * server; - u32 original_sw_if_index = (u32) ~0; - ip4_address_t relay_addr = { - .as_u32 = 0, - }; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - - b0 = vlib_get_buffer (vm, bi0); - h0 = vlib_buffer_get_current (b0); - - /* - * udp_local hands us the DHCP header, need udp hdr, - * ip hdr to relay to client - */ - vlib_buffer_advance (b0, -(sizeof(*u0))); - u0 = vlib_buffer_get_current (b0); - - vlib_buffer_advance (b0, -(sizeof(*ip0))); - ip0 = vlib_buffer_get_current (b0); - - /* Consumed by dhcp client code? */ - if (dhcp_client_for_us (bi0, b0, ip0, u0, h0)) - continue; - - if (1 /* dpm->insert_option_82 */) - { - dhcp_option_t *o = (dhcp_option_t *) h0->options; - dhcp_option_t *sub; - - /* Parse through TLVs looking for option 82. - The circuit-ID is the FIB number we need - to track down the client-facing interface */ - - while (o->option != 0xFF /* end of options */ && - (u8 *) o < (b0->data + b0->current_data + b0->current_length)) - { - if (o->option == 82) - { - u32 vss_exist = 0; - u32 vss_ctrl = 0; - sub = (dhcp_option_t *) &o->data[0]; - while (sub->option != 0xFF /* end of options */ && - (u8 *) sub < (u8 *)(o + o->length)) { - /* If this is one of ours, it will have - total length 12, circuit-id suboption type, - and the sw_if_index */ - if (sub->option == 1 && sub->length == 4) - { - sw_if_index = ((sub->data[0] << 24) | - (sub->data[1] << 16) | - (sub->data[2] << 8) | - (sub->data[3])); - } - else if (sub->option == 5 && sub->length == 4) - { - relay_addr.as_u8[0] = sub->data[0]; - relay_addr.as_u8[1] = sub->data[1]; - relay_addr.as_u8[2] = sub->data[2]; - relay_addr.as_u8[3] = sub->data[3]; - } - else if (sub->option == 151 && - sub->length == 7 && - sub->data[0] == 1) - vss_exist = 1; - else if (sub->option == 152 && sub->length == 0) - vss_ctrl = 1; - sub = (dhcp_option_t *) - (((uword) sub) + (sub->length + 2)); - } - if (vss_ctrl && vss_exist) - vlib_node_increment_counter - (vm, dhcp_proxy_to_client_node.index, - DHCP_PROXY_ERROR_OPTION_82_VSS_NOT_PROCESSED, 1); - - } - o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); - } - } - - if (sw_if_index == (u32)~0) - { - error0 = DHCP_PROXY_ERROR_NO_OPTION_82; - - drop_packet: - vlib_node_increment_counter (vm, dhcp_proxy_to_client_node.index, - error0, 1); - f0 = vlib_get_frame_to_node (vm, dpm->error_drop_node_index); - to_next0 = vlib_frame_vector_args (f0); - to_next0[0] = bi0; - f0->n_vectors = 1; - vlib_put_frame_to_node (vm, dpm->error_drop_node_index, f0); - goto do_trace; - } - - if (relay_addr.as_u32 == 0) - { - error0 = DHCP_PROXY_ERROR_BAD_OPTION_82_ADDR; - goto drop_packet; - } - - if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index)) - { - error0 = DHCP_PROXY_ERROR_BAD_OPTION_82_ITF; - goto drop_packet; - } - - fib_index = im->fib_index_by_sw_if_index [sw_if_index]; - server = dhcp_get_server(dpm, fib_index); - - if (PREDICT_FALSE (NULL == server)) - { - error0 = DHCP_PROXY_ERROR_NO_SERVER; - goto drop_packet; - } - - if (ip0->src_address.as_u32 != server->dhcp_server.as_u32) - { - error0 = DHCP_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; - goto drop_packet; - } - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index; - - swif = vnet_get_sw_interface (vnm, sw_if_index); - original_sw_if_index = sw_if_index; - if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) - sw_if_index = swif->unnumbered_sw_if_index; - - ia0 = ip4_interface_first_address (&ip4_main, sw_if_index, 0); - if (ia0 == 0) - { - error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS; - goto drop_packet; - } - - if (relay_addr.as_u32 != ia0->as_u32) - { - error0 = DHCP_PROXY_ERROR_BAD_YIADDR; - goto drop_packet; - } - - u0->checksum = 0; - u0->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcp_to_client); - sum0 = ip0->checksum; - old0 = ip0->dst_address.as_u32; - new0 = 0xFFFFFFFF; - ip0->dst_address.as_u32 = new0; - sum0 = ip_csum_update (sum0, old0, new0, - ip4_header_t /* structure */, - dst_address /* offset of changed member */); - ip0->checksum = ip_csum_fold (sum0); - - sum0 = ip0->checksum; - old0 = ip0->src_address.as_u32; - new0 = ia0->as_u32; - ip0->src_address.as_u32 = new0; - sum0 = ip_csum_update (sum0, old0, new0, - ip4_header_t /* structure */, - src_address /* offset of changed member */); - ip0->checksum = ip_csum_fold (sum0); - - vlib_buffer_advance (b0, -(sizeof(ethernet_header_t))); - si0 = vnet_get_sw_interface (vnm, original_sw_if_index); - if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) - vlib_buffer_advance (b0, -4 /* space for VLAN tag */); - - mac0 = vlib_buffer_get_current (b0); - - hi0 = vnet_get_sup_hw_interface (vnm, original_sw_if_index); - ei0 = pool_elt_at_index (em->interfaces, hi0->hw_instance); - clib_memcpy (mac0->src_address, ei0->address, sizeof (ei0->address)); - memset (mac0->dst_address, 0xff, sizeof (mac0->dst_address)); - mac0->type = (si0->type == VNET_SW_INTERFACE_TYPE_SUB) ? - clib_net_to_host_u16(0x8100) : clib_net_to_host_u16 (0x0800); - - if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) - { - u32 * vlan_tag = (u32 *)(mac0+1); - u32 tmp; - tmp = (si0->sub.id << 16) | 0x0800; - *vlan_tag = clib_host_to_net_u32 (tmp); - } - - /* $$$ This needs to be rewritten, for sure */ - f0 = vlib_get_frame_to_node (vm, hi0->output_node_index); - to_next0 = vlib_frame_vector_args (f0); - to_next0[0] = bi0; - f0->n_vectors = 1; - vlib_put_frame_to_node (vm, hi0->output_node_index, f0); - - do_trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - dhcp_proxy_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->which = 1; /* to client */ - tr->trace_ip4_address.as_u32 = ia0 ? ia0->as_u32 : 0; - tr->error = error0; - tr->original_sw_if_index = original_sw_if_index; - tr->sw_if_index = sw_if_index; - } - } - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dhcp_proxy_to_client_node) = { - .function = dhcp_proxy_to_client_input, - .name = "dhcp-proxy-to-client", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .n_errors = DHCP_PROXY_N_ERROR, - .error_strings = dhcp_proxy_error_strings, - .format_buffer = format_dhcp_proxy_header_with_length, - .format_trace = format_dhcp_proxy_trace, -#if 0 - .unformat_buffer = unformat_dhcp_proxy_header, -#endif -}; - -clib_error_t * dhcp_proxy_init (vlib_main_t * vm) -{ - dhcp_proxy_main_t * dm = &dhcp_proxy_main; - vlib_node_t * error_drop_node; - dhcp_server_t * server; - - dm->vlib_main = vm; - dm->vnet_main = vnet_get_main(); - error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); - dm->error_drop_node_index = error_drop_node->index; - - dm->vss_index_by_rx_fib_index = NULL; - - udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_client, - dhcp_proxy_to_client_node.index, 1 /* is_ip4 */); - - udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_server, - dhcp_proxy_to_server_node.index, 1 /* is_ip4 */); - - /* Create the default server, don't mark it valid */ - pool_get (dm->dhcp_servers, server); - memset (server, 0, sizeof (*server)); - - return 0; -} - -VLIB_INIT_FUNCTION (dhcp_proxy_init); - -int dhcp_proxy_set_server (ip4_address_t *addr, - ip4_address_t *src_address, - u32 rx_fib_id, - u32 server_fib_id, - int is_del) -{ - dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - dhcp_server_t * server = 0; - u32 server_index = 0; - u32 rx_fib_index = 0; - - const fib_prefix_t all_1s = - { - .fp_len = 32, - .fp_addr.ip4.as_u32 = 0xffffffff, - .fp_proto = FIB_PROTOCOL_IP4, - }; - - if (addr->as_u32 == 0) - return VNET_API_ERROR_INVALID_DST_ADDRESS; - - if (src_address->as_u32 == 0) - return VNET_API_ERROR_INVALID_SRC_ADDRESS; - - rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, - rx_fib_id); - - if (is_del) - { - if (rx_fib_index >= vec_len(dpm->dhcp_server_index_by_rx_fib_index)) - return VNET_API_ERROR_NO_SUCH_ENTRY; - - server_index = dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index]; - - if (server_index == ~0) - return VNET_API_ERROR_NO_SUCH_ENTRY; - - /* Use the default server again. */ - dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = ~0; - server = pool_elt_at_index (dpm->dhcp_servers, server_index); - - fib_table_entry_special_remove(rx_fib_index, - &all_1s, - FIB_SOURCE_DHCP); - fib_table_unlock (rx_fib_index, - FIB_PROTOCOL_IP4); - fib_table_unlock (server->server_fib_index, - FIB_PROTOCOL_IP4); - - memset (server, 0, sizeof (*server)); - pool_put (dpm->dhcp_servers, server); - return 0; - } - else - { - vec_validate_init_empty(dpm->dhcp_server_index_by_rx_fib_index, - rx_fib_index, - ~0); - - pool_get (dpm->dhcp_servers, server); - - server->dhcp_server.as_u32 = addr->as_u32; - server->dhcp_src_address.as_u32 = src_address->as_u32; - - fib_table_entry_special_add(rx_fib_index, - &all_1s, - FIB_SOURCE_DHCP, - FIB_ENTRY_FLAG_LOCAL, - ADJ_INDEX_INVALID); - fib_table_lock (rx_fib_index, - FIB_PROTOCOL_IP4); - - server->server_fib_index = - fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, - server_fib_id); - - vec_validate_init_empty (dpm->dhcp_server_index_by_rx_fib_index, - rx_fib_index, - ~0); - dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = - server - dpm->dhcp_servers; - } - - fib_table_unlock (rx_fib_index, - FIB_PROTOCOL_IP4); - - return 0; -} - -static clib_error_t * -dhcp_proxy_set_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - ip4_address_t server_addr, src_addr; - u32 server_fib_id = 0, rx_fib_id = 0; - int is_del = 0; - int set_src = 0, set_server = 0; - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "server %U", - unformat_ip4_address, &server_addr)) - set_server = 1; - else if (unformat (input, "server-fib-id %d", &server_fib_id)) - ; - else if (unformat (input, "rx-fib-id %d", &rx_fib_id)) - ; - else if (unformat(input, "src-address %U", - unformat_ip4_address, &src_addr)) - set_src = 1; - else if (unformat (input, "delete") || - unformat (input, "del")) - is_del = 1; - else - break; - } - - if (is_del || (set_server && set_src)) - { - int rv; - - rv = dhcp_proxy_set_server (&server_addr, &src_addr, rx_fib_id, - server_fib_id, is_del); - switch (rv) - { - case 0: - return 0; - - case VNET_API_ERROR_INVALID_DST_ADDRESS: - return clib_error_return (0, "Invalid server address"); - - case VNET_API_ERROR_INVALID_SRC_ADDRESS: - return clib_error_return (0, "Invalid src address"); - - case VNET_API_ERROR_NO_SUCH_INNER_FIB: - return clib_error_return (0, "No such rx fib id %d", rx_fib_id); - - case VNET_API_ERROR_NO_SUCH_FIB: - return clib_error_return (0, "No such server fib id %d", - server_fib_id); - - case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return - (0, "Fib id %d: no per-fib DHCP server configured", rx_fib_id); - - default: - return clib_error_return (0, "BUG: rv %d", rv); - } - } - else - return clib_error_return (0, "parse error`%U'", - format_unformat_error, input); -} - -VLIB_CLI_COMMAND (dhcp_proxy_set_command, static) = { - .path = "set dhcp proxy", - .short_help = "set dhcp proxy [del] server src-address [server-fib-id ] [rx-fib-id ]", - .function = dhcp_proxy_set_command_fn, -}; - -u8 * format_dhcp_proxy_server (u8 * s, va_list * args) -{ - dhcp_proxy_main_t * dm = va_arg (*args, dhcp_proxy_main_t *); - dhcp_server_t * server = va_arg (*args, dhcp_server_t *); - u32 rx_fib_index = va_arg (*args, u32); - ip4_fib_t * rx_fib, * server_fib; - u32 server_fib_id = ~0, rx_fib_id = ~0; - - if (dm == 0) - { - s = format (s, "%=16s%=16s%=14s%=14s", "Server", "Src Address", - "Server FIB", "RX FIB"); - return s; - } - - server_fib = ip4_fib_get(server->server_fib_index); - - if (server_fib) - server_fib_id = server_fib->table_id; - - rx_fib = ip4_fib_get(rx_fib_index); - - if (rx_fib) - rx_fib_id = rx_fib->table_id; - - s = format (s, "%=16U%=16U%=14u%=14u", - format_ip4_address, &server->dhcp_server, - format_ip4_address, &server->dhcp_src_address, - server_fib_id, rx_fib_id); - return s; -} - -static clib_error_t * -dhcp_proxy_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - dhcp_server_t * server; - u32 server_index, i; - - vlib_cli_output (vm, "%U", format_dhcp_proxy_server, 0 /* header line */, - 0, 0); - - vec_foreach_index (i, dpm->dhcp_server_index_by_rx_fib_index) - { - server_index = dpm->dhcp_server_index_by_rx_fib_index[i]; - if (~0 == server_index) - continue; - - server = pool_elt_at_index (dpm->dhcp_servers, server_index); - - vlib_cli_output (vm, "%U", format_dhcp_proxy_server, dpm, - server, i); - } - - return 0; -} - -VLIB_CLI_COMMAND (dhcp_proxy_show_command, static) = { - .path = "show dhcp proxy", - .short_help = "Display dhcp proxy server info", - .function = dhcp_proxy_show_command_fn, -}; - -void -dhcp_proxy_dump (void *opaque, - u32 context) -{ - dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - ip4_fib_t *s_fib, *r_fib; - dhcp_server_t * server; - u32 server_index, i; - vss_info *v; - - vec_foreach_index (i, dpm->dhcp_server_index_by_rx_fib_index) - { - server_index = dpm->dhcp_server_index_by_rx_fib_index[i]; - if (~0 == server_index) - continue; - - server = pool_elt_at_index (dpm->dhcp_servers, server_index); - v = dhcp_get_vss_info(dpm, i); - - ip46_address_t src_addr = { - .ip4 = server->dhcp_src_address, - }; - ip46_address_t server_addr = { - .ip4 = server->dhcp_server, - }; - - s_fib = ip4_fib_get(server->server_fib_index); - r_fib = ip4_fib_get(i); - - dhcp_send_details(opaque, - context, - &server_addr, - &src_addr, - s_fib->table_id, - r_fib->table_id, - (v ? v->vpn_id.fib_id : 0), - (v ? v->vpn_id.oui : 0)); - } -} - -int dhcp_proxy_set_option82_vss(u32 tbl_id, - u32 oui, - u32 fib_id, - int is_del) -{ - dhcp_proxy_main_t *dm = &dhcp_proxy_main; - vss_info *v = NULL; - u32 rx_fib_index; - int rc = 0; - - rx_fib_index = ip4_fib_table_find_or_create_and_lock(tbl_id); - v = dhcp_get_vss_info(dm, rx_fib_index); - - if (NULL != v) - { - if (is_del) - { - /* release the lock held on the table when the VSS - * info was created */ - fib_table_unlock (rx_fib_index, - FIB_PROTOCOL_IP4); - - pool_put (dm->vss, v); - dm->vss_index_by_rx_fib_index[rx_fib_index] = ~0; - } - else - { - /* this is a modify */ - v->vpn_id.fib_id = fib_id; - v->vpn_id.oui = oui; - } - } - else - { - if (is_del) - rc = VNET_API_ERROR_NO_SUCH_ENTRY; - else - { - /* create a new entry */ - vec_validate_init_empty(dm->vss_index_by_rx_fib_index, - rx_fib_index, ~0); - - /* hold a lock on the table whilst the VSS info exist */ - fib_table_lock (rx_fib_index, - FIB_PROTOCOL_IP4); - - pool_get (dm->vss, v); - v->vpn_id.fib_id = fib_id; - v->vpn_id.oui = oui; - dm->vss_index_by_rx_fib_index[rx_fib_index] = v - dm->vss; - } - } - - /* Release the lock taken during the create_or_lock at the start */ - fib_table_unlock (rx_fib_index, - FIB_PROTOCOL_IP4); - - return (rc); -} - -static clib_error_t * -dhcp_option_82_vss_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - int is_del = 0, got_new_vpn_id=0; - u32 oui=0, fib_id=0, tbl_id=~0; - - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) - { - - if (unformat(input, "delete") || unformat(input, "del")) - is_del = 1; - else if (unformat (input, "oui %d", &oui)) - got_new_vpn_id = 1; - else if (unformat (input, "vpn-id %d", &fib_id)) - got_new_vpn_id = 1; - else if (unformat (input, "table %d", &tbl_id)) - got_new_vpn_id = 1; - else - break; - } - if (tbl_id == ~0) - return clib_error_return (0, "no table ID specified."); - - if (is_del || got_new_vpn_id) - { - int rv; - rv = dhcp_proxy_set_option82_vss(tbl_id, oui, fib_id, is_del); - switch (rv) - { - case 0: - return 0; - - case VNET_API_ERROR_NO_SUCH_FIB: - return clib_error_return (0, "option 82 vss(oui:%d, vpn-id:%d) not found in table %d", - oui, fib_id, tbl_id); - - case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "option 82 vss for table %d not found in in pool.", - tbl_id); - default: - return clib_error_return (0, "BUG: rv %d", rv); - } - } - else - return clib_error_return (0, "parse error`%U'", - format_unformat_error, input); -} - -VLIB_CLI_COMMAND (dhcp_proxy_vss_command,static) = { - .path = "set dhcp option-82 vss", - .short_help = "set dhcp option-82 vss [del] table
oui vpn-id ", - .function = dhcp_option_82_vss_fn, -}; - - -static clib_error_t * -dhcp_vss_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) - -{ - dhcp_proxy_main_t * dm = &dhcp_proxy_main; - ip4_fib_t *fib; - u32 *fib_index; - vss_info *v; - - vlib_cli_output (vm, "%=9s%=11s%=12s","Table", "OUI", "VPN-ID"); - pool_foreach (fib_index, dm->vss_index_by_rx_fib_index, - ({ - fib = ip4_fib_get (*fib_index); - v = pool_elt_at_index (dm->vss, *fib_index); - - vlib_cli_output (vm, "%=6d%=6d%=12d", - fib->table_id, - v->vpn_id.oui, - v->vpn_id.fib_id); - })); - - return 0; -} - -VLIB_CLI_COMMAND (dhcp_proxy_vss_show_command, static) = { - .path = "show dhcp vss", - .short_help = "show dhcp VSS", - .function = dhcp_vss_show_command_fn, -}; - -static clib_error_t * -dhcp_option_82_address_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) - -{ - dhcp_proxy_main_t *dm = &dhcp_proxy_main; - vnet_main_t *vnm = vnet_get_main(); - u32 sw_if_index0=0, sw_if_index; - ip4_address_t *ia0; - vnet_sw_interface_t *swif; - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) - { - - if (unformat(input, "%U", - unformat_vnet_sw_interface, dm->vnet_main, &sw_if_index0)) - { - swif = vnet_get_sw_interface (vnm, sw_if_index0); - sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ? - swif->unnumbered_sw_if_index : sw_if_index0; - ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0); - if (ia0) - { - vlib_cli_output (vm, "%=20s%=20s", "interface", - "source IP address"); - - vlib_cli_output (vm, "%=20U%=20U", - format_vnet_sw_if_index_name, - dm->vnet_main, sw_if_index0, - format_ip4_address, ia0); - } - else - vlib_cli_output (vm, "%=34s %=20U", - "No IPv4 address configured on", - format_vnet_sw_if_index_name, - dm->vnet_main, sw_if_index); - } - else - break; - } - - return 0; -} - -VLIB_CLI_COMMAND (dhcp_proxy_address_show_command,static) = { - .path = "show dhcp option-82-address interface", - .short_help = "show dhcp option-82-address interface ", - .function = dhcp_option_82_address_show_command_fn, -}; diff --git a/src/vnet/dhcpv6/packet.h b/src/vnet/dhcpv6/packet.h deleted file mode 100644 index 8634b5d8..00000000 --- a/src/vnet/dhcpv6/packet.h +++ /dev/null @@ -1,183 +0,0 @@ -#ifndef included_vnet_dhcp_packet_h -#define included_vnet_dhcp_packet_h - -/* - * DHCP packet format - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -// #define DHCP_VRF_NAME_MAX_LEN L3VM_MAX_NAME_STR_LEN -// #define DHCPV6_MAX_VRF_NAME_LEN L3VM_MAX_NAME_STR_LEN -#define DHCP_MAX_RELAY_ADDR 16 -#define PROTO_UDP 17 -#define DHCPV6_CLIENT_PORT 546 -#define DHCPV6_SERVER_PORT 547 -#define HOP_COUNT_LIMIT 32 -#define DHCPV6_CISCO_ENT_NUM 9 - -/* - * DHCPv6 message types - */ -typedef enum dhcpv6_msg_type_{ - DHCPV6_MSG_SOLICIT = 1, - DHCPV6_MSG_ADVERTISE = 2, - DHCPV6_MSG_REQUEST = 3, - DHCPV6_MSG_CONFIRM = 4, - DHCPV6_MSG_RENEW = 5, - DHCPV6_MSG_REBIND = 6, - DHCPV6_MSG_REPLY = 7, - DHCPV6_MSG_RELEASE = 8, - DHCPV6_MSG_DECLINE = 9, - DHCPV6_MSG_RECONFIGURE = 10, - DHCPV6_MSG_INFORMATION_REQUEST = 11, - DHCPV6_MSG_RELAY_FORW = 12, - DHCPV6_MSG_RELAY_REPL = 13, -} dhcpv6_msg_type_t; - -/* - * DHCPv6 options types - */ -enum { - DHCPV6_OPTION_CLIENTID = 1, - DHCPV6_OPTION_SERVERID = 2, - DHCPV6_OPTION_IA_NA = 3, - DHCPV6_OPTION_IA_TA = 4, - DHCPV6_OPTION_IAADDR = 5, - DHCPV6_OPTION_ORO = 6, - DHCPV6_OPTION_PREFERENCE = 7, - DHCPV6_OPTION_ELAPSED_TIME = 8, - DHCPV6_OPTION_RELAY_MSG = 9, - DHCPV6_OPTION_AUTH = 11, - DHCPV6_OPTION_UNICAST = 12, - DHCPV6_OPTION_STATUS_CODE = 13, - DHCPV6_OPTION_RAPID_COMMIT = 14, - DHCPV6_OPTION_USER_CLASS = 15, - DHCPV6_OPTION_VENDOR_CLASS = 16, - DHCPV6_OPTION_VENDOR_OPTS = 17, - DHCPV6_OPTION_INTERFACE_ID = 18, // relay agent fills this - DHCPV6_OPTION_RECONF_MSG = 19, - DHCPV6_OPTION_RECONF_ACCEPT = 20, - DHCPV6_OPTION_REMOTEID = 37, // relay agent fills this - DHCPV6_OPTION_VSS = 68, // relay agent fills this - DHCPV6_OPTION_CLIENT_LINK_LAYER_ADDRESS = 79, - DHCPV6_OPTION_MAX -}; - -/* -* DHCPv6 status codes - */ -enum { - DHCPV6_STATUS_SUCCESS = 0, - DHCPV6_STATUS_UNSPEC_FAIL = 1, - DHCPV6_STATUS_NOADDRS_AVAIL = 2, - DHCPV6_STATUS_NO_BINDING = 3, - DHCPV6_STATUS_NOT_ONLINK = 4, - DHCPV6_STATUS_USE_MULTICAST = 5, -}; - -/* - * DHCPv6 DUID types - */ -enum { - DHCPV6_DUID_LLT = 1, /* DUID Based on Link-layer Address Plus Time */ - DHCPV6_DUID_EN = 2, /* DUID Based on Enterprise Number */ - DHCPV6_DUID_LL = 3, /* DUID Based on Link-layer Address */ -}; - -//Structure for DHCPv6 payload from client -typedef struct dhcpv6_hdr_ { - union { - u8 msg_type; //DHCP msg type - u32 xid; // transaction id - }u; - u8 data[0]; -} dhcpv6_header_t; - - - -typedef CLIB_PACKED (struct dhcpv6_relay_ctx_ { - dhcpv6_header_t *pkt; - u32 pkt_len; - u32 dhcpv6_len; //DHCPv6 payload load -// if_ordinal iod; - u32 if_index; - u32 ctx_id; - char ctx_name[32+1]; - u8 dhcp_msg_type; -}) dhcpv6_relay_ctx_t; - -//Structure for DHCPv6 RELAY-FORWARD and DHCPv6 RELAY-REPLY pkts -typedef CLIB_PACKED (struct dhcpv6_relay_hdr_ { - u8 msg_type; - u8 hop_count; - ip6_address_t link_addr; - ip6_address_t peer_addr; - u8 data[0]; -}) dhcpv6_relay_hdr_t; - -typedef enum dhcp_stats_action_type_ { - DHCP_STATS_ACTION_FORWARDED=1, - DHCP_STATS_ACTION_RECEIVED, - DHCP_STATS_ACTION_DROPPED -} dhcp_stats_action_type_t; -//Generic counters for a packet -typedef struct dhcp_stats_counters_ { - u64 rx_pkts; //counter for received pkts - u64 tx_pkts; //counter for forwarded pkts - u64 drops; //counter for dropped pkts -} dhcp_stats_counters_t; - - -typedef enum dhcpv6_stats_drop_reason_ { - DHCPV6_RELAY_PKT_DROP_RELAYDISABLE = 1, - DHCPV6_RELAY_PKT_DROP_MAX_HOPS, - DHCPV6_RELAY_PKT_DROP_VALIDATION_FAIL, - DHCPV6_RELAY_PKT_DROP_UNKNOWN_OP_INTF, - DHCPV6_RELAY_PKT_DROP_BAD_CONTEXT, - DHCPV6_RELAY_PKT_DROP_OPT_INSERT_FAIL, - DHCPV6_RELAY_PKT_DROP_REPLY_FROM_CLIENT, -} dhcpv6_stats_drop_reason_t; - -typedef CLIB_PACKED (struct { - u16 option; - u16 length; - u8 data[0]; -}) dhcpv6_option_t; - -typedef CLIB_PACKED (struct { - dhcpv6_option_t opt; - u32 int_idx; -}) dhcpv6_int_id_t; - -typedef CLIB_PACKED (struct { - dhcpv6_option_t opt; - u8 data[8]; // data[0]:type, data[1..7]: VPN ID -}) dhcpv6_vss_t; - -typedef CLIB_PACKED (struct { - dhcpv6_option_t opt; - u32 ent_num; - u32 rmt_id; -}) dhcpv6_rmt_id_t; - -typedef CLIB_PACKED (struct { - dhcpv6_option_t opt; - u16 link_type; - u8 data[6]; // data[0]:data[5]: MAC address -}) dhcpv6_client_mac_t; - - -#endif /* included_vnet_dhcp_packet_h */ diff --git a/src/vnet/dhcpv6/proxy.h b/src/vnet/dhcpv6/proxy.h deleted file mode 100644 index 77ced361..00000000 --- a/src/vnet/dhcpv6/proxy.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * proxy.h: dhcp proxy - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_dhcpv6_proxy_h -#define included_dhcpv6_proxy_h - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef enum { -#define dhcpv6_proxy_error(n,s) DHCPV6_PROXY_ERROR_##n, -#include -#undef dhcpv6_proxy_error - DHCPV6_PROXY_N_ERROR, -} dhcpv6_proxy_error_t; - -typedef struct { - u32 oui; - u32 fib_id; -} dhcpv6_vss_id; - -typedef union { - u8 as_u8[8]; - dhcpv6_vss_id vpn_id; -} dhcpv6_vss_info; - -typedef struct { - ip6_address_t dhcp6_server; - ip6_address_t dhcp6_src_address; - u32 server_fib6_index; -} dhcpv6_server_t; - -typedef struct { - /* Pool of DHCP servers */ - dhcpv6_server_t * dhcp6_servers; - - /* Pool of selected DHCP server. Zero is the default server */ - u32 * dhcp6_server_index_by_rx_fib_index; - - /* all DHCP servers address */ - ip6_address_t all_dhcpv6_server_address; - ip6_address_t all_dhcpv6_server_relay_agent_address; - - /* to drop pkts in server-to-client direction */ - u32 error_drop_node_index; - - dhcpv6_vss_info *vss; - - /* hash lookup specific vrf_id -> VSS vector index*/ - u32 *vss_index_by_rx_fib_index; - - /* convenience */ - vlib_main_t * vlib_main; - vnet_main_t * vnet_main; -} dhcpv6_proxy_main_t; - -dhcpv6_proxy_main_t dhcpv6_proxy_main; - -int dhcpv6_proxy_set_vss(u32 tbl_id, - u32 oui, - u32 fib_id, - int is_del); - -int dhcpv6_proxy_set_server(ip6_address_t *addr, - ip6_address_t *src_address, - u32 rx_fib_id, - u32 server_fib_id, - int is_del); - -void dhcpv6_proxy_dump(void *opaque, - u32 context); - -#endif /* included_dhcpv6_proxy_h */ diff --git a/src/vnet/dhcpv6/proxy_error.def b/src/vnet/dhcpv6/proxy_error.def deleted file mode 100644 index 55fa7317..00000000 --- a/src/vnet/dhcpv6/proxy_error.def +++ /dev/null @@ -1,29 +0,0 @@ -/* - * dhcp_proxy_error.def: dhcp proxy errors - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -dhcpv6_proxy_error (NONE, "no error") -dhcpv6_proxy_error (NO_SERVER, "no dhcpv6 server configured") -dhcpv6_proxy_error (RELAY_TO_SERVER, "DHCPV6 packets relayed to the server") -dhcpv6_proxy_error (RELAY_TO_CLIENT, "DHCPV6 packets relayed to clients") -dhcpv6_proxy_error (NO_INTERFACE_ADDRESS, "DHCPV6 no interface address") -dhcpv6_proxy_error (WRONG_MESSAGE_TYPE, "DHCPV6 wrong message type.") -dhcpv6_proxy_error (NO_SRC_ADDRESS, "DHCPV6 no srouce IPv6 address configured.") -dhcpv6_proxy_error (NO_CIRCUIT_ID_OPTION, "DHCPv6 reply packets without circuit ID option") -dhcpv6_proxy_error (NO_RELAY_MESSAGE_OPTION, "DHCPv6 reply packets without relay message option") -dhcpv6_proxy_error (BAD_SVR_FIB_OR_ADDRESS, "DHCPv6 packets not from DHCPv6 server or server FIB.") -dhcpv6_proxy_error (PKT_TOO_BIG, "DHCPv6 packets which are too big.") -dhcpv6_proxy_error (WRONG_INTERFACE_ID_OPTION, "DHCPv6 reply to invalid interface.") diff --git a/src/vnet/dhcpv6/proxy_node.c b/src/vnet/dhcpv6/proxy_node.c deleted file mode 100644 index f40798e6..00000000 --- a/src/vnet/dhcpv6/proxy_node.c +++ /dev/null @@ -1,1280 +0,0 @@ -/* - * proxy_node.c: dhcpv6 proxy node processing - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -static char * dhcpv6_proxy_error_strings[] = { -#define dhcpv6_proxy_error(n,s) s, -#include "proxy_error.def" -#undef dhcpv6_proxy_error -}; - -#define foreach_dhcpv6_proxy_to_server_input_next \ - _ (DROP, "error-drop") \ - _ (LOOKUP, "ip6-lookup") \ - _ (SEND_TO_CLIENT, "dhcpv6-proxy-to-client") - - -typedef enum { -#define _(s,n) DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_##s, - foreach_dhcpv6_proxy_to_server_input_next -#undef _ - DHCPV6_PROXY_TO_SERVER_INPUT_N_NEXT, -} dhcpv6_proxy_to_server_input_next_t; - -typedef struct { - /* 0 => to server, 1 => to client */ - int which; - u8 packet_data[64]; - u32 error; - u32 sw_if_index; - u32 original_sw_if_index; -} dhcpv6_proxy_trace_t; - -vlib_node_registration_t dhcpv6_proxy_to_server_node; -vlib_node_registration_t dhcpv6_proxy_to_client_node; - - -u8 * format_dhcpv6_proxy_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - dhcpv6_proxy_trace_t * t = va_arg (*args, dhcpv6_proxy_trace_t *); - - if (t->which == 0) - s = format (s, "DHCPV6 proxy: sent to server %U", - format_ip6_address, &t->packet_data, sizeof (ip6_address_t)); - else - s = format (s, "DHCPV6 proxy: sent to client from %U", - format_ip6_address, &t->packet_data, sizeof (ip6_address_t)); - if (t->error != (u32)~0) - s = format (s, " error: %s\n", dhcpv6_proxy_error_strings[t->error]); - - s = format (s, " original_sw_if_index: %d, sw_if_index: %d\n", - t->original_sw_if_index, t->sw_if_index); - - return s; -} - -u8 * format_dhcpv6_proxy_header_with_length (u8 * s, va_list * args) -{ - dhcpv6_header_t * h = va_arg (*args, dhcpv6_header_t *); - u32 max_header_bytes = va_arg (*args, u32); - u32 header_bytes; - - header_bytes = sizeof (h[0]); - if (max_header_bytes != 0 && header_bytes > max_header_bytes) - return format (s, "dhcpv6 header truncated"); - - s = format (s, "DHCPV6 Proxy"); - - return s; -} -/* get first interface address */ -static ip6_address_t * -ip6_interface_first_global_or_site_address (ip6_main_t * im, u32 sw_if_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip_interface_address_t * ia = 0; - ip6_address_t * result = 0; - - foreach_ip_interface_address (lm, ia, sw_if_index, - 1 /* honor unnumbered */, - ({ - ip6_address_t * a = ip_interface_address_get_address (lm, ia); - if ((a->as_u8[0] & 0xe0) == 0x20 || - (a->as_u8[0] & 0xfe) == 0xfc) { - result = a; - break; - } - })); - return result; -} - -static inline void copy_ip6_address (ip6_address_t *dst, ip6_address_t *src) -{ - - dst->as_u64[0] = src->as_u64[0]; - dst->as_u64[1] = src->as_u64[1]; -} - -static inline dhcpv6_vss_info * -dhcpv6_get_vss_info (dhcpv6_proxy_main_t *dm, - u32 rx_fib_index) -{ - dhcpv6_vss_info *v; - - if (vec_len(dm->vss_index_by_rx_fib_index) <= rx_fib_index || - dm->vss_index_by_rx_fib_index[rx_fib_index] == ~0) - { - v = NULL; - } - else - { - v = pool_elt_at_index (dm->vss, - dm->vss_index_by_rx_fib_index[rx_fib_index]); - } - - return (v); -} - -static inline dhcpv6_server_t * -dhcpv6_get_server (dhcpv6_proxy_main_t *dm, - u32 rx_fib_index) -{ - dhcpv6_server_t *s = NULL; - - if (vec_len(dm->dhcp6_server_index_by_rx_fib_index) > rx_fib_index && - dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] != ~0) - { - s = pool_elt_at_index (dm->dhcp6_servers, - dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index]); - } - - return (s); -} - -static uword -dhcpv6_proxy_to_server_input (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, * from, * to_next; - dhcpv6_proxy_main_t * dpm = &dhcpv6_proxy_main; - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - u32 pkts_to_server=0, pkts_to_client=0, pkts_no_server=0; - u32 pkts_no_interface_address=0, pkts_no_exceeding_max_hop=0; - u32 pkts_no_src_address=0; - u32 pkts_wrong_msg_type=0; - u32 pkts_too_big=0; - ip6_main_t * im = &ip6_main; - ip6_address_t * src; - int bogus_length; - dhcpv6_server_t * server; - u32 rx_fib_idx = 0, server_fib_idx = 0; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - vnet_main_t *vnm = vnet_get_main(); - u32 sw_if_index = 0; - u32 rx_sw_if_index = 0; - vnet_sw_interface_t *swif; - u32 bi0; - vlib_buffer_t * b0; - udp_header_t * u0, *u1; - dhcpv6_header_t * h0; // client msg hdr - ip6_header_t * ip0, *ip1; - ip6_address_t _ia0, *ia0=&_ia0; - u32 next0; - u32 error0 = (u32) ~0; - dhcpv6_option_t *fwd_opt; - dhcpv6_relay_hdr_t *r1; - u16 len; - dhcpv6_int_id_t *id1; - dhcpv6_vss_t *vss1; - dhcpv6_client_mac_t *cmac; // client mac - ethernet_header_t * e_h0; - u8 client_src_mac[6]; - vlib_buffer_free_list_t *fl; - dhcpv6_vss_info *vss; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - h0 = vlib_buffer_get_current (b0); - - /* - * udp_local hands us the DHCPV6 header. - */ - u0 = (void *)h0 -(sizeof(*u0)); - ip0 = (void *)u0 -(sizeof(*ip0)); - e_h0 = (void *)ip0 - ethernet_buffer_header_size(b0); - - clib_memcpy(client_src_mac, e_h0->src_address, 6); - - switch (h0->u.msg_type) { - case DHCPV6_MSG_SOLICIT: - case DHCPV6_MSG_REQUEST: - case DHCPV6_MSG_CONFIRM: - case DHCPV6_MSG_RENEW: - case DHCPV6_MSG_REBIND: - case DHCPV6_MSG_RELEASE: - case DHCPV6_MSG_DECLINE: - case DHCPV6_MSG_INFORMATION_REQUEST: - case DHCPV6_MSG_RELAY_FORW: - /* send to server */ - break; - case DHCPV6_MSG_RELAY_REPL: - /* send to client */ - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_SEND_TO_CLIENT; - error0 = 0; - pkts_to_client++; - goto do_enqueue; - default: - /* drop the packet */ - pkts_wrong_msg_type++; - error0 = DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - goto do_trace; - - } - - /* Send to DHCPV6 server via the configured FIB */ - rx_sw_if_index = sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - rx_fib_idx = im->fib_index_by_sw_if_index [rx_sw_if_index]; - server = dhcpv6_get_server(dpm, rx_fib_idx); - - if (PREDICT_FALSE (NULL == server)) - { - error0 = DHCPV6_PROXY_ERROR_NO_SERVER; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_server++; - goto do_trace; - } - - server_fib_idx = server->server_fib6_index; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = server_fib_idx; - - - /* relay-option header pointer */ - vlib_buffer_advance(b0, -(sizeof(*fwd_opt))); - fwd_opt = vlib_buffer_get_current(b0); - /* relay message header pointer */ - vlib_buffer_advance(b0, -(sizeof(*r1))); - r1 = vlib_buffer_get_current(b0); - - vlib_buffer_advance(b0, -(sizeof(*u1))); - u1 = vlib_buffer_get_current(b0); - - vlib_buffer_advance(b0, -(sizeof(*ip1))); - ip1 = vlib_buffer_get_current(b0); - - /* fill in all that rubbish... */ - len = clib_net_to_host_u16(u0->length) - sizeof(udp_header_t); - copy_ip6_address(&r1->peer_addr, &ip0->src_address); - - r1->msg_type = DHCPV6_MSG_RELAY_FORW; - fwd_opt->length = clib_host_to_net_u16(len); - fwd_opt->option = clib_host_to_net_u16(DHCPV6_OPTION_RELAY_MSG); - - r1->hop_count++; - r1->hop_count = (h0->u.msg_type != DHCPV6_MSG_RELAY_FORW) ? 0 : r1->hop_count; - - if (PREDICT_FALSE(r1->hop_count >= HOP_COUNT_LIMIT)) - { - error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_exceeding_max_hop++; - goto do_trace; - } - - - /* If relay-fwd and src address is site or global unicast address */ - if (h0->u.msg_type == DHCPV6_MSG_RELAY_FORW && - ((ip0->src_address.as_u8[0] & 0xe0) == 0x20 || - (ip0->src_address.as_u8[0] & 0xfe) == 0xfc)) - { - /* Set link address to zero */ - r1->link_addr.as_u64[0] = 0; - r1->link_addr.as_u64[1] = 0; - goto link_address_set; - } - - /* if receiving interface is unnumbered, use receiving interface - * IP address as link address, otherwise use the loopback interface - * IP address as link address. - */ - - swif = vnet_get_sw_interface (vnm, rx_sw_if_index); - if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) - sw_if_index = swif->unnumbered_sw_if_index; - - ia0 = ip6_interface_first_global_or_site_address(&ip6_main, sw_if_index); - if (ia0 == 0) - { - error0 = DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_interface_address++; - goto do_trace; - } - - copy_ip6_address(&r1->link_addr, ia0); - - link_address_set: - fl = vlib_buffer_get_free_list (vm, b0->free_list_index); - - if ((b0->current_length+sizeof(*id1)+sizeof(*vss1)+sizeof(*cmac)) - > fl->n_data_bytes) - { - error0 = DHCPV6_PROXY_ERROR_PKT_TOO_BIG; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_too_big++; - goto do_trace; - } - - id1 = (dhcpv6_int_id_t *) (((uword) ip1) + b0->current_length); - b0->current_length += (sizeof (*id1)); - - id1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_INTERFACE_ID); - id1->opt.length = clib_host_to_net_u16(sizeof(rx_sw_if_index)); - id1->int_idx = clib_host_to_net_u32(rx_sw_if_index); - - u1->length =0; - if (h0->u.msg_type != DHCPV6_MSG_RELAY_FORW) - { - cmac = (dhcpv6_client_mac_t *) (((uword) ip1) + b0->current_length); - b0->current_length += (sizeof (*cmac)); - cmac->opt.length =clib_host_to_net_u16(sizeof(*cmac) - - sizeof(cmac->opt)); - cmac->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_CLIENT_LINK_LAYER_ADDRESS); - cmac->link_type = clib_host_to_net_u16(1); // ethernet - clib_memcpy(cmac->data, client_src_mac, 6); - u1->length += sizeof(*cmac); - } - - //TODO: Revisit if hash makes sense here - vss = dhcpv6_get_vss_info(dpm, rx_fib_idx); - - if (NULL != vss) { - vss1 = (dhcpv6_vss_t *) (((uword) ip1) + b0->current_length); - b0->current_length += (sizeof (*vss1)); - vss1->opt.length =clib_host_to_net_u16(sizeof(*vss1) - - sizeof(vss1->opt)); - vss1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_VSS); - vss1->data[0] = 1; // type - vss1->data[1] = vss->vpn_id.oui >>16 & 0xff; - vss1->data[2] = vss->vpn_id.oui >>8 & 0xff; - vss1->data[3] = vss->vpn_id.oui & 0xff; - vss1->data[4] = vss->vpn_id.fib_id >> 24 & 0xff; - vss1->data[5] = vss->vpn_id.fib_id >> 16 & 0xff; - vss1->data[6] = vss->vpn_id.fib_id >> 8 & 0xff; - vss1->data[7] = vss->vpn_id.fib_id & 0xff; - u1->length += sizeof(*vss1); - } - - pkts_to_server++; - u1->checksum = 0; - u1->src_port = clib_host_to_net_u16(UDP_DST_PORT_dhcpv6_to_client); - u1->dst_port = clib_host_to_net_u16(UDP_DST_PORT_dhcpv6_to_server); - - u1->length = - clib_host_to_net_u16( clib_net_to_host_u16(fwd_opt->length) + - sizeof(*r1) + sizeof(*fwd_opt) + - sizeof(*u1) + sizeof(*id1) + u1->length); - - memset(ip1, 0, sizeof(*ip1)); - ip1->ip_version_traffic_class_and_flow_label = 0x60; - ip1->payload_length = u1->length; - ip1->protocol = PROTO_UDP; - ip1->hop_limit = HOP_COUNT_LIMIT; - src = (server->dhcp6_server.as_u64[0] || server->dhcp6_server.as_u64[1]) ? - &server->dhcp6_server : &dpm->all_dhcpv6_server_address; - copy_ip6_address(&ip1->dst_address, src); - - - ia0 = ip6_interface_first_global_or_site_address - (&ip6_main, vnet_buffer(b0)->sw_if_index[VLIB_RX]); - - src = (server->dhcp6_src_address.as_u64[0] || server->dhcp6_src_address.as_u64[1]) ? - &server->dhcp6_src_address : ia0; - if (ia0 == 0) - { - error0 = DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_src_address++; - goto do_trace; - } - - copy_ip6_address (&ip1->src_address, src); - - - u1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, ip1, - &bogus_length); - ASSERT(bogus_length == 0); - - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; - - do_trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - dhcpv6_proxy_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->which = 0; /* to server */ - tr->error = error0; - tr->original_sw_if_index = rx_sw_if_index; - tr->sw_if_index = sw_if_index; - if (DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP == next0) - copy_ip6_address((ip6_address_t *)&tr->packet_data[0], &server->dhcp6_server); - } - - do_enqueue: - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, - DHCPV6_PROXY_ERROR_RELAY_TO_CLIENT, - pkts_to_client); - vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, - DHCPV6_PROXY_ERROR_RELAY_TO_SERVER, - pkts_to_server); - vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, - DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS, - pkts_no_interface_address); - vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, - DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE, - pkts_wrong_msg_type); - vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, - DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS, - pkts_no_src_address); - vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, - DHCPV6_PROXY_ERROR_PKT_TOO_BIG, - pkts_too_big); - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dhcpv6_proxy_to_server_node) = { - .function = dhcpv6_proxy_to_server_input, - .name = "dhcpv6-proxy-to-server", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .n_errors = DHCPV6_PROXY_N_ERROR, - .error_strings = dhcpv6_proxy_error_strings, - - .n_next_nodes = DHCPV6_PROXY_TO_SERVER_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_##s] = n, - foreach_dhcpv6_proxy_to_server_input_next -#undef _ - }, - - .format_buffer = format_dhcpv6_proxy_header_with_length, - .format_trace = format_dhcpv6_proxy_trace, -#if 0 - .unformat_buffer = unformat_dhcpv6_proxy_header, -#endif -}; - -static uword -dhcpv6_proxy_to_client_input (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - - u32 n_left_from, * from; - ethernet_main_t *em = ethernet_get_main (vm); - dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; - dhcpv6_server_t * server; - vnet_main_t * vnm = vnet_get_main(); - int bogus_length; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - while (n_left_from > 0) - { - u32 bi0; - vlib_buffer_t * b0; - udp_header_t * u0, *u1=0; - dhcpv6_relay_hdr_t * h0; - ip6_header_t * ip1 = 0, *ip0; - ip6_address_t _ia0, * ia0 = &_ia0; - ip6_address_t client_address; - ethernet_interface_t *ei0; - ethernet_header_t *mac0; - vnet_hw_interface_t *hi0; - vlib_frame_t *f0; - u32 * to_next0; - u32 sw_if_index = ~0; - u32 original_sw_if_index = ~0; - vnet_sw_interface_t *si0; - u32 error0 = (u32)~0; - vnet_sw_interface_t *swif; - dhcpv6_option_t *r0 = 0, *o; - u16 len = 0; - u8 interface_opt_flag = 0; - u8 relay_msg_opt_flag = 0; - ip6_main_t * im = &ip6_main; - u32 server_fib_idx, client_fib_idx; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - - b0 = vlib_get_buffer (vm, bi0); - h0 = vlib_buffer_get_current (b0); - - if (DHCPV6_MSG_RELAY_REPL != h0->msg_type) - { - error0 = DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE; - - drop_packet: - vlib_node_increment_counter (vm, dhcpv6_proxy_to_client_node.index, - error0, 1); - - f0 = vlib_get_frame_to_node (vm, dm->error_drop_node_index); - to_next0 = vlib_frame_vector_args (f0); - to_next0[0] = bi0; - f0->n_vectors = 1; - vlib_put_frame_to_node (vm, dm->error_drop_node_index, f0); - goto do_trace; - } - /* hop count seems not need to be checked */ - if (HOP_COUNT_LIMIT < h0->hop_count) - { - error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS; - goto drop_packet; - } - u0 = (void *)h0 -(sizeof(*u0)); - ip0 = (void *)u0 -(sizeof(*ip0)); - - vlib_buffer_advance (b0, sizeof(*h0)); - o = vlib_buffer_get_current (b0); - - /* Parse through TLVs looking for option 18 (DHCPV6_OPTION_INTERFACE_ID) - _and_ option 9 (DHCPV6_OPTION_RELAY_MSG) option which must be there. - Currently assuming no other options need to be processed - The interface-ID is the FIB number we need - to track down the client-facing interface */ - - while ((u8 *) o < (b0->data + b0->current_data + b0->current_length)) - { - if (DHCPV6_OPTION_INTERFACE_ID == clib_net_to_host_u16(o->option)) - { - interface_opt_flag = 1; - if (clib_net_to_host_u16(o->length) == sizeof(sw_if_index)) - sw_if_index = clib_net_to_host_u32(((dhcpv6_int_id_t*)o)->int_idx); - if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index)) - { - error0 = DHCPV6_PROXY_ERROR_WRONG_INTERFACE_ID_OPTION; - goto drop_packet; - } - } - if (DHCPV6_OPTION_RELAY_MSG == clib_net_to_host_u16(o->option)) - { - relay_msg_opt_flag = 1; - r0 = vlib_buffer_get_current (b0); - } - if ((relay_msg_opt_flag == 1) && (interface_opt_flag == 1)) - break; - vlib_buffer_advance (b0, sizeof(*o) + clib_net_to_host_u16(o->length)); - o = (dhcpv6_option_t *) (((uword) o) + clib_net_to_host_u16(o->length) + sizeof(*o)); - } - - if ((relay_msg_opt_flag == 0) || (r0 == 0)) - { - error0 = DHCPV6_PROXY_ERROR_NO_RELAY_MESSAGE_OPTION; - goto drop_packet; - } - - if ((u32)~0 == sw_if_index) - { - error0 = DHCPV6_PROXY_ERROR_NO_CIRCUIT_ID_OPTION; - goto drop_packet; - } - - //Advance buffer to start of encapsulated DHCPv6 message - vlib_buffer_advance (b0, sizeof(*r0)); - - client_fib_idx = im->fib_index_by_sw_if_index[sw_if_index]; - server = dhcpv6_get_server(dm, client_fib_idx); - - if (NULL == server) - { - error0 = DHCPV6_PROXY_ERROR_NO_SERVER; - goto drop_packet; - } - - server_fib_idx = im->fib_index_by_sw_if_index - [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; - - if (server_fib_idx != server->server_fib6_index || - ip0->src_address.as_u64[0] != server->dhcp6_server.as_u64[0] || - ip0->src_address.as_u64[1] != server->dhcp6_server.as_u64[1]) - { - //drop packet if not from server with configured address or FIB - error0 = DHCPV6_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; - goto drop_packet; - } - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = original_sw_if_index - = sw_if_index; - - swif = vnet_get_sw_interface (vnm, original_sw_if_index); - if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) - sw_if_index = swif->unnumbered_sw_if_index; - - - /* - * udp_local hands us the DHCPV6 header, need udp hdr, - * ip hdr to relay to client - */ - vlib_buffer_advance (b0, -(sizeof(*u1))); - u1 = vlib_buffer_get_current (b0); - - vlib_buffer_advance (b0, -(sizeof(*ip1))); - ip1 = vlib_buffer_get_current (b0); - - copy_ip6_address(&client_address, &h0->peer_addr); - - ia0 = ip6_interface_first_address (&ip6_main, sw_if_index); - if (ia0 == 0) - { - error0 = DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS; - goto drop_packet; - } - - len = clib_net_to_host_u16(r0->length); - memset(ip1, 0, sizeof(*ip1)); - copy_ip6_address(&ip1->dst_address, &client_address); - u1->checksum = 0; - u1->src_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcpv6_to_server); - u1->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcpv6_to_client); - u1->length = clib_host_to_net_u16 (len + sizeof(udp_header_t)); - - ip1->ip_version_traffic_class_and_flow_label = - ip0->ip_version_traffic_class_and_flow_label & - 0x00000fff; - ip1->payload_length = u1->length; - ip1->protocol = PROTO_UDP; - ip1->hop_limit = HOP_COUNT_LIMIT; - copy_ip6_address(&ip1->src_address, ia0); - - u1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, ip1, - &bogus_length); - ASSERT(bogus_length == 0); - - vlib_buffer_advance (b0, -(sizeof(ethernet_header_t))); - si0 = vnet_get_sw_interface (vnm, original_sw_if_index); - if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) - vlib_buffer_advance (b0, -4 /* space for VLAN tag */); - - mac0 = vlib_buffer_get_current (b0); - - hi0 = vnet_get_sup_hw_interface (vnm, original_sw_if_index); - ei0 = pool_elt_at_index (em->interfaces, hi0->hw_instance); - clib_memcpy (mac0->src_address, ei0->address, sizeof (ei0->address)); - memset (&mac0->dst_address, 0xff, sizeof (mac0->dst_address)); - mac0->type = (si0->type == VNET_SW_INTERFACE_TYPE_SUB) ? - clib_net_to_host_u16(0x8100) : clib_net_to_host_u16 (0x86dd); - - if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) - { - u32 * vlan_tag = (u32 *)(mac0+1); - u32 tmp; - tmp = (si0->sub.id << 16) | 0x0800; - *vlan_tag = clib_host_to_net_u32 (tmp); - } - - /* $$$ consider adding a dynamic next to the graph node, for performance */ - f0 = vlib_get_frame_to_node (vm, hi0->output_node_index); - to_next0 = vlib_frame_vector_args (f0); - to_next0[0] = bi0; - f0->n_vectors = 1; - vlib_put_frame_to_node (vm, hi0->output_node_index, f0); - - do_trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - dhcpv6_proxy_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->which = 1; /* to client */ - if (ia0) - copy_ip6_address((ip6_address_t*)tr->packet_data, ia0); - tr->error = error0; - tr->original_sw_if_index = original_sw_if_index; - tr->sw_if_index = sw_if_index; - } - } - return from_frame->n_vectors; - -} - -VLIB_REGISTER_NODE (dhcpv6_proxy_to_client_node) = { - .function = dhcpv6_proxy_to_client_input, - .name = "dhcpv6-proxy-to-client", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .n_errors = DHCPV6_PROXY_N_ERROR, - .error_strings = dhcpv6_proxy_error_strings, - .format_buffer = format_dhcpv6_proxy_header_with_length, - .format_trace = format_dhcpv6_proxy_trace, -#if 0 - .unformat_buffer = unformat_dhcpv6_proxy_header, -#endif -}; - -clib_error_t * dhcpv6_proxy_init (vlib_main_t * vm) -{ - dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; - vlib_node_t * error_drop_node; - dhcpv6_server_t * server; - - dm->vlib_main = vm; - dm->vnet_main = vnet_get_main(); - error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); - dm->error_drop_node_index = error_drop_node->index; - - dm->vss_index_by_rx_fib_index = NULL; - - /* RFC says this is the dhcpv6 server address */ - dm->all_dhcpv6_server_address.as_u64[0] = clib_host_to_net_u64 (0xFF05000000000000); - dm->all_dhcpv6_server_address.as_u64[1] = clib_host_to_net_u64 (0x00010003); - - /* RFC says this is the server and agent address */ - dm->all_dhcpv6_server_relay_agent_address.as_u64[0] = clib_host_to_net_u64 (0xFF02000000000000); - dm->all_dhcpv6_server_relay_agent_address.as_u64[1] = clib_host_to_net_u64 (0x00010002); - - udp_register_dst_port (vm, UDP_DST_PORT_dhcpv6_to_client, - dhcpv6_proxy_to_client_node.index, 0 /* is_ip6 */); - - udp_register_dst_port (vm, UDP_DST_PORT_dhcpv6_to_server, - dhcpv6_proxy_to_server_node.index, 0 /* is_ip6 */); - - /* Create the default server, don't mark it valid */ - pool_get (dm->dhcp6_servers, server); - memset (server, 0, sizeof (*server)); - - return 0; -} - -VLIB_INIT_FUNCTION (dhcpv6_proxy_init); - -int dhcpv6_proxy_set_server (ip6_address_t *addr, - ip6_address_t *src_address, - u32 rx_fib_id, - u32 server_fib_id, - int is_del) -{ - dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; - dhcpv6_server_t * server = 0; - u32 rx_fib_index = 0; - int rc = 0; - - rx_fib_index = ip6_mfib_table_find_or_create_and_lock(rx_fib_id); - - const mfib_prefix_t all_dhcp_servers = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_grp_addr = { - .ip6 = dm->all_dhcpv6_server_relay_agent_address, - } - }; - - if (is_del) - { - server = dhcpv6_get_server(dm, rx_fib_index); - - if (NULL == server) - { - rc = VNET_API_ERROR_NO_SUCH_ENTRY; - goto out; - } - - /* - * release the locks held on the server fib and rx mfib - */ - mfib_table_entry_delete(rx_fib_index, - &all_dhcp_servers, - MFIB_SOURCE_DHCP); - mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); - fib_table_unlock(server->server_fib6_index, FIB_PROTOCOL_IP6); - - dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] = ~0; - - memset (server, 0, sizeof (*server)); - pool_put (dm->dhcp6_servers, server); - } - else - { - if (addr->as_u64[0] == 0 && - addr->as_u64[1] == 0 ) - { - rc = VNET_API_ERROR_INVALID_DST_ADDRESS; - goto out; - } - if (src_address->as_u64[0] == 0 && - src_address->as_u64[1] == 0) - { - rc = VNET_API_ERROR_INVALID_SRC_ADDRESS; - goto out; - } - - server = dhcpv6_get_server(dm, rx_fib_index); - - if (NULL != server) - { - /* modify of an existing entry */ - ip6_fib_t *fib; - - fib = ip6_fib_get(server->server_fib6_index); - - if (fib->table_id != server_fib_id) - { - /* swap tables */ - fib_table_unlock(server->server_fib6_index, FIB_PROTOCOL_IP6); - server->server_fib6_index = - ip6_fib_table_find_or_create_and_lock(server_fib_id); - } - } - else - { - /* Allocate a new server */ - pool_get (dm->dhcp6_servers, server); - - vec_validate_init_empty (dm->dhcp6_server_index_by_rx_fib_index, - rx_fib_index, ~0); - dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] = - server - dm->dhcp6_servers; - - server->server_fib6_index = - ip6_fib_table_find_or_create_and_lock(server_fib_id); - mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6); - - const mfib_prefix_t all_dhcp_servers = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_grp_addr = { - .ip6 = dm->all_dhcpv6_server_relay_agent_address, - } - }; - const fib_route_path_t path_for_us = { - .frp_proto = FIB_PROTOCOL_IP6, - .frp_addr = zero_addr, - .frp_sw_if_index = 0xffffffff, - .frp_fib_index = ~0, - .frp_weight = 0, - .frp_flags = FIB_ROUTE_PATH_LOCAL, - }; - mfib_table_entry_path_update(rx_fib_index, - &all_dhcp_servers, - MFIB_SOURCE_DHCP, - &path_for_us, - MFIB_ITF_FLAG_FORWARD); - /* - * Each interface that is enabled in this table, needs to be added - * as an accepting interface, but this is not easily doable in VPP. - * So we cheat. Add a flag to the entry that indicates accept form - * any interface. - * We will still only accept on v6 enabled interfaces, since the - * input feature ensures this. - */ - mfib_table_entry_update(rx_fib_index, - &all_dhcp_servers, - MFIB_SOURCE_DHCP, - MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF); - } - copy_ip6_address(&server->dhcp6_server, addr); - copy_ip6_address(&server->dhcp6_src_address, src_address); - } - -out: - mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); - - return (rc); -} - -static clib_error_t * -dhcpv6_proxy_set_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - ip6_address_t addr, src_addr; - int set_server = 0, set_src_address = 0; - u32 rx_fib_id = 0, server_fib_id = 0; - int is_del = 0; - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "server %U", - unformat_ip6_address, &addr)) - set_server = 1; - else if (unformat(input, "src-address %U", - unformat_ip6_address, &src_addr)) - set_src_address =1; - else if (unformat (input, "server-fib-id %d", &server_fib_id)) - ; - else if (unformat (input, "rx-fib-id %d", &rx_fib_id)) - ; - else if (unformat (input, "delete") || - unformat (input, "del")) - is_del = 1; - else - break; - } - - if (is_del || (set_server && set_src_address)) - { - int rv; - - rv = dhcpv6_proxy_set_server (&addr, &src_addr, rx_fib_id, - server_fib_id, is_del); - - //TODO: Complete the errors - switch (rv) - { - case 0: - return 0; - - case -1: - return clib_error_return (0, "FIB id %d does not exist", server_fib_id); - - default: - return clib_error_return (0, "BUG: rv %d", rv); - } - } - else - return clib_error_return (0, "parse error`%U'", - format_unformat_error, input); -} - -VLIB_CLI_COMMAND (dhcpv6_proxy_set_command, static) = { - .path = "set dhcpv6 proxy", - .short_help = "set dhcpv6 proxy [del] server src-address " - "[server-fib-id ] [rx-fib-id ] ", - .function = dhcpv6_proxy_set_command_fn, -}; - -u8 * format_dhcpv6_proxy_server (u8 * s, va_list * args) -{ - dhcpv6_proxy_main_t * dm = va_arg (*args, dhcpv6_proxy_main_t *); - dhcpv6_server_t * server = va_arg (*args, dhcpv6_server_t *); - u32 rx_fib_index = va_arg (*args, u32); - ip6_fib_t * rx_fib, * server_fib; - u32 server_fib_id = (u32)~0, rx_fib_id = ~0; - - if (dm == 0) - { - s = format (s, "%=40s%=40s%=14s%=14s", "Server Address", "Source Address", - "Server FIB", "RX FIB"); - return s; - } - - server_fib = ip6_fib_get(server->server_fib6_index); - if (server_fib) - server_fib_id= server_fib->table_id; - - rx_fib= ip6_fib_get(rx_fib_index); - - if (rx_fib) - rx_fib_id = rx_fib->table_id; - - s = format (s, "%=40U%=40U%=14u%=14u", - format_ip6_address, &server->dhcp6_server, - format_ip6_address, &server->dhcp6_src_address, - server_fib_id, rx_fib_id); - return s; -} - -static clib_error_t * -dhcpv6_proxy_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - dhcpv6_proxy_main_t * dpm = &dhcpv6_proxy_main; - int i; - u32 server_index; - dhcpv6_server_t * server; - - vlib_cli_output (vm, "%U", format_dhcpv6_proxy_server, 0 /* header line */, - 0, 0); - vec_foreach_index (i, dpm->dhcp6_server_index_by_rx_fib_index) - { - server_index = dpm->dhcp6_server_index_by_rx_fib_index[i]; - if (~0 == server_index) - continue; - - server = pool_elt_at_index (dpm->dhcp6_servers, server_index); - - vlib_cli_output (vm, "%U", format_dhcpv6_proxy_server, dpm, - server, i); - } - - return 0; -} - -VLIB_CLI_COMMAND (dhcpv6_proxy_show_command, static) = { - .path = "show dhcpv6 proxy", - .short_help = "Display dhcpv6 proxy info", - .function = dhcpv6_proxy_show_command_fn, -}; - -void -dhcpv6_proxy_dump (void *opaque, - u32 context) -{ - dhcpv6_proxy_main_t * dpm = &dhcpv6_proxy_main; - ip6_fib_t *s_fib, *r_fib; - dhcpv6_server_t * server; - u32 server_index, i; - dhcpv6_vss_info *v; - - vec_foreach_index (i, dpm->dhcp6_server_index_by_rx_fib_index) - { - server_index = dpm->dhcp6_server_index_by_rx_fib_index[i]; - if (~0 == server_index) - continue; - - server = pool_elt_at_index (dpm->dhcp6_servers, server_index); - v = dhcpv6_get_vss_info(dpm, i); - - ip46_address_t src_addr = { - .ip6 = server->dhcp6_src_address, - }; - ip46_address_t server_addr = { - .ip6 = server->dhcp6_server, - }; - - s_fib = ip6_fib_get(server->server_fib6_index); - r_fib = ip6_fib_get(i); - - dhcp_send_details(opaque, - context, - &server_addr, - &src_addr, - s_fib->table_id, - r_fib->table_id, - (v ? v->vpn_id.fib_id : 0), - (v ? v->vpn_id.oui : 0)); - } -} - -int dhcpv6_proxy_set_vss(u32 tbl_id, - u32 oui, - u32 fib_id, - int is_del) -{ - dhcpv6_proxy_main_t *dm = &dhcpv6_proxy_main; - dhcpv6_vss_info *v = NULL; - u32 rx_fib_index; - int rc = 0; - - rx_fib_index = ip6_fib_table_find_or_create_and_lock(tbl_id); - v = dhcpv6_get_vss_info(dm, rx_fib_index); - - if (NULL != v) - { - if (is_del) - { - /* release the lock held on the table when the VSS - * info was created */ - fib_table_unlock (rx_fib_index, - FIB_PROTOCOL_IP6); - - pool_put (dm->vss, v); - dm->vss_index_by_rx_fib_index[rx_fib_index] = ~0; - } - else - { - /* this is a modify */ - v->vpn_id.fib_id = fib_id; - v->vpn_id.oui = oui; - } - } - else - { - if (is_del) - rc = VNET_API_ERROR_NO_SUCH_ENTRY; - else - { - /* create a new entry */ - vec_validate_init_empty(dm->vss_index_by_rx_fib_index, - rx_fib_index, ~0); - - /* hold a lock on the table whilst the VSS info exist */ - fib_table_lock (rx_fib_index, - FIB_PROTOCOL_IP6); - - pool_get (dm->vss, v); - v->vpn_id.fib_id = fib_id; - v->vpn_id.oui = oui; - dm->vss_index_by_rx_fib_index[rx_fib_index] = v - dm->vss; - } - } - - /* Release the lock taken during the create_or_lock at the start */ - fib_table_unlock (rx_fib_index, - FIB_PROTOCOL_IP6); - - return (rc); -} - - -static clib_error_t * -dhcpv6_vss_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - int is_del = 0, got_new_vss=0; - u32 oui=0; - u32 fib_id=0, tbl_id=~0; - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "oui %d", &oui)) - got_new_vss = 1; - else if (unformat (input, "vpn-id %d", &fib_id)) - got_new_vss = 1; - else if (unformat (input, "table %d", &tbl_id)) - got_new_vss = 1; - else if (unformat(input, "delete") || unformat(input, "del")) - is_del = 1; - else - break; - } - - if (tbl_id ==~0) - return clib_error_return (0, "no table ID specified."); - - if (is_del || got_new_vss) - { - int rv; - - rv = dhcpv6_proxy_set_vss(tbl_id, oui, fib_id, is_del); - switch (rv) - { - case 0: - return 0; - - case VNET_API_ERROR_NO_SUCH_FIB: - return clib_error_return (0, "vss info (oui:%d, vpn-id:%d) not found in table %d.", - oui, fib_id, tbl_id); - - case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "vss for table %d not found in pool.", - tbl_id); - - default: - return clib_error_return (0, "BUG: rv %d", rv); - } - } - else - return clib_error_return (0, "parse error`%U'", - format_unformat_error, input); - -} - -VLIB_CLI_COMMAND (dhcpv6_proxy_vss_command, static) = { - .path = "set dhcpv6 vss", - .short_help = "set dhcpv6 vss table oui vpn-idx ", - .function = dhcpv6_vss_command_fn, -}; - -static clib_error_t * -dhcpv6_vss_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) - -{ - dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; - dhcpv6_vss_info *v; - ip6_fib_t *fib; - u32 *fib_index; - - vlib_cli_output (vm, "%=6s%=6s%=12s","Table", "OUI", "VPN ID"); - pool_foreach (fib_index, dm->vss_index_by_rx_fib_index, - ({ - fib = ip6_fib_get (*fib_index); - v = pool_elt_at_index (dm->vss, *fib_index); - - vlib_cli_output (vm, "%=6d%=6d%=12d", - fib->table_id, - v->vpn_id.oui, - v->vpn_id.fib_id); - })); - - return 0; -} - -VLIB_CLI_COMMAND (dhcpv6_proxy_vss_show_command, static) = { - .path = "show dhcpv6 vss", - .short_help = "show dhcpv6 VSS", - .function = dhcpv6_vss_show_command_fn, -}; - -static clib_error_t * -dhcpv6_link_address_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) - -{ - dhcpv6_proxy_main_t *dm = &dhcpv6_proxy_main; - vnet_main_t *vnm = vnet_get_main(); - u32 sw_if_index0=0, sw_if_index; - ip6_address_t *ia0; - vnet_sw_interface_t *swif; - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) - { - - if (unformat(input, "%U", - unformat_vnet_sw_interface, dm->vnet_main, &sw_if_index0)) - { - swif = vnet_get_sw_interface (vnm, sw_if_index0); - sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ? - swif->unnumbered_sw_if_index : sw_if_index0; - ia0 = ip6_interface_first_address(&ip6_main, sw_if_index); - if (ia0) - { - vlib_cli_output (vm, "%=20s%=48s", "interface", "link-address"); - - vlib_cli_output (vm, "%=20U%=48U", - format_vnet_sw_if_index_name, dm->vnet_main, sw_if_index0, - format_ip6_address, ia0); - } else - vlib_cli_output (vm, "%=34s%=20U", "No IPv6 address configured on", - format_vnet_sw_if_index_name, dm->vnet_main, sw_if_index); - } else - break; - } - - return 0; -} - -VLIB_CLI_COMMAND (dhcpv6_proxy_address_show_command, static) = { - .path = "show dhcpv6 link-address interface", - .short_help = "show dhcpv6 link-address interface ", - .function = dhcpv6_link_address_show_command_fn, -}; diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 70b4e4c9..4cc6aa73 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -21,8 +21,7 @@ #include #include #include -#include -#include +#include #include #include #include diff --git a/test/test_dhcp.py b/test/test_dhcp.py index fbfb8a0c..6299975b 100644 --- a/test/test_dhcp.py +++ b/test/test_dhcp.py @@ -293,7 +293,7 @@ class TestDHCP(VppTestCase): # # Inject a response from the server # dropped, because there is no IP addrees on the - # clinet interfce to fill in the option. + # client interfce to fill in the option. # p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) / -- cgit 1.2.3-korg From 68b0fb0c620c7451ef1a6380c43c39de6614db51 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 28 Feb 2017 15:15:56 -0500 Subject: VPP-598: tcp stack initial commit Change-Id: I49e5ce0aae6e4ff634024387ceaf7dbc432a0351 Signed-off-by: Dave Barach Signed-off-by: Florin Coras --- src/Makefile.am | 1 + src/plugins/ioam/export-common/ioam_export.h | 2 +- src/plugins/ioam/ipfixcollector/ipfixcollector.c | 2 +- src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c | 2 +- src/plugins/snat/in2out.c | 26 +- src/plugins/snat/out2in.c | 24 +- src/scripts/vnet/tcp | 18 +- src/scripts/vnet/udp | 19 + src/scripts/vnet/uri/tcp-setup.sh | 39 + src/scripts/vnet/uri/tcp_server | 4 + src/scripts/vnet/uri/udp | 19 + src/svm.am | 10 +- src/svm/ssvm.c | 16 + src/svm/ssvm.h | 18 +- src/svm/svm_fifo.c | 568 ++++++ src/svm/svm_fifo.h | 157 ++ src/svm/svm_fifo_segment.c | 193 ++ src/svm/svm_fifo_segment.h | 89 + src/svm/test_svm_fifo1.c | 361 ++++ src/uri.am | 22 + src/uri/uri_tcp_test.c | 916 +++++++++ src/uri/uri_udp_test.c | 553 ++++++ src/uri/uri_udp_test2.c | 954 +++++++++ src/uri/uritest.c | 484 +++++ src/vlib/buffer.c | 2 +- src/vlib/buffer.h | 68 + src/vlibmemory/unix_shared_memory_queue.c | 12 +- src/vlibmemory/unix_shared_memory_queue.h | 2 +- src/vnet.am | 66 +- src/vnet/api_errno.h | 21 +- src/vnet/bfd/bfd_udp.c | 4 +- src/vnet/buffer.h | 10 + src/vnet/classify/vnet_classify.c | 4 +- src/vnet/dhcp/dhcp_proxy.h | 2 +- src/vnet/flow/flow_report.h | 2 +- src/vnet/ip/ip.h | 4 +- src/vnet/ip/ip4.h | 42 +- src/vnet/ip/ip4_forward.c | 173 +- src/vnet/ip/ip4_packet.h | 26 +- src/vnet/ip/ip6.h | 44 +- src/vnet/ip/ip6_packet.h | 26 +- src/vnet/ip/punt.c | 2 +- src/vnet/ip/tcp_packet.h | 141 -- src/vnet/ip/udp.h | 315 --- src/vnet/ip/udp_error.def | 21 - src/vnet/ip/udp_format.c | 91 - src/vnet/ip/udp_init.c | 71 - src/vnet/ip/udp_local.c | 645 ------ src/vnet/ip/udp_packet.h | 65 - src/vnet/ip/udp_pg.c | 237 --- src/vnet/ipsec/ikev2.c | 2 +- src/vnet/ipsec/ikev2_cli.c | 2 +- src/vnet/ipsec/ikev2_crypto.c | 2 +- src/vnet/lisp-cp/packets.c | 65 +- src/vnet/lisp-cp/packets.h | 45 - src/vnet/lisp-gpe/interface.c | 2 +- src/vnet/lisp-gpe/lisp_gpe.h | 4 +- src/vnet/lisp-gpe/lisp_gpe_adjacency.c | 2 + src/vnet/session/application.c | 343 ++++ src/vnet/session/application.h | 120 ++ src/vnet/session/application_interface.c | 459 +++++ src/vnet/session/application_interface.h | 136 ++ src/vnet/session/hashes.c | 28 + src/vnet/session/node.c | 435 ++++ src/vnet/session/session.api | 429 ++++ src/vnet/session/session.c | 1286 ++++++++++++ src/vnet/session/session.h | 380 ++++ src/vnet/session/session_api.c | 821 ++++++++ src/vnet/session/session_cli.c | 189 ++ src/vnet/session/transport.c | 64 + src/vnet/session/transport.h | 250 +++ src/vnet/tcp/tcp.c | 708 +++++++ src/vnet/tcp/tcp.h | 624 ++++++ src/vnet/tcp/tcp_error.def | 35 + src/vnet/tcp/tcp_format.c | 136 ++ src/vnet/tcp/tcp_input.c | 2316 ++++++++++++++++++++++ src/vnet/tcp/tcp_newreno.c | 93 + src/vnet/tcp/tcp_output.c | 1412 +++++++++++++ src/vnet/tcp/tcp_packet.h | 184 ++ src/vnet/tcp/tcp_pg.c | 236 +++ src/vnet/tcp/tcp_syn_filter4.c | 542 +++++ src/vnet/tcp/tcp_timer.h | 29 + src/vnet/udp/builtin_server.c | 239 +++ src/vnet/udp/udp.c | 342 ++++ src/vnet/udp/udp.h | 362 ++++ src/vnet/udp/udp_error.def | 21 + src/vnet/udp/udp_format.c | 91 + src/vnet/udp/udp_input.c | 314 +++ src/vnet/udp/udp_local.c | 666 +++++++ src/vnet/udp/udp_packet.h | 65 + src/vnet/udp/udp_pg.c | 237 +++ src/vnet/vnet_all_api_h.h | 1 + src/vnet/vxlan-gpe/vxlan_gpe.h | 2 +- src/vnet/vxlan/vxlan.h | 2 +- src/vpp/api/vpe.api | 1 + src/vppinfra.am | 5 + src/vppinfra/bihash_16_8.h | 103 + src/vppinfra/bihash_48_8.h | 116 ++ src/vppinfra/tw_timer_16t_1w_2048sl.c | 26 + src/vppinfra/tw_timer_16t_1w_2048sl.h | 46 + 100 files changed, 18737 insertions(+), 1874 deletions(-) create mode 100644 src/scripts/vnet/udp create mode 100755 src/scripts/vnet/uri/tcp-setup.sh create mode 100644 src/scripts/vnet/uri/tcp_server create mode 100644 src/scripts/vnet/uri/udp create mode 100644 src/svm/svm_fifo.c create mode 100644 src/svm/svm_fifo.h create mode 100644 src/svm/svm_fifo_segment.c create mode 100644 src/svm/svm_fifo_segment.h create mode 100644 src/svm/test_svm_fifo1.c create mode 100644 src/uri.am create mode 100644 src/uri/uri_tcp_test.c create mode 100644 src/uri/uri_udp_test.c create mode 100644 src/uri/uri_udp_test2.c create mode 100644 src/uri/uritest.c delete mode 100644 src/vnet/ip/tcp_packet.h delete mode 100644 src/vnet/ip/udp.h delete mode 100644 src/vnet/ip/udp_error.def delete mode 100644 src/vnet/ip/udp_format.c delete mode 100644 src/vnet/ip/udp_init.c delete mode 100644 src/vnet/ip/udp_local.c delete mode 100644 src/vnet/ip/udp_packet.h delete mode 100644 src/vnet/ip/udp_pg.c create mode 100644 src/vnet/session/application.c create mode 100644 src/vnet/session/application.h create mode 100644 src/vnet/session/application_interface.c create mode 100644 src/vnet/session/application_interface.h create mode 100644 src/vnet/session/hashes.c create mode 100644 src/vnet/session/node.c create mode 100644 src/vnet/session/session.api create mode 100644 src/vnet/session/session.c create mode 100644 src/vnet/session/session.h create mode 100644 src/vnet/session/session_api.c create mode 100644 src/vnet/session/session_cli.c create mode 100644 src/vnet/session/transport.c create mode 100644 src/vnet/session/transport.h create mode 100644 src/vnet/tcp/tcp.c create mode 100644 src/vnet/tcp/tcp.h create mode 100644 src/vnet/tcp/tcp_error.def create mode 100644 src/vnet/tcp/tcp_format.c create mode 100644 src/vnet/tcp/tcp_input.c create mode 100644 src/vnet/tcp/tcp_newreno.c create mode 100644 src/vnet/tcp/tcp_output.c create mode 100644 src/vnet/tcp/tcp_packet.h create mode 100644 src/vnet/tcp/tcp_pg.c create mode 100644 src/vnet/tcp/tcp_syn_filter4.c create mode 100644 src/vnet/tcp/tcp_timer.h create mode 100644 src/vnet/udp/builtin_server.c create mode 100644 src/vnet/udp/udp.c create mode 100644 src/vnet/udp/udp.h create mode 100644 src/vnet/udp/udp_error.def create mode 100644 src/vnet/udp/udp_format.c create mode 100644 src/vnet/udp/udp_input.c create mode 100644 src/vnet/udp/udp_local.c create mode 100644 src/vnet/udp/udp_packet.h create mode 100644 src/vnet/udp/udp_pg.c create mode 100644 src/vppinfra/bihash_16_8.h create mode 100644 src/vppinfra/bihash_48_8.h create mode 100644 src/vppinfra/tw_timer_16t_1w_2048sl.c create mode 100644 src/vppinfra/tw_timer_16t_1w_2048sl.h (limited to 'src/vnet/dhcp') diff --git a/src/Makefile.am b/src/Makefile.am index 08feb29a..641707ed 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -88,6 +88,7 @@ include vlib-api.am include vnet.am include vpp.am include vpp-api-test.am +include uri.am SUBDIRS += plugins diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h index e84dab0b..dd48a93b 100644 --- a/src/plugins/ioam/export-common/ioam_export.h +++ b/src/plugins/ioam/export-common/ioam_export.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/plugins/ioam/ipfixcollector/ipfixcollector.c b/src/plugins/ioam/ipfixcollector/ipfixcollector.c index 4ae47edc..71b934ec 100644 --- a/src/plugins/ioam/ipfixcollector/ipfixcollector.c +++ b/src/plugins/ioam/ipfixcollector/ipfixcollector.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include ipfix_collector_main_t ipfix_collector_main; diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c index b42c357c..f334c983 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c index e30c913c..b4b7793d 100644 --- a/src/plugins/snat/in2out.c +++ b/src/plugins/snat/in2out.c @@ -689,12 +689,12 @@ snat_hairpinning (snat_main_t *sm, ip4_header_t, dst_address); ip0->checksum = ip_csum_fold (sum0); - old_dst_port0 = tcp0->ports.dst; + old_dst_port0 = tcp0->dst; if (PREDICT_TRUE(new_dst_port0 != old_dst_port0)) { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - tcp0->ports.dst = new_dst_port0; + tcp0->dst = new_dst_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t, dst_address); @@ -872,9 +872,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = s0->out2in.port; - new_port0 = tcp0->ports.src; + old_port0 = tcp0->src_port; + tcp0->src_port = s0->out2in.port; + new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -1012,9 +1012,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) { - old_port1 = tcp1->ports.src; - tcp1->ports.src = s1->out2in.port; - new_port1 = tcp1->ports.src; + old_port1 = tcp1->src_port; + tcp1->src_port = s1->out2in.port; + new_port1 = tcp1->src_port; sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, @@ -1188,9 +1188,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = s0->out2in.port; - new_port0 = tcp0->ports.src; + old_port0 = tcp0->src_port; + tcp0->src_port = s0->out2in.port; + new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -1667,8 +1667,8 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm, { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = new_port0; + old_port0 = tcp0->src_port; + tcp0->src_port = new_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c index 328f5ba4..3bfc0aa3 100644 --- a/src/plugins/snat/out2in.c +++ b/src/plugins/snat/out2in.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include @@ -602,9 +602,9 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.dst; - tcp0->ports.dst = s0->in2out.port; - new_port0 = tcp0->ports.dst; + old_port0 = tcp0->dst_port; + tcp0->dst_port = s0->in2out.port; + new_port0 = tcp0->dst_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -737,9 +737,9 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) { - old_port1 = tcp1->ports.dst; - tcp1->ports.dst = s1->in2out.port; - new_port1 = tcp1->ports.dst; + old_port1 = tcp1->dst_port; + tcp1->dst_port = s1->in2out.port; + new_port1 = tcp1->dst_port; sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, @@ -907,9 +907,9 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.dst; - tcp0->ports.dst = s0->in2out.port; - new_port0 = tcp0->ports.dst; + old_port0 = tcp0->dst_port; + tcp0->dst_port = s0->in2out.port; + new_port0 = tcp0->dst_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -1369,8 +1369,8 @@ snat_out2in_fast_node_fn (vlib_main_t * vm, { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.dst; - tcp0->ports.dst = new_port0; + old_port0 = tcp0->dst_port; + tcp0->dst_port = new_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, diff --git a/src/scripts/vnet/tcp b/src/scripts/vnet/tcp index a2ee8b2d..b9c23c3a 100644 --- a/src/scripts/vnet/tcp +++ b/src/scripts/vnet/tcp @@ -1,16 +1,18 @@ +loop create +set int ip address loop0 192.168.1.1/8 +set int state loop0 up + packet-generator new { name x - limit 1 + limit 2048 node ip4-input - size 64-64 + size 100-100 + interface loop0 no-recycle data { - TCP: 1.2.3.4 -> 5.6.7.8 - TCP: 1234 -> 5678 + TCP: 192.168.1.2 -> 192.168.1.1 + TCP: 32415 -> 80 + SYN incrementing 100 } } - -tr add pg-input 100 -ip route 5.6.7.8/32 via local -ip route 1.2.3.4/32 via local diff --git a/src/scripts/vnet/udp b/src/scripts/vnet/udp new file mode 100644 index 00000000..7dda1eec --- /dev/null +++ b/src/scripts/vnet/udp @@ -0,0 +1,19 @@ +loop create +set int ip address loop0 192.168.1.1/8 +set int state loop0 up + +packet-generator new { + name udp + limit 512 + rate 1e4 + node ip4-input + size 100-100 + interface loop0 + no-recycle + data { + UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1 + UDP: 4321 -> 1234 + length 72 + incrementing 100 + } +} diff --git a/src/scripts/vnet/uri/tcp-setup.sh b/src/scripts/vnet/uri/tcp-setup.sh new file mode 100755 index 00000000..e0b01588 --- /dev/null +++ b/src/scripts/vnet/uri/tcp-setup.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +function topo_setup +{ + ip netns add vppns1 + ip link add veth_vpp1 type veth peer name vpp1 + ip link set dev vpp1 up + ip link set dev veth_vpp1 up netns vppns1 + + ip netns exec vppns1 \ + bash -c " + ip link set dev lo up + ip addr add 6.0.1.2/24 dev veth_vpp1 + " + + ethtool --offload vpp1 rx off tx off + ip netns exec vppns1 ethtool --offload veth_vpp1 rx off tx off + +} + +function topo_clean +{ + ip link del dev veth_vpp1 &> /dev/null + ip netns del vppns1 &> /dev/null +} + +if [ "$1" == "clean" ] ; then + topo_clean + exit 0 +else + topo_setup +fi + +# to test connectivity do: +# sudo ip netns exec vppns1 telnet 6.0.1.1 1234 +# to push traffic to the server +# dd if=/dev/zero bs=1024K count=512 | nc 6.0.1.1 +# to listen for incoming connection from vpp +# nc -l 1234 diff --git a/src/scripts/vnet/uri/tcp_server b/src/scripts/vnet/uri/tcp_server new file mode 100644 index 00000000..7f5a86de --- /dev/null +++ b/src/scripts/vnet/uri/tcp_server @@ -0,0 +1,4 @@ +create host-interface name vpp1 +set int state host-vpp1 up +set int ip address host-vpp1 6.0.1.1/24 +trace add af-packet-input 10 diff --git a/src/scripts/vnet/uri/udp b/src/scripts/vnet/uri/udp new file mode 100644 index 00000000..ca13b83c --- /dev/null +++ b/src/scripts/vnet/uri/udp @@ -0,0 +1,19 @@ +loop create +set int ip address loop0 10.0.0.1/32 +set int state loop0 up + +packet-generator new { + name udp + limit 512 + rate 1e4 + node ip4-input + size 100-100 + interface loop0 + no-recycle + data { + UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1 + UDP: 4321 -> 1234 + length 72 + incrementing 100 + } +} diff --git a/src/svm.am b/src/svm.am index 2cd385bd..442eba8e 100644 --- a/src/svm.am +++ b/src/svm.am @@ -13,13 +13,14 @@ bin_PROGRAMS += svmtool svmdbtool -nobase_include_HEADERS += svm/svm.h svm/ssvm.h svm/svmdb.h +nobase_include_HEADERS += svm/svm.h svm/ssvm.h svm/svmdb.h \ + svm/svm_fifo.h svm/svm_fifo_segment.h lib_LTLIBRARIES += libsvm.la libsvmdb.la +libsvm_la_SOURCES = svm/svm.c svm/ssvm.c svm/svm_fifo.c svm/svm_fifo_segment.c libsvm_la_LIBADD = libvppinfra.la -lrt -lpthread libsvm_la_DEPENDENCIES = libvppinfra.la -libsvm_la_SOURCES = svm/svm.c svm/ssvm.c svmtool_SOURCES = svm/svmtool.c svmtool_LDADD = libsvm.la libvppinfra.la -lpthread -lrt @@ -31,4 +32,9 @@ libsvmdb_la_SOURCES = svm/svmdb.c svmdbtool_SOURCES = svm/svmdbtool.c svmdbtool_LDADD = libsvmdb.la libsvm.la libvppinfra.la -lpthread -lrt +noinst_PROGRAMS += test_svm_fifo1 +test_svm_fifo1_SOURCES = svm/test_svm_fifo1.c +test_svm_fifo1_LDADD = libsvm.la libvppinfra.la -lpthread -lrt +test_svm_fifo1_LDFLAGS = -static + # vi:syntax=automake diff --git a/src/svm/ssvm.c b/src/svm/ssvm.c index 6f409eb6..6cda1f27 100644 --- a/src/svm/ssvm.c +++ b/src/svm/ssvm.c @@ -169,6 +169,22 @@ re_map_it: return 0; } +void +ssvm_delete (ssvm_private_t * ssvm) +{ + u8 *fn; + + fn = format (0, "/dev/shm/%s%c", ssvm->name, 0); + + /* Throw away the backing file */ + if (unlink ((char *) fn) < 0) + clib_unix_warning ("unlink segment '%s'", ssvm->name); + + munmap ((void *) ssvm->requested_va, ssvm->ssvm_size); + vec_free (fn); +} + + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/svm/ssvm.h b/src/svm/ssvm.h index 9e61b9a0..bccfc164 100644 --- a/src/svm/ssvm.h +++ b/src/svm/ssvm.h @@ -38,7 +38,10 @@ #include #include -#define MMAP_PAGESIZE (4<<10) +#ifndef MMAP_PAGESIZE +#define MMAP_PAGESIZE (clib_mem_get_page_size()) +#endif + #define SSVM_N_OPAQUE 7 typedef struct @@ -125,12 +128,12 @@ ssvm_pop_heap (void *oldheap) } #define foreach_ssvm_api_error \ -_(NO_NAME, "No shared segment name", -10) \ -_(NO_SIZE, "Size not set (master)", -11) \ -_(CREATE_FAILURE, "Create failed", -12) \ -_(SET_SIZE, "Set size failed", -13) \ -_(MMAP, "mmap failed", -14) \ -_(SLAVE_TIMEOUT, "Slave map timeout", -15) +_(NO_NAME, "No shared segment name", -100) \ +_(NO_SIZE, "Size not set (master)", -101) \ +_(CREATE_FAILURE, "Create failed", -102) \ +_(SET_SIZE, "Set size failed", -103) \ +_(MMAP, "mmap failed", -104) \ +_(SLAVE_TIMEOUT, "Slave map timeout", -105) typedef enum { @@ -143,6 +146,7 @@ typedef enum int ssvm_master_init (ssvm_private_t * ssvm, u32 master_index); int ssvm_slave_init (ssvm_private_t * ssvm, int timeout_in_seconds); +void ssvm_delete (ssvm_private_t * ssvm); #endif /* __included_ssvm_h__ */ diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c new file mode 100644 index 00000000..11f90193 --- /dev/null +++ b/src/svm/svm_fifo.c @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svm_fifo.h" + +/** create an svm fifo, in the current heap. Fails vs blow up the process */ +svm_fifo_t * +svm_fifo_create (u32 data_size_in_bytes) +{ + svm_fifo_t *f; + pthread_mutexattr_t attr; + pthread_condattr_t cattr; + + f = clib_mem_alloc_aligned_or_null (sizeof (*f) + data_size_in_bytes, + CLIB_CACHE_LINE_BYTES); + if (f == 0) + return 0; + + memset (f, 0, sizeof (*f) + data_size_in_bytes); + f->nitems = data_size_in_bytes; + f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX; + + memset (&attr, 0, sizeof (attr)); + memset (&cattr, 0, sizeof (cattr)); + + if (pthread_mutexattr_init (&attr)) + clib_unix_warning ("mutexattr_init"); + if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("pthread_mutexattr_setpshared"); + if (pthread_mutex_init (&f->mutex, &attr)) + clib_unix_warning ("mutex_init"); + if (pthread_mutexattr_destroy (&attr)) + clib_unix_warning ("mutexattr_destroy"); + if (pthread_condattr_init (&cattr)) + clib_unix_warning ("condattr_init"); + if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("condattr_setpshared"); + if (pthread_cond_init (&f->condvar, &cattr)) + clib_unix_warning ("cond_init1"); + if (pthread_condattr_destroy (&cattr)) + clib_unix_warning ("cond_init2"); + + return (f); +} + +always_inline ooo_segment_t * +ooo_segment_new (svm_fifo_t * f, u32 start, u32 length) +{ + ooo_segment_t *s; + + pool_get (f->ooo_segments, s); + + s->fifo_position = start; + s->length = length; + + s->prev = s->next = OOO_SEGMENT_INVALID_INDEX; + + return s; +} + +always_inline void +ooo_segment_del (svm_fifo_t * f, u32 index) +{ + ooo_segment_t *cur, *prev = 0, *next = 0; + cur = pool_elt_at_index (f->ooo_segments, index); + + if (cur->next != OOO_SEGMENT_INVALID_INDEX) + { + next = pool_elt_at_index (f->ooo_segments, cur->next); + next->prev = cur->prev; + } + + if (cur->prev != OOO_SEGMENT_INVALID_INDEX) + { + prev = pool_elt_at_index (f->ooo_segments, cur->prev); + prev->next = cur->next; + } + else + { + f->ooos_list_head = cur->next; + } + + pool_put (f->ooo_segments, cur); +} + +/** + * Add segment to fifo's out-of-order segment list. Takes care of merging + * adjacent segments and removing overlapping ones. + */ +static void +ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) +{ + ooo_segment_t *s, *new_s, *prev, *next, *it; + u32 new_index, position, end_offset, s_sof, s_eof, s_index; + + position = (f->tail + offset) % f->nitems; + end_offset = offset + length; + + if (f->ooos_list_head == OOO_SEGMENT_INVALID_INDEX) + { + s = ooo_segment_new (f, position, length); + f->ooos_list_head = s - f->ooo_segments; + f->ooos_newest = f->ooos_list_head; + return; + } + + /* Find first segment that starts after new segment */ + s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head); + while (s->next != OOO_SEGMENT_INVALID_INDEX + && ooo_segment_offset (f, s) <= offset) + s = pool_elt_at_index (f->ooo_segments, s->next); + + s_index = s - f->ooo_segments; + s_sof = ooo_segment_offset (f, s); + s_eof = ooo_segment_end_offset (f, s); + + /* No overlap, add before current segment */ + if (end_offset < s_sof) + { + new_s = ooo_segment_new (f, position, length); + new_index = new_s - f->ooo_segments; + + /* Pool might've moved, get segment again */ + s = pool_elt_at_index (f->ooo_segments, s_index); + + if (s->prev != OOO_SEGMENT_INVALID_INDEX) + { + new_s->prev = s->prev; + + prev = pool_elt_at_index (f->ooo_segments, new_s->prev); + prev->next = new_index; + } + else + { + /* New head */ + f->ooos_list_head = new_index; + } + + new_s->next = s - f->ooo_segments; + s->prev = new_index; + f->ooos_newest = new_index; + return; + } + /* No overlap, add after current segment */ + else if (s_eof < offset) + { + new_s = ooo_segment_new (f, position, length); + new_index = new_s - f->ooo_segments; + + /* Pool might've moved, get segment again */ + s = pool_elt_at_index (f->ooo_segments, s_index); + + if (s->next != OOO_SEGMENT_INVALID_INDEX) + { + new_s->next = s->next; + + next = pool_elt_at_index (f->ooo_segments, new_s->next); + next->prev = new_index; + } + + new_s->prev = s - f->ooo_segments; + s->next = new_index; + f->ooos_newest = new_index; + + return; + } + + /* + * Merge needed + */ + + /* Merge at head */ + if (offset <= s_sof) + { + /* If we have a previous, check if we overlap */ + if (s->prev != OOO_SEGMENT_INVALID_INDEX) + { + prev = pool_elt_at_index (f->ooo_segments, s->prev); + + /* New segment merges prev and current. Remove previous and + * update position of current. */ + if (ooo_segment_end_offset (f, prev) >= offset) + { + s->fifo_position = prev->fifo_position; + s->length = s_eof - ooo_segment_offset (f, prev); + ooo_segment_del (f, s->prev); + } + } + else + { + s->fifo_position = position; + s->length = s_eof - ooo_segment_offset (f, s); + } + + /* The new segment's tail may cover multiple smaller ones */ + if (s_eof < end_offset) + { + /* Remove segments completely covered */ + it = (s->next != OOO_SEGMENT_INVALID_INDEX) ? + pool_elt_at_index (f->ooo_segments, s->next) : 0; + while (it && ooo_segment_end_offset (f, it) < end_offset) + { + next = (it->next != OOO_SEGMENT_INVALID_INDEX) ? + pool_elt_at_index (f->ooo_segments, it->next) : 0; + ooo_segment_del (f, it - f->ooo_segments); + it = next; + } + + /* Update length. Segment's start might have changed. */ + s->length = end_offset - ooo_segment_offset (f, s); + + /* If partial overlap with last, merge */ + if (it && ooo_segment_offset (f, it) < end_offset) + { + s->length += + it->length - (ooo_segment_offset (f, it) - end_offset); + ooo_segment_del (f, it - f->ooo_segments); + } + } + } + /* Last but overlapping previous */ + else if (s_eof <= end_offset) + { + s->length = end_offset - ooo_segment_offset (f, s); + } + /* New segment completely covered by current one */ + else + { + /* Do Nothing */ + } + + /* Most recently updated segment */ + f->ooos_newest = s - f->ooo_segments; +} + +/** + * Removes segments that can now be enqueued because the fifo's tail has + * advanced. Returns the number of bytes added to tail. + */ +static int +ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued) +{ + ooo_segment_t *s; + u32 index, bytes = 0, diff; + + s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head); + + /* If last tail update overlaps one/multiple ooo segments, remove them */ + diff = (f->nitems + f->tail - s->fifo_position) % f->nitems; + while (0 < diff && diff < n_bytes_enqueued) + { + /* Segment end is beyond the tail. Advance tail and be done */ + if (diff < s->length) + { + f->tail += s->length - diff; + f->tail %= f->nitems; + break; + } + /* If we have next go on */ + else if (s->next != OOO_SEGMENT_INVALID_INDEX) + { + index = s - f->ooo_segments; + s = pool_elt_at_index (f->ooo_segments, s->next); + diff = (f->nitems + f->tail - s->fifo_position) % f->nitems; + ooo_segment_del (f, index); + } + /* End of search */ + else + { + break; + } + } + + /* If tail is adjacent to an ooo segment, 'consume' it */ + if (diff == 0) + { + bytes = ((f->nitems - f->cursize) >= s->length) ? s->length : + f->nitems - f->cursize; + + f->tail += bytes; + f->tail %= f->nitems; + + ooo_segment_del (f, s - f->ooo_segments); + } + + return bytes; +} + +static int +svm_fifo_enqueue_internal (svm_fifo_t * f, + int pid, u32 max_bytes, u8 * copy_from_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + + if (PREDICT_FALSE (f->cursize == f->nitems)) + return -2; /* fifo stuffed */ + + /* read cursize, which can only decrease while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (nitems - cursize) < max_bytes ? + (nitems - cursize) : max_bytes; + + if (PREDICT_TRUE (copy_from_here != 0)) + { + /* Number of bytes in first copy segment */ + first_copy_bytes = ((nitems - f->tail) < total_copy_bytes) + ? (nitems - f->tail) : total_copy_bytes; + + clib_memcpy (&f->data[f->tail], copy_from_here, first_copy_bytes); + f->tail += first_copy_bytes; + f->tail = (f->tail == nitems) ? 0 : f->tail; + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + clib_memcpy (&f->data[f->tail], copy_from_here + first_copy_bytes, + second_copy_bytes); + f->tail += second_copy_bytes; + f->tail = (f->tail == nitems) ? 0 : f->tail; + } + } + else + { + /* Account for a zero-copy enqueue done elsewhere */ + ASSERT (max_bytes <= (nitems - cursize)); + f->tail += max_bytes; + f->tail = f->tail % nitems; + total_copy_bytes = max_bytes; + } + + /* Any out-of-order segments to collect? */ + if (PREDICT_FALSE (f->ooos_list_head != OOO_SEGMENT_INVALID_INDEX)) + total_copy_bytes += ooo_segment_try_collect (f, total_copy_bytes); + + /* Atomically increase the queue length */ + __sync_fetch_and_add (&f->cursize, total_copy_bytes); + + return (total_copy_bytes); +} + +int +svm_fifo_enqueue_nowait (svm_fifo_t * f, + int pid, u32 max_bytes, u8 * copy_from_here) +{ + return svm_fifo_enqueue_internal (f, pid, max_bytes, copy_from_here); +} + +/** Enqueue a future segment. + * Two choices: either copies the entire segment, or copies nothing + * Returns 0 of the entire segment was copied + * Returns -1 if none of the segment was copied due to lack of space + */ + +static int +svm_fifo_enqueue_with_offset_internal2 (svm_fifo_t * f, + int pid, + u32 offset, + u32 required_bytes, + u8 * copy_from_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + u32 tail_plus_offset; + + ASSERT (offset > 0); + + /* read cursize, which can only decrease while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Will this request fit? */ + if ((required_bytes + offset) > (nitems - cursize)) + return -1; + + ooo_segment_add (f, offset, required_bytes); + + /* Number of bytes we're going to copy */ + total_copy_bytes = required_bytes; + tail_plus_offset = (f->tail + offset) % nitems; + + /* Number of bytes in first copy segment */ + first_copy_bytes = ((nitems - tail_plus_offset) < total_copy_bytes) + ? (nitems - tail_plus_offset) : total_copy_bytes; + + clib_memcpy (&f->data[tail_plus_offset], copy_from_here, first_copy_bytes); + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + tail_plus_offset += first_copy_bytes; + tail_plus_offset %= nitems; + + ASSERT (tail_plus_offset == 0); + + clib_memcpy (&f->data[tail_plus_offset], + copy_from_here + first_copy_bytes, second_copy_bytes); + } + + return (0); +} + + +int +svm_fifo_enqueue_with_offset (svm_fifo_t * f, + int pid, + u32 offset, + u32 required_bytes, u8 * copy_from_here) +{ + return svm_fifo_enqueue_with_offset_internal2 + (f, pid, offset, required_bytes, copy_from_here); +} + + +static int +svm_fifo_dequeue_internal2 (svm_fifo_t * f, + int pid, u32 max_bytes, u8 * copy_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + + if (PREDICT_FALSE (f->cursize == 0)) + return -2; /* nothing in the fifo */ + + /* read cursize, which can only increase while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (cursize < max_bytes) ? cursize : max_bytes; + + if (PREDICT_TRUE (copy_here != 0)) + { + /* Number of bytes in first copy segment */ + first_copy_bytes = ((nitems - f->head) < total_copy_bytes) + ? (nitems - f->head) : total_copy_bytes; + clib_memcpy (copy_here, &f->data[f->head], first_copy_bytes); + f->head += first_copy_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + clib_memcpy (copy_here + first_copy_bytes, + &f->data[f->head], second_copy_bytes); + f->head += second_copy_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + } + } + else + { + /* Account for a zero-copy dequeue done elsewhere */ + ASSERT (max_bytes <= cursize); + f->head += max_bytes; + f->head = f->head % nitems; + cursize -= max_bytes; + total_copy_bytes = max_bytes; + } + + __sync_fetch_and_sub (&f->cursize, total_copy_bytes); + + return (total_copy_bytes); +} + +int +svm_fifo_dequeue_nowait (svm_fifo_t * f, + int pid, u32 max_bytes, u8 * copy_here) +{ + return svm_fifo_dequeue_internal2 (f, pid, max_bytes, copy_here); +} + +int +svm_fifo_peek (svm_fifo_t * f, int pid, u32 offset, u32 max_bytes, + u8 * copy_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + + if (PREDICT_FALSE (f->cursize == 0)) + return -2; /* nothing in the fifo */ + + /* read cursize, which can only increase while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (cursize < max_bytes) ? cursize : max_bytes; + + if (PREDICT_TRUE (copy_here != 0)) + { + /* Number of bytes in first copy segment */ + first_copy_bytes = + ((nitems - f->head) < total_copy_bytes) ? + (nitems - f->head) : total_copy_bytes; + clib_memcpy (copy_here, &f->data[f->head], first_copy_bytes); + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + clib_memcpy (copy_here + first_copy_bytes, &f->data[0], + second_copy_bytes); + } + } + return total_copy_bytes; +} + +int +svm_fifo_dequeue_drop (svm_fifo_t * f, int pid, u32 max_bytes) +{ + u32 total_drop_bytes, first_drop_bytes, second_drop_bytes; + u32 cursize, nitems; + + if (PREDICT_FALSE (f->cursize == 0)) + return -2; /* nothing in the fifo */ + + /* read cursize, which can only increase while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Number of bytes we're going to drop */ + total_drop_bytes = (cursize < max_bytes) ? cursize : max_bytes; + + /* Number of bytes in first copy segment */ + first_drop_bytes = + ((nitems - f->head) < total_drop_bytes) ? + (nitems - f->head) : total_drop_bytes; + f->head += first_drop_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + + /* Number of bytes in second drop segment, if any */ + second_drop_bytes = total_drop_bytes - first_drop_bytes; + if (second_drop_bytes) + { + f->head += second_drop_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + } + + __sync_fetch_and_sub (&f->cursize, total_drop_bytes); + + return total_drop_bytes; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h new file mode 100644 index 00000000..70624b74 --- /dev/null +++ b/src/svm/svm_fifo.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ssvm_fifo_h__ +#define __included_ssvm_fifo_h__ + +#include +#include +#include +#include +#include +#include +#include + +typedef enum +{ + SVM_FIFO_TAG_NOT_HELD = 0, + SVM_FIFO_TAG_DEQUEUE, + SVM_FIFO_TAG_ENQUEUE, +} svm_lock_tag_t; + +/** Out-of-order segment */ +typedef struct +{ + u32 next; /**< Next linked-list element pool index */ + u32 prev; /**< Previous linked-list element pool index */ + + u32 fifo_position; /**< Start of segment, normalized*/ + u32 length; /**< Length of segment */ +} ooo_segment_t; + +#define OOO_SEGMENT_INVALID_INDEX ((u32)~0) + +typedef struct +{ + pthread_mutex_t mutex; /* 8 bytes */ + pthread_cond_t condvar; /* 8 bytes */ + u32 owner_pid; + svm_lock_tag_t tag; + volatile u32 cursize; + u32 nitems; + + /* Backpointers */ + u32 server_session_index; + u32 client_session_index; + u8 server_thread_index; + u8 client_thread_index; + CLIB_CACHE_LINE_ALIGN_MARK (end_shared); + u32 head; + CLIB_CACHE_LINE_ALIGN_MARK (end_consumer); + + /* producer */ + u32 tail; + + ooo_segment_t *ooo_segments; /**< Pool of ooo segments */ + u32 ooos_list_head; /**< Head of out-of-order linked-list */ + u32 ooos_newest; /**< Last segment to have been updated */ + + CLIB_CACHE_LINE_ALIGN_MARK (data); +} svm_fifo_t; + +static inline int +svm_fifo_lock (svm_fifo_t * f, u32 pid, u32 tag, int nowait) +{ + if (PREDICT_TRUE (nowait == 0)) + pthread_mutex_lock (&f->mutex); + else + { + if (pthread_mutex_trylock (&f->mutex)) + return -1; + } + f->owner_pid = pid; + f->tag = tag; + return 0; +} + +static inline void +svm_fifo_unlock (svm_fifo_t * f) +{ + f->owner_pid = 0; + f->tag = 0; + CLIB_MEMORY_BARRIER (); + pthread_mutex_unlock (&f->mutex); +} + +static inline u32 +svm_fifo_max_dequeue (svm_fifo_t * f) +{ + return f->cursize; +} + +static inline u32 +svm_fifo_max_enqueue (svm_fifo_t * f) +{ + return f->nitems - f->cursize; +} + +static inline u8 +svm_fifo_has_ooo_data (svm_fifo_t * f) +{ + return f->ooos_list_head != OOO_SEGMENT_INVALID_INDEX; +} + +svm_fifo_t *svm_fifo_create (u32 data_size_in_bytes); + +int svm_fifo_enqueue_nowait (svm_fifo_t * f, int pid, u32 max_bytes, + u8 * copy_from_here); + +int svm_fifo_enqueue_with_offset (svm_fifo_t * f, int pid, + u32 offset, u32 required_bytes, + u8 * copy_from_here); + +int svm_fifo_dequeue_nowait (svm_fifo_t * f, int pid, u32 max_bytes, + u8 * copy_here); + +int svm_fifo_peek (svm_fifo_t * f, int pid, u32 offset, u32 max_bytes, + u8 * copy_here); +int svm_fifo_dequeue_drop (svm_fifo_t * f, int pid, u32 max_bytes); + +always_inline ooo_segment_t * +svm_fifo_newest_ooo_segment (svm_fifo_t * f) +{ + return f->ooo_segments + f->ooos_newest; +} + +always_inline u32 +ooo_segment_offset (svm_fifo_t * f, ooo_segment_t * s) +{ + return ((f->nitems + s->fifo_position - f->tail) % f->nitems); +} + +always_inline u32 +ooo_segment_end_offset (svm_fifo_t * f, ooo_segment_t * s) +{ + return ((f->nitems + s->fifo_position + s->length - f->tail) % f->nitems); +} + +#endif /* __included_ssvm_fifo_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c new file mode 100644 index 00000000..acabb3bd --- /dev/null +++ b/src/svm/svm_fifo_segment.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +svm_fifo_segment_main_t svm_fifo_segment_main; + +/** (master) create an svm fifo segment */ +int +svm_fifo_segment_create (svm_fifo_segment_create_args_t * a) +{ + int rv; + svm_fifo_segment_private_t *s; + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + void *oldheap; + + /* Allocate a fresh segment */ + pool_get (sm->segments, s); + memset (s, 0, sizeof (*s)); + + s->ssvm.ssvm_size = a->segment_size; + s->ssvm.i_am_master = 1; + s->ssvm.my_pid = getpid (); + s->ssvm.name = (u8 *) a->segment_name; + s->ssvm.requested_va = sm->next_baseva; + + rv = ssvm_master_init (&s->ssvm, s - sm->segments); + + if (rv) + { + _vec_len (s) = vec_len (s) - 1; + return (rv); + } + + /* Note; requested_va updated due to seg base addr randomization */ + sm->next_baseva = s->ssvm.requested_va + a->segment_size; + + sh = s->ssvm.sh; + oldheap = ssvm_push_heap (sh); + + /* Set up svm_fifo_segment shared header */ + fsh = clib_mem_alloc (sizeof (*fsh)); + memset (fsh, 0, sizeof (*fsh)); + sh->opaque[0] = fsh; + s->h = fsh; + fsh->segment_name = format (0, "%s%c", a->segment_name, 0); + + /* Avoid vec_add1(...) failure when adding a fifo, etc. */ + vec_validate (fsh->fifos, 64); + _vec_len (fsh->fifos) = 0; + + ssvm_pop_heap (oldheap); + + sh->ready = 1; + a->new_segment_index = s - sm->segments; + return (0); +} + +/** (slave) attach to an svm fifo segment */ +int +svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a) +{ + int rv; + svm_fifo_segment_private_t *s; + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + + /* Allocate a fresh segment */ + pool_get (sm->segments, s); + + memset (s, 0, sizeof (*s)); + + s->ssvm.ssvm_size = a->segment_size; + s->ssvm.my_pid = getpid (); + s->ssvm.name = (u8 *) a->segment_name; + s->ssvm.requested_va = sm->next_baseva; + + rv = ssvm_slave_init (&s->ssvm, sm->timeout_in_seconds); + + if (rv) + { + _vec_len (s) = vec_len (s) - 1; + return (rv); + } + + /* Fish the segment header */ + sh = s->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + s->h = fsh; + + a->new_segment_index = s - sm->segments; + return (0); +} + +void +svm_fifo_segment_delete (svm_fifo_segment_private_t * s) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + ssvm_delete (&s->ssvm); + pool_put (sm->segments, s); +} + +svm_fifo_t * +svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s, + u32 data_size_in_bytes) +{ + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + svm_fifo_t *f; + void *oldheap; + + sh = s->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + oldheap = ssvm_push_heap (sh); + + /* Note: this can fail, in which case: create another segment */ + f = svm_fifo_create (data_size_in_bytes); + if (f == 0) + { + ssvm_pop_heap (oldheap); + return (0); + } + + vec_add1 (fsh->fifos, f); + + ssvm_pop_heap (oldheap); + return (f); +} + +void +svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, svm_fifo_t * f) +{ + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + void *oldheap; + int i; + + sh = s->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + oldheap = ssvm_push_heap (sh); + + for (i = 0; i < vec_len (fsh->fifos); i++) + { + if (fsh->fifos[i] == f) + { + vec_delete (fsh->fifos, 1, i); + goto found; + } + } + clib_warning ("fifo 0x%llx not found in fifo table...", f); + +found: + clib_mem_free (f); + ssvm_pop_heap (oldheap); +} + +void +svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + + sm->next_baseva = baseva; + sm->timeout_in_seconds = timeout_in_seconds; +} + +u32 +svm_fifo_segment_index (svm_fifo_segment_private_t * s) +{ + return s - svm_fifo_segment_main.segments; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo_segment.h b/src/svm/svm_fifo_segment.h new file mode 100644 index 00000000..793fa7c8 --- /dev/null +++ b/src/svm/svm_fifo_segment.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ssvm_fifo_segment_h__ +#define __included_ssvm_fifo_segment_h__ + +#include "svm_fifo.h" +#include "ssvm.h" + +typedef struct +{ + volatile svm_fifo_t **fifos; + u8 *segment_name; +} svm_fifo_segment_header_t; + +typedef struct +{ + ssvm_private_t ssvm; + svm_fifo_segment_header_t *h; +} svm_fifo_segment_private_t; + +typedef struct +{ + /** pool of segments */ + svm_fifo_segment_private_t *segments; + /* Where to put the next one */ + u64 next_baseva; + u32 timeout_in_seconds; +} svm_fifo_segment_main_t; + +extern svm_fifo_segment_main_t svm_fifo_segment_main; + +typedef struct +{ + char *segment_name; + u32 segment_size; + u32 new_segment_index; +} svm_fifo_segment_create_args_t; + +static inline svm_fifo_segment_private_t * +svm_fifo_get_segment (u32 segment_index) +{ + svm_fifo_segment_main_t *ssm = &svm_fifo_segment_main; + return vec_elt_at_index (ssm->segments, segment_index); +} + +#define foreach_ssvm_fifo_segment_api_error \ +_(OUT_OF_SPACE, "Out of space in segment", -200) + +typedef enum +{ +#define _(n,s,c) SSVM_FIFO_SEGMENT_API_ERROR_##n = c, + foreach_ssvm_fifo_segment_api_error +#undef _ +} ssvm_fifo_segment_api_error_enum_t; + +int svm_fifo_segment_create (svm_fifo_segment_create_args_t * a); +int svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a); +void svm_fifo_segment_delete (svm_fifo_segment_private_t * s); + +svm_fifo_t *svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s, + u32 data_size_in_bytes); +void svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, + svm_fifo_t * f); + +void svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds); + +u32 svm_fifo_segment_index (svm_fifo_segment_private_t * s); + +#endif /* __included_ssvm_fifo_segment_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/test_svm_fifo1.c b/src/svm/test_svm_fifo1.c new file mode 100644 index 00000000..355653df --- /dev/null +++ b/src/svm/test_svm_fifo1.c @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svm_fifo_segment.h" + +clib_error_t * +hello_world (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + int rv; + u8 *test_data; + u8 *retrieved_data = 0; + clib_error_t *error = 0; + int pid = getpid (); + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + + f = svm_fifo_segment_alloc_fifo (sp, 4096); + + if (f == 0) + return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed"); + + test_data = format (0, "Hello world%c", 0); + vec_validate (retrieved_data, vec_len (test_data) - 1); + + while (svm_fifo_max_enqueue (f) >= vec_len (test_data)) + svm_fifo_enqueue_nowait (f, pid, vec_len (test_data), test_data); + + while (svm_fifo_max_dequeue (f) >= vec_len (test_data)) + svm_fifo_dequeue_nowait (f, pid, vec_len (retrieved_data), + retrieved_data); + + while (svm_fifo_max_enqueue (f) >= vec_len (test_data)) + svm_fifo_enqueue_nowait (f, pid, vec_len (test_data), test_data); + + while (svm_fifo_max_dequeue (f) >= vec_len (test_data)) + svm_fifo_dequeue_nowait (f, pid, vec_len (retrieved_data), + retrieved_data); + + if (!memcmp (retrieved_data, test_data, vec_len (test_data))) + error = clib_error_return (0, "data test OK, got '%s'", retrieved_data); + else + error = clib_error_return (0, "data test FAIL!"); + + svm_fifo_segment_free_fifo (sp, f); + + return error; +} + +clib_error_t * +master (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + int rv; + u8 *test_data; + u8 *retrieved_data = 0; + int i; + int pid = getpid (); + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + + f = svm_fifo_segment_alloc_fifo (sp, 4096); + + if (f == 0) + return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed"); + + test_data = format (0, "Hello world%c", 0); + vec_validate (retrieved_data, vec_len (test_data) - 1); + + for (i = 0; i < 1000; i++) + svm_fifo_enqueue_nowait (f, pid, vec_len (test_data), test_data); + + return clib_error_return (0, "master (enqueue) done"); +} + +clib_error_t * +mempig (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + svm_fifo_t **flist = 0; + int rv; + int i; + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + + for (i = 0; i < 1000; i++) + { + f = svm_fifo_segment_alloc_fifo (sp, 4096); + if (f == 0) + break; + vec_add1 (flist, f); + } + + fformat (stdout, "Try #1: created %d fifos...\n", vec_len (flist)); + for (i = 0; i < vec_len (flist); i++) + { + f = flist[i]; + svm_fifo_segment_free_fifo (sp, f); + } + + _vec_len (flist) = 0; + + for (i = 0; i < 1000; i++) + { + f = svm_fifo_segment_alloc_fifo (sp, 4096); + if (f == 0) + break; + vec_add1 (flist, f); + } + + fformat (stdout, "Try #2: created %d fifos...\n", vec_len (flist)); + for (i = 0; i < vec_len (flist); i++) + { + f = flist[i]; + svm_fifo_segment_free_fifo (sp, f); + } + + return 0; +} + +clib_error_t * +offset (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + int rv; + u32 *test_data = 0; + u32 *recovered_data = 0; + int i; + int pid = getpid (); + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + + f = svm_fifo_segment_alloc_fifo (sp, 200 << 10); + + if (f == 0) + return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed"); + + for (i = 0; i < (3 * 1024); i++) + vec_add1 (test_data, i); + + /* Enqueue the first 1024 u32's */ + svm_fifo_enqueue_nowait (f, pid, 4096 /* bytes to enqueue */ , + (u8 *) test_data); + + /* Enqueue the third 1024 u32's 2048 ahead of the current tail */ + svm_fifo_enqueue_with_offset (f, pid, 4096, 4096, (u8 *) & test_data[2048]); + + /* Enqueue the second 1024 u32's at the current tail */ + svm_fifo_enqueue_nowait (f, pid, 4096 /* bytes to enqueue */ , + (u8 *) & test_data[1024]); + + vec_validate (recovered_data, (3 * 1024) - 1); + + svm_fifo_dequeue_nowait (f, pid, 3 * 4096, (u8 *) recovered_data); + + for (i = 0; i < (3 * 1024); i++) + { + if (recovered_data[i] != test_data[i]) + { + clib_warning ("[%d] expected %d recovered %d", i, + test_data[i], recovered_data[i]); + return clib_error_return (0, "offset test FAILED"); + } + } + + return clib_error_return (0, "offset test OK"); +} + +clib_error_t * +slave (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_segment_header_t *fsh; + svm_fifo_t *f; + ssvm_shared_header_t *sh; + int rv; + u8 *test_data; + u8 *retrieved_data = 0; + int pid = getpid (); + int i; + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + + rv = svm_fifo_segment_attach (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_attach returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + sh = sp->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + /* might wanna wait.. */ + f = (svm_fifo_t *) fsh->fifos[0]; + + /* Lazy bastards united */ + test_data = format (0, "Hello world%c", 0); + vec_validate (retrieved_data, vec_len (test_data) - 1); + + for (i = 0; i < 1000; i++) + { + svm_fifo_dequeue_nowait (f, pid, vec_len (retrieved_data), + retrieved_data); + if (memcmp (retrieved_data, test_data, vec_len (retrieved_data))) + return clib_error_return (0, "retrieved data incorrect, '%s'", + retrieved_data); + } + + return clib_error_return (0, "slave (dequeue) done"); +} + + +int +test_ssvm_fifo1 (unformat_input_t * input) +{ + clib_error_t *error = 0; + int verbose = 0; + int test_id = 0; + + svm_fifo_segment_init (0x200000000ULL, 20); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose %d", &verbose)) + ; + else if (unformat (input, "verbose")) + verbose = 1; + else if (unformat (input, "master")) + test_id = 1; + else if (unformat (input, "slave")) + test_id = 2; + else if (unformat (input, "mempig")) + test_id = 3; + else if (unformat (input, "offset")) + test_id = 4; + else + { + error = clib_error_create ("unknown input `%U'\n", + format_unformat_error, input); + goto out; + } + } + + switch (test_id) + { + case 0: + error = hello_world (verbose); + break; + + case 1: + error = master (verbose); + break; + + case 2: + error = slave (verbose); + break; + + case 3: + error = mempig (verbose); + break; + + case 4: + error = offset (verbose); + break; + + default: + error = clib_error_return (0, "test id %d unknown", test_id); + break; + } + +out: + if (error) + clib_error_report (error); + + return 0; +} + + + +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + int r; + + unformat_init_command_line (&i, argv); + r = test_ssvm_fifo1 (&i); + unformat_free (&i); + return r; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/uri.am b/src/uri.am new file mode 100644 index 00000000..8cdd77c6 --- /dev/null +++ b/src/uri.am @@ -0,0 +1,22 @@ +# Copyright (c) 2016 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +noinst_PROGRAMS += uri_udp_test2 uri_tcp_test + +uri_udp_test2_SOURCES = uri/uri_udp_test2.c +uri_udp_test2_LDADD = libvlibmemoryclient.la libvlibapi.la libsvm.la \ + libvppinfra.la -lpthread -lm -lrt + +uri_tcp_test_SOURCES = uri/uri_tcp_test.c +uri_tcp_test_LDADD = libvlibmemoryclient.la libvlibapi.la libsvm.la \ + libvppinfra.la -lpthread -lm -lrt diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c new file mode 100644 index 00000000..ed5a37d8 --- /dev/null +++ b/src/uri/uri_tcp_test.c @@ -0,0 +1,916 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "../vnet/session/application_interface.h" + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Satisfy external references when not linking with -lvlib */ +vlib_main_t vlib_global_main; +vlib_main_t **vlib_mains; + +typedef struct +{ + svm_fifo_t * server_rx_fifo; + svm_fifo_t * server_tx_fifo; + + u32 vpp_session_index; + u32 vpp_session_thread; +} session_t; + +typedef enum +{ + STATE_START, + STATE_READY, + STATE_DISCONNECTING, + STATE_FAILED +} connection_state_t; + +typedef struct +{ + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* The URI we're playing with */ + u8 * uri; + + /* Session pool */ + session_t * sessions; + + /* Hash table for disconnect processing */ + uword * session_index_by_vpp_handles; + + /* intermediate rx buffer */ + u8 * rx_buf; + + /* URI for slave's connect */ + u8 * connect_uri; + + u32 connected_session_index; + + int i_am_master; + + /* drop all packets */ + int drop_packets; + + /* Our event queue */ + unix_shared_memory_queue_t * our_event_queue; + + /* $$$ single thread only for the moment */ + unix_shared_memory_queue_t * vpp_event_queue; + + pid_t my_pid; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile connection_state_t state; + + /* Signal variables */ + volatile int time_to_stop; + volatile int time_to_print_stats; + + u32 configured_segment_size; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword * error_string_by_error_number; + + /* convenience */ + svm_fifo_segment_main_t * segment_main; + + u8 *connect_test_data; +} uri_tcp_test_main_t; + +uri_tcp_test_main_t uri_tcp_test_main; + +#if CLIB_DEBUG > 0 +#define NITER 10000 +#else +#define NITER 4000000 +#endif + +int +wait_for_state_change (uri_tcp_test_main_t * utm, connection_state_t state) +{ +#if CLIB_DEBUG > 0 +#define TIMEOUT 600.0 +#else +#define TIMEOUT 600.0 +#endif + + f64 timeout = clib_time_now (&utm->clib_time) + TIMEOUT; + + while (clib_time_now (&utm->clib_time) < timeout) + { + if (utm->state == state) + return 0; + if (utm->state == STATE_FAILED) + return -1; + } + clib_warning ("timeout waiting for STATE_READY"); + return -1; +} + +static void +init_error_string_table (uri_tcp_test_main_t * utm) +{ + utm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (utm->error_string_by_error_number, 99, "Misc"); +} + +static void +stop_signal (int signum) +{ + uri_tcp_test_main_t *um = &uri_tcp_test_main; + + um->time_to_stop = 1; +} + +static void +stats_signal (int signum) +{ + uri_tcp_test_main_t *um = &uri_tcp_test_main; + + um->time_to_print_stats = 1; +} + +static clib_error_t * +setup_signal_handlers (void) +{ + signal (SIGINT, stats_signal); + signal (SIGQUIT, stop_signal); + signal (SIGTERM, stop_signal); + + return 0; +} + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG"); +} + +int +connect_to_vpp (char *name) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0) + return -1; + + utm->vl_input_queue = am->shmem_hdr->vl_input_queue; + utm->my_client_index = am->my_client_index; + + return 0; +} + +static void +vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t *mp) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", + mp->segment_name); + return; + } + clib_warning ("Mapped new segment '%s' size %d", mp->segment_name, + mp->segment_size); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + session_t * session; + vl_api_disconnect_session_reply_t * rmp; + uword * p; + int rv = 0; + u64 key; + + key = (((u64)mp->session_thread_index) << 32) | (u64)mp->session_index; + + p = hash_get (utm->session_index_by_vpp_handles, key); + + if (p) + { + session = pool_elt_at_index (utm->sessions, p[0]); + hash_unset (utm->session_index_by_vpp_handles, key); + pool_put (utm->sessions, session); + } + else + { + clib_warning ("couldn't find session key %llx", key); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = rv; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&rmp); +} + +static void +vl_api_reset_session_t_handler (vl_api_reset_session_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + session_t * session; + vl_api_reset_session_reply_t * rmp; + uword * p; + int rv = 0; + u64 key; + + key = (((u64)mp->session_thread_index) << 32) | (u64)mp->session_index; + + p = hash_get(utm->session_index_by_vpp_handles, key); + + if (p) + { + session = pool_elt_at_index(utm->sessions, p[0]); + hash_unset(utm->session_index_by_vpp_handles, key); + pool_put(utm->sessions, session); + } + else + { + clib_warning("couldn't find session key %llx", key); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = rv; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&rmp); +} + +void +handle_fifo_event_connect_rx (uri_tcp_test_main_t *utm, session_fifo_event_t * e) +{ + svm_fifo_t * rx_fifo; + int n_read, bytes; + + rx_fifo = e->fifo; + + bytes = e->enqueue_length; + do + { + n_read = svm_fifo_dequeue_nowait (rx_fifo, 0, vec_len(utm->rx_buf), + utm->rx_buf); + if (n_read > 0) + bytes -= n_read; + } + while (n_read < 0 || bytes > 0); + + // bytes_to_read = svm_fifo_max_dequeue (rx_fifo); + // + // bytes_to_read = vec_len(utm->rx_buf) > bytes_to_read ? + // bytes_to_read : vec_len(utm->rx_buf); + // + // buffer_offset = 0; + // while (bytes_to_read > 0) + // { + // rv = svm_fifo_dequeue_nowait2 (rx_fifo, mypid, + // bytes_to_read, + // utm->rx_buf + buffer_offset); + // if (rv > 0) + // { + // bytes_to_read -= rv; + // buffer_offset += rv; + // bytes_received += rv; + // } + // } + + + // while (bytes_received < bytes_sent) + // { + // rv = svm_fifo_dequeue_nowait2 (rx_fifo, mypid, + // vec_len (utm->rx_buf), + // utm->rx_buf); + // if (rv > 0) + // { + //#if CLIB_DEBUG > 0 + // int j; + // for (j = 0; j < rv; j++) + // { + // if (utm->rx_buf[j] != ((bytes_received + j) & 0xff)) + // { + // clib_warning ("error at byte %lld, 0x%x not 0x%x", + // bytes_received + j, + // utm->rx_buf[j], + // ((bytes_received + j )&0xff)); + // } + // } + //#endif + // bytes_received += (u64) rv; + // } + // } +} + +void +handle_connect_event_queue (uri_tcp_test_main_t * utm) +{ + session_fifo_event_t _e, *e = &_e;; + + unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e, 0 /* nowait */); + switch (e->event_type) + { + case FIFO_EVENT_SERVER_RX: + handle_fifo_event_connect_rx (utm, e); + break; + + case FIFO_EVENT_SERVER_EXIT: + return; + + default: + clib_warning("unknown event type %d", e->event_type); + break; + } +} + +void +uri_tcp_connect_send (uri_tcp_test_main_t *utm) +{ + u8 *test_data = utm->connect_test_data; + u64 bytes_sent = 0; + int rv; + int mypid = getpid(); + session_t * session; + svm_fifo_t *tx_fifo; + int buffer_offset, bytes_to_send = 0; + session_fifo_event_t evt; + static int serial_number = 0; + int i; + u32 max_chunk = 64 << 10, write; + + session = pool_elt_at_index (utm->sessions, utm->connected_session_index); + tx_fifo = session->server_tx_fifo; + + vec_validate (utm->rx_buf, vec_len (test_data) - 1); + + for (i = 0; i < 10; i++) + { + bytes_to_send = vec_len (test_data); + buffer_offset = 0; + while (bytes_to_send > 0) + { + write = bytes_to_send > max_chunk ? max_chunk : bytes_to_send; + rv = svm_fifo_enqueue_nowait (tx_fifo, mypid, write, + test_data + buffer_offset); + + if (rv > 0) + { + bytes_to_send -= rv; + buffer_offset += rv; + bytes_sent += rv; + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = rv; + evt.event_id = serial_number++; + + unix_shared_memory_queue_add (utm->vpp_event_queue, (u8 *) &evt, + 0 /* do wait for mutex */); + } + } + } +} + +static void +uri_tcp_client_test (uri_tcp_test_main_t * utm) +{ + vl_api_connect_uri_t * cmp; + vl_api_disconnect_session_t *dmp; + session_t *connected_session; + int i; + + cmp = vl_msg_api_alloc (sizeof (*cmp)); + memset (cmp, 0, sizeof (*cmp)); + + cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI); + cmp->client_index = utm->my_client_index; + cmp->context = ntohl(0xfeedface); + memcpy (cmp->uri, utm->connect_uri, vec_len (utm->connect_uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&cmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + return; + } + + /* Init test data */ + vec_validate (utm->connect_test_data, 64 * 1024 - 1); + for (i = 0; i < vec_len (utm->connect_test_data); i++) + utm->connect_test_data[i] = i & 0xff; + + /* Start reader thread */ + /* handle_connect_event_queue (utm); */ + + /* Start send */ + uri_tcp_connect_send (utm); + + /* Disconnect */ + connected_session = pool_elt_at_index(utm->sessions, + utm->connected_session_index); + dmp = vl_msg_api_alloc (sizeof (*dmp)); + memset (dmp, 0, sizeof (*dmp)); + dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION); + dmp->client_index = utm->my_client_index; + dmp->session_index = connected_session->vpp_session_index; + dmp->session_thread_index = connected_session->vpp_session_thread; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&dmp); +} + +void +handle_fifo_event_server_rx (uri_tcp_test_main_t *utm, session_fifo_event_t * e) +{ + svm_fifo_t * rx_fifo, * tx_fifo; + int n_read; + + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + int rv, bytes; + + rx_fifo = e->fifo; + tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo; + + bytes = e->enqueue_length; + do + { + n_read = svm_fifo_dequeue_nowait (rx_fifo, 0, vec_len(utm->rx_buf), + utm->rx_buf); + + /* Reflect if a non-drop session */ + if (!utm->drop_packets && n_read > 0) + { + do + { + rv = svm_fifo_enqueue_nowait (tx_fifo, 0, n_read, utm->rx_buf); + } + while (rv == -2); + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = n_read; + evt.event_id = e->event_id; + q = utm->vpp_event_queue; + unix_shared_memory_queue_add (q, (u8 *) &evt, 0 /* do wait for mutex */); + } + + if (n_read > 0) + bytes -= n_read; + } + while (n_read < 0 || bytes > 0); +} + +void +handle_event_queue (uri_tcp_test_main_t * utm) +{ + session_fifo_event_t _e, *e = &_e;; + + while (1) + { + unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *)e, + 0 /* nowait */); + switch (e->event_type) + { + case FIFO_EVENT_SERVER_RX: + handle_fifo_event_server_rx (utm, e); + break; + + case FIFO_EVENT_SERVER_EXIT: + return; + + default: + clib_warning ("unknown event type %d", e->event_type); + break; + } + if (PREDICT_FALSE(utm->time_to_stop == 1)) + break; + if (PREDICT_FALSE(utm->time_to_print_stats == 1)) + { + utm->time_to_print_stats = 0; + fformat(stdout, "%d connections\n", pool_elts (utm->sessions)); + } + } +} + +static void +vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + if (mp->retval) + { + clib_warning("bind failed: %d", mp->retval); + return; + } + + if (mp->segment_name_length == 0) + { + clib_warning("segment_name_length zero"); + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + ASSERT(mp->server_event_queue_address); + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning("svm_fifo_segment_attach ('%s') failed", mp->segment_name); + return; + } + + utm->our_event_queue = + (unix_shared_memory_queue_t *) mp->server_event_queue_address; + + utm->state = STATE_READY; +} + +static void +vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + session_t *session; + u32 session_index; + svm_fifo_t *rx_fifo, *tx_fifo; + int rv; + + if (mp->retval) + { + clib_warning ("connection failed with code: %d", mp->retval); + utm->state = STATE_FAILED; + return; + } + /* + * Attatch to segment + */ + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + utm->state = STATE_FAILED; + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + ASSERT(mp->client_event_queue_address); + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", + mp->segment_name); + return; + } + + /* + * Save the queues + */ + + utm->our_event_queue = (unix_shared_memory_queue_t *) + mp->client_event_queue_address; + + utm->vpp_event_queue = (unix_shared_memory_queue_t *) + mp->vpp_event_queue_address; + + /* + * Setup session + */ + + pool_get (utm->sessions, session); + session_index = session - utm->sessions; + + rx_fifo = (svm_fifo_t *)mp->server_rx_fifo; + rx_fifo->client_session_index = session_index; + tx_fifo = (svm_fifo_t *)mp->server_tx_fifo; + tx_fifo->client_session_index = session_index; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + session->vpp_session_index = mp->session_index; + session->vpp_session_thread = mp->session_thread_index; + + /* Save handle */ + utm->connected_session_index = session_index; + + utm->state = STATE_READY; +} + +void +uri_tcp_bind (uri_tcp_test_main_t *utm) +{ + vl_api_bind_uri_t * bmp; + u32 fifo_size = 3 << 20; + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_URI); + bmp->client_index = utm->my_client_index; + bmp->context = ntohl(0xfeedface); + bmp->initial_segment_size = 256<<20; /* size of initial segment */ + bmp->options[SESSION_OPTIONS_FLAGS] = + SESSION_OPTIONS_FLAGS_USE_FIFO | SESSION_OPTIONS_FLAGS_ADD_SEGMENT; + bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = fifo_size; + bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = fifo_size; + bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = 128<<20; + memcpy (bmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&bmp); +} + +static void +vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t *mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + + if (mp->retval != 0) + clib_warning ("returned %d", ntohl(mp->retval)); + + utm->state = STATE_START; +} + +void +uri_tcp_unbind (uri_tcp_test_main_t *utm) +{ + vl_api_unbind_uri_t * ump; + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI); + ump->client_index = utm->my_client_index; + memcpy (ump->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&ump); +} + +static void +vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + vl_api_accept_session_reply_t *rmp; + svm_fifo_t * rx_fifo, * tx_fifo; + session_t * session; + static f64 start_time; + u64 key; + u32 session_index; + + if (start_time == 0.0) + start_time = clib_time_now (&utm->clib_time); + + utm->vpp_event_queue = (unix_shared_memory_queue_t *) + mp->vpp_event_queue_address; + + /* Allocate local session and set it up */ + pool_get (utm->sessions, session); + session_index = session - utm->sessions; + + rx_fifo = (svm_fifo_t *)mp->server_rx_fifo; + rx_fifo->client_session_index = session_index; + tx_fifo = (svm_fifo_t *)mp->server_tx_fifo; + tx_fifo->client_session_index = session_index; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + + /* Add it to lookup table */ + key = (((u64)mp->session_thread_index) << 32) | (u64)mp->session_index; + hash_set (utm->session_index_by_vpp_handles, key, session_index); + + utm->state = STATE_READY; + + /* Stats printing */ + if (pool_elts (utm->sessions) && (pool_elts(utm->sessions) % 20000) == 0) + { + f64 now = clib_time_now (&utm->clib_time); + fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n", + pool_elts(utm->sessions), now - start_time, + (f64)pool_elts(utm->sessions) / (now - start_time)); + } + + /* Send accept reply to vpp */ + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY); + rmp->session_type = mp->session_type; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&rmp); +} + +void +uri_tcp_server_test (uri_tcp_test_main_t * utm) +{ + + /* Bind to uri */ + uri_tcp_bind (utm); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + /* Enter handle event loop */ + handle_event_queue (utm); + + /* Cleanup */ + uri_tcp_unbind (utm); + + if (wait_for_state_change (utm, STATE_START)) + { + clib_warning ("timeout waiting for STATE_START"); + return; + } + + fformat (stdout, "Test complete...\n"); +} + +#define foreach_uri_msg \ +_(BIND_URI_REPLY, bind_uri_reply) \ +_(UNBIND_URI_REPLY, unbind_uri_reply) \ +_(ACCEPT_SESSION, accept_session) \ +_(CONNECT_URI_REPLY, connect_uri_reply) \ +_(DISCONNECT_SESSION, disconnect_session) \ +_(RESET_SESSION, reset_session) \ +_(MAP_ANOTHER_SEGMENT, map_another_segment) + +void +uri_api_hookup (uri_tcp_test_main_t * utm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_uri_msg; +#undef _ +} + +int +main (int argc, char **argv) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + unformat_input_t _argv, *a = &_argv; + u8 *chroot_prefix; + u8 *heap; + u8 * bind_name = (u8 *) "tcp://0.0.0.0/1234"; + u32 tmp; + mheap_t *h; + session_t * session; + int i; + int i_am_master = 1, drop_packets = 0; + + clib_mem_init (0, 256 << 20); + + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + vec_validate (utm->rx_buf, 65536); + + utm->session_index_by_vpp_handles = + hash_create (0, sizeof(uword)); + + utm->my_pid = getpid(); + utm->configured_segment_size = 1<<20; + + clib_time_init (&utm->clib_time); + init_error_string_table (utm); + svm_fifo_segment_init(0x200000000ULL, 20); + unformat_init_command_line (a, argv); + + while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) + { + if (unformat (a, "chroot prefix %s", &chroot_prefix)) + { + vl_set_memory_root_path ((char *) chroot_prefix); + } + else if (unformat (a, "uri %s", &bind_name)) + ; + else if (unformat (a, "segment-size %dM", &tmp)) + utm->configured_segment_size = tmp<<20; + else if (unformat (a, "segment-size %dG", &tmp)) + utm->configured_segment_size = tmp<<30; + else if (unformat (a, "master")) + i_am_master = 1; + else if (unformat (a, "slave")) + i_am_master = 0; + else if (unformat (a, "drop")) + drop_packets = 1; + else + { + fformat (stderr, "%s: usage [master|slave]\n"); + exit (1); + } + } + + utm->uri = format (0, "%s%c", bind_name, 0); + utm->i_am_master = i_am_master; + utm->segment_main = &svm_fifo_segment_main; + utm->drop_packets = drop_packets; + + utm->connect_uri = format (0, "tcp://6.0.1.2/1234%c", 0); + + setup_signal_handlers(); + uri_api_hookup (utm); + + if (connect_to_vpp (i_am_master? "uri_tcp_server":"uri_tcp_client") < 0) + { + svm_region_exit (); + fformat (stderr, "Couldn't connect to vpe, exiting...\n"); + exit (1); + } + + if (i_am_master == 0) + { + uri_tcp_client_test (utm); + exit (0); + } + + /* $$$$ hack preallocation */ + for (i = 0; i < 200000; i++) + { + pool_get (utm->sessions, session); + memset (session, 0, sizeof (*session)); + } + for (i = 0; i < 200000; i++) + pool_put_index (utm->sessions, i); + + uri_tcp_server_test (utm); + + vl_client_disconnect_from_vlib (); + exit (0); +} diff --git a/src/uri/uri_udp_test.c b/src/uri/uri_udp_test.c new file mode 100644 index 00000000..6f5284c9 --- /dev/null +++ b/src/uri/uri_udp_test.c @@ -0,0 +1,553 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Satisfy external references when not linking with -lvlib */ +vlib_main_t vlib_global_main; +vlib_main_t **vlib_mains; + +typedef enum +{ + STATE_START, + STATE_READY, + STATE_DISCONNECTING, +} connection_state_t; + +typedef struct +{ + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; +} session_t; + +typedef struct +{ + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* The URI we're playing with */ + u8 *uri; + + /* Session pool */ + session_t *sessions; + + /* Hash table for disconnect processing */ + uword *session_index_by_vpp_handles; + + /* fifo segment */ + svm_fifo_segment_private_t *seg; + + /* intermediate rx buffer */ + u8 *rx_buf; + + /* Our event queue */ + unix_shared_memory_queue_t *our_event_queue; + + /* $$$ single thread only for the moment */ + unix_shared_memory_queue_t *vpp_event_queue; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile connection_state_t state; + + volatile int time_to_stop; + volatile int time_to_print_stats; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword *error_string_by_error_number; +} uri_udp_test_main_t; + +#if CLIB_DEBUG > 0 +#define NITER 1000 +#else +#define NITER 1000000 +#endif + +uri_udp_test_main_t uri_udp_test_main; + +static void +stop_signal (int signum) +{ + uri_udp_test_main_t *um = &uri_udp_test_main; + + um->time_to_stop = 1; +} + +static void +stats_signal (int signum) +{ + uri_udp_test_main_t *um = &uri_udp_test_main; + + um->time_to_print_stats = 1; +} + +static clib_error_t * +setup_signal_handlers (void) +{ + signal (SIGINT, stats_signal); + signal (SIGQUIT, stop_signal); + signal (SIGTERM, stop_signal); + + return 0; +} + +u8 * +format_api_error (u8 * s, va_list * args) +{ + uri_udp_test_main_t *utm = va_arg (*args, uri_udp_test_main_t *); + i32 error = va_arg (*args, u32); + uword *p; + + p = hash_get (utm->error_string_by_error_number, -error); + + if (p) + s = format (s, "%s", p[0]); + else + s = format (s, "%d", error); + return s; +} + +int +wait_for_state_change (uri_udp_test_main_t * utm, connection_state_t state) +{ + f64 timeout = clib_time_now (&utm->clib_time) + 5.0; + + while (clib_time_now (&utm->clib_time) < timeout) + { + if (utm->state == state) + return 0; + } + return -1; +} + +static void +vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + a->segment_name = (char *) mp->segment_name; + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%s') failed", mp->segment_name); + return; + } + + utm->our_event_queue = (unix_shared_memory_queue_t *) + mp->server_event_queue_address; + + utm->state = STATE_READY; +} + +static void +vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + + if (mp->retval != 0) + clib_warning ("returned %d", ntohl (mp->retval)); + + utm->state = STATE_START; +} + +static void +vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + vl_api_accept_session_reply_t *rmp; + svm_fifo_t *rx_fifo, *tx_fifo; + session_t *session; + static f64 start_time; + u64 key; + + if (start_time == 0.0) + start_time = clib_time_now (&utm->clib_time); + + utm->vpp_event_queue = (unix_shared_memory_queue_t *) + mp->vpp_event_queue_address; + + pool_get (utm->sessions, session); + + rx_fifo = (svm_fifo_t *) mp->server_rx_fifo; + rx_fifo->client_session_index = session - utm->sessions; + tx_fifo = (svm_fifo_t *) mp->server_tx_fifo; + tx_fifo->client_session_index = session - utm->sessions; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + + key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index; + + hash_set (utm->session_index_by_vpp_handles, key, session - utm->sessions); + + utm->state = STATE_READY; + + if (pool_elts (utm->sessions) && (pool_elts (utm->sessions) % 20000) == 0) + { + f64 now = clib_time_now (&utm->clib_time); + fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n", + pool_elts (utm->sessions), now - start_time, + (f64) pool_elts (utm->sessions) / (now - start_time)); + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY); + rmp->session_type = mp->session_type; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + session_t *session; + vl_api_disconnect_session_reply_t *rmp; + uword *p; + int rv = 0; + u64 key; + + key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index; + + p = hash_get (utm->session_index_by_vpp_handles, key); + + if (p) + { + session = pool_elt_at_index (utm->sessions, p[0]); + hash_unset (utm->session_index_by_vpp_handles, key); + pool_put (utm->sessions, session); + } + else + { + clib_warning ("couldn't find session key %llx", key); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = rv; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp); +} + +#define foreach_uri_msg \ +_(BIND_URI_REPLY, bind_uri_reply) \ +_(UNBIND_URI_REPLY, unbind_uri_reply) \ +_(ACCEPT_SESSION, accept_session) \ +_(DISCONNECT_SESSION, disconnect_session) + +void +uri_api_hookup (uri_udp_test_main_t * utm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_uri_msg; +#undef _ + +} + + +int +connect_to_vpp (char *name) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0) + return -1; + + utm->vl_input_queue = am->shmem_hdr->vl_input_queue; + utm->my_client_index = am->my_client_index; + + return 0; +} + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG"); +} + +static void +init_error_string_table (uri_udp_test_main_t * utm) +{ + utm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (utm->error_string_by_error_number, 99, "Misc"); +} + +void +handle_fifo_event_server_rx (uri_udp_test_main_t * utm, + session_fifo_event_t * e) +{ + svm_fifo_t *rx_fifo, *tx_fifo; + int nbytes; + + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + int rv; + + rx_fifo = e->fifo; + tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo; + + do + { + nbytes = svm_fifo_dequeue_nowait (rx_fifo, 0, + vec_len (utm->rx_buf), utm->rx_buf); + } + while (nbytes <= 0); + do + { + rv = svm_fifo_enqueue_nowait (tx_fifo, 0, nbytes, utm->rx_buf); + } + while (rv == -2); + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = nbytes; + evt.event_id = e->event_id; + q = utm->vpp_event_queue; + unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* do wait for mutex */ ); +} + +void +handle_event_queue (uri_udp_test_main_t * utm) +{ + session_fifo_event_t _e, *e = &_e;; + + while (1) + { + unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e, + 0 /* nowait */ ); + switch (e->event_type) + { + case FIFO_EVENT_SERVER_RX: + handle_fifo_event_server_rx (utm, e); + break; + + case FIFO_EVENT_SERVER_EXIT: + return; + + default: + clib_warning ("unknown event type %d", e->event_type); + break; + } + if (PREDICT_FALSE (utm->time_to_stop == 1)) + break; + if (PREDICT_FALSE (utm->time_to_print_stats == 1)) + { + utm->time_to_print_stats = 0; + fformat (stdout, "%d connections\n", pool_elts (utm->sessions)); + } + } +} + +void +uri_udp_test (uri_udp_test_main_t * utm) +{ + vl_api_bind_uri_t *bmp; + vl_api_unbind_uri_t *ump; + + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_URI); + bmp->client_index = utm->my_client_index; + bmp->context = ntohl (0xfeedface); + bmp->segment_size = 2 << 30; + memcpy (bmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + handle_event_queue (utm); + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI); + ump->client_index = utm->my_client_index; + memcpy (ump->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump); + + if (wait_for_state_change (utm, STATE_START)) + { + clib_warning ("timeout waiting for STATE_START"); + return; + } + + fformat (stdout, "Test complete...\n"); +} + +int +main (int argc, char **argv) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + unformat_input_t _argv, *a = &_argv; + u8 *chroot_prefix; + u8 *heap; + u8 *bind_name = (u8 *) "udp4:1234"; + mheap_t *h; + session_t *session; + int i; + + clib_mem_init (0, 256 << 20); + + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + vec_validate (utm->rx_buf, 8192); + + utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword)); + + clib_time_init (&utm->clib_time); + init_error_string_table (utm); + svm_fifo_segment_init (0x200000000ULL, 20); + unformat_init_command_line (a, argv); + + while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) + { + if (unformat (a, "chroot prefix %s", &chroot_prefix)) + { + vl_set_memory_root_path ((char *) chroot_prefix); + } + else if (unformat (a, "uri %s", &bind_name)) + ; + else + { + fformat (stderr, "%s: usage [master|slave]\n"); + exit (1); + } + } + + utm->uri = format (0, "%s%c", bind_name, 0); + + setup_signal_handlers (); + + uri_api_hookup (utm); + + if (connect_to_vpp ("uri_udp_test") < 0) + { + svm_region_exit (); + fformat (stderr, "Couldn't connect to vpe, exiting...\n"); + exit (1); + } + + /* $$$$ hack preallocation */ + for (i = 0; i < 200000; i++) + { + pool_get (utm->sessions, session); + memset (session, 0, sizeof (*session)); + } + for (i = 0; i < 200000; i++) + pool_put_index (utm->sessions, i); + + uri_udp_test (utm); + + vl_client_disconnect_from_vlib (); + exit (0); +} + +#undef vl_api_version +#define vl_api_version(n,v) static u32 vpe_api_version = v; +#include +#undef vl_api_version + +void +vl_client_add_api_signatures (vl_api_memclnt_create_t * mp) +{ + /* + * Send the main API signature in slot 0. This bit of code must + * match the checks in ../vpe/api/api.c: vl_msg_api_version_check(). + */ + mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/uri/uri_udp_test2.c b/src/uri/uri_udp_test2.c new file mode 100644 index 00000000..ddfffaa6 --- /dev/null +++ b/src/uri/uri_udp_test2.c @@ -0,0 +1,954 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../vnet/session/application_interface.h" + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Satisfy external references when not linking with -lvlib */ +vlib_main_t vlib_global_main; +vlib_main_t **vlib_mains; + +typedef enum +{ + STATE_START, + STATE_READY, + STATE_DISCONNECTING, +} connection_state_t; + +typedef struct +{ + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; +} session_t; + +typedef struct +{ + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* The URI we're playing with */ + u8 *uri; + + /* Session pool */ + session_t *sessions; + + /* Hash table for disconnect processing */ + uword *session_index_by_vpp_handles; + + /* fifo segment */ + svm_fifo_segment_private_t *seg; + + /* intermediate rx buffer */ + u8 *rx_buf; + + /* URI for connect */ + u8 *connect_uri; + + int i_am_master; + + /* Our event queue */ + unix_shared_memory_queue_t *our_event_queue; + + /* $$$ single thread only for the moment */ + unix_shared_memory_queue_t *vpp_event_queue; + + /* $$$$ hack: cut-through session index */ + volatile u32 cut_through_session_index; + + /* unique segment name counter */ + u32 unique_segment_index; + + pid_t my_pid; + + /* pthread handle */ + pthread_t cut_through_thread_handle; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile connection_state_t state; + + volatile int time_to_stop; + volatile int time_to_print_stats; + + u32 configured_segment_size; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword *error_string_by_error_number; + + /* convenience */ + svm_fifo_segment_main_t *segment_main; + +} uri_udp_test_main_t; + +#if CLIB_DEBUG > 0 +#define NITER 10000 +#else +#define NITER 4000000 +#endif + +uri_udp_test_main_t uri_udp_test_main; + +static void +stop_signal (int signum) +{ + uri_udp_test_main_t *um = &uri_udp_test_main; + + um->time_to_stop = 1; +} + +static void +stats_signal (int signum) +{ + uri_udp_test_main_t *um = &uri_udp_test_main; + + um->time_to_print_stats = 1; +} + +static clib_error_t * +setup_signal_handlers (void) +{ + signal (SIGINT, stats_signal); + signal (SIGQUIT, stop_signal); + signal (SIGTERM, stop_signal); + + return 0; +} + +u8 * +format_api_error (u8 * s, va_list * args) +{ + uri_udp_test_main_t *utm = va_arg (*args, uri_udp_test_main_t *); + i32 error = va_arg (*args, u32); + uword *p; + + p = hash_get (utm->error_string_by_error_number, -error); + + if (p) + s = format (s, "%s", p[0]); + else + s = format (s, "%d", error); + return s; +} + +int +wait_for_state_change (uri_udp_test_main_t * utm, connection_state_t state) +{ +#if CLIB_DEBUG > 0 +#define TIMEOUT 600.0 +#else +#define TIMEOUT 600.0 +#endif + + f64 timeout = clib_time_now (&utm->clib_time) + TIMEOUT; + + while (clib_time_now (&utm->clib_time) < timeout) + { + if (utm->state == state) + return 0; + } + return -1; +} + +u64 server_bytes_received, server_bytes_sent; + +static void * +cut_through_thread_fn (void *arg) +{ + session_t *s; + svm_fifo_t *rx_fifo; + svm_fifo_t *tx_fifo; + u8 *my_copy_buffer = 0; + uri_udp_test_main_t *utm = &uri_udp_test_main; + i32 actual_transfer; + int rv; + u32 buffer_offset; + + while (utm->cut_through_session_index == ~0) + ; + + s = pool_elt_at_index (utm->sessions, utm->cut_through_session_index); + + rx_fifo = s->server_rx_fifo; + tx_fifo = s->server_tx_fifo; + + vec_validate (my_copy_buffer, 64 * 1024 - 1); + + while (true) + { + /* We read from the tx fifo and write to the rx fifo */ + do + { + actual_transfer = svm_fifo_dequeue_nowait (tx_fifo, 0, + vec_len (my_copy_buffer), + my_copy_buffer); + } + while (actual_transfer <= 0); + + server_bytes_received += actual_transfer; + + buffer_offset = 0; + while (actual_transfer > 0) + { + rv = svm_fifo_enqueue_nowait (rx_fifo, 0, actual_transfer, + my_copy_buffer + buffer_offset); + if (rv > 0) + { + actual_transfer -= rv; + buffer_offset += rv; + server_bytes_sent += rv; + } + + } + if (PREDICT_FALSE (utm->time_to_stop)) + break; + } + + pthread_exit (0); +} + +static void +uri_udp_slave_test (uri_udp_test_main_t * utm) +{ + vl_api_connect_uri_t *cmp; + int i; + u8 *test_data = 0; + u64 bytes_received = 0, bytes_sent = 0; + i32 bytes_to_read; + int rv; + int mypid = getpid (); + f64 before, after, delta, bytes_per_second; + session_t *session; + svm_fifo_t *rx_fifo, *tx_fifo; + int buffer_offset, bytes_to_send = 0; + + vec_validate (test_data, 64 * 1024 - 1); + for (i = 0; i < vec_len (test_data); i++) + test_data[i] = i & 0xff; + + cmp = vl_msg_api_alloc (sizeof (*cmp)); + memset (cmp, 0, sizeof (*cmp)); + + cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI); + cmp->client_index = utm->my_client_index; + cmp->context = ntohl (0xfeedface); + memcpy (cmp->uri, utm->connect_uri, vec_len (utm->connect_uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & cmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + session = pool_elt_at_index (utm->sessions, utm->cut_through_session_index); + rx_fifo = session->server_rx_fifo; + tx_fifo = session->server_tx_fifo; + + before = clib_time_now (&utm->clib_time); + + vec_validate (utm->rx_buf, vec_len (test_data) - 1); + + for (i = 0; i < NITER; i++) + { + bytes_to_send = vec_len (test_data); + buffer_offset = 0; + while (bytes_to_send > 0) + { + rv = svm_fifo_enqueue_nowait (tx_fifo, mypid, + bytes_to_send, + test_data + buffer_offset); + + if (rv > 0) + { + bytes_to_send -= rv; + buffer_offset += rv; + bytes_sent += rv; + } + } + + bytes_to_read = svm_fifo_max_dequeue (rx_fifo); + + bytes_to_read = vec_len (utm->rx_buf) > bytes_to_read ? + bytes_to_read : vec_len (utm->rx_buf); + + buffer_offset = 0; + while (bytes_to_read > 0) + { + rv = svm_fifo_dequeue_nowait (rx_fifo, mypid, + bytes_to_read, + utm->rx_buf + buffer_offset); + if (rv > 0) + { + bytes_to_read -= rv; + buffer_offset += rv; + bytes_received += rv; + } + } + } + while (bytes_received < bytes_sent) + { + rv = svm_fifo_dequeue_nowait (rx_fifo, mypid, + vec_len (utm->rx_buf), utm->rx_buf); + if (rv > 0) + { +#if CLIB_DEBUG > 0 + int j; + for (j = 0; j < rv; j++) + { + if (utm->rx_buf[j] != ((bytes_received + j) & 0xff)) + { + clib_warning ("error at byte %lld, 0x%x not 0x%x", + bytes_received + j, + utm->rx_buf[j], + ((bytes_received + j) & 0xff)); + } + } +#endif + bytes_received += (u64) rv; + } + } + + after = clib_time_now (&utm->clib_time); + delta = after - before; + bytes_per_second = 0.0; + + if (delta > 0.0) + bytes_per_second = (f64) bytes_received / delta; + + fformat (stdout, + "Done: %lld recv bytes in %.2f seconds, %.2f bytes/sec...\n\n", + bytes_received, delta, bytes_per_second); + fformat (stdout, + "Done: %lld sent bytes in %.2f seconds, %.2f bytes/sec...\n\n", + bytes_sent, delta, bytes_per_second); + fformat (stdout, + "client -> server -> client round trip: %.2f Gbit/sec \n\n", + (bytes_per_second * 8.0) / 1e9); +} + +static void +vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + ASSERT (mp->server_event_queue_address); + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", + mp->segment_name); + return; + } + + utm->our_event_queue = (unix_shared_memory_queue_t *) + mp->server_event_queue_address; + + utm->state = STATE_READY; +} + +static void +vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", + mp->segment_name); + return; + } + clib_warning ("Mapped new segment '%s' size %d", mp->segment_name, + mp->segment_size); +} + +static void +vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp) +{ + u32 segment_index; + uri_udp_test_main_t *utm = &uri_udp_test_main; + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *seg; + unix_shared_memory_queue_t *client_q; + vl_api_connect_uri_reply_t *rmp; + session_t *session; + int rv = 0; + + /* Create the segment */ + a->segment_name = (char *) format (0, "%d:segment%d%c", utm->my_pid, + utm->unique_segment_index++, 0); + a->segment_size = utm->configured_segment_size; + + rv = svm_fifo_segment_create (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%s') failed", a->segment_name); + rv = VNET_API_ERROR_URI_FIFO_CREATE_FAILED; + goto send_reply; + } + + vec_add2 (utm->seg, seg, 1); + + segment_index = vec_len (sm->segments) - 1; + + memcpy (seg, sm->segments + segment_index, sizeof (utm->seg[0])); + + pool_get (utm->sessions, session); + + /* + * By construction the master's idea of the rx fifo ends up in + * fsh->fifos[0], and the master's idea of the tx fifo ends up in + * fsh->fifos[1]. + */ + session->server_rx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, + 128 * 1024); + ASSERT (session->server_rx_fifo); + + session->server_tx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, + 128 * 1024); + ASSERT (session->server_tx_fifo); + + session->server_rx_fifo->server_session_index = session - utm->sessions; + session->server_tx_fifo->server_session_index = session - utm->sessions; + utm->cut_through_session_index = session - utm->sessions; + + rv = pthread_create (&utm->cut_through_thread_handle, + NULL /*attr */ , cut_through_thread_fn, 0); + if (rv) + { + clib_warning ("pthread_create returned %d", rv); + rv = VNET_API_ERROR_SYSCALL_ERROR_1; + } + +send_reply: + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + + rmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI_REPLY); + rmp->context = mp->context; + rmp->retval = ntohl (rv); + rmp->segment_name_length = vec_len (a->segment_name); + memcpy (rmp->segment_name, a->segment_name, vec_len (a->segment_name)); + + vec_free (a->segment_name); + + client_q = (unix_shared_memory_queue_t *) mp->client_queue_address; + vl_msg_api_send_shmem (client_q, (u8 *) & rmp); +} + +static void +vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + + if (mp->retval != 0) + clib_warning ("returned %d", ntohl (mp->retval)); + + utm->state = STATE_START; +} + +static void +vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + vl_api_accept_session_reply_t *rmp; + svm_fifo_t *rx_fifo, *tx_fifo; + session_t *session; + static f64 start_time; + u64 key; + + if (start_time == 0.0) + start_time = clib_time_now (&utm->clib_time); + + utm->vpp_event_queue = (unix_shared_memory_queue_t *) + mp->vpp_event_queue_address; + + pool_get (utm->sessions, session); + + rx_fifo = (svm_fifo_t *) mp->server_rx_fifo; + rx_fifo->client_session_index = session - utm->sessions; + tx_fifo = (svm_fifo_t *) mp->server_tx_fifo; + tx_fifo->client_session_index = session - utm->sessions; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + + key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index; + + hash_set (utm->session_index_by_vpp_handles, key, session - utm->sessions); + + utm->state = STATE_READY; + + if (pool_elts (utm->sessions) && (pool_elts (utm->sessions) % 20000) == 0) + { + f64 now = clib_time_now (&utm->clib_time); + fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n", + pool_elts (utm->sessions), now - start_time, + (f64) pool_elts (utm->sessions) / (now - start_time)); + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY); + rmp->session_type = mp->session_type; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + session_t *session; + vl_api_disconnect_session_reply_t *rmp; + uword *p; + int rv = 0; + u64 key; + + key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index; + + p = hash_get (utm->session_index_by_vpp_handles, key); + + if (p) + { + session = pool_elt_at_index (utm->sessions, p[0]); + hash_unset (utm->session_index_by_vpp_handles, key); + pool_put (utm->sessions, session); + } + else + { + clib_warning ("couldn't find session key %llx", key); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = rv; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp); +} + +static void +vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + uri_udp_test_main_t *utm = &uri_udp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + ssvm_shared_header_t *sh; + svm_fifo_segment_private_t *seg; + svm_fifo_segment_header_t *fsh; + session_t *session; + u32 segment_index; + int rv; + + ASSERT (utm->i_am_master == 0); + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + memset (a, 0, sizeof (*a)); + + a->segment_name = (char *) mp->segment_name; + + sleep (1); + + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%v') failed", mp->segment_name); + return; + } + + segment_index = vec_len (sm->segments) - 1; + + vec_add2 (utm->seg, seg, 1); + + memcpy (seg, sm->segments + segment_index, sizeof (*seg)); + sh = seg->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + while (vec_len (fsh->fifos) < 2) + sleep (1); + + pool_get (utm->sessions, session); + utm->cut_through_session_index = session - utm->sessions; + + session->server_rx_fifo = (svm_fifo_t *) fsh->fifos[0]; + ASSERT (session->server_rx_fifo); + session->server_tx_fifo = (svm_fifo_t *) fsh->fifos[1]; + ASSERT (session->server_tx_fifo); + + /* security: could unlink /dev/shm/segment_name> here, maybe */ + + utm->state = STATE_READY; +} + +#define foreach_uri_msg \ +_(BIND_URI_REPLY, bind_uri_reply) \ +_(CONNECT_URI, connect_uri) \ +_(CONNECT_URI_REPLY, connect_uri_reply) \ +_(UNBIND_URI_REPLY, unbind_uri_reply) \ +_(ACCEPT_SESSION, accept_session) \ +_(DISCONNECT_SESSION, disconnect_session) \ +_(MAP_ANOTHER_SEGMENT, map_another_segment) + +void +uri_api_hookup (uri_udp_test_main_t * utm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_uri_msg; +#undef _ + +} + + +int +connect_to_vpp (char *name) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0) + return -1; + + utm->vl_input_queue = am->shmem_hdr->vl_input_queue; + utm->my_client_index = am->my_client_index; + + return 0; +} + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG"); +} + +static void +init_error_string_table (uri_udp_test_main_t * utm) +{ + utm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (utm->error_string_by_error_number, 99, "Misc"); +} + +void +handle_fifo_event_server_rx (uri_udp_test_main_t * utm, + session_fifo_event_t * e) +{ + svm_fifo_t *rx_fifo, *tx_fifo; + int nbytes; + + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + int rv; + + rx_fifo = e->fifo; + tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo; + + do + { + nbytes = svm_fifo_dequeue_nowait (rx_fifo, 0, + vec_len (utm->rx_buf), utm->rx_buf); + } + while (nbytes <= 0); + do + { + rv = svm_fifo_enqueue_nowait (tx_fifo, 0, nbytes, utm->rx_buf); + } + while (rv == -2); + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = nbytes; + evt.event_id = e->event_id; + q = utm->vpp_event_queue; + unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* do wait for mutex */ ); +} + +void +handle_event_queue (uri_udp_test_main_t * utm) +{ + session_fifo_event_t _e, *e = &_e;; + + while (1) + { + unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e, + 0 /* nowait */ ); + switch (e->event_type) + { + case FIFO_EVENT_SERVER_RX: + handle_fifo_event_server_rx (utm, e); + break; + + case FIFO_EVENT_SERVER_EXIT: + return; + + default: + clib_warning ("unknown event type %d", e->event_type); + break; + } + if (PREDICT_FALSE (utm->time_to_stop == 1)) + break; + if (PREDICT_FALSE (utm->time_to_print_stats == 1)) + { + utm->time_to_print_stats = 0; + fformat (stdout, "%d connections\n", pool_elts (utm->sessions)); + } + } +} + +void +uri_udp_test (uri_udp_test_main_t * utm) +{ + vl_api_bind_uri_t *bmp; + vl_api_unbind_uri_t *ump; + + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_URI); + bmp->client_index = utm->my_client_index; + bmp->context = ntohl (0xfeedface); + bmp->initial_segment_size = 256 << 20; /* size of initial segment */ + bmp->options[SESSION_OPTIONS_FLAGS] = + SESSION_OPTIONS_FLAGS_USE_FIFO | SESSION_OPTIONS_FLAGS_ADD_SEGMENT; + bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = 16 << 10; + bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = 16 << 10; + bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = 128 << 20; + memcpy (bmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + handle_event_queue (utm); + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI); + ump->client_index = utm->my_client_index; + memcpy (ump->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump); + + if (wait_for_state_change (utm, STATE_START)) + { + clib_warning ("timeout waiting for STATE_START"); + return; + } + + fformat (stdout, "Test complete...\n"); +} + +int +main (int argc, char **argv) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + unformat_input_t _argv, *a = &_argv; + u8 *chroot_prefix; + u8 *heap; + u8 *bind_name = (u8 *) "udp://0.0.0.0/1234"; + u32 tmp; + mheap_t *h; + session_t *session; + int i; + int i_am_master = 1; + + clib_mem_init (0, 256 << 20); + + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + vec_validate (utm->rx_buf, 8192); + + utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword)); + + utm->my_pid = getpid (); + utm->configured_segment_size = 1 << 20; + + clib_time_init (&utm->clib_time); + init_error_string_table (utm); + svm_fifo_segment_init (0x200000000ULL, 20); + unformat_init_command_line (a, argv); + + while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) + { + if (unformat (a, "chroot prefix %s", &chroot_prefix)) + { + vl_set_memory_root_path ((char *) chroot_prefix); + } + else if (unformat (a, "uri %s", &bind_name)) + ; + else if (unformat (a, "segment-size %dM", &tmp)) + utm->configured_segment_size = tmp << 20; + else if (unformat (a, "segment-size %dG", &tmp)) + utm->configured_segment_size = tmp << 30; + else if (unformat (a, "master")) + i_am_master = 1; + else if (unformat (a, "slave")) + i_am_master = 0; + else + { + fformat (stderr, "%s: usage [master|slave]\n"); + exit (1); + } + } + + utm->cut_through_session_index = ~0; + utm->uri = format (0, "%s%c", bind_name, 0); + utm->i_am_master = i_am_master; + utm->segment_main = &svm_fifo_segment_main; + + utm->connect_uri = format (0, "udp://10.0.0.1/1234%c", 0); + + setup_signal_handlers (); + + uri_api_hookup (utm); + + if (connect_to_vpp (i_am_master ? "uri_udp_master" : "uri_udp_slave") < 0) + { + svm_region_exit (); + fformat (stderr, "Couldn't connect to vpe, exiting...\n"); + exit (1); + } + + if (i_am_master == 0) + { + uri_udp_slave_test (utm); + exit (0); + } + + /* $$$$ hack preallocation */ + for (i = 0; i < 200000; i++) + { + pool_get (utm->sessions, session); + memset (session, 0, sizeof (*session)); + } + for (i = 0; i < 200000; i++) + pool_put_index (utm->sessions, i); + + uri_udp_test (utm); + + vl_client_disconnect_from_vlib (); + exit (0); +} + +#undef vl_api_version +#define vl_api_version(n,v) static u32 vpe_api_version = v; +#include +#undef vl_api_version + +void +vl_client_add_api_signatures (vl_api_memclnt_create_t * mp) +{ + /* + * Send the main API signature in slot 0. This bit of code must + * match the checks in ../vpe/api/api.c: vl_msg_api_version_check(). + */ + mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version); +} + +u32 +vl (void *p) +{ + return vec_len (p); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/uri/uritest.c b/src/uri/uritest.c new file mode 100644 index 00000000..edcdb3ad --- /dev/null +++ b/src/uri/uritest.c @@ -0,0 +1,484 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +typedef enum +{ + STATE_START, + STATE_READY, + STATE_DISCONNECTING, +} connection_state_t; + +typedef struct +{ + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* role */ + int i_am_master; + + /* The URI we're playing with */ + u8 *uri; + + /* fifo segment */ + svm_fifo_segment_private_t *seg; + + svm_fifo_t *rx_fifo; + svm_fifo_t *tx_fifo; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile connection_state_t state; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword *error_string_by_error_number; +} uritest_main_t; + +#if CLIB_DEBUG > 0 +#define NITER 1000 +#else +#define NITER 1000000 +#endif + +uritest_main_t uritest_main; + +u8 * +format_api_error (u8 * s, va_list * args) +{ + uritest_main_t *utm = va_arg (*args, uritest_main_t *); + i32 error = va_arg (*args, u32); + uword *p; + + p = hash_get (utm->error_string_by_error_number, -error); + + if (p) + s = format (s, "%s", p[0]); + else + s = format (s, "%d", error); + return s; +} + +int +wait_for_state_change (uritest_main_t * utm, connection_state_t state) +{ + f64 timeout = clib_time_now (&utm->clib_time) + 1.0; + + while (clib_time_now (&utm->clib_time) < timeout) + { + if (utm->state == state) + return 0; + } + return -1; +} + +static void +vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp) +{ + uritest_main_t *utm = &uritest_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + ASSERT (utm->i_am_master); + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + /* Create the segment */ + rv = svm_fifo_segment_create (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%s') failed", mp->segment_name); + return; + } + + vec_validate (utm->seg, 0); + + memcpy (utm->seg, a->rv, sizeof (*utm->seg)); + + /* + * By construction the master's idea of the rx fifo ends up in + * fsh->fifos[0], and the master's idea of the tx fifo ends up in + * fsh->fifos[1]. + */ + utm->rx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, 10240); + ASSERT (utm->rx_fifo); + + utm->tx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, 10240); + ASSERT (utm->tx_fifo); + + utm->state = STATE_READY; +} + +static void +vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) +{ + uritest_main_t *utm = &uritest_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + int rv; + + ASSERT (utm->i_am_master == 0); + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + memset (a, 0, sizeof (*a)); + + a->segment_name = (char *) mp->segment_name; + + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%s') failed", mp->segment_name); + return; + } + + vec_validate (utm->seg, 0); + + memcpy (utm->seg, a->rv, sizeof (*utm->seg)); + sh = utm->seg->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + while (vec_len (fsh->fifos) < 2) + sleep (1); + + utm->rx_fifo = (svm_fifo_t *) fsh->fifos[1]; + ASSERT (utm->rx_fifo); + utm->tx_fifo = (svm_fifo_t *) fsh->fifos[0]; + ASSERT (utm->tx_fifo); + + /* security: could unlink /dev/shm/segment_name> here, maybe */ + + utm->state = STATE_READY; +} + +static void +vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp) +{ + uritest_main_t *utm = &uritest_main; + + if (mp->retval != 0) + clib_warning ("returned %d", ntohl (mp->retval)); + + utm->state = STATE_START; +} + +#define foreach_uri_msg \ +_(BIND_URI_REPLY, bind_uri_reply) \ +_(CONNECT_URI_REPLY, connect_uri_reply) \ +_(UNBIND_URI_REPLY, unbind_uri_reply) + +void +uri_api_hookup (uritest_main_t * utm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_uri_msg; +#undef _ + +} + + +int +connect_to_vpp (char *name) +{ + uritest_main_t *utm = &uritest_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0) + return -1; + + utm->vl_input_queue = am->shmem_hdr->vl_input_queue; + utm->my_client_index = am->my_client_index; + + return 0; +} + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG"); +} + +static void +init_error_string_table (uritest_main_t * utm) +{ + utm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (utm->error_string_by_error_number, 99, "Misc"); +} + +void +uritest_master (uritest_main_t * utm) +{ + vl_api_bind_uri_t *bmp; + vl_api_unbind_uri_t *ump; + int i; + u8 *test_data = 0; + u8 *reply = 0; + u32 reply_len; + int mypid = getpid (); + + for (i = 0; i < 2048; i++) + vec_add1 (test_data, 'a' + (i % 32)); + + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_URI); + bmp->client_index = utm->my_client_index; + bmp->context = ntohl (0xfeedface); + bmp->segment_size = 256 << 10; + memcpy (bmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + for (i = 0; i < NITER; i++) + svm_fifo_enqueue (utm->tx_fifo, mypid, vec_len (test_data), test_data); + + vec_validate (reply, 0); + + reply_len = svm_fifo_dequeue (utm->rx_fifo, mypid, vec_len (reply), reply); + + if (reply_len != 1) + clib_warning ("reply length %d", reply_len); + + if (reply[0] == 1) + fformat (stdout, "Test OK..."); + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI); + ump->client_index = utm->my_client_index; + memcpy (ump->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump); + + if (wait_for_state_change (utm, STATE_START)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + fformat (stdout, "Master done...\n"); +} + +void +uritest_slave (uritest_main_t * utm) +{ + vl_api_connect_uri_t *cmp; + int i, j; + u8 *test_data = 0; + u8 *reply = 0; + u32 bytes_received = 0; + u32 actual_bytes; + int mypid = getpid (); + u8 ok; + f64 before, after, delta, bytes_per_second; + + vec_validate (test_data, 4095); + + cmp = vl_msg_api_alloc (sizeof (*cmp)); + memset (cmp, 0, sizeof (*cmp)); + + cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI); + cmp->client_index = utm->my_client_index; + cmp->context = ntohl (0xfeedface); + memcpy (cmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & cmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + ok = 1; + before = clib_time_now (&utm->clib_time); + for (i = 0; i < NITER; i++) + { + actual_bytes = svm_fifo_dequeue (utm->rx_fifo, mypid, + vec_len (test_data), test_data); + j = 0; + while (j < actual_bytes) + { + if (test_data[j] != ('a' + (bytes_received % 32))) + ok = 0; + bytes_received++; + j++; + } + if (bytes_received == NITER * 2048) + break; + } + + vec_add1 (reply, ok); + + svm_fifo_enqueue (utm->tx_fifo, mypid, vec_len (reply), reply); + after = clib_time_now (&utm->clib_time); + delta = after - before; + bytes_per_second = 0.0; + + if (delta > 0.0) + bytes_per_second = (f64) bytes_received / delta; + + fformat (stdout, + "Slave done, %d bytes in %.2f seconds, %.2f bytes/sec...\n", + bytes_received, delta, bytes_per_second); +} + +int +main (int argc, char **argv) +{ + uritest_main_t *utm = &uritest_main; + unformat_input_t _argv, *a = &_argv; + u8 *chroot_prefix; + u8 *heap; + char *bind_name = "fifo:uritest"; + mheap_t *h; + int i_am_master = 0; + + clib_mem_init (0, 128 << 20); + + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + clib_time_init (&utm->clib_time); + init_error_string_table (utm); + svm_fifo_segment_init (0x200000000ULL, 20); + unformat_init_command_line (a, argv); + + utm->uri = format (0, "%s%c", bind_name, 0); + + while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) + { + if (unformat (a, "master")) + i_am_master = 1; + else if (unformat (a, "slave")) + i_am_master = 0; + else if (unformat (a, "chroot prefix %s", &chroot_prefix)) + { + vl_set_memory_root_path ((char *) chroot_prefix); + } + else + { + fformat (stderr, "%s: usage [master|slave]\n"); + exit (1); + } + } + + uri_api_hookup (utm); + + if (connect_to_vpp (i_am_master ? "uritest_master" : "uritest_slave") < 0) + { + svm_region_exit (); + fformat (stderr, "Couldn't connect to vpe, exiting...\n"); + exit (1); + } + + utm->i_am_master = i_am_master; + + if (i_am_master) + uritest_master (utm); + else + uritest_slave (utm); + + vl_client_disconnect_from_vlib (); + exit (0); +} + +#undef vl_api_version +#define vl_api_version(n,v) static u32 vpe_api_version = v; +#include +#undef vl_api_version + +void +vl_client_add_api_signatures (vl_api_memclnt_create_t * mp) +{ + /* + * Send the main API signature in slot 0. This bit of code must + * match the checks in ../vpe/api/api.c: vl_msg_api_version_check(). + */ + mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 4f5eb09d..9f26bec7 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -360,7 +360,7 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, memset (f, 0, sizeof (f[0])); f->index = f - bm->buffer_free_list_pool; f->n_data_bytes = vlib_buffer_round_size (n_data_bytes); - f->min_n_buffers_each_physmem_alloc = 16; + f->min_n_buffers_each_physmem_alloc = VLIB_FRAME_SIZE; f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name); /* Setup free buffer template. */ diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index 1f723f3b..69c8c7cc 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -240,6 +240,74 @@ vlib_get_buffer_opaque2 (vlib_buffer_t * b) return (void *) b->opaque2; } +/** \brief Get pointer to the end of buffer's data + * @param b pointer to the buffer + * @return pointer to tail of packet's data + */ +always_inline u8 * +vlib_buffer_get_tail (vlib_buffer_t * b) +{ + return b->data + b->current_data + b->current_length; +} + +/** \brief Append uninitialized data to buffer + * @param b pointer to the buffer + * @param size number of uninitialized bytes + * @return pointer to beginning of uninitialized data + */ +always_inline void * +vlib_buffer_put_uninit (vlib_buffer_t * b, u8 size) +{ + void *p = vlib_buffer_get_tail (b); + /* XXX make sure there's enough space */ + b->current_length += size; + return p; +} + +/** \brief Prepend uninitialized data to buffer + * @param b pointer to the buffer + * @param size number of uninitialized bytes + * @return pointer to beginning of uninitialized data + */ +always_inline void * +vlib_buffer_push_uninit (vlib_buffer_t * b, u8 size) +{ + ASSERT (b->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= size); + b->current_data -= size; + b->current_length += size; + + return vlib_buffer_get_current (b); +} + +/** \brief Make head room, typically for packet headers + * @param b pointer to the buffer + * @param size number of head room bytes + * @return pointer to start of buffer (current data) + */ +always_inline void * +vlib_buffer_make_headroom (vlib_buffer_t * b, u8 size) +{ + ASSERT (b->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= size); + b->current_data += size; + return vlib_buffer_get_current (b); +} + +/** \brief Retrieve bytes from buffer head + * @param b pointer to the buffer + * @param size number of bytes to pull + * @return pointer to start of buffer (current data) + */ +always_inline void * +vlib_buffer_pull (vlib_buffer_t * b, u8 size) +{ + if (b->current_length + VLIB_BUFFER_PRE_DATA_SIZE < size) + return 0; + + void *data = vlib_buffer_get_current (b); + vlib_buffer_advance (b, size); + return data; +} + /* Forward declaration. */ struct vlib_main_t; diff --git a/src/vlibmemory/unix_shared_memory_queue.c b/src/vlibmemory/unix_shared_memory_queue.c index 25d28910..e86edec3 100644 --- a/src/vlibmemory/unix_shared_memory_queue.c +++ b/src/vlibmemory/unix_shared_memory_queue.c @@ -33,18 +33,13 @@ * nels = number of elements on the queue * elsize = element size, presumably 4 and cacheline-size will * be popular choices. - * coid = consumer coid, from ChannelCreate * pid = consumer pid - * pulse_code = pulse code consumer expects - * pulse_value = pulse value consumer expects - * consumer_prio = consumer's priority, so pulses won't change - * the consumer's priority. * * The idea is to call this function in the queue consumer, * and e-mail the queue pointer to the producer(s). * - * The spp process / main thread allocates one of these - * at startup; its main input queue. The spp main input queue + * The vpp process / main thread allocates one of these + * at startup; its main input queue. The vpp main input queue * has a pointer to it in the shared memory segment header. * * You probably want to be on an svm data heap before calling this @@ -70,7 +65,7 @@ unix_shared_memory_queue_init (int nels, q->signal_when_queue_non_empty = signal_when_queue_non_empty; memset (&attr, 0, sizeof (attr)); - memset (&cattr, 0, sizeof (attr)); + memset (&cattr, 0, sizeof (cattr)); if (pthread_mutexattr_init (&attr)) clib_unix_warning ("mutexattr_init"); @@ -277,6 +272,7 @@ unix_shared_memory_queue_sub (unix_shared_memory_queue_t * q, clib_memcpy (elem, headp, q->elsize); q->head++; + /* $$$$ JFC shouldn't this be == 0? */ if (q->cursize == q->maxsize) need_broadcast = 1; diff --git a/src/vlibmemory/unix_shared_memory_queue.h b/src/vlibmemory/unix_shared_memory_queue.h index f758f17c..13800065 100644 --- a/src/vlibmemory/unix_shared_memory_queue.h +++ b/src/vlibmemory/unix_shared_memory_queue.h @@ -29,7 +29,7 @@ typedef struct _unix_shared_memory_queue pthread_cond_t condvar; /* 8 bytes */ int head; int tail; - int cursize; + volatile int cursize; int maxsize; int elsize; int consumer_pid; diff --git a/src/vnet.am b/src/vnet.am index 64484e18..923f61d8 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -324,11 +324,7 @@ libvnet_la_SOURCES += \ vnet/ip/ip_input_acl.c \ vnet/ip/lookup.c \ vnet/ip/ping.c \ - vnet/ip/punt.c \ - vnet/ip/udp_format.c \ - vnet/ip/udp_init.c \ - vnet/ip/udp_local.c \ - vnet/ip/udp_pg.c + vnet/ip/punt.c nobase_include_HEADERS += \ vnet/ip/format.h \ @@ -354,11 +350,7 @@ nobase_include_HEADERS += \ vnet/ip/ports.def \ vnet/ip/protocols.def \ vnet/ip/punt_error.def \ - vnet/ip/punt.h \ - vnet/ip/tcp_packet.h \ - vnet/ip/udp_error.def \ - vnet/ip/udp.h \ - vnet/ip/udp_packet.h + vnet/ip/punt.h API_FILES += vnet/ip/ip.api @@ -473,6 +465,38 @@ test_map_LDADD = libvnet.la libvppinfra.la libvlib.la \ test_map_LDFLAGS = -static endif +######################################## +# Layer 4 protocol: tcp +######################################## +libvnet_la_SOURCES += \ + vnet/tcp/tcp_format.c \ + vnet/tcp/tcp_pg.c \ + vnet/tcp/tcp_syn_filter4.c \ + vnet/tcp/tcp_output.c \ + vnet/tcp/tcp_input.c \ + vnet/tcp/tcp_newreno.c \ + vnet/tcp/tcp.c + +nobase_include_HEADERS += \ + vnet/tcp/tcp_packet.h \ + vnet/tcp/tcp_timer.h \ + vnet/tcp/tcp.h + +######################################## +# Layer 4 protocol: udp +######################################## +libvnet_la_SOURCES += \ + vnet/udp/udp.c \ + vnet/udp/udp_input.c \ + vnet/udp/builtin_server.c \ + vnet/udp/udp_format.c \ + vnet/udp/udp_local.c \ + vnet/udp/udp_pg.c + +nobase_include_HEADERS += \ + vnet/udp/udp_error.def \ + vnet/udp/udp.h \ + vnet/udp/udp_packet.h ######################################## # Tunnel protocol: gre @@ -833,6 +857,28 @@ libvnet_la_SOURCES += \ nobase_include_HEADERS += \ vnet/devices/ssvm/ssvm_eth.h +######################################## +# session managmeent +######################################## + +libvnet_la_SOURCES += \ + vnet/session/session.c \ + vnet/session/node.c \ + vnet/session/transport.c \ + vnet/session/application.c \ + vnet/session/session_cli.c \ + vnet/session/hashes.c \ + vnet/session/application_interface.c \ + vnet/session/session_api.c + +nobase_include_HEADERS += \ + vnet/session/session.h \ + vnet/session/application.h \ + vnet/session/transport.h \ + vnet/session/application_interface.h + +API_FILES += vnet/session/session.api + ######################################## # Linux packet interface ######################################## diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 8680ef7c..861a5767 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -91,14 +91,19 @@ _(INVALID_ADDRESS_FAMILY, -97, "Invalid address family") \ _(INVALID_SUB_SW_IF_INDEX, -98, "Invalid sub-interface sw_if_index") \ _(TABLE_TOO_BIG, -99, "Table too big") \ _(CANNOT_ENABLE_DISABLE_FEATURE, -100, "Cannot enable/disable feature") \ -_(BFD_EEXIST, -101, "Duplicate BFD object") \ -_(BFD_ENOENT, -102, "No such BFD object") \ -_(BFD_EINUSE, -103, "BFD object in use") \ -_(BFD_NOTSUPP, -104, "BFD feature not supported") \ -_(LISP_RLOC_LOCAL, -105, "RLOC address is local") \ -_(BFD_EAGAIN, -106, "BFD object cannot be manipulated at this time") \ -_(INVALID_GPE_MODE, -107, "Invalid GPE mode") \ -_(LISP_GPE_ENTRIES_PRESENT, -108, "LISP GPE entries are present") +_(BFD_EEXIST, -101, "Duplicate BFD object") \ +_(BFD_ENOENT, -102, "No such BFD object") \ +_(BFD_EINUSE, -103, "BFD object in use") \ +_(BFD_NOTSUPP, -104, "BFD feature not supported") \ +_(ADDRESS_IN_USE, -105, "Address in use") \ +_(ADDRESS_NOT_IN_USE, -106, "Address not in use") \ +_(QUEUE_FULL, -107, "Queue full") \ +_(UNKNOWN_URI_TYPE, -108, "Unknown URI type") \ +_(URI_FIFO_CREATE_FAILED, -109, "URI FIFO segment create failed") \ +_(LISP_RLOC_LOCAL, -110, "RLOC address is local") \ +_(BFD_EAGAIN, -111, "BFD object cannot be manipulated at this time") \ +_(INVALID_GPE_MODE, -112, "Invalid GPE mode") \ +_(LISP_GPE_ENTRIES_PRESENT, -113, "LISP GPE entries are present") typedef enum { diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 146faad6..cf05089b 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -18,12 +18,12 @@ #include #include #include -#include +#include +#include #include #include #include #include -#include #include #include #include diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index f1cc6371..3de01f2a 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -277,6 +277,16 @@ typedef struct u16 buffer_advance; } device_input_feat; + /* TCP */ + struct + { + u32 connection_index; + u32 seq_number; + u32 seq_end; + u32 ack_number; + u8 flags; + } tcp; + u32 unused[6]; }; } vnet_buffer_opaque_t; diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c index 6093e2ac..b651a1f1 100644 --- a/src/vnet/classify/vnet_classify.c +++ b/src/vnet/classify/vnet_classify.c @@ -695,8 +695,8 @@ int vnet_classify_add_del_table (vnet_classify_main_t * cm, } #define foreach_tcp_proto_field \ -_(src_port) \ -_(dst_port) +_(src) \ +_(dst) #define foreach_udp_proto_field \ _(src_port) \ diff --git a/src/vnet/dhcp/dhcp_proxy.h b/src/vnet/dhcp/dhcp_proxy.h index c0d79c41..4586d883 100644 --- a/src/vnet/dhcp/dhcp_proxy.h +++ b/src/vnet/dhcp/dhcp_proxy.h @@ -26,7 +26,7 @@ #include #include #include -#include +#include typedef enum { #define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n, diff --git a/src/vnet/flow/flow_report.h b/src/vnet/flow/flow_report.h index 4e764377..e8ed3818 100644 --- a/src/vnet/flow/flow_report.h +++ b/src/vnet/flow/flow_report.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h index 02a1a963..70b4ccd8 100644 --- a/src/vnet/ip/ip.h +++ b/src/vnet/ip/ip.h @@ -50,8 +50,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index b184fbae..4e075d0f 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -309,8 +309,8 @@ ip4_compute_flow_hash (const ip4_header_t * ip, b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2; b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0; - t1 = is_tcp_udp ? tcp->ports.src : 0; - t2 = is_tcp_udp ? tcp->ports.dst : 0; + t1 = is_tcp_udp ? tcp->src : 0; + t2 = is_tcp_udp ? tcp->dst : 0; t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0; t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0; @@ -334,6 +334,44 @@ u8 *format_ip4_forward_next_trace (u8 * s, va_list * args); u32 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0); +#define IP_DF 0x4000 /* don't fragment */ + +/** + * Push IPv4 header to buffer + * + * This does not support fragmentation. + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param src - source IP + * @param dst - destination IP + * @param prot - payload proto + * + * @return - pointer to start of IP header + */ +always_inline void * +vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b, + ip4_address_t * src, ip4_address_t * dst, int proto) +{ + ip4_header_t *ih; + + /* make some room */ + ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t)); + + ih->ip_version_and_header_length = 0x45; + ih->tos = 0; + ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b)); + + /* No fragments */ + ih->flags_and_fragment_offset = clib_host_to_net_u16 (IP_DF); + ih->ttl = 255; + ih->protocol = proto; + ih->src_address.as_u32 = src->as_u32; + ih->dst_address.as_u32 = dst->as_u32; + + ih->checksum = ip4_header_checksum (ih); + return ih; +} #endif /* included_ip_ip4_h */ /* diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 8081b34b..66d91ab6 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1478,8 +1478,18 @@ ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) return p0->flags; } -static uword -ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +/* *INDENT-OFF* */ +VNET_FEATURE_ARC_INIT (ip4_local) = +{ + .arc_name = "ip4-local", + .start_nodes = VNET_FEATURES ("ip4-local"), +}; +/* *INDENT-ON* */ + +static inline uword +ip4_local_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int head_of_feature_arc) { ip4_main_t *im = &ip4_main; ip_lookup_main_t *lm = &im->lookup_main; @@ -1487,6 +1497,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) u32 *from, *to_next, n_left_from, n_left_to_next; vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip4_input_node.index); + u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -1513,7 +1524,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) i32 len_diff0, len_diff1; u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1; - u8 enqueue_code; + u32 sw_if_index0, sw_if_index1; pi0 = to_next[0] = from[0]; pi1 = to_next[1] = from[1]; @@ -1522,6 +1533,8 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) to_next += 2; n_left_to_next -= 2; + next0 = next1 = IP_LOCAL_NEXT_DROP; + p0 = vlib_get_buffer (vm, pi0); p1 = vlib_get_buffer (vm, pi1); @@ -1531,14 +1544,18 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data; - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - fib_index1 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p1)->sw_if_index[VLIB_RX]); + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1); fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; @@ -1557,6 +1574,13 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) until support of IP frag reassembly is implemented */ proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol; + + if (head_of_feature_arc == 0) + { + error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL; + goto skip_checks; + } + is_udp0 = proto0 == IP_PROTOCOL_UDP; is_udp1 = proto1 == IP_PROTOCOL_UDP; is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; @@ -1686,6 +1710,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) next0 = lm->local_next_by_ip_protocol[proto0]; next1 = lm->local_next_by_ip_protocol[proto1]; + skip_checks: next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; next1 = @@ -1694,44 +1719,17 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p0->error = error0 ? error_node->errors[error0] : 0; p1->error = error1 ? error_node->errors[error1] : 0; - enqueue_code = (next0 != next_index) + 2 * (next1 != next_index); - - if (PREDICT_FALSE (enqueue_code != 0)) + if (head_of_feature_arc) { - switch (enqueue_code) - { - case 1: - /* A B A */ - to_next[-2] = pi1; - to_next -= 1; - n_left_to_next += 1; - vlib_set_next_frame_buffer (vm, node, next0, pi0); - break; - - case 2: - /* A A B */ - to_next -= 1; - n_left_to_next += 1; - vlib_set_next_frame_buffer (vm, node, next1, pi1); - break; - - case 3: - /* A B B or A B C */ - to_next -= 2; - n_left_to_next += 2; - vlib_set_next_frame_buffer (vm, node, next0, pi0); - vlib_set_next_frame_buffer (vm, node, next1, pi1); - if (next0 == next1) - { - vlib_put_next_frame (vm, node, next_index, - n_left_to_next); - next_index = next1; - vlib_get_next_frame (vm, node, next_index, to_next, - n_left_to_next); - } - break; - } + if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0); + if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1); } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, + next0, next1); } while (n_left_from > 0 && n_left_to_next > 0) @@ -1746,6 +1744,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; load_balance_t *lb0; const dpo_id_t *dpo0; + u32 sw_if_index0; pi0 = to_next[0] = from[0]; from += 1; @@ -1753,14 +1752,18 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) to_next += 1; n_left_to_next -= 1; + next0 = IP_LOCAL_NEXT_DROP; + p0 = vlib_get_buffer (vm, pi0); ip0 = vlib_buffer_get_current (p0); vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); + fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; @@ -1775,6 +1778,13 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) /* Treat IP frag packets as "experimental" protocol for now until support of IP frag reassembly is implemented */ proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; + + if (head_of_feature_arc == 0) + { + error0 = IP4_ERROR_UNKNOWN_PROTOCOL; + goto skip_check; + } + is_udp0 = proto0 == IP_PROTOCOL_UDP; is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; @@ -1847,6 +1857,8 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip0->dst_address.as_u32 != 0xFFFFFFFF) ? IP4_ERROR_SRC_LOOKUP_MISS : error0); + skip_check: + next0 = lm->local_next_by_ip_protocol[proto0]; next0 = @@ -1854,18 +1866,15 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p0->error = error0 ? error_node->errors[error0] : 0; - if (PREDICT_FALSE (next0 != next_index)) + if (head_of_feature_arc) { - n_left_to_next += 1; - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - - next_index = next0; - vlib_get_next_frame (vm, node, next_index, to_next, - n_left_to_next); - to_next[0] = pi0; - to_next += 1; - n_left_to_next -= 1; + if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0); } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, next0); + } vlib_put_next_frame (vm, node, next_index, n_left_to_next); @@ -1874,21 +1883,57 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) return frame->n_vectors; } +static uword +ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ ); +} + +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_local_node) = { - .function = ip4_local,.name = "ip4-local",.vector_size = - sizeof (u32),.format_trace = - format_ip4_forward_next_trace,.n_next_nodes = - IP_LOCAL_N_NEXT,.next_nodes = + .function = ip4_local, + .name = "ip4-local", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_next_nodes = IP_LOCAL_N_NEXT, + .next_nodes = { - [IP_LOCAL_NEXT_DROP] = "error-drop", - [IP_LOCAL_NEXT_PUNT] = "error-punt", - [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup", - [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",} -,}; + [IP_LOCAL_NEXT_DROP] = "error-drop", + [IP_LOCAL_NEXT_PUNT] = "error-punt", + [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup", + [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",}, +}; +/* *INDENT-ON* */ VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local); +static uword +ip4_local_end_of_arc (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = { + .function = ip4_local_end_of_arc, + .name = "ip4-local-end-of-arc", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + .sibling_of = "ip4-local", +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc) + +VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = { + .arc_name = "ip4-local", + .node_name = "ip4-local-end-of-arc", + .runs_before = 0, /* not before any other features */ +}; +/* *INDENT-ON* */ + void ip4_register_protocol (u32 protocol, u32 node_index) { diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h index 8da788b4..b2c1fcd4 100644 --- a/src/vnet/ip/ip4_packet.h +++ b/src/vnet/ip/ip4_packet.h @@ -41,7 +41,7 @@ #define included_ip4_packet_h #include /* for ip_csum_t */ -#include /* for tcp_header_t */ +#include /* for tcp_header_t */ #include /* for clib_net_to_host_u16 */ /* IP4 address which can be accessed either as 4 bytes @@ -342,10 +342,10 @@ ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0) ip0->src_address.data_u32 = dst0; ip0->dst_address.data_u32 = src0; - src0 = tcp0->ports.src; - dst0 = tcp0->ports.dst; - tcp0->ports.src = dst0; - tcp0->ports.dst = src0; + src0 = tcp0->src; + dst0 = tcp0->dst; + tcp0->src = dst0; + tcp0->dst = src0; } always_inline void @@ -363,14 +363,14 @@ ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1, ip0->dst_address.data_u32 = src0; ip1->dst_address.data_u32 = src1; - src0 = tcp0->ports.src; - src1 = tcp1->ports.src; - dst0 = tcp0->ports.dst; - dst1 = tcp1->ports.dst; - tcp0->ports.src = dst0; - tcp1->ports.src = dst1; - tcp0->ports.dst = src0; - tcp1->ports.dst = src1; + src0 = tcp0->src; + src1 = tcp1->src; + dst0 = tcp0->dst; + dst1 = tcp1->dst; + tcp0->src = dst0; + tcp1->src = dst1; + tcp0->dst = src0; + tcp1->dst = src1; } #endif /* included_ip4_packet_h */ diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index 5456f0f2..2615fbfa 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -461,8 +461,8 @@ ip6_compute_flow_hash (const ip6_header_t * ip, b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2; b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0; - t1 = is_tcp_udp ? tcp->ports.src : 0; - t2 = is_tcp_udp ? tcp->ports.dst : 0; + t1 = is_tcp_udp ? tcp->src : 0; + t2 = is_tcp_udp ? tcp->dst : 0; t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0; t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0; @@ -497,6 +497,46 @@ int ip6_hbh_register_option (u8 option, int ip6_hbh_unregister_option (u8 option); void ip6_hbh_set_next_override (uword next); +/** + * Push IPv6 header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param src - source IP + * @param dst - destination IP + * @param prot - payload proto + * + * @return - pointer to start of IP header + */ +always_inline void * +vlib_buffer_push_ip6 (vlib_main_t * vm, vlib_buffer_t * b, + ip6_address_t * src, ip6_address_t * dst, int proto) +{ + ip6_header_t *ip6h; + u16 payload_length; + + /* make some room */ + ip6h = vlib_buffer_push_uninit (b, sizeof (ip6_header_t)); + + ip6h->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + + /* calculate ip6 payload length */ + payload_length = vlib_buffer_length_in_chain (vm, b); + payload_length -= sizeof (*ip6h); + + ip6h->payload_length = clib_host_to_net_u16 (payload_length); + + ip6h->hop_limit = 0xff; + ip6h->protocol = proto; + clib_memcpy (ip6h->src_address.as_u8, src->as_u8, + sizeof (ip6h->src_address)); + clib_memcpy (ip6h->dst_address.as_u8, dst->as_u8, + sizeof (ip6h->src_address)); + + return ip6h; +} + #endif /* included_ip_ip6_h */ /* diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index 1e551c8b..4fd14b96 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -40,7 +40,7 @@ #ifndef included_ip6_packet_h #define included_ip6_packet_h -#include +#include #include typedef union @@ -373,10 +373,10 @@ ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0) { u16 src0, dst0; - src0 = tcp0->ports.src; - dst0 = tcp0->ports.dst; - tcp0->ports.src = dst0; - tcp0->ports.dst = src0; + src0 = tcp0->src; + dst0 = tcp0->dst; + tcp0->src = dst0; + tcp0->dst = src0; } } @@ -400,14 +400,14 @@ ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1, { u16 src0, dst0, src1, dst1; - src0 = tcp0->ports.src; - src1 = tcp1->ports.src; - dst0 = tcp0->ports.dst; - dst1 = tcp1->ports.dst; - tcp0->ports.src = dst0; - tcp1->ports.src = dst1; - tcp0->ports.dst = src0; - tcp1->ports.dst = src1; + src0 = tcp0->src; + src1 = tcp1->src; + dst0 = tcp0->dst; + dst1 = tcp1->dst; + tcp0->src = dst0; + tcp1->src = dst1; + tcp0->dst = src0; + tcp1->dst = src1; } } diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c index 9c735128..48558401 100644 --- a/src/vnet/ip/punt.c +++ b/src/vnet/ip/punt.c @@ -23,7 +23,7 @@ */ #include #include -#include +#include #include #define foreach_punt_next \ diff --git a/src/vnet/ip/tcp_packet.h b/src/vnet/ip/tcp_packet.h deleted file mode 100644 index 93f73e01..00000000 --- a/src/vnet/ip/tcp_packet.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip4/tcp_packet.h: TCP packet format (see RFC 793) - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef included_tcp_packet_h -#define included_tcp_packet_h - -/* TCP flags bit 0 first. */ -#define foreach_tcp_flag \ - _ (FIN) \ - _ (SYN) \ - _ (RST) \ - _ (PSH) \ - _ (ACK) \ - _ (URG) \ - _ (ECE) \ - _ (CWR) - -enum -{ -#define _(f) TCP_FLAG_BIT_##f, - foreach_tcp_flag -#undef _ - TCP_N_FLAG_BITS, - -#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f, - foreach_tcp_flag -#undef _ -}; - -typedef struct -{ - /* Source and destination port. */ - union - { - union - { - struct - { - u16 src, dst; - }; - u32 src_and_dst; - } ports; - struct - { - u16 src_port, dst_port; - }; - }; - - /* Sequence and acknowledgment number. */ - u32 seq_number, ack_number; - - /* Size of TCP header in 32-bit units plus 4 reserved bits. */ - u8 tcp_header_u32s_and_reserved; - - /* see foreach_tcp_flag for enumation of tcp flags. */ - u8 flags; - - /* Current window advertised by sender. - This is the number of bytes sender is willing to receive - right now. */ - u16 window; - - /* Checksum of TCP pseudo header and data. */ - u16 checksum; - - u16 urgent_pointer; -} tcp_header_t; - -always_inline int -tcp_header_bytes (tcp_header_t * t) -{ - return (t->tcp_header_u32s_and_reserved >> 4) * sizeof (u32); -} - -/* TCP options. */ -typedef enum tcp_option_type -{ - TCP_OPTION_END = 0, - TCP_OPTION_NOP = 1, - TCP_OPTION_MSS = 2, - TCP_OPTION_WINDOW_SCALE = 3, - TCP_OPTION_SACK_PERMITTED = 4, - TCP_OPTION_SACK_BLOCK = 5, - TCP_OPTION_TIME_STAMP = 8, -} tcp_option_type_t; - -/* All except NOP and END have 1 byte length field. */ -typedef struct -{ - tcp_option_type_t type:8; - - /* Length of this option in bytes. */ - u8 length; -} tcp_option_with_length_t; - -#endif /* included_tcp_packet_h */ - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp.h b/src/vnet/ip/udp.h deleted file mode 100644 index bad58b5d..00000000 --- a/src/vnet/ip/udp.h +++ /dev/null @@ -1,315 +0,0 @@ -/* - * ip/udp.h: udp protocol - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_udp_h -#define included_udp_h - -#include -#include -#include -#include -#include -#include -#include - -typedef enum -{ -#define udp_error(n,s) UDP_ERROR_##n, -#include -#undef udp_error - UDP_N_ERROR, -} udp_error_t; - -#define foreach_udp4_dst_port \ -_ (67, dhcp_to_server) \ -_ (68, dhcp_to_client) \ -_ (500, ikev2) \ -_ (3784, bfd4) \ -_ (3785, bfd_echo4) \ -_ (4341, lisp_gpe) \ -_ (4342, lisp_cp) \ -_ (4739, ipfix) \ -_ (4789, vxlan) \ -_ (4789, vxlan6) \ -_ (4790, vxlan_gpe) \ -_ (6633, vpath_3) - - -#define foreach_udp6_dst_port \ -_ (547, dhcpv6_to_server) \ -_ (546, dhcpv6_to_client) \ -_ (3784, bfd6) \ -_ (3785, bfd_echo6) \ -_ (4341, lisp_gpe6) \ -_ (4342, lisp_cp6) \ -_ (4790, vxlan6_gpe) \ -_ (6633, vpath6_3) - -typedef enum -{ -#define _(n,f) UDP_DST_PORT_##f = n, - foreach_udp4_dst_port foreach_udp6_dst_port -#undef _ -} udp_dst_port_t; - -typedef enum -{ -#define _(n,f) UDP6_DST_PORT_##f = n, - foreach_udp6_dst_port -#undef _ -} udp6_dst_port_t; - -typedef struct -{ - /* Name (a c string). */ - char *name; - - /* GRE protocol type in host byte order. */ - udp_dst_port_t dst_port; - - /* Node which handles this type. */ - u32 node_index; - - /* Next index for this type. */ - u32 next_index; -} udp_dst_port_info_t; - -typedef enum -{ - UDP_IP6 = 0, - UDP_IP4, /* the code is full of is_ip4... */ - N_UDP_AF, -} udp_af_t; - -typedef struct -{ - udp_dst_port_info_t *dst_port_infos[N_UDP_AF]; - - /* Hash tables mapping name/protocol to protocol info index. */ - uword *dst_port_info_by_name[N_UDP_AF]; - uword *dst_port_info_by_dst_port[N_UDP_AF]; - - /* convenience */ - vlib_main_t *vlib_main; -} udp_main_t; - -always_inline udp_dst_port_info_t * -udp_get_dst_port_info (udp_main_t * um, udp_dst_port_t dst_port, u8 is_ip4) -{ - uword *p = hash_get (um->dst_port_info_by_dst_port[is_ip4], dst_port); - return p ? vec_elt_at_index (um->dst_port_infos[is_ip4], p[0]) : 0; -} - -format_function_t format_udp_header; -format_function_t format_udp_rx_trace; - -unformat_function_t unformat_udp_header; - -void udp_register_dst_port (vlib_main_t * vm, - udp_dst_port_t dst_port, - u32 node_index, u8 is_ip4); - -void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add); - -always_inline void -ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) -{ - u16 new_l0; - udp_header_t *udp0; - - if (is_ip4) - { - ip4_header_t *ip0; - ip_csum_t sum0; - u16 old_l0 = 0; - - ip0 = vlib_buffer_get_current (b0); - - /* fix the ing outer-IP checksum */ - sum0 = ip0->checksum; - /* old_l0 always 0, see the rewrite setup */ - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - - sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */ ); - ip0->checksum = ip_csum_fold (sum0); - ip0->length = new_l0; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - udp0->length = new_l0; - } - else - { - ip6_header_t *ip0; - int bogus0; - - ip0 = vlib_buffer_get_current (b0); - - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - ip0->payload_length = new_l0; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - udp0->length = new_l0; - - udp0->checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); - ASSERT (bogus0 == 0); - - if (udp0->checksum == 0) - udp0->checksum = 0xffff; - } -} - -always_inline void -ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, - u8 is_ip4) -{ - vlib_buffer_advance (b0, -ec_len); - - if (is_ip4) - { - ip4_header_t *ip0; - - ip0 = vlib_buffer_get_current (b0); - - /* Apply the encap string. */ - clib_memcpy (ip0, ec0, ec_len); - ip_udp_fixup_one (vm, b0, 1); - } - else - { - ip6_header_t *ip0; - - ip0 = vlib_buffer_get_current (b0); - - /* Apply the encap string. */ - clib_memcpy (ip0, ec0, ec_len); - ip_udp_fixup_one (vm, b0, 0); - } -} - -always_inline void -ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, - u8 * ec0, u8 * ec1, word ec_len, u8 is_v4) -{ - u16 new_l0, new_l1; - udp_header_t *udp0, *udp1; - - ASSERT (_vec_len (ec0) == _vec_len (ec1)); - - vlib_buffer_advance (b0, -ec_len); - vlib_buffer_advance (b1, -ec_len); - - if (is_v4) - { - ip4_header_t *ip0, *ip1; - ip_csum_t sum0, sum1; - u16 old_l0 = 0, old_l1 = 0; - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* Apply the encap string */ - clib_memcpy (ip0, ec0, ec_len); - clib_memcpy (ip1, ec1, ec_len); - - /* fix the ing outer-IP checksum */ - sum0 = ip0->checksum; - sum1 = ip1->checksum; - - /* old_l0 always 0, see the rewrite setup */ - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); - - sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */ ); - sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, - length /* changed member */ ); - - ip0->checksum = ip_csum_fold (sum0); - ip1->checksum = ip_csum_fold (sum1); - - ip0->length = new_l0; - ip1->length = new_l1; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - udp1 = (udp_header_t *) (ip1 + 1); - - new_l0 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - new_l1 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) - - sizeof (*ip1)); - udp0->length = new_l0; - udp1->length = new_l1; - } - else - { - ip6_header_t *ip0, *ip1; - int bogus0, bogus1; - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* Apply the encap string. */ - clib_memcpy (ip0, ec0, ec_len); - clib_memcpy (ip1, ec1, ec_len); - - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) - - sizeof (*ip1)); - ip0->payload_length = new_l0; - ip1->payload_length = new_l1; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - udp1 = (udp_header_t *) (ip1 + 1); - - udp0->length = new_l0; - udp1->length = new_l1; - - udp0->checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); - udp1->checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, b1, ip1, &bogus1); - ASSERT (bogus0 == 0); - ASSERT (bogus1 == 0); - - if (udp0->checksum == 0) - udp0->checksum = 0xffff; - if (udp1->checksum == 0) - udp1->checksum = 0xffff; - } -} - -#endif /* included_udp_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_error.def b/src/vnet/ip/udp_error.def deleted file mode 100644 index bfdae0ac..00000000 --- a/src/vnet/ip/udp_error.def +++ /dev/null @@ -1,21 +0,0 @@ -/* - * udp_error.def: udp errors - * - * Copyright (c) 2013-2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -udp_error (NONE, "no error") -udp_error (NO_LISTENER, "no listener for dst port") -udp_error (LENGTH_ERROR, "UDP packets with length errors") -udp_error (PUNT, "no listener punt") diff --git a/src/vnet/ip/udp_format.c b/src/vnet/ip/udp_format.c deleted file mode 100644 index abdf561e..00000000 --- a/src/vnet/ip/udp_format.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip/udp_format.c: udp formatting - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include - -/* Format UDP header. */ -u8 * -format_udp_header (u8 * s, va_list * args) -{ - udp_header_t *udp = va_arg (*args, udp_header_t *); - u32 max_header_bytes = va_arg (*args, u32); - uword indent; - u32 header_bytes = sizeof (udp[0]); - - /* Nothing to do. */ - if (max_header_bytes < sizeof (udp[0])) - return format (s, "UDP header truncated"); - - indent = format_get_indent (s); - indent += 2; - - s = format (s, "UDP: %d -> %d", - clib_net_to_host_u16 (udp->src_port), - clib_net_to_host_u16 (udp->dst_port)); - - s = format (s, "\n%Ulength %d, checksum 0x%04x", - format_white_space, indent, - clib_net_to_host_u16 (udp->length), - clib_net_to_host_u16 (udp->checksum)); - - /* Recurse into next protocol layer. */ - if (max_header_bytes != 0 && header_bytes < max_header_bytes) - { - ip_main_t *im = &ip_main; - tcp_udp_port_info_t *pi; - - pi = ip_get_tcp_udp_port_info (im, udp->dst_port); - - if (pi && pi->format_header) - s = format (s, "\n%U%U", - format_white_space, indent - 2, pi->format_header, - /* next protocol header */ (udp + 1), - max_header_bytes - sizeof (udp[0])); - } - - return s; -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_init.c b/src/vnet/ip/udp_init.c deleted file mode 100644 index 1241ca4a..00000000 --- a/src/vnet/ip/udp_init.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip/udp_init.c: udp initialization - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include - -clib_error_t * -udp_init (vlib_main_t * vm) -{ - ip_main_t *im = &ip_main; - ip_protocol_info_t *pi; - clib_error_t *error; - - error = vlib_call_init_function (vm, ip_main_init); - - if (!error) - { - pi = ip_get_protocol_info (im, IP_PROTOCOL_UDP); - if (pi == 0) - return clib_error_return (0, "UDP protocol info AWOL"); - pi->format_header = format_udp_header; - pi->unformat_pg_edit = unformat_pg_udp_header; - } - - return 0; -} - -VLIB_INIT_FUNCTION (udp_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_local.c b/src/vnet/ip/udp_local.c deleted file mode 100644 index 13ab6e4f..00000000 --- a/src/vnet/ip/udp_local.c +++ /dev/null @@ -1,645 +0,0 @@ -/* - * node.c: udp packet processing - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -udp_main_t udp_main; - -#define foreach_udp_input_next \ - _ (PUNT, "error-punt") \ - _ (DROP, "error-drop") \ - _ (ICMP4_ERROR, "ip4-icmp-error") \ - _ (ICMP6_ERROR, "ip6-icmp-error") - -typedef enum -{ -#define _(s,n) UDP_INPUT_NEXT_##s, - foreach_udp_input_next -#undef _ - UDP_INPUT_N_NEXT, -} udp_input_next_t; - -typedef struct -{ - u16 src_port; - u16 dst_port; - u8 bound; -} udp_rx_trace_t; - -u8 * -format_udp_rx_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - udp_rx_trace_t *t = va_arg (*args, udp_rx_trace_t *); - - s = format (s, "UDP: src-port %d dst-port %d%s", - clib_net_to_host_u16 (t->src_port), - clib_net_to_host_u16 (t->dst_port), - t->bound ? "" : " (no listener)"); - return s; -} - -typedef struct -{ - /* Sparse vector mapping udp dst_port in network byte order - to next index. */ - u16 *next_by_dst_port; - u8 punt_unknown; -} udp_input_runtime_t; - -vlib_node_registration_t udp4_input_node; -vlib_node_registration_t udp6_input_node; - -always_inline uword -udp46_input_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame, int is_ip4) -{ - udp_input_runtime_t *rt = is_ip4 ? - (void *) vlib_node_get_runtime_data (vm, udp4_input_node.index) - : (void *) vlib_node_get_runtime_data (vm, udp6_input_node.index); - __attribute__ ((unused)) u32 n_left_from, next_index, *from, *to_next; - word n_no_listener = 0; - u8 punt_unknown = rt->punt_unknown; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - udp_header_t *h0 = 0, *h1 = 0; - u32 i0, i1, dst_port0, dst_port1; - u32 advance0, advance1; - u32 error0, next0, error1, next1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD); - CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - /* ip4/6_local hands us the ip header, not the udp header */ - if (is_ip4) - { - advance0 = sizeof (ip4_header_t); - advance1 = sizeof (ip4_header_t); - } - else - { - advance0 = sizeof (ip6_header_t); - advance1 = sizeof (ip6_header_t); - } - - if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) - { - error0 = UDP_ERROR_LENGTH_ERROR; - next0 = UDP_INPUT_NEXT_DROP; - } - else - { - vlib_buffer_advance (b0, advance0); - h0 = vlib_buffer_get_current (b0); - error0 = next0 = 0; - if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) > - vlib_buffer_length_in_chain (vm, b0))) - { - error0 = UDP_ERROR_LENGTH_ERROR; - next0 = UDP_INPUT_NEXT_DROP; - } - } - - if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1))) - { - error1 = UDP_ERROR_LENGTH_ERROR; - next1 = UDP_INPUT_NEXT_DROP; - } - else - { - vlib_buffer_advance (b1, advance1); - h1 = vlib_buffer_get_current (b1); - error1 = next1 = 0; - if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) > - vlib_buffer_length_in_chain (vm, b1))) - { - error1 = UDP_ERROR_LENGTH_ERROR; - next1 = UDP_INPUT_NEXT_DROP; - } - } - - /* Index sparse array with network byte order. */ - dst_port0 = (error0 == 0) ? h0->dst_port : 0; - dst_port1 = (error1 == 0) ? h1->dst_port : 0; - sparse_vec_index2 (rt->next_by_dst_port, dst_port0, dst_port1, - &i0, &i1); - next0 = (error0 == 0) ? vec_elt (rt->next_by_dst_port, i0) : next0; - next1 = (error1 == 0) ? vec_elt (rt->next_by_dst_port, i1) : next1; - - if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX)) - { - // move the pointer back so icmp-error can find the - // ip packet header - vlib_buffer_advance (b0, -(word) advance0); - - if (PREDICT_FALSE (punt_unknown)) - { - b0->error = node->errors[UDP_ERROR_PUNT]; - next0 = UDP_INPUT_NEXT_PUNT; - } - else if (is_ip4) - { - icmp4_error_set_vnet_buffer (b0, - ICMP4_destination_unreachable, - ICMP4_destination_unreachable_port_unreachable, - 0); - next0 = UDP_INPUT_NEXT_ICMP4_ERROR; - n_no_listener++; - } - else - { - icmp6_error_set_vnet_buffer (b0, - ICMP6_destination_unreachable, - ICMP6_destination_unreachable_port_unreachable, - 0); - next0 = UDP_INPUT_NEXT_ICMP6_ERROR; - n_no_listener++; - } - } - else - { - b0->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b0, sizeof (*h0)); - } - - if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX)) - { - // move the pointer back so icmp-error can find the - // ip packet header - vlib_buffer_advance (b1, -(word) advance1); - - if (PREDICT_FALSE (punt_unknown)) - { - b1->error = node->errors[UDP_ERROR_PUNT]; - next1 = UDP_INPUT_NEXT_PUNT; - } - else if (is_ip4) - { - icmp4_error_set_vnet_buffer (b1, - ICMP4_destination_unreachable, - ICMP4_destination_unreachable_port_unreachable, - 0); - next1 = UDP_INPUT_NEXT_ICMP4_ERROR; - n_no_listener++; - } - else - { - icmp6_error_set_vnet_buffer (b1, - ICMP6_destination_unreachable, - ICMP6_destination_unreachable_port_unreachable, - 0); - next1 = UDP_INPUT_NEXT_ICMP6_ERROR; - n_no_listener++; - } - } - else - { - b1->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b1, sizeof (*h1)); - } - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - udp_rx_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) - { - tr->src_port = h0 ? h0->src_port : 0; - tr->dst_port = h0 ? h0->dst_port : 0; - tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR && - next0 != UDP_INPUT_NEXT_ICMP6_ERROR); - } - } - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - udp_rx_trace_t *tr = vlib_add_trace (vm, node, - b1, sizeof (*tr)); - if (b1->error != node->errors[UDP_ERROR_LENGTH_ERROR]) - { - tr->src_port = h1 ? h1->src_port : 0; - tr->dst_port = h1 ? h1->dst_port : 0; - tr->bound = (next1 != UDP_INPUT_NEXT_ICMP4_ERROR && - next1 != UDP_INPUT_NEXT_ICMP6_ERROR); - } - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - udp_header_t *h0 = 0; - u32 i0, next0; - u32 advance0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - /* ip4/6_local hands us the ip header, not the udp header */ - if (is_ip4) - advance0 = sizeof (ip4_header_t); - else - advance0 = sizeof (ip6_header_t); - - if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) - { - b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; - next0 = UDP_INPUT_NEXT_DROP; - goto trace_x1; - } - - vlib_buffer_advance (b0, advance0); - - h0 = vlib_buffer_get_current (b0); - - if (PREDICT_TRUE (clib_net_to_host_u16 (h0->length) <= - vlib_buffer_length_in_chain (vm, b0))) - { - i0 = sparse_vec_index (rt->next_by_dst_port, h0->dst_port); - next0 = vec_elt (rt->next_by_dst_port, i0); - - if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX)) - { - // move the pointer back so icmp-error can find the - // ip packet header - vlib_buffer_advance (b0, -(word) advance0); - - if (PREDICT_FALSE (punt_unknown)) - { - b0->error = node->errors[UDP_ERROR_PUNT]; - next0 = UDP_INPUT_NEXT_PUNT; - } - else if (is_ip4) - { - icmp4_error_set_vnet_buffer (b0, - ICMP4_destination_unreachable, - ICMP4_destination_unreachable_port_unreachable, - 0); - next0 = UDP_INPUT_NEXT_ICMP4_ERROR; - n_no_listener++; - } - else - { - icmp6_error_set_vnet_buffer (b0, - ICMP6_destination_unreachable, - ICMP6_destination_unreachable_port_unreachable, - 0); - next0 = UDP_INPUT_NEXT_ICMP6_ERROR; - n_no_listener++; - } - } - else - { - b0->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b0, sizeof (*h0)); - } - } - else - { - b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; - next0 = UDP_INPUT_NEXT_DROP; - } - - trace_x1: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - udp_rx_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) - { - tr->src_port = h0->src_port; - tr->dst_port = h0->dst_port; - tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR && - next0 != UDP_INPUT_NEXT_ICMP6_ERROR); - } - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_error_count (vm, node->node_index, UDP_ERROR_NO_LISTENER, - n_no_listener); - return from_frame->n_vectors; -} - -static char *udp_error_strings[] = { -#define udp_error(n,s) s, -#include "udp_error.def" -#undef udp_error -}; - -static uword -udp4_input (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - return udp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ ); -} - -static uword -udp6_input (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - return udp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ ); -} - - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (udp4_input_node) = { - .function = udp4_input, - .name = "ip4-udp-lookup", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .runtime_data_bytes = sizeof (udp_input_runtime_t), - - .n_errors = UDP_N_ERROR, - .error_strings = udp_error_strings, - - .n_next_nodes = UDP_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [UDP_INPUT_NEXT_##s] = n, - foreach_udp_input_next -#undef _ - }, - - .format_buffer = format_udp_header, - .format_trace = format_udp_rx_trace, - .unformat_buffer = unformat_udp_header, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (udp4_input_node, udp4_input); - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (udp6_input_node) = { - .function = udp6_input, - .name = "ip6-udp-lookup", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .runtime_data_bytes = sizeof (udp_input_runtime_t), - - .n_errors = UDP_N_ERROR, - .error_strings = udp_error_strings, - - .n_next_nodes = UDP_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [UDP_INPUT_NEXT_##s] = n, - foreach_udp_input_next -#undef _ - }, - - .format_buffer = format_udp_header, - .format_trace = format_udp_rx_trace, - .unformat_buffer = unformat_udp_header, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (udp6_input_node, udp6_input); - -static void -add_dst_port (udp_main_t * um, - udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4) -{ - udp_dst_port_info_t *pi; - u32 i; - - vec_add2 (um->dst_port_infos[is_ip4], pi, 1); - i = pi - um->dst_port_infos[is_ip4]; - - pi->name = dst_port_name; - pi->dst_port = dst_port; - pi->next_index = pi->node_index = ~0; - - hash_set (um->dst_port_info_by_dst_port[is_ip4], dst_port, i); - - if (pi->name) - hash_set_mem (um->dst_port_info_by_name[is_ip4], pi->name, i); -} - -void -udp_register_dst_port (vlib_main_t * vm, - udp_dst_port_t dst_port, u32 node_index, u8 is_ip4) -{ - udp_main_t *um = &udp_main; - udp_dst_port_info_t *pi; - udp_input_runtime_t *rt; - u16 *n; - - { - clib_error_t *error = vlib_call_init_function (vm, udp_local_init); - if (error) - clib_error_report (error); - } - - pi = udp_get_dst_port_info (um, dst_port, is_ip4); - if (!pi) - { - add_dst_port (um, dst_port, 0, is_ip4); - pi = udp_get_dst_port_info (um, dst_port, is_ip4); - ASSERT (pi); - } - - pi->node_index = node_index; - pi->next_index = vlib_node_add_next (vm, - is_ip4 ? udp4_input_node.index - : udp6_input_node.index, node_index); - - /* Setup udp protocol -> next index sparse vector mapping. */ - rt = vlib_node_get_runtime_data - (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); - n = sparse_vec_validate (rt->next_by_dst_port, - clib_host_to_net_u16 (dst_port)); - n[0] = pi->next_index; -} - -void -udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add) -{ - udp_input_runtime_t *rt; - - { - clib_error_t *error = vlib_call_init_function (vm, udp_local_init); - if (error) - clib_error_report (error); - } - - rt = vlib_node_get_runtime_data - (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); - - rt->punt_unknown = is_add; -} - -/* Parse a UDP header. */ -uword -unformat_udp_header (unformat_input_t * input, va_list * args) -{ - u8 **result = va_arg (*args, u8 **); - udp_header_t *udp; - __attribute__ ((unused)) int old_length; - u16 src_port, dst_port; - - /* Allocate space for IP header. */ - { - void *p; - - old_length = vec_len (*result); - vec_add2 (*result, p, sizeof (ip4_header_t)); - udp = p; - } - - memset (udp, 0, sizeof (udp[0])); - if (unformat (input, "src-port %d dst-port %d", &src_port, &dst_port)) - { - udp->src_port = clib_host_to_net_u16 (src_port); - udp->dst_port = clib_host_to_net_u16 (dst_port); - return 1; - } - return 0; -} - -static void -udp_setup_node (vlib_main_t * vm, u32 node_index) -{ - vlib_node_t *n = vlib_get_node (vm, node_index); - pg_node_t *pn = pg_get_node (node_index); - - n->format_buffer = format_udp_header; - n->unformat_buffer = unformat_udp_header; - pn->unformat_edit = unformat_pg_udp_header; -} - -clib_error_t * -udp_local_init (vlib_main_t * vm) -{ - udp_input_runtime_t *rt; - udp_main_t *um = &udp_main; - int i; - - { - clib_error_t *error; - error = vlib_call_init_function (vm, udp_init); - if (error) - clib_error_report (error); - } - - - for (i = 0; i < 2; i++) - { - um->dst_port_info_by_name[i] = hash_create_string (0, sizeof (uword)); - um->dst_port_info_by_dst_port[i] = hash_create (0, sizeof (uword)); - } - - udp_setup_node (vm, udp4_input_node.index); - udp_setup_node (vm, udp6_input_node.index); - - rt = vlib_node_get_runtime_data (vm, udp4_input_node.index); - - rt->next_by_dst_port = sparse_vec_new - ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]), - /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); - - rt->punt_unknown = 0; - -#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */); - foreach_udp4_dst_port -#undef _ - rt = vlib_node_get_runtime_data (vm, udp6_input_node.index); - - rt->next_by_dst_port = sparse_vec_new - ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]), - /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); - - rt->punt_unknown = 0; - -#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */); - foreach_udp6_dst_port -#undef _ - ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index); - /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */ - return 0; -} - -VLIB_INIT_FUNCTION (udp_local_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_packet.h b/src/vnet/ip/udp_packet.h deleted file mode 100644 index beea3059..00000000 --- a/src/vnet/ip/udp_packet.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip4/udp_packet.h: UDP packet format - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef included_udp_packet_h -#define included_udp_packet_h - -typedef struct -{ - /* Source and destination port. */ - u16 src_port, dst_port; - - /* Length of UDP header plus payload. */ - u16 length; - - /* Checksum of UDP pseudo-header and data or - zero if checksum is disabled. */ - u16 checksum; -} udp_header_t; - -#endif /* included_udp_packet_h */ - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_pg.c b/src/vnet/ip/udp_pg.c deleted file mode 100644 index c9d8d38c..00000000 --- a/src/vnet/ip/udp_pg.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip/udp_pg: UDP packet-generator interface - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include /* for unformat_udp_udp_port */ - -#define UDP_PG_EDIT_LENGTH (1 << 0) -#define UDP_PG_EDIT_CHECKSUM (1 << 1) - -always_inline void -udp_pg_edit_function_inline (pg_main_t * pg, - pg_stream_t * s, - pg_edit_group_t * g, - u32 * packets, u32 n_packets, u32 flags) -{ - vlib_main_t *vm = vlib_get_main (); - u32 ip_offset, udp_offset; - - udp_offset = g->start_byte_offset; - ip_offset = (g - 1)->start_byte_offset; - - while (n_packets >= 1) - { - vlib_buffer_t *p0; - ip4_header_t *ip0; - udp_header_t *udp0; - u32 udp_len0; - - p0 = vlib_get_buffer (vm, packets[0]); - n_packets -= 1; - packets += 1; - - ip0 = (void *) (p0->data + ip_offset); - udp0 = (void *) (p0->data + udp_offset); - udp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]); - - if (flags & UDP_PG_EDIT_LENGTH) - udp0->length = - clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, p0) - - ip_offset); - - /* Initialize checksum with header. */ - if (flags & UDP_PG_EDIT_CHECKSUM) - { - ip_csum_t sum0; - - sum0 = clib_mem_unaligned (&ip0->src_address, u64); - - sum0 = ip_csum_with_carry - (sum0, clib_host_to_net_u32 (udp_len0 + (ip0->protocol << 16))); - - /* Invalidate possibly old checksum. */ - udp0->checksum = 0; - - sum0 = - ip_incremental_checksum_buffer (vm, p0, udp_offset, udp_len0, - sum0); - - sum0 = ~ip_csum_fold (sum0); - - /* Zero checksum means checksumming disabled. */ - sum0 = sum0 != 0 ? sum0 : 0xffff; - - udp0->checksum = sum0; - } - } -} - -static void -udp_pg_edit_function (pg_main_t * pg, - pg_stream_t * s, - pg_edit_group_t * g, u32 * packets, u32 n_packets) -{ - switch (g->edit_function_opaque) - { - case UDP_PG_EDIT_LENGTH: - udp_pg_edit_function_inline (pg, s, g, packets, n_packets, - UDP_PG_EDIT_LENGTH); - break; - - case UDP_PG_EDIT_CHECKSUM: - udp_pg_edit_function_inline (pg, s, g, packets, n_packets, - UDP_PG_EDIT_CHECKSUM); - break; - - case UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH: - udp_pg_edit_function_inline (pg, s, g, packets, n_packets, - UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH); - break; - - default: - ASSERT (0); - break; - } -} - -typedef struct -{ - pg_edit_t src_port, dst_port; - pg_edit_t length; - pg_edit_t checksum; -} pg_udp_header_t; - -static inline void -pg_udp_header_init (pg_udp_header_t * p) -{ - /* Initialize fields that are not bit fields in the IP header. */ -#define _(f) pg_edit_init (&p->f, udp_header_t, f); - _(src_port); - _(dst_port); - _(length); - _(checksum); -#undef _ -} - -uword -unformat_pg_udp_header (unformat_input_t * input, va_list * args) -{ - pg_stream_t *s = va_arg (*args, pg_stream_t *); - pg_udp_header_t *p; - u32 group_index; - - p = pg_create_edit_group (s, sizeof (p[0]), sizeof (udp_header_t), - &group_index); - pg_udp_header_init (p); - - /* Defaults. */ - p->checksum.type = PG_EDIT_UNSPECIFIED; - p->length.type = PG_EDIT_UNSPECIFIED; - - if (!unformat (input, "UDP: %U -> %U", - unformat_pg_edit, - unformat_tcp_udp_port, &p->src_port, - unformat_pg_edit, unformat_tcp_udp_port, &p->dst_port)) - goto error; - - /* Parse options. */ - while (1) - { - if (unformat (input, "length %U", - unformat_pg_edit, unformat_pg_number, &p->length)) - ; - - else if (unformat (input, "checksum %U", - unformat_pg_edit, unformat_pg_number, &p->checksum)) - ; - - /* Can't parse input: try next protocol level. */ - else - break; - } - - { - ip_main_t *im = &ip_main; - u16 dst_port; - tcp_udp_port_info_t *pi; - - pi = 0; - if (p->dst_port.type == PG_EDIT_FIXED) - { - dst_port = pg_edit_get_value (&p->dst_port, PG_EDIT_LO); - pi = ip_get_tcp_udp_port_info (im, dst_port); - } - - if (pi && pi->unformat_pg_edit - && unformat_user (input, pi->unformat_pg_edit, s)) - ; - - else if (!unformat_user (input, unformat_pg_payload, s)) - goto error; - - p = pg_get_edit_group (s, group_index); - if (p->checksum.type == PG_EDIT_UNSPECIFIED - || p->length.type == PG_EDIT_UNSPECIFIED) - { - pg_edit_group_t *g = pg_stream_get_group (s, group_index); - g->edit_function = udp_pg_edit_function; - g->edit_function_opaque = 0; - if (p->checksum.type == PG_EDIT_UNSPECIFIED) - g->edit_function_opaque |= UDP_PG_EDIT_CHECKSUM; - if (p->length.type == PG_EDIT_UNSPECIFIED) - g->edit_function_opaque |= UDP_PG_EDIT_LENGTH; - } - - return 1; - } - -error: - /* Free up any edits we may have added. */ - pg_free_edit_group (s); - return 0; -} - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ipsec/ikev2.c b/src/vnet/ipsec/ikev2.c index 09209334..2c1074d8 100644 --- a/src/vnet/ipsec/ikev2.c +++ b/src/vnet/ipsec/ikev2.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vnet/ipsec/ikev2_cli.c b/src/vnet/ipsec/ikev2_cli.c index 5c88d8d4..05ed4e60 100644 --- a/src/vnet/ipsec/ikev2_cli.c +++ b/src/vnet/ipsec/ikev2_cli.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/vnet/ipsec/ikev2_crypto.c b/src/vnet/ipsec/ikev2_crypto.c index c201d3eb..ca56158f 100644 --- a/src/vnet/ipsec/ikev2_crypto.c +++ b/src/vnet/ipsec/ikev2_crypto.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vnet/lisp-cp/packets.c b/src/vnet/lisp-cp/packets.c index 3a4f421b..f24024f1 100644 --- a/src/vnet/lisp-cp/packets.c +++ b/src/vnet/lisp-cp/packets.c @@ -15,7 +15,7 @@ #include #include -#include +#include /* Returns IP ID for the packet */ /* static u16 ip_id = 0; @@ -141,61 +141,6 @@ pkt_push_udp (vlib_main_t * vm, vlib_buffer_t * b, u16 sp, u16 dp) return uh; } -void * -pkt_push_ipv4 (vlib_main_t * vm, vlib_buffer_t * b, ip4_address_t * src, - ip4_address_t * dst, int proto) -{ - ip4_header_t *ih; - - /* make some room */ - ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t)); - - ih->ip_version_and_header_length = 0x45; - ih->tos = 0; - ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b)); - - /* iph->fragment_id = clib_host_to_net_u16(get_IP_ID ()); */ - - /* TODO: decide if we allow fragments in case of control */ - ih->flags_and_fragment_offset = clib_host_to_net_u16 (IP_DF); - ih->ttl = 255; - ih->protocol = proto; - ih->src_address.as_u32 = src->as_u32; - ih->dst_address.as_u32 = dst->as_u32; - - ih->checksum = ip4_header_checksum (ih); - return ih; -} - -void * -pkt_push_ipv6 (vlib_main_t * vm, vlib_buffer_t * b, ip6_address_t * src, - ip6_address_t * dst, int proto) -{ - ip6_header_t *ip6h; - u16 payload_length; - - /* make some room */ - ip6h = vlib_buffer_push_uninit (b, sizeof (ip6_header_t)); - - ip6h->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (0x6 << 28); - - /* calculate ip6 payload length */ - payload_length = vlib_buffer_length_in_chain (vm, b); - payload_length -= sizeof (*ip6h); - - ip6h->payload_length = clib_host_to_net_u16 (payload_length); - - ip6h->hop_limit = 0xff; - ip6h->protocol = proto; - clib_memcpy (ip6h->src_address.as_u8, src->as_u8, - sizeof (ip6h->src_address)); - clib_memcpy (ip6h->dst_address.as_u8, dst->as_u8, - sizeof (ip6h->src_address)); - - return ip6h; -} - void * pkt_push_ip (vlib_main_t * vm, vlib_buffer_t * b, ip_address_t * src, ip_address_t * dst, u32 proto) @@ -210,12 +155,12 @@ pkt_push_ip (vlib_main_t * vm, vlib_buffer_t * b, ip_address_t * src, switch (ip_addr_version (src)) { case IP4: - return pkt_push_ipv4 (vm, b, &ip_addr_v4 (src), &ip_addr_v4 (dst), - proto); + return vlib_buffer_push_ip4 (vm, b, &ip_addr_v4 (src), + &ip_addr_v4 (dst), proto); break; case IP6: - return pkt_push_ipv6 (vm, b, &ip_addr_v6 (src), &ip_addr_v6 (dst), - proto); + return vlib_buffer_push_ip6 (vm, b, &ip_addr_v6 (src), + &ip_addr_v6 (dst), proto); break; } diff --git a/src/vnet/lisp-cp/packets.h b/src/vnet/lisp-cp/packets.h index 212a1d78..f6da3bf4 100644 --- a/src/vnet/lisp-cp/packets.h +++ b/src/vnet/lisp-cp/packets.h @@ -26,51 +26,6 @@ void *pkt_push_udp_and_ip (vlib_main_t * vm, vlib_buffer_t * b, u16 sp, void *pkt_push_ecm_hdr (vlib_buffer_t * b); -always_inline u8 * -vlib_buffer_get_tail (vlib_buffer_t * b) -{ - return b->data + b->current_data + b->current_length; -} - -always_inline void * -vlib_buffer_put_uninit (vlib_buffer_t * b, u8 size) -{ - /* XXX should make sure there's enough space! */ - void *p = vlib_buffer_get_tail (b); - b->current_length += size; - return p; -} - -always_inline void * -vlib_buffer_push_uninit (vlib_buffer_t * b, u8 size) -{ - /* XXX should make sure there's enough space! */ - ASSERT (b->current_data >= size); - b->current_data -= size; - b->current_length += size; - - return vlib_buffer_get_current (b); -} - -always_inline void * -vlib_buffer_make_headroom (vlib_buffer_t * b, u8 size) -{ - /* XXX should make sure there's enough space! */ - b->current_data += size; - return vlib_buffer_get_current (b); -} - -always_inline void * -vlib_buffer_pull (vlib_buffer_t * b, u8 size) -{ - if (b->current_length < size) - return 0; - - void *data = vlib_buffer_get_current (b); - vlib_buffer_advance (b, size); - return data; -} - /* *INDENT-ON* */ /* diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c index 13359277..292c7e6a 100644 --- a/src/vnet/lisp-gpe/interface.c +++ b/src/vnet/lisp-gpe/interface.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vnet/lisp-gpe/lisp_gpe.h b/src/vnet/lisp-gpe/lisp_gpe.h index c898a7da..b5a50ec6 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.h +++ b/src/vnet/lisp-gpe/lisp_gpe.h @@ -27,10 +27,12 @@ #include #include #include -#include +#include #include #include #include +#include +#include /** IP4-UDP-LISP encap header */ /* *INDENT-OFF* */ diff --git a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c index 65006b81..dbcf7134 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c +++ b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include /** * Memory pool of all adjacencies diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c new file mode 100644 index 00000000..a561e7d1 --- /dev/null +++ b/src/vnet/session/application.c @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +/* + * Pool from which we allocate all applications + */ +static application_t *app_pool; + +/* + * Hash table of apps by api client index + */ +static uword *app_by_api_client_index; + +int +application_api_queue_is_full (application_t * app) +{ + unix_shared_memory_queue_t *q; + + /* builtin servers are always OK */ + if (app->api_client_index == ~0) + return 0; + + q = vl_api_client_index_to_input_queue (app->api_client_index); + if (!q) + return 1; + + if (q->cursize == q->maxsize) + return 1; + return 0; +} + +static void +application_table_add (application_t * app) +{ + hash_set (app_by_api_client_index, app->api_client_index, app->index); +} + +static void +application_table_del (application_t * app) +{ + hash_unset (app_by_api_client_index, app->api_client_index); +} + +application_t * +application_lookup (u32 api_client_index) +{ + uword *p; + p = hash_get (app_by_api_client_index, api_client_index); + if (p) + return application_get (p[0]); + + return 0; +} + +void +application_del (application_t * app) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + api_main_t *am = &api_main; + void *oldheap; + session_manager_t *sm; + + if (app->mode == APP_SERVER) + { + sm = session_manager_get (app->session_manager_index); + session_manager_del (smm, sm); + } + + /* Free the event fifo in the /vpe-api shared-memory segment */ + oldheap = svm_push_data_heap (am->vlib_rp); + if (app->event_queue) + unix_shared_memory_queue_free (app->event_queue); + svm_pop_heap (oldheap); + + application_table_del (app); + + pool_put (app_pool, app); +} + +application_t * +application_new (application_type_t type, session_type_t sst, + u32 api_client_index, u32 flags, session_cb_vft_t * cb_fns) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + api_main_t *am = &api_main; + application_t *app; + void *oldheap; + session_manager_t *sm; + + pool_get (app_pool, app); + memset (app, 0, sizeof (*app)); + + /* Allocate event fifo in the /vpe-api shared-memory segment */ + oldheap = svm_push_data_heap (am->vlib_rp); + + /* Allocate server event queue */ + app->event_queue = + unix_shared_memory_queue_init (128 /* nels $$$$ config */ , + sizeof (session_fifo_event_t), + 0 /* consumer pid */ , + 0 + /* (do not) signal when queue non-empty */ + ); + + svm_pop_heap (oldheap); + + /* If a server, allocate session manager */ + if (type == APP_SERVER) + { + pool_get (smm->session_managers, sm); + memset (sm, 0, sizeof (*sm)); + + app->session_manager_index = sm - smm->session_managers; + } + else if (type == APP_CLIENT) + { + /* Allocate connect session manager if needed */ + if (smm->connect_manager_index[sst] == INVALID_INDEX) + connects_session_manager_init (smm, sst); + app->session_manager_index = smm->connect_manager_index[sst]; + } + + app->mode = type; + app->index = application_get_index (app); + app->session_type = sst; + app->api_client_index = api_client_index; + app->flags = flags; + app->cb_fns = *cb_fns; + + /* Add app to lookup by api_client_index table */ + application_table_add (app); + + return app; +} + +application_t * +application_get (u32 index) +{ + return pool_elt_at_index (app_pool, index); +} + +u32 +application_get_index (application_t * app) +{ + return app - app_pool; +} + +int +application_server_init (application_t * server, u32 segment_size, + u32 add_segment_size, u32 rx_fifo_size, + u32 tx_fifo_size, u8 ** segment_name) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + session_manager_t *sm; + int rv; + + sm = session_manager_get (server->session_manager_index); + + /* Add first segment */ + if ((rv = session_manager_add_first_segment (smm, sm, segment_size, + segment_name))) + { + return rv; + } + + /* Setup session manager */ + sm->add_segment_size = add_segment_size; + sm->rx_fifo_size = rx_fifo_size; + sm->tx_fifo_size = tx_fifo_size; + sm->add_segment = sm->add_segment_size != 0; + return 0; +} + +u8 * +format_application_server (u8 * s, va_list * args) +{ + application_t *srv = va_arg (*args, application_t *); + int verbose = va_arg (*args, int); + vl_api_registration_t *regp; + stream_session_t *listener; + u8 *server_name, *str, *seg_name; + u32 segment_size; + + if (srv == 0) + { + if (verbose) + s = format (s, "%-40s%-20s%-15s%-15s%-10s", "Connection", "Server", + "Segment", "API Client", "Cookie"); + else + s = format (s, "%-40s%-20s", "Connection", "Server"); + + return s; + } + + regp = vl_api_client_index_to_registration (srv->api_client_index); + if (!regp) + server_name = format (0, "%s%c", regp->name, 0); + else + server_name = regp->name; + + listener = stream_session_listener_get (srv->session_type, + srv->session_index); + str = format (0, "%U", format_stream_session, listener, verbose); + + session_manager_get_segment_info (listener->server_segment_index, &seg_name, + &segment_size); + if (verbose) + { + s = format (s, "%-40s%-20s%-20s%-10d%-10d", str, server_name, + seg_name, srv->api_client_index, srv->accept_cookie); + } + else + s = format (s, "%-40s%-20s", str, server_name); + return s; +} + +u8 * +format_application_client (u8 * s, va_list * args) +{ + application_t *client = va_arg (*args, application_t *); + int verbose = va_arg (*args, int); + stream_session_t *session; + u8 *str, *seg_name; + u32 segment_size; + + if (client == 0) + { + if (verbose) + s = + format (s, "%-40s%-20s%-10s", "Connection", "Segment", + "API Client"); + else + s = format (s, "%-40s", "Connection"); + + return s; + } + + session = stream_session_get (client->session_index, client->thread_index); + str = format (0, "%U", format_stream_session, session, verbose); + + session_manager_get_segment_info (session->server_segment_index, &seg_name, + &segment_size); + if (verbose) + { + s = format (s, "%-40s%-20s%-10d%", str, seg_name, + client->api_client_index); + } + else + s = format (s, "%-40s", str); + return s; +} + +static clib_error_t * +show_app_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + application_t *app; + int do_server = 0; + int do_client = 0; + int verbose = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "server")) + do_server = 1; + else if (unformat (input, "client")) + do_client = 1; + else if (unformat (input, "verbose")) + verbose = 1; + else + break; + } + + if (do_server) + { + if (pool_elts (app_pool)) + { + vlib_cli_output (vm, "%U", format_application_server, + 0 /* header */ , + verbose); + /* *INDENT-OFF* */ + pool_foreach (app, app_pool, + ({ + if (app->mode == APP_SERVER) + vlib_cli_output (vm, "%U", format_application_server, app, + verbose); + })); + /* *INDENT-ON* */ + } + else + vlib_cli_output (vm, "No active server bindings"); + } + + if (do_client) + { + if (pool_elts (app_pool)) + { + vlib_cli_output (vm, "%U", format_application_client, + 0 /* header */ , + verbose); + /* *INDENT-OFF* */ + pool_foreach (app, app_pool, + ({ + if (app->mode == APP_CLIENT) + vlib_cli_output (vm, "%U", format_application_client, app, + verbose); + })); + /* *INDENT-ON* */ + } + else + vlib_cli_output (vm, "No active server bindings"); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_app_command, static) = +{ +.path = "show app",.short_help = + "show app [server|client] [verbose]",.function = show_app_command_fn,}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h new file mode 100644 index 00000000..027d6967 --- /dev/null +++ b/src/vnet/session/application.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_VNET_SESSION_APPLICATION_H_ +#define SRC_VNET_SESSION_APPLICATION_H_ + +#include +#include + +typedef enum +{ + APP_SERVER, + APP_CLIENT +} application_type_t; + +typedef struct _stream_session_cb_vft +{ + /** Notify server of new segment */ + int (*add_segment_callback) (u32 api_client_index, const u8 * seg_name, + u32 seg_size); + + /** Notify server of newly accepted session */ + int (*session_accept_callback) (stream_session_t * new_session); + + /* Connection request callback */ + int (*session_connected_callback) (u32 api_client_index, + stream_session_t * s, u8 code); + + /** Notify app that session is closing */ + void (*session_disconnect_callback) (stream_session_t * s); + + /** Notify app that session was reset */ + void (*session_reset_callback) (stream_session_t * s); + + /* Direct RX callback, for built-in servers */ + int (*builtin_server_rx_callback) (stream_session_t * session); + + /* Redirect connection to local server */ + int (*redirect_connect_callback) (u32 api_client_index, void *mp); +} session_cb_vft_t; + +typedef struct _application +{ + /** Index in server pool */ + u32 index; + + /** Flags */ + u32 flags; + + /** Binary API connection index, ~0 if internal */ + u32 api_client_index; + + /* */ + u32 api_context; + + /** Application listens for events on this svm queue */ + unix_shared_memory_queue_t *event_queue; + + /** Stream session type */ + u8 session_type; + + /* Stream server mode: accept or connect */ + u8 mode; + + u32 session_manager_index; + + /* + * Bind/Listen specific + */ + + /** Accept cookie, for multiple session flavors ($$$ maybe) */ + u32 accept_cookie; + + /** Index of the listen session or connect session */ + u32 session_index; + + /** Session thread index for client connect sessions */ + u32 thread_index; + + /* + * Callbacks: shoulder-taps for the server/client + */ + session_cb_vft_t cb_fns; +} application_t; + +application_t *application_new (application_type_t type, session_type_t sst, + u32 api_client_index, u32 flags, + session_cb_vft_t * cb_fns); +void application_del (application_t * app); +application_t *application_get (u32 index); +application_t *application_lookup (u32 api_client_index); +u32 application_get_index (application_t * app); + +int +application_server_init (application_t * server, u32 segment_size, + u32 add_segment_size, u32 rx_fifo_size, + u32 tx_fifo_size, u8 ** segment_name); +int application_api_queue_is_full (application_t * app); + +#endif /* SRC_VNET_SESSION_APPLICATION_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c new file mode 100644 index 00000000..0ea77fd8 --- /dev/null +++ b/src/vnet/session/application_interface.c @@ -0,0 +1,459 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include +#include + +/** @file + VPP's application/session API bind/unbind/connect/disconnect calls +*/ + +static u8 +ip_is_zero (ip46_address_t * ip46_address, u8 is_ip4) +{ + if (is_ip4) + return (ip46_address->ip4.as_u32 == 0); + else + return (ip46_address->as_u64[0] == 0 && ip46_address->as_u64[1] == 0); +} + +static u8 +ip_is_local (ip46_address_t * ip46_address, u8 is_ip4) +{ + fib_node_index_t fei; + fib_entry_flag_t flags; + fib_prefix_t prefix; + + /* Check if requester is local */ + if (is_ip4) + { + prefix.fp_len = 32; + prefix.fp_proto = FIB_PROTOCOL_IP4; + } + else + { + prefix.fp_len = 128; + prefix.fp_proto = FIB_PROTOCOL_IP6; + } + + clib_memcpy (&prefix.fp_addr, ip46_address, sizeof (ip46_address)); + fei = fib_table_lookup (0, &prefix); + flags = fib_entry_get_flags (fei); + + return (flags & FIB_ENTRY_FLAG_LOCAL); +} + +int +api_parse_session_handle (u64 handle, u32 * session_index, u32 * thread_index) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + stream_session_t *pool; + + *thread_index = handle & 0xFFFFFFFF; + *session_index = handle >> 32; + + if (*thread_index >= vec_len (smm->sessions)) + return VNET_API_ERROR_INVALID_VALUE; + + pool = smm->sessions[*thread_index]; + + if (pool_is_free_index (pool, *session_index)) + return VNET_API_ERROR_INVALID_VALUE_2; + + return 0; +} + +int +vnet_bind_i (u32 api_client_index, ip46_address_t * ip46, u16 port_host_order, + session_type_t sst, u64 * options, session_cb_vft_t * cb_fns, + application_t ** app, u32 * len_seg_name, char *seg_name) +{ + u8 *segment_name = 0; + application_t *server = 0; + stream_session_t *listener; + u8 is_ip4; + + listener = + stream_session_lookup_listener (ip46, + clib_host_to_net_u16 (port_host_order), + sst); + + if (listener) + return VNET_API_ERROR_ADDRESS_IN_USE; + + if (application_lookup (api_client_index)) + { + clib_warning ("Only one bind supported for now"); + return VNET_API_ERROR_ADDRESS_IN_USE; + } + + is_ip4 = SESSION_TYPE_IP4_UDP == sst || SESSION_TYPE_IP4_TCP == sst; + if (!ip_is_zero (ip46, is_ip4) && !ip_is_local (ip46, is_ip4)) + return VNET_API_ERROR_INVALID_VALUE; + + /* Allocate and initialize stream server */ + server = application_new (APP_SERVER, sst, api_client_index, + options[SESSION_OPTIONS_FLAGS], cb_fns); + + application_server_init (server, options[SESSION_OPTIONS_SEGMENT_SIZE], + options[SESSION_OPTIONS_ADD_SEGMENT_SIZE], + options[SESSION_OPTIONS_RX_FIFO_SIZE], + options[SESSION_OPTIONS_TX_FIFO_SIZE], + &segment_name); + + /* Setup listen path down to transport */ + stream_session_start_listen (server->index, ip46, port_host_order); + + /* + * Return values + */ + + ASSERT (vec_len (segment_name) <= 128); + *len_seg_name = vec_len (segment_name); + memcpy (seg_name, segment_name, *len_seg_name); + *app = server; + + return 0; +} + +int +vnet_unbind_i (u32 api_client_index) +{ + application_t *server; + + /* + * Find the stream_server_t corresponding to the api client + */ + server = application_lookup (api_client_index); + if (!server) + return VNET_API_ERROR_INVALID_VALUE_2; + + /* Clear the listener */ + stream_session_stop_listen (server->index); + application_del (server); + + return 0; +} + +int +vnet_connect_i (u32 api_client_index, u32 api_context, session_type_t sst, + ip46_address_t * ip46, u16 port, u64 * options, void *mp, + session_cb_vft_t * cb_fns) +{ + stream_session_t *listener; + application_t *server, *app; + + /* + * Figure out if connecting to a local server + */ + listener = stream_session_lookup_listener (ip46, + clib_host_to_net_u16 (port), + sst); + if (listener) + { + server = application_get (listener->app_index); + + /* + * Server is willing to have a direct fifo connection created + * instead of going through the state machine, etc. + */ + if (server->flags & SESSION_OPTIONS_FLAGS_USE_FIFO) + return server->cb_fns. + redirect_connect_callback (server->api_client_index, mp); + } + + /* Create client app */ + app = application_new (APP_CLIENT, sst, api_client_index, + options[SESSION_OPTIONS_FLAGS], cb_fns); + + app->api_context = api_context; + + /* + * Not connecting to a local server. Create regular session + */ + stream_session_open (sst, ip46, port, app->index); + + return 0; +} + +/** + * unformat a vnet URI + * + * fifo://name + * tcp://ip46-addr:port + * udp://ip46-addr:port + * + * u8 ip46_address[16]; + * u16 port_in_host_byte_order; + * stream_session_type_t sst; + * u8 *fifo_name; + * + * if (unformat (input, "%U", unformat_vnet_uri, &ip46_address, + * &sst, &port, &fifo_name)) + * etc... + * + */ +uword +unformat_vnet_uri (unformat_input_t * input, va_list * args) +{ + ip46_address_t *address = va_arg (*args, ip46_address_t *); + session_type_t *sst = va_arg (*args, session_type_t *); + u16 *port = va_arg (*args, u16 *); + + if (unformat (input, "tcp://%U/%d", unformat_ip4_address, &address->ip4, + port)) + { + *sst = SESSION_TYPE_IP4_TCP; + return 1; + } + if (unformat (input, "udp://%U/%d", unformat_ip4_address, &address->ip4, + port)) + { + *sst = SESSION_TYPE_IP4_UDP; + return 1; + } + if (unformat (input, "udp://%U/%d", unformat_ip6_address, &address->ip6, + port)) + { + *sst = SESSION_TYPE_IP6_UDP; + return 1; + } + if (unformat (input, "tcp://%U/%d", unformat_ip6_address, &address->ip6, + port)) + { + *sst = SESSION_TYPE_IP6_TCP; + return 1; + } + + return 0; +} + +int +parse_uri (char *uri, session_type_t * sst, ip46_address_t * addr, + u16 * port_number_host_byte_order) +{ + unformat_input_t _input, *input = &_input; + + /* Make sure */ + uri = (char *) format (0, "%s%c", uri, 0); + + /* Parse uri */ + unformat_init_string (input, uri, strlen (uri)); + if (!unformat (input, "%U", unformat_vnet_uri, addr, sst, + port_number_host_byte_order)) + { + unformat_free (input); + return VNET_API_ERROR_INVALID_VALUE; + } + unformat_free (input); + + return 0; +} + +int +vnet_bind_uri (vnet_bind_args_t * a) +{ + application_t *server = 0; + u16 port_host_order; + session_type_t sst = SESSION_N_TYPES; + ip46_address_t ip46; + int rv; + + memset (&ip46, 0, sizeof (ip46)); + rv = parse_uri (a->uri, &sst, &ip46, &port_host_order); + if (rv) + return rv; + + if ((rv = vnet_bind_i (a->api_client_index, &ip46, port_host_order, sst, + a->options, a->session_cb_vft, &server, + &a->segment_name_length, a->segment_name))) + return rv; + + a->server_event_queue_address = (u64) server->event_queue; + return 0; +} + +session_type_t +session_type_from_proto_and_ip (session_api_proto_t proto, u8 is_ip4) +{ + if (proto == SESSION_PROTO_TCP) + { + if (is_ip4) + return SESSION_TYPE_IP4_TCP; + else + return SESSION_TYPE_IP6_TCP; + } + else + { + if (is_ip4) + return SESSION_TYPE_IP4_UDP; + else + return SESSION_TYPE_IP6_UDP; + } + + return SESSION_N_TYPES; +} + +int +vnet_unbind_uri (char *uri, u32 api_client_index) +{ + u16 port_number_host_byte_order; + session_type_t sst = SESSION_N_TYPES; + ip46_address_t ip46_address; + stream_session_t *listener; + int rv; + + rv = parse_uri (uri, &sst, &ip46_address, &port_number_host_byte_order); + if (rv) + return rv; + + listener = + stream_session_lookup_listener (&ip46_address, + clib_host_to_net_u16 + (port_number_host_byte_order), sst); + + if (!listener) + return VNET_API_ERROR_ADDRESS_NOT_IN_USE; + + /* External client? */ + if (api_client_index != ~0) + { + ASSERT (vl_api_client_index_to_registration (api_client_index)); + } + + return vnet_unbind_i (api_client_index); +} + +int +vnet_connect_uri (vnet_connect_args_t * a) +{ + ip46_address_t ip46_address; + u16 port; + session_type_t sst; + application_t *app; + int rv; + + app = application_lookup (a->api_client_index); + if (app) + { + clib_warning ("Already have a connect from this app"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + + /* Parse uri */ + rv = parse_uri (a->uri, &sst, &ip46_address, &port); + if (rv) + return rv; + + return vnet_connect_i (a->api_client_index, a->api_context, sst, + &ip46_address, port, a->options, a->mp, + a->session_cb_vft); +} + +int +vnet_disconnect_session (u32 client_index, u32 session_index, + u32 thread_index) +{ + stream_session_t *session; + + session = stream_session_get (session_index, thread_index); + stream_session_disconnect (session); + + return 0; +} + + +int +vnet_bind (vnet_bind_args_t * a) +{ + application_t *server = 0; + session_type_t sst = SESSION_N_TYPES; + int rv; + + sst = session_type_from_proto_and_ip (a->proto, a->tep.is_ip4); + if ((rv = vnet_bind_i (a->api_client_index, &a->tep.ip, a->tep.port, sst, + a->options, a->session_cb_vft, &server, + &a->segment_name_length, a->segment_name))) + return rv; + + a->server_event_queue_address = (u64) server->event_queue; + a->handle = (u64) a->tep.vrf << 32 | (u64) server->session_index; + return 0; +} + +int +vnet_unbind (vnet_unbind_args_t * a) +{ + application_t *server; + + if (a->api_client_index != ~0) + { + ASSERT (vl_api_client_index_to_registration (a->api_client_index)); + } + + /* Make sure this is the right one */ + server = application_lookup (a->api_client_index); + ASSERT (server->session_index == (0xFFFFFFFF & a->handle)); + + /* TODO use handle to disambiguate namespaces/vrfs */ + return vnet_unbind_i (a->api_client_index); +} + +int +vnet_connect (vnet_connect_args_t * a) +{ + session_type_t sst; + application_t *app; + + app = application_lookup (a->api_client_index); + if (app) + { + clib_warning ("Already have a connect from this app"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + + sst = session_type_from_proto_and_ip (a->proto, a->tep.is_ip4); + return vnet_connect_i (a->api_client_index, a->api_context, sst, &a->tep.ip, + a->tep.port, a->options, a->mp, a->session_cb_vft); +} + +int +vnet_disconnect (vnet_disconnect_args_t * a) +{ + stream_session_t *session; + u32 session_index, thread_index; + + if (api_parse_session_handle (a->handle, &session_index, &thread_index)) + { + clib_warning ("Invalid handle"); + return -1; + } + + session = stream_session_get (session_index, thread_index); + stream_session_disconnect (session); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h new file mode 100644 index 00000000..8d87c067 --- /dev/null +++ b/src/vnet/session/application_interface.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_uri_h__ +#define __included_uri_h__ + +#include +#include +#include +#include +#include +#include + +typedef enum _session_api_proto +{ + SESSION_PROTO_TCP, + SESSION_PROTO_UDP +} session_api_proto_t; + +typedef struct _vnet_bind_args_t +{ + union + { + char *uri; + struct + { + transport_endpoint_t tep; + session_api_proto_t proto; + }; + }; + + u32 api_client_index; + u64 *options; + session_cb_vft_t *session_cb_vft; + + /* + * Results + */ + char *segment_name; + u32 segment_name_length; + u64 server_event_queue_address; + u64 handle; +} vnet_bind_args_t; + +typedef struct _vnet_unbind_args_t +{ + union + { + char *uri; + u64 handle; + }; + u32 api_client_index; +} vnet_unbind_args_t; + +typedef struct _vnet_connect_args +{ + union + { + char *uri; + struct + { + transport_endpoint_t tep; + session_api_proto_t proto; + }; + }; + u32 api_client_index; + u32 api_context; + u64 *options; + session_cb_vft_t *session_cb_vft; + + /* Used for redirects */ + void *mp; +} vnet_connect_args_t; + +typedef struct _vnet_disconnect_args_t +{ + u64 handle; + u32 api_client_index; +} vnet_disconnect_args_t; + +/* Bind / connect options */ +typedef enum +{ + SESSION_OPTIONS_FLAGS, + SESSION_OPTIONS_SEGMENT_SIZE, + SESSION_OPTIONS_ADD_SEGMENT_SIZE, + SESSION_OPTIONS_RX_FIFO_SIZE, + SESSION_OPTIONS_TX_FIFO_SIZE, + SESSION_OPTIONS_ACCEPT_COOKIE, + SESSION_OPTIONS_N_OPTIONS +} session_options_index_t; + +/** Server can handle delegated connect requests from local clients */ +#define SESSION_OPTIONS_FLAGS_USE_FIFO (1<<0) + +/** Server wants vpp to add segments when out of memory for fifos */ +#define SESSION_OPTIONS_FLAGS_ADD_SEGMENT (1<<1) + +#define VNET_CONNECT_REDIRECTED 123 + +int vnet_bind_uri (vnet_bind_args_t *); +int vnet_unbind_uri (char *uri, u32 api_client_index); +int vnet_connect_uri (vnet_connect_args_t * a); +int +vnet_disconnect_session (u32 client_index, u32 session_index, + u32 thread_index); + +int vnet_bind (vnet_bind_args_t * a); +int vnet_connect (vnet_connect_args_t * a); +int vnet_unbind (vnet_unbind_args_t * a); +int vnet_disconnect (vnet_disconnect_args_t * a); + +int +api_parse_session_handle (u64 handle, u32 * session_index, + u32 * thread_index); + +#endif /* __included_uri_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/hashes.c b/src/vnet/session/hashes.c new file mode 100644 index 00000000..1808dd73 --- /dev/null +++ b/src/vnet/session/hashes.c @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Generate typed init functions for multiple hash table styles... */ + +#include +#include + +#include + +#undef __included_bihash_template_h__ + +#include +#include + +#include diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c new file mode 100644 index 00000000..e467f4e9 --- /dev/null +++ b/src/vnet/session/node.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include + +vlib_node_registration_t session_queue_node; + +typedef struct +{ + u32 session_index; + u32 server_thread_index; +} session_queue_trace_t; + +/* packet trace format function */ +static u8 * +format_session_queue_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + session_queue_trace_t *t = va_arg (*args, session_queue_trace_t *); + + s = format (s, "SESSION_QUEUE: session index %d, server thread index %d", + t->session_index, t->server_thread_index); + return s; +} + +vlib_node_registration_t session_queue_node; + +#define foreach_session_queue_error \ +_(TX, "Packets transmitted") \ +_(TIMER, "Timer events") + +typedef enum +{ +#define _(sym,str) SESSION_QUEUE_ERROR_##sym, + foreach_session_queue_error +#undef _ + SESSION_QUEUE_N_ERROR, +} session_queue_error_t; + +static char *session_queue_error_strings[] = { +#define _(sym,string) string, + foreach_session_queue_error +#undef _ +}; + +static u32 session_type_to_next[] = { + SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT, + SESSION_QUEUE_NEXT_IP4_LOOKUP, + SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT, + SESSION_QUEUE_NEXT_IP6_LOOKUP, +}; + +always_inline int +session_fifo_rx_i (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, int *n_tx_packets, + u8 peek_data) +{ + u32 n_trace = vlib_get_trace_count (vm, node); + u32 left_to_snd0, max_len_to_snd0, len_to_deq0, n_bufs, snd_space0; + u32 n_frame_bytes, n_frames_per_evt; + transport_connection_t *tc0; + transport_proto_vft_t *transport_vft; + u32 next_index, next0, *to_next, n_left_to_next, bi0; + vlib_buffer_t *b0; + u32 rx_offset; + u16 snd_mss0; + u8 *data0; + int i; + + next_index = next0 = session_type_to_next[s0->session_type]; + + transport_vft = session_get_transport_vft (s0->session_type); + tc0 = transport_vft->get_connection (s0->connection_index, thread_index); + + /* Make sure we have space to send and there's something to dequeue */ + snd_space0 = transport_vft->send_space (tc0); + snd_mss0 = transport_vft->send_mss (tc0); + + if (snd_space0 == 0 || svm_fifo_max_dequeue (s0->server_tx_fifo) == 0 + || snd_mss0 == 0) + return 0; + + ASSERT (e0->enqueue_length > 0); + + /* Ensure we're not writing more than transport window allows */ + max_len_to_snd0 = clib_min (e0->enqueue_length, snd_space0); + + if (peek_data) + { + /* Offset in rx fifo from where to peek data */ + rx_offset = transport_vft->rx_fifo_offset (tc0); + } + + /* TODO check if transport is willing to send len_to_snd0 + * bytes (Nagle) */ + + n_frame_bytes = snd_mss0 * VLIB_FRAME_SIZE; + n_frames_per_evt = ceil ((double) max_len_to_snd0 / n_frame_bytes); + + n_bufs = vec_len (smm->tx_buffers[thread_index]); + left_to_snd0 = max_len_to_snd0; + for (i = 0; i < n_frames_per_evt; i++) + { + /* Make sure we have at least one full frame of buffers ready */ + if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE)) + { + vec_validate (smm->tx_buffers[thread_index], + n_bufs + VLIB_FRAME_SIZE - 1); + n_bufs += + vlib_buffer_alloc (vm, &smm->tx_buffers[thread_index][n_bufs], + VLIB_FRAME_SIZE); + + /* buffer shortage + * XXX 0.9 because when debugging we might not get a full frame */ + if (PREDICT_FALSE (n_bufs < 0.9 * VLIB_FRAME_SIZE)) + { + /* Keep track of how much we've dequeued and exit */ + e0->enqueue_length -= max_len_to_snd0 - left_to_snd0; + return -1; + } + + _vec_len (smm->tx_buffers[thread_index]) = n_bufs; + } + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while (left_to_snd0 && n_left_to_next) + { + /* Get free buffer */ + n_bufs--; + bi0 = smm->tx_buffers[thread_index][n_bufs]; + _vec_len (smm->tx_buffers[thread_index]) = n_bufs; + + b0 = vlib_get_buffer (vm, bi0); + b0->error = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID + | VNET_BUFFER_LOCALLY_ORIGINATED; + b0->current_data = 0; + + /* RX on the local interface. tx in default fib */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + /* usual speculation, or the enqueue_x1 macro will barf */ + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + if (PREDICT_FALSE (n_trace > 0)) + { + session_queue_trace_t *t0; + vlib_trace_buffer (vm, node, next_index, b0, + 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + t0->session_index = s0->session_index; + t0->server_thread_index = s0->thread_index; + } + + if (1) + { + ELOG_TYPE_DECLARE (e) = + { + .format = "evt-dequeue: id %d length %d",.format_args = + "i4i4",}; + struct + { + u32 data[2]; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->data[0] = e0->event_id; + ed->data[1] = e0->enqueue_length; + } + + len_to_deq0 = (left_to_snd0 < snd_mss0) ? left_to_snd0 : snd_mss0; + + /* Make room for headers */ + data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN); + + /* Dequeue the data + * TODO 1) peek instead of dequeue + * 2) buffer chains */ + if (peek_data) + { + int n_bytes_read; + n_bytes_read = svm_fifo_peek (s0->server_tx_fifo, s0->pid, + rx_offset, len_to_deq0, data0); + if (n_bytes_read < 0) + goto dequeue_fail; + + /* Keep track of progress locally, transport is also supposed to + * increment it independently when pushing header */ + rx_offset += n_bytes_read; + } + else + { + if (svm_fifo_dequeue_nowait (s0->server_tx_fifo, s0->pid, + len_to_deq0, data0) < 0) + goto dequeue_fail; + } + + b0->current_length = len_to_deq0; + + /* Ask transport to push header */ + transport_vft->push_header (tc0, b0); + + left_to_snd0 -= len_to_deq0; + *n_tx_packets = *n_tx_packets + 1; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* If we couldn't dequeue all bytes store progress */ + if (max_len_to_snd0 < e0->enqueue_length) + { + e0->enqueue_length -= max_len_to_snd0; + vec_add1 (smm->evts_partially_read[thread_index], *e0); + } + return 0; + +dequeue_fail: + /* Can't read from fifo. Store event rx progress, save as partially read, + * return buff to free list and return */ + e0->enqueue_length -= max_len_to_snd0 - left_to_snd0; + vec_add1 (smm->evts_partially_read[thread_index], *e0); + + to_next -= 1; + n_left_to_next += 1; + _vec_len (smm->tx_buffers[thread_index]) += 1; + + clib_warning ("dequeue fail"); + return 0; +} + +int +session_fifo_rx_peek (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, int *n_tx_pkts) +{ + return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts, + 1); +} + +int +session_fifo_rx_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, stream_session_t * s0, + u32 thread_index, int *n_tx_pkts) +{ + return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts, + 0); +} + +static uword +session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + session_fifo_event_t *my_fifo_events, *e; + u32 n_to_dequeue; + unix_shared_memory_queue_t *q; + int n_tx_packets = 0; + u32 my_thread_index = vm->cpu_index; + int i, rv; + + /* + * Update TCP time + */ + tcp_update_time (vlib_time_now (vm), my_thread_index); + + /* + * Get vpp queue events + */ + q = smm->vpp_event_queues[my_thread_index]; + if (PREDICT_FALSE (q == 0)) + return 0; + + /* min number of events we can dequeue without blocking */ + n_to_dequeue = q->cursize; + if (n_to_dequeue == 0) + return 0; + + my_fifo_events = smm->fifo_events[my_thread_index]; + + /* If we didn't manage to process previous events try going + * over them again without dequeuing new ones. + * XXX: Block senders to sessions that can't keep up */ + if (vec_len (my_fifo_events) >= 100) + goto skip_dequeue; + + /* See you in the next life, don't be late */ + if (pthread_mutex_trylock (&q->mutex)) + return 0; + + for (i = 0; i < n_to_dequeue; i++) + { + vec_add2 (my_fifo_events, e, 1); + unix_shared_memory_queue_sub_raw (q, (u8 *) e); + } + + /* The other side of the connection is not polling */ + if (q->cursize < (q->maxsize / 8)) + (void) pthread_cond_broadcast (&q->condvar); + pthread_mutex_unlock (&q->mutex); + + smm->fifo_events[my_thread_index] = my_fifo_events; + +skip_dequeue: + + for (i = 0; i < n_to_dequeue; i++) + { + svm_fifo_t *f0; /* $$$ prefetch 1 ahead maybe */ + stream_session_t *s0; + u32 server_session_index0, server_thread_index0; + session_fifo_event_t *e0; + + e0 = &my_fifo_events[i]; + f0 = e0->fifo; + server_session_index0 = f0->server_session_index; + server_thread_index0 = f0->server_thread_index; + + /* $$$ add multiple event queues, per vpp worker thread */ + ASSERT (server_thread_index0 == my_thread_index); + + s0 = pool_elt_at_index (smm->sessions[my_thread_index], + server_session_index0); + + ASSERT (s0->thread_index == my_thread_index); + + switch (e0->event_type) + { + case FIFO_EVENT_SERVER_TX: + /* Spray packets in per session type frames, since they go to + * different nodes */ + rv = (smm->session_rx_fns[s0->session_type]) (vm, node, smm, e0, s0, + my_thread_index, + &n_tx_packets); + if (rv < 0) + goto done; + + break; + + default: + clib_warning ("unhandled event type %d", e0->event_type); + } + } + +done: + + /* Couldn't process all events. Probably out of buffers */ + if (PREDICT_FALSE (i < n_to_dequeue)) + { + session_fifo_event_t *partially_read = + smm->evts_partially_read[my_thread_index]; + vec_add (partially_read, &my_fifo_events[i], n_to_dequeue - i); + vec_free (my_fifo_events); + smm->fifo_events[my_thread_index] = partially_read; + smm->evts_partially_read[my_thread_index] = 0; + } + else + { + vec_free (smm->fifo_events[my_thread_index]); + smm->fifo_events[my_thread_index] = + smm->evts_partially_read[my_thread_index]; + smm->evts_partially_read[my_thread_index] = 0; + } + + vlib_node_increment_counter (vm, session_queue_node.index, + SESSION_QUEUE_ERROR_TX, n_tx_packets); + + return n_tx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (session_queue_node) = +{ + .function = session_queue_node_fn, + .name = "session-queue", + .format_trace = format_session_queue_trace, + .type = VLIB_NODE_TYPE_INPUT, + .n_errors = ARRAY_LEN (session_queue_error_strings), + .error_strings = session_queue_error_strings, + .n_next_nodes = SESSION_QUEUE_N_NEXT, + /* .state = VLIB_NODE_STATE_DISABLED, enable on-demand? */ + /* edit / add dispositions here */ + .next_nodes = + { + [SESSION_QUEUE_NEXT_DROP] = "error-drop", + [SESSION_QUEUE_NEXT_IP4_LOOKUP] = "ip4-lookup", + [SESSION_QUEUE_NEXT_IP6_LOOKUP] = "ip6-lookup", + [SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT] = "tcp4-output", + [SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT] = "tcp6-output", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api new file mode 100644 index 00000000..a7b28c1d --- /dev/null +++ b/src/vnet/session/session.api @@ -0,0 +1,429 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /** \brief Bind to a given URI + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param accept_cookie - sender accept cookie, to identify this bind flavor + @param uri - a URI, e.g. "tcp://0.0.0.0/0/80" [ipv4] + "tcp://::/0/80" [ipv6] etc. + @param options - socket options, fifo sizes, etc. +*/ +define bind_uri { + u32 client_index; + u32 context; + u32 accept_cookie; + u32 initial_segment_size; + u8 uri[128]; + u64 options[16]; +}; + +/** \brief Unbind a given URI + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param uri - a URI, e.g. "tcp://0.0.0.0/0/80" [ipv4] + "tcp://::/0/80" [ipv6], etc. + @param options - socket options, fifo sizes, etc. +*/ +define unbind_uri { + u32 client_index; + u32 context; + u8 uri[128]; +}; + +/** \brief Connect to a given URI + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param accept_cookie - sender accept cookie, to identify this bind flavor + @param uri - a URI, e.g. "tcp4://0.0.0.0/0/80" + "tcp6://::/0/80" [ipv6], etc. + @param options - socket options, fifo sizes, etc. +*/ +define connect_uri { + u32 client_index; + u32 context; + u8 uri[128]; + u64 client_queue_address; + u64 options[16]; +}; + +/** \brief Bind reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param event_queue_address - vpp event queue address or 0 if this + connection shouldn't send events + @param segment_name_length - length of segment name + @param segment_name - name of segment client needs to attach to +*/ +define bind_uri_reply { + u32 context; + i32 retval; + u64 server_event_queue_address; + u8 segment_name_length; + u32 segment_size; + u8 segment_name[128]; +}; + +/** \brief unbind reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define unbind_uri_reply { + u32 context; + i32 retval; +}; + +/** \brief vpp->client, connect reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param server_rx_fifo - rx (vpp -> vpp-client) fifo address + @param server_tx_fifo - tx (vpp-client -> vpp) fifo address + @param session_index - session index; + @param session_thread_index - session thread index + @param session_type - session thread type + @param vpp_event_queue_address - vpp's event queue address + @param client_event_queue_address - client's event queue address + @param segment_name_length - non-zero if the client needs to attach to + the fifo segment + @param segment_name - set if the client needs to attach to the segment +*/ +define connect_uri_reply { + u32 context; + i32 retval; + u64 server_rx_fifo; + u64 server_tx_fifo; + u32 session_index; + u32 session_thread_index; + u8 session_type; + u64 client_event_queue_address; + u64 vpp_event_queue_address; + u32 segment_size; + u8 segment_name_length; + u8 segment_name[128]; +}; + +/** \brief vpp->client, please map an additional shared memory segment + @param context - sender context, to match reply w/ request + @param segment_name - +*/ +define map_another_segment { + u32 client_index; + u32 context; + u32 segment_size; + u8 segment_name[128]; +}; + +/** \brief client->vpp + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define map_another_segment_reply { + u32 context; + i32 retval; +}; + +/** \brief vpp->client, accept this session + @param context - sender context, to match reply w/ request + @param accept_cookie - tells client which bind flavor just occurred + @param rx_fifo_address - rx (vpp -> vpp-client) fifo address + @param tx_fifo_address - tx (vpp-client -> vpp) fifo address + @param session_index - index of new session + @param session_thread_index - thread index of new session + @param vpp_event_queue_address - vpp's event queue address + @param session_type - type of session + +*/ +define accept_session { + u32 client_index; + u32 context; + u32 accept_cookie; + u64 server_rx_fifo; + u64 server_tx_fifo; + u32 session_index; + u32 session_thread_index; + u64 vpp_event_queue_address; + u8 session_type; +}; + +/** \brief client->vpp, reply to an accept message + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param session_index - session index from accept_session / connect_reply + @param session_thread_index - thread index from accept_session / + connect_reply +*/ +define accept_session_reply { + u32 context; + i32 retval; + u8 session_type; + u8 session_thread_index; + u32 session_index; +}; + +/** \brief bidirectional disconnect API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param session_index - cookie #1 from accept_session / connect_reply + @param session_thread_index - cookie #2 +*/ +define disconnect_session { + u32 client_index; + u32 context; + u32 session_index; + u32 session_thread_index; +}; + +/** \brief bidirectional disconnect reply API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param session_index - session index from accept_session / connect_reply + @param session_thread_index - thread index from accept_session / + connect_reply +*/ +define disconnect_session_reply { + u32 client_index; + u32 context; + i32 retval; + u32 session_index; + u32 session_thread_index; +}; + +/** \brief vpp->client reset session API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param session_index - session index from accept_session / connect_reply + @param session_thread_index - thread index from accept_session / + connect_reply +*/ +define reset_session { + u32 client_index; + u32 context; + u32 session_index; + u32 session_thread_index; +}; + +/** \brief client->vpp reset session reply + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param session_index - session index from accept_session / connect_reply + @param session_thread_index - thread index from accept_session / + connect_reply +*/ +define reset_session_reply { + u32 client_index; + u32 context; + i32 retval; + u32 session_index; + u32 session_thread_index; +}; + +/** \brief Bind to an ip:port pair for a given transport protocol + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf - bind namespace + @param is_ip4 - flag that is 1 if ip address family is IPv4 + @param ip - ip address + @param port - port + @param proto - protocol 0 - TCP 1 - UDP + @param options - socket options, fifo sizes, etc. +*/ +define bind_sock { + u32 client_index; + u32 context; + u32 vrf; + u8 is_ip4; + u8 ip[16]; + u16 port; + u8 proto; + u64 options[16]; +}; + +/** \brief Unbind + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param handle - bind handle obtained from bind reply +*/ +define unbind_sock { + u32 client_index; + u32 context; + u64 handle; +}; + +/** \brief Connect to a remote peer + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf - connection namespace + @param is_ip4 - flag that is 1 if ip address family is IPv4 + @param ip - ip address + @param port - port + @param proto - protocol 0 - TCP 1 - UDP + @param client_queue_address - client's API queue address. Non-zero when + used to perform redirects + @param options - socket options, fifo sizes, etc. +*/ +define connect_sock { + u32 client_index; + u32 context; + u32 vrf; + u8 is_ip4; + u8 ip[16]; + u16 port; + u8 proto; + u64 client_queue_address; + u64 options[16]; +}; + +/** \brief Bind reply + @param context - sender context, to match reply w/ request + @param handle - bind handle + @param retval - return code for the request + @param event_queue_address - vpp event queue address or 0 if this + connection shouldn't send events + @param segment_name_length - length of segment name + @param segment_name - name of segment client needs to attach to +*/ +define bind_sock_reply { + u32 context; + u64 handle; + i32 retval; + u64 server_event_queue_address; + u32 segment_size; + u8 segment_name_length; + u8 segment_name[128]; +}; + +/** \brief unbind reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define unbind_sock_reply { + u32 context; + i32 retval; +}; + +/** \brief vpp/server->client, connect reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param handle - connection handle + @param server_rx_fifo - rx (vpp -> vpp-client) fifo address + @param server_tx_fifo - tx (vpp-client -> vpp) fifo address + @param vpp_event_queue_address - vpp's event queue address + @param client_event_queue_address - client's event queue address + @param segment_name_length - non-zero if the client needs to attach to + the fifo segment + @param segment_name - set if the client needs to attach to the segment +*/ +define connect_sock_reply { + u32 context; + i32 retval; + u64 handle; + u64 server_rx_fifo; + u64 server_tx_fifo; + u64 client_event_queue_address; + u64 vpp_event_queue_address; + u32 segment_size; + u8 segment_name_length; + u8 segment_name[128]; +}; + +/** \brief bidirectional disconnect API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param handle - session handle obtained through accept/connect +*/ +define disconnect_sock { + u32 client_index; + u32 context; + u64 handle; +}; + +/** \brief bidirectional disconnect reply API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param client_context - sender context, to match reply w/ request + @param handle - session handle obtained through accept/connect +*/ +define disconnect_sock_reply { + u32 client_index; + u32 context; + i32 retval; + u64 handle; +}; + +/** \brief vpp->client, accept this session + @param context - sender context, to match reply w/ request + @param accept_cookie - tells client which bind flavor just occurred + @param handle - session handle obtained through accept/connect + @param rx_fifo_address - rx (vpp -> vpp-client) fifo address + @param tx_fifo_address - tx (vpp-client -> vpp) fifo address + @param vpp_event_queue_address - vpp's event queue address +*/ +define accept_sock { + u32 client_index; + u32 context; + u32 accept_cookie; + u64 handle; + u64 server_rx_fifo; + u64 server_tx_fifo; + u64 vpp_event_queue_address; +}; + +/** \brief client->vpp, reply to an accept message + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param handle - session handle obtained through accept/connect +*/ +define accept_sock_reply { + u32 context; + i32 retval; + u64 handle; +}; + +/** \brief vpp->client reset session API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param handle - session handle obtained through accept/connect +*/ +define reset_sock { + u32 client_index; + u32 context; + u64 handle; +}; + +/** \brief client->vpp reset session reply + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param handle - session handle obtained through accept/connect +*/ +define reset_sock_reply { + u32 client_index; + u32 context; + i32 retval; + u64 handle; +}; +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ \ No newline at end of file diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c new file mode 100644 index 00000000..539da613 --- /dev/null +++ b/src/vnet/session/session.c @@ -0,0 +1,1286 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief Session and session manager + */ + +#include +#include +#include +#include +#include + +/** + * Per-type vector of transport protocol virtual function tables + */ +static transport_proto_vft_t *tp_vfts; + +session_manager_main_t session_manager_main; + +/* + * Session lookup key; (src-ip, dst-ip, src-port, dst-port, session-type) + * Value: (owner thread index << 32 | session_index); + */ +static void +stream_session_table_add_for_tc (u8 sst, transport_connection_t * tc, + u64 value) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + kv4.value = value; + clib_bihash_add_del_16_8 (&smm->v4_session_hash, &kv4, 1 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + kv6.value = value; + clib_bihash_add_del_48_8 (&smm->v6_session_hash, &kv6, 1 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } +} + +void +stream_session_table_add (session_manager_main_t * smm, stream_session_t * s, + u64 value) +{ + transport_connection_t *tc; + + tc = tp_vfts[s->session_type].get_connection (s->connection_index, + s->thread_index); + stream_session_table_add_for_tc (s->session_type, tc, value); +} + +static void +stream_session_half_open_table_add (u8 sst, transport_connection_t * tc, + u64 value) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + kv4.value = value; + clib_bihash_add_del_16_8 (&smm->v4_half_open_hash, &kv4, + 1 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + kv6.value = value; + clib_bihash_add_del_48_8 (&smm->v6_half_open_hash, &kv6, + 1 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } +} + +static int +stream_session_table_del_for_tc (session_manager_main_t * smm, u8 sst, + transport_connection_t * tc) +{ + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + return clib_bihash_add_del_16_8 (&smm->v4_session_hash, &kv4, + 0 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + return clib_bihash_add_del_48_8 (&smm->v6_session_hash, &kv6, + 0 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } + + return 0; +} + +static int +stream_session_table_del (session_manager_main_t * smm, stream_session_t * s) +{ + transport_connection_t *ts; + + ts = tp_vfts[s->session_type].get_connection (s->connection_index, + s->thread_index); + return stream_session_table_del_for_tc (smm, s->session_type, ts); +} + +static void +stream_session_half_open_table_del (session_manager_main_t * smm, u8 sst, + transport_connection_t * tc) +{ + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + clib_bihash_add_del_16_8 (&smm->v4_half_open_hash, &kv4, + 0 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + clib_bihash_add_del_48_8 (&smm->v6_half_open_hash, &kv6, + 0 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } +} + +stream_session_t * +stream_session_lookup_listener4 (ip4_address_t * lcl, u16 lcl_port, u8 proto) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv4_t kv4; + int rv; + + make_v4_listener_kv (&kv4, lcl, lcl_port, proto); + rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); + if (rv == 0) + return pool_elt_at_index (smm->listen_sessions[proto], (u32) kv4.value); + + /* Zero out the lcl ip */ + kv4.key[0] = 0; + rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); + if (rv == 0) + return pool_elt_at_index (smm->listen_sessions[proto], kv4.value); + + return 0; +} + +/** Looks up a session based on the 5-tuple passed as argument. + * + * First it tries to find an established session, if this fails, it tries + * finding a listener session if this fails, it tries a lookup with a + * wildcarded local source (listener bound to all interfaces) + */ +stream_session_t * +stream_session_lookup4 (ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv4_t kv4; + int rv; + + /* Lookup session amongst established ones */ + make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); + if (rv == 0) + return stream_session_get_tsi (kv4.value, my_thread_index); + + /* If nothing is found, check if any listener is available */ + return stream_session_lookup_listener4 (lcl, lcl_port, proto); +} + +stream_session_t * +stream_session_lookup_listener6 (ip6_address_t * lcl, u16 lcl_port, u8 proto) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv6_t kv6; + int rv; + + make_v6_listener_kv (&kv6, lcl, lcl_port, proto); + rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); + if (rv == 0) + return pool_elt_at_index (smm->listen_sessions[proto], kv6.value); + + /* Zero out the lcl ip */ + kv6.key[0] = kv6.key[1] = 0; + rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); + if (rv == 0) + return pool_elt_at_index (smm->listen_sessions[proto], kv6.value); + + return 0; +} + +/* Looks up a session based on the 5-tuple passed as argument. + * First it tries to find an established session, if this fails, it tries + * finding a listener session if this fails, it tries a lookup with a + * wildcarded local source (listener bound to all interfaces) */ +stream_session_t * +stream_session_lookup6 (ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + session_kv6_t kv6; + int rv; + + make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); + if (rv == 0) + return stream_session_get_tsi (kv6.value, my_thread_index); + + /* If nothing is found, check if any listener is available */ + return stream_session_lookup_listener6 (lcl, lcl_port, proto); +} + +stream_session_t * +stream_session_lookup_listener (ip46_address_t * lcl, u16 lcl_port, u8 proto) +{ + switch (proto) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + return stream_session_lookup_listener4 (&lcl->ip4, lcl_port, proto); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + return stream_session_lookup_listener6 (&lcl->ip6, lcl_port, proto); + break; + } + return 0; +} + +static u64 +stream_session_half_open_lookup (session_manager_main_t * smm, + ip46_address_t * lcl, ip46_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + session_kv4_t kv4; + session_kv6_t kv6; + int rv; + + switch (proto) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv (&kv4, &lcl->ip4, &rmt->ip4, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4); + + if (rv == 0) + return kv4.value; + + return (u64) ~ 0; + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv (&kv6, &lcl->ip6, &rmt->ip6, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6); + + if (rv == 0) + return kv6.value; + + return (u64) ~ 0; + break; + } + return 0; +} + +transport_connection_t * +stream_session_lookup_transport4 (session_manager_main_t * smm, + ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + session_kv4_t kv4; + stream_session_t *s; + int rv; + + /* Lookup session amongst established ones */ + make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); + if (rv == 0) + { + s = stream_session_get_tsi (kv4.value, my_thread_index); + + return tp_vfts[s->session_type].get_connection (s->connection_index, + my_thread_index); + } + + /* If nothing is found, check if any listener is available */ + s = stream_session_lookup_listener4 (lcl, lcl_port, proto); + if (s) + return tp_vfts[s->session_type].get_listener (s->connection_index); + + /* Finally, try half-open connections */ + rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4); + if (rv == 0) + return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF); + + return 0; +} + +transport_connection_t * +stream_session_lookup_transport6 (session_manager_main_t * smm, + ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + stream_session_t *s; + session_kv6_t kv6; + int rv; + + make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); + if (rv == 0) + { + s = stream_session_get_tsi (kv6.value, my_thread_index); + + return tp_vfts[s->session_type].get_connection (s->connection_index, + my_thread_index); + } + + /* If nothing is found, check if any listener is available */ + s = stream_session_lookup_listener6 (lcl, lcl_port, proto); + if (s) + return tp_vfts[s->session_type].get_listener (s->connection_index); + + /* Finally, try half-open connections */ + rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6); + if (rv == 0) + return tp_vfts[s->session_type].get_half_open (kv6.value & 0xFFFFFFFF); + + return 0; +} + +/** + * Allocate vpp event queue (once) per worker thread + */ +void +vpp_session_event_queue_allocate (session_manager_main_t * smm, + u32 thread_index) +{ + api_main_t *am = &api_main; + void *oldheap; + + if (smm->vpp_event_queues[thread_index] == 0) + { + /* Allocate event fifo in the /vpe-api shared-memory segment */ + oldheap = svm_push_data_heap (am->vlib_rp); + + smm->vpp_event_queues[thread_index] = + unix_shared_memory_queue_init (2048 /* nels $$$$ config */ , + sizeof (session_fifo_event_t), + 0 /* consumer pid */ , + 0 + /* (do not) send signal when queue non-empty */ + ); + + svm_pop_heap (oldheap); + } +} + +void +session_manager_get_segment_info (u32 index, u8 ** name, u32 * size) +{ + svm_fifo_segment_private_t *s; + s = svm_fifo_get_segment (index); + *name = s->h->segment_name; + *size = s->ssvm.ssvm_size; +} + +always_inline int +session_manager_add_segment_i (session_manager_main_t * smm, + session_manager_t * sm, + u32 segment_size, u8 * segment_name) +{ + svm_fifo_segment_create_args_t _ca, *ca = &_ca; + int rv; + + memset (ca, 0, sizeof (*ca)); + + ca->segment_name = (char *) segment_name; + ca->segment_size = segment_size; + + rv = svm_fifo_segment_create (ca); + if (rv) + { + clib_warning ("svm_fifo_segment_create ('%s', %d) failed", + ca->segment_name, ca->segment_size); + vec_free (segment_name); + return -1; + } + + vec_add1 (sm->segment_indices, ca->new_segment_index); + + return 0; +} + +static int +session_manager_add_segment (session_manager_main_t * smm, + session_manager_t * sm) +{ + u8 *segment_name; + svm_fifo_segment_create_args_t _ca, *ca = &_ca; + u32 add_segment_size; + u32 default_segment_size = 128 << 10; + + memset (ca, 0, sizeof (*ca)); + segment_name = format (0, "%d-%d%c", getpid (), + smm->unique_segment_name_counter++, 0); + add_segment_size = + sm->add_segment_size ? sm->add_segment_size : default_segment_size; + + return session_manager_add_segment_i (smm, sm, add_segment_size, + segment_name); +} + +int +session_manager_add_first_segment (session_manager_main_t * smm, + session_manager_t * sm, u32 segment_size, + u8 ** segment_name) +{ + svm_fifo_segment_create_args_t _ca, *ca = &_ca; + memset (ca, 0, sizeof (*ca)); + *segment_name = format (0, "%d-%d%c", getpid (), + smm->unique_segment_name_counter++, 0); + return session_manager_add_segment_i (smm, sm, segment_size, *segment_name); +} + +void +session_manager_del (session_manager_main_t * smm, session_manager_t * sm) +{ + u32 *deleted_sessions = 0; + u32 *deleted_thread_indices = 0; + int i, j; + + /* Across all fifo segments used by the server */ + for (j = 0; j < vec_len (sm->segment_indices); j++) + { + svm_fifo_segment_private_t *fifo_segment; + svm_fifo_t **fifos; + /* Vector of fifos allocated in the segment */ + fifo_segment = svm_fifo_get_segment (sm->segment_indices[j]); + fifos = (svm_fifo_t **) fifo_segment->h->fifos; + + /* + * Remove any residual sessions from the session lookup table + * Don't bother deleting the individual fifos, we're going to + * throw away the fifo segment in a minute. + */ + for (i = 0; i < vec_len (fifos); i++) + { + svm_fifo_t *fifo; + u32 session_index, thread_index; + stream_session_t *session; + + fifo = fifos[i]; + session_index = fifo->server_session_index; + thread_index = fifo->server_thread_index; + + session = pool_elt_at_index (smm->sessions[thread_index], + session_index); + + /* Add to the deleted_sessions vector (once!) */ + if (!session->is_deleted) + { + session->is_deleted = 1; + vec_add1 (deleted_sessions, + session - smm->sessions[thread_index]); + vec_add1 (deleted_thread_indices, thread_index); + } + } + + for (i = 0; i < vec_len (deleted_sessions); i++) + { + stream_session_t *session; + + session = + pool_elt_at_index (smm->sessions[deleted_thread_indices[i]], + deleted_sessions[i]); + + /* Instead of directly removing the session call disconnect */ + stream_session_disconnect (session); + + /* + stream_session_table_del (smm, session); + pool_put(smm->sessions[deleted_thread_indices[i]], session); + */ + } + + vec_reset_length (deleted_sessions); + vec_reset_length (deleted_thread_indices); + + /* Instead of removing the segment, test when removing the session if + * the segment can be removed + */ + /* svm_fifo_segment_delete (fifo_segment); */ + } + + vec_free (deleted_sessions); + vec_free (deleted_thread_indices); +} + +int +session_manager_allocate_session_fifos (session_manager_main_t * smm, + session_manager_t * sm, + svm_fifo_t ** server_rx_fifo, + svm_fifo_t ** server_tx_fifo, + u32 * fifo_segment_index, + u8 * added_a_segment) +{ + svm_fifo_segment_private_t *fifo_segment; + u32 fifo_size, default_fifo_size = 8192 /* TODO config */ ; + int i; + + *added_a_segment = 0; + + /* Allocate svm fifos */ + ASSERT (vec_len (sm->segment_indices)); + +again: + for (i = 0; i < vec_len (sm->segment_indices); i++) + { + *fifo_segment_index = sm->segment_indices[i]; + fifo_segment = svm_fifo_get_segment (*fifo_segment_index); + + fifo_size = sm->rx_fifo_size; + fifo_size = (fifo_size == 0) ? default_fifo_size : fifo_size; + *server_rx_fifo = svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size); + + fifo_size = sm->tx_fifo_size; + fifo_size = (fifo_size == 0) ? default_fifo_size : fifo_size; + *server_tx_fifo = svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size); + + if (*server_rx_fifo == 0) + { + /* This would be very odd, but handle it... */ + if (*server_tx_fifo != 0) + { + svm_fifo_segment_free_fifo (fifo_segment, *server_tx_fifo); + *server_tx_fifo = 0; + } + continue; + } + if (*server_tx_fifo == 0) + { + if (*server_rx_fifo != 0) + { + svm_fifo_segment_free_fifo (fifo_segment, *server_rx_fifo); + *server_rx_fifo = 0; + } + continue; + } + break; + } + + /* See if we're supposed to create another segment */ + if (*server_rx_fifo == 0) + { + if (sm->add_segment) + { + if (*added_a_segment) + { + clib_warning ("added a segment, still cant allocate a fifo"); + return SESSION_ERROR_NEW_SEG_NO_SPACE; + } + + if (session_manager_add_segment (smm, sm)) + return VNET_API_ERROR_URI_FIFO_CREATE_FAILED; + + *added_a_segment = 1; + goto again; + } + else + return SESSION_ERROR_NO_SPACE; + } + return 0; +} + +int +stream_session_create_i (session_manager_main_t * smm, application_t * app, + transport_connection_t * tc, + stream_session_t ** ret_s) +{ + int rv; + svm_fifo_t *server_rx_fifo = 0, *server_tx_fifo = 0; + u32 fifo_segment_index; + u32 pool_index, seg_size; + stream_session_t *s; + u64 value; + u32 thread_index = tc->thread_index; + session_manager_t *sm; + u8 segment_added; + u8 *seg_name; + + sm = session_manager_get (app->session_manager_index); + + /* Check the API queue */ + if (app->mode == APP_SERVER && application_api_queue_is_full (app)) + return SESSION_ERROR_API_QUEUE_FULL; + + if ((rv = session_manager_allocate_session_fifos (smm, sm, &server_rx_fifo, + &server_tx_fifo, + &fifo_segment_index, + &segment_added))) + return rv; + + if (segment_added && app->mode == APP_SERVER) + { + /* Send an API message to the external server, to map new segment */ + ASSERT (app->cb_fns.add_segment_callback); + + session_manager_get_segment_info (fifo_segment_index, &seg_name, + &seg_size); + if (app->cb_fns.add_segment_callback (app->api_client_index, seg_name, + seg_size)) + return VNET_API_ERROR_URI_FIFO_CREATE_FAILED; + } + + /* Create the session */ + pool_get (smm->sessions[thread_index], s); + memset (s, 0, sizeof (*s)); + + /* Initialize backpointers */ + pool_index = s - smm->sessions[thread_index]; + server_rx_fifo->server_session_index = pool_index; + server_rx_fifo->server_thread_index = thread_index; + + server_tx_fifo->server_session_index = pool_index; + server_tx_fifo->server_thread_index = thread_index; + + s->server_rx_fifo = server_rx_fifo; + s->server_tx_fifo = server_tx_fifo; + + /* Initialize state machine, such as it is... */ + s->session_type = app->session_type; + s->session_state = SESSION_STATE_CONNECTING; + s->app_index = application_get_index (app); + s->server_segment_index = fifo_segment_index; + s->thread_index = thread_index; + s->session_index = pool_index; + + /* Attach transport to session */ + s->connection_index = tc->c_index; + + /* Attach session to transport */ + tc->s_index = s->session_index; + + /* Add to the main lookup table */ + value = (((u64) thread_index) << 32) | (u64) s->session_index; + stream_session_table_add_for_tc (app->session_type, tc, value); + + *ret_s = s; + + return 0; +} + +/* + * Enqueue data for delivery to session peer. Does not notify peer of enqueue + * event but on request can queue notification events for later delivery by + * calling stream_server_flush_enqueue_events(). + * + * @param tc Transport connection which is to be enqueued data + * @param data Data to be enqueued + * @param len Length of data to be enqueued + * @param queue_event Flag to indicate if peer is to be notified or if event + * is to be queued. The former is useful when more data is + * enqueued and only one event is to be generated. + * @return Number of bytes enqueued or a negative value if enqueueing failed. + */ +int +stream_session_enqueue_data (transport_connection_t * tc, u8 * data, u16 len, + u8 queue_event) +{ + stream_session_t *s; + int enqueued; + + s = stream_session_get (tc->s_index, tc->thread_index); + + /* Make sure there's enough space left. We might've filled the pipes */ + if (PREDICT_FALSE (len > svm_fifo_max_enqueue (s->server_rx_fifo))) + return -1; + + enqueued = svm_fifo_enqueue_nowait (s->server_rx_fifo, s->pid, len, data); + + if (queue_event) + { + /* Queue RX event on this fifo. Eventually these will need to be flushed + * by calling stream_server_flush_enqueue_events () */ + session_manager_main_t *smm = vnet_get_session_manager_main (); + u32 thread_index = s->thread_index; + u32 my_enqueue_epoch = smm->current_enqueue_epoch[thread_index]; + + if (s->enqueue_epoch != my_enqueue_epoch) + { + s->enqueue_epoch = my_enqueue_epoch; + vec_add1 (smm->session_indices_to_enqueue_by_thread[thread_index], + s - smm->sessions[thread_index]); + } + } + + return enqueued; +} + +/** Check if we have space in rx fifo to push more bytes */ +u8 +stream_session_no_space (transport_connection_t * tc, u32 thread_index, + u16 data_len) +{ + stream_session_t *s = stream_session_get (tc->c_index, thread_index); + + if (PREDICT_FALSE (s->session_state != SESSION_STATE_READY)) + return 1; + + if (data_len > svm_fifo_max_enqueue (s->server_rx_fifo)) + return 1; + + return 0; +} + +u32 +stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer, + u32 offset, u32 max_bytes) +{ + stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index); + return svm_fifo_peek (s->server_tx_fifo, s->pid, offset, max_bytes, buffer); +} + +u32 +stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes) +{ + stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index); + return svm_fifo_dequeue_drop (s->server_tx_fifo, s->pid, max_bytes); +} + +/** + * Notify session peer that new data has been enqueued. + * + * @param s Stream session for which the event is to be generated. + * @param block Flag to indicate if call should block if event queue is full. + * + * @return 0 on succes or negative number if failed to send notification. + */ +static int +stream_session_enqueue_notify (stream_session_t * s, u8 block) +{ + application_t *app; + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + static u32 serial_number; + + if (PREDICT_FALSE (s->session_state == SESSION_STATE_CLOSED)) + return 0; + + /* Get session's server */ + app = application_get (s->app_index); + + /* Fabricate event */ + evt.fifo = s->server_rx_fifo; + evt.event_type = FIFO_EVENT_SERVER_RX; + evt.event_id = serial_number++; + evt.enqueue_length = svm_fifo_max_dequeue (s->server_rx_fifo); + + /* Add event to server's event queue */ + q = app->event_queue; + + /* Based on request block (or not) for lack of space */ + if (block || PREDICT_TRUE (q->cursize < q->maxsize)) + unix_shared_memory_queue_add (app->event_queue, (u8 *) & evt, + 0 /* do wait for mutex */ ); + else + return -1; + + if (1) + { + ELOG_TYPE_DECLARE (e) = + { + .format = "evt-enqueue: id %d length %d",.format_args = "i4i4",}; + struct + { + u32 data[2]; + } *ed; + ed = ELOG_DATA (&vlib_global_main.elog_main, e); + ed->data[0] = evt.event_id; + ed->data[1] = evt.enqueue_length; + } + + return 0; +} + +/** + * Flushes queue of sessions that are to be notified of new data + * enqueued events. + * + * @param thread_index Thread index for which the flush is to be performed. + * @return 0 on success or a positive number indicating the number of + * failures due to API queue being full. + */ +int +session_manager_flush_enqueue_events (u32 thread_index) +{ + session_manager_main_t *smm = &session_manager_main; + u32 *session_indices_to_enqueue; + int i, errors = 0; + + session_indices_to_enqueue = + smm->session_indices_to_enqueue_by_thread[thread_index]; + + for (i = 0; i < vec_len (session_indices_to_enqueue); i++) + { + stream_session_t *s0; + + /* Get session */ + s0 = stream_session_get (session_indices_to_enqueue[i], thread_index); + if (stream_session_enqueue_notify (s0, 0 /* don't block */ )) + { + errors++; + } + } + + vec_reset_length (session_indices_to_enqueue); + + smm->session_indices_to_enqueue_by_thread[thread_index] = + session_indices_to_enqueue; + + /* Increment enqueue epoch for next round */ + smm->current_enqueue_epoch[thread_index]++; + + return errors; +} + +/* + * Start listening on server's ip/port pair for requested transport. + * + * Creates a 'dummy' stream session with state LISTENING to be used in session + * lookups, prior to establishing connection. Requests transport to build + * it's own specific listening connection. + */ +int +stream_session_start_listen (u32 server_index, ip46_address_t * ip, u16 port) +{ + session_manager_main_t *smm = &session_manager_main; + stream_session_t *s; + transport_connection_t *tc; + application_t *srv; + u32 tci; + + srv = application_get (server_index); + + pool_get (smm->listen_sessions[srv->session_type], s); + memset (s, 0, sizeof (*s)); + + s->session_type = srv->session_type; + s->session_state = SESSION_STATE_LISTENING; + s->session_index = s - smm->listen_sessions[srv->session_type]; + s->app_index = srv->index; + + /* Transport bind/listen */ + tci = tp_vfts[srv->session_type].bind (smm->vlib_main, s->session_index, ip, + port); + + /* Attach transport to session */ + s->connection_index = tci; + tc = tp_vfts[srv->session_type].get_listener (tci); + + srv->session_index = s->session_index; + + /* Add to the main lookup table */ + stream_session_table_add_for_tc (s->session_type, tc, s->session_index); + + return 0; +} + +void +stream_session_stop_listen (u32 server_index) +{ + session_manager_main_t *smm = &session_manager_main; + stream_session_t *listener; + transport_connection_t *tc; + application_t *srv; + + srv = application_get (server_index); + listener = pool_elt_at_index (smm->listen_sessions[srv->session_type], + srv->session_index); + + tc = tp_vfts[srv->session_type].get_listener (listener->connection_index); + stream_session_table_del_for_tc (smm, listener->session_type, tc); + + tp_vfts[srv->session_type].unbind (smm->vlib_main, + listener->connection_index); + pool_put (smm->listen_sessions[srv->session_type], listener); +} + +int +connect_server_add_segment_cb (application_t * ss, char *segment_name, + u32 segment_size) +{ + /* Does exactly nothing, but die */ + ASSERT (0); + return 0; +} + +void +connects_session_manager_init (session_manager_main_t * smm, u8 session_type) +{ + session_manager_t *sm; + u32 connect_fifo_size = 8 << 10; /* Config? */ + u32 default_segment_size = 1 << 20; + + pool_get (smm->session_managers, sm); + memset (sm, 0, sizeof (*sm)); + + sm->add_segment_size = default_segment_size; + sm->rx_fifo_size = connect_fifo_size; + sm->tx_fifo_size = connect_fifo_size; + sm->add_segment = 1; + + session_manager_add_segment (smm, sm); + smm->connect_manager_index[session_type] = sm - smm->session_managers; +} + +void +stream_session_connect_notify (transport_connection_t * tc, u8 sst, + u8 is_fail) +{ + session_manager_main_t *smm = &session_manager_main; + application_t *app; + stream_session_t *new_s = 0; + u64 value; + + value = stream_session_half_open_lookup (smm, &tc->lcl_ip, &tc->rmt_ip, + tc->lcl_port, tc->rmt_port, + tc->proto); + if (value == HALF_OPEN_LOOKUP_INVALID_VALUE) + { + clib_warning ("This can't be good!"); + return; + } + + app = application_get (value >> 32); + + if (!is_fail) + { + /* Create new session (server segments are allocated if needed) */ + if (stream_session_create_i (smm, app, tc, &new_s)) + return; + + app->session_index = stream_session_get_index (new_s); + app->thread_index = new_s->thread_index; + + /* Allocate vpp event queue for this thread if needed */ + vpp_session_event_queue_allocate (smm, tc->thread_index); + } + + /* Notify client */ + app->cb_fns.session_connected_callback (app->api_client_index, new_s, + is_fail); + + /* Cleanup session lookup */ + stream_session_half_open_table_del (smm, sst, tc); +} + +void +stream_session_accept_notify (transport_connection_t * tc) +{ + application_t *server; + stream_session_t *s; + + s = stream_session_get (tc->s_index, tc->thread_index); + server = application_get (s->app_index); + server->cb_fns.session_accept_callback (s); +} + +/** + * Notification from transport that connection is being closed. + * + * A disconnect is sent to application but state is not removed. Once + * disconnect is acknowledged by application, session disconnect is called. + * Ultimately this leads to close being called on transport (passive close). + */ +void +stream_session_disconnect_notify (transport_connection_t * tc) +{ + application_t *server; + stream_session_t *s; + + s = stream_session_get (tc->s_index, tc->thread_index); + server = application_get (s->app_index); + server->cb_fns.session_disconnect_callback (s); +} + +/** + * Cleans up session and associated app if needed. + */ +void +stream_session_delete (stream_session_t * s) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + svm_fifo_segment_private_t *fifo_segment; + application_t *app; + int rv; + + /* delete from the main lookup table */ + rv = stream_session_table_del (smm, s); + + if (rv) + clib_warning ("hash delete error, rv %d", rv); + + /* Cleanup fifo segments */ + fifo_segment = svm_fifo_get_segment (s->server_segment_index); + svm_fifo_segment_free_fifo (fifo_segment, s->server_rx_fifo); + svm_fifo_segment_free_fifo (fifo_segment, s->server_tx_fifo); + + /* Cleanup app if client */ + app = application_get (s->app_index); + if (app->mode == APP_CLIENT) + { + application_del (app); + } + else if (app->mode == APP_SERVER) + { + session_manager_t *sm; + svm_fifo_segment_private_t *fifo_segment; + svm_fifo_t **fifos; + u32 fifo_index; + + sm = session_manager_get (app->session_manager_index); + + /* Delete fifo */ + fifo_segment = svm_fifo_get_segment (s->server_segment_index); + fifos = (svm_fifo_t **) fifo_segment->h->fifos; + + fifo_index = svm_fifo_segment_index (fifo_segment); + + /* Remove segment only if it holds no fifos and not the first */ + if (sm->segment_indices[0] != fifo_index && vec_len (fifos) == 0) + svm_fifo_segment_delete (fifo_segment); + } + + pool_put (smm->sessions[s->thread_index], s); +} + +/** + * Notification from transport that connection is being deleted + * + * This should be called only on previously fully established sessions. For + * instance failed connects should call stream_session_connect_notify and + * indicate that the connect has failed. + */ +void +stream_session_delete_notify (transport_connection_t * tc) +{ + stream_session_t *s; + + s = stream_session_get_if_valid (tc->s_index, tc->thread_index); + if (!s) + { + clib_warning ("Surprised!"); + return; + } + stream_session_delete (s); +} + +/** + * Notify application that connection has been reset. + */ +void +stream_session_reset_notify (transport_connection_t * tc) +{ + stream_session_t *s; + application_t *app; + s = stream_session_get (tc->s_index, tc->thread_index); + + app = application_get (s->app_index); + app->cb_fns.session_reset_callback (s); +} + +/** + * Accept a stream session. Optionally ping the server by callback. + */ +int +stream_session_accept (transport_connection_t * tc, u32 listener_index, + u8 sst, u8 notify) +{ + session_manager_main_t *smm = &session_manager_main; + application_t *server; + stream_session_t *s, *listener; + + int rv; + + /* Find the server */ + listener = pool_elt_at_index (smm->listen_sessions[sst], listener_index); + server = application_get (listener->app_index); + + if ((rv = stream_session_create_i (smm, server, tc, &s))) + return rv; + + /* Allocate vpp event queue for this thread if needed */ + vpp_session_event_queue_allocate (smm, tc->thread_index); + + /* Shoulder-tap the server */ + if (notify) + { + server->cb_fns.session_accept_callback (s); + } + + return 0; +} + +void +stream_session_open (u8 sst, ip46_address_t * addr, u16 port_host_byte_order, + u32 app_index) +{ + transport_connection_t *tc; + u32 tci; + u64 value; + + /* Ask transport to open connection */ + tci = tp_vfts[sst].open (addr, port_host_byte_order); + + /* Get transport connection */ + tc = tp_vfts[sst].get_half_open (tci); + + /* Store api_client_index and transport connection index */ + value = (((u64) app_index) << 32) | (u64) tc->c_index; + + /* Add to the half-open lookup table */ + stream_session_half_open_table_add (sst, tc, value); +} + +/** + * Disconnect session and propagate to transport. This should eventually + * result in a delete notification that allows us to cleanup session state. + * Called for both active/passive disconnects. + */ +void +stream_session_disconnect (stream_session_t * s) +{ + tp_vfts[s->session_type].close (s->connection_index, s->thread_index); + s->session_state = SESSION_STATE_CLOSED; +} + +/** + * Cleanup transport and session state. + */ +void +stream_session_cleanup (stream_session_t * s) +{ + tp_vfts[s->session_type].cleanup (s->connection_index, s->thread_index); + stream_session_delete (s); +} + +void +session_register_transport (u8 type, const transport_proto_vft_t * vft) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + + vec_validate (tp_vfts, type); + tp_vfts[type] = *vft; + + /* If an offset function is provided, then peek instead of dequeue */ + smm->session_rx_fns[type] = + (vft->rx_fifo_offset) ? session_fifo_rx_peek : session_fifo_rx_dequeue; +} + +transport_proto_vft_t * +session_get_transport_vft (u8 type) +{ + if (type >= vec_len (tp_vfts)) + return 0; + return &tp_vfts[type]; +} + +static clib_error_t * +session_manager_main_init (vlib_main_t * vm) +{ + u32 num_threads; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + session_manager_main_t *smm = &session_manager_main; + int i; + + smm->vlib_main = vm; + smm->vnet_main = vnet_get_main (); + + num_threads = 1 /* main thread */ + vtm->n_threads; + + if (num_threads < 1) + return clib_error_return (0, "n_thread_stacks not set"); + + /* $$$ config parameters */ + svm_fifo_segment_init (0x200000000ULL /* first segment base VA */ , + 20 /* timeout in seconds */ ); + + /* configure per-thread ** vectors */ + vec_validate (smm->sessions, num_threads - 1); + vec_validate (smm->session_indices_to_enqueue_by_thread, num_threads - 1); + vec_validate (smm->tx_buffers, num_threads - 1); + vec_validate (smm->fifo_events, num_threads - 1); + vec_validate (smm->evts_partially_read, num_threads - 1); + vec_validate (smm->current_enqueue_epoch, num_threads - 1); + vec_validate (smm->vpp_event_queues, num_threads - 1); + + /* $$$$ preallocate hack config parameter */ + for (i = 0; i < 200000; i++) + { + stream_session_t *ss; + pool_get (smm->sessions[0], ss); + memset (ss, 0, sizeof (*ss)); + } + + for (i = 0; i < 200000; i++) + pool_put_index (smm->sessions[0], i); + + clib_bihash_init_16_8 (&smm->v4_session_hash, "v4 session table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + clib_bihash_init_48_8 (&smm->v6_session_hash, "v6 session table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + + clib_bihash_init_16_8 (&smm->v4_half_open_hash, "v4 half-open table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + clib_bihash_init_48_8 (&smm->v6_half_open_hash, "v6 half-open table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + + for (i = 0; i < SESSION_N_TYPES; i++) + smm->connect_manager_index[i] = INVALID_INDEX; + + return 0; +} + +VLIB_INIT_FUNCTION (session_manager_main_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h new file mode 100644 index 00000000..cf14cca9 --- /dev/null +++ b/src/vnet/session/session.h @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_session_h__ +#define __included_session_h__ + +#include +#include +#include +#include +#include + +#define HALF_OPEN_LOOKUP_INVALID_VALUE ((u64)~0) +#define INVALID_INDEX ((u32)~0) + +/* TODO decide how much since we have pre-data as well */ +#define MAX_HDRS_LEN 100 /* Max number of bytes for headers */ + +typedef enum +{ + FIFO_EVENT_SERVER_RX, + FIFO_EVENT_SERVER_TX, + FIFO_EVENT_TIMEOUT, + FIFO_EVENT_SERVER_EXIT, +} fifo_event_type_t; + +#define foreach_session_input_error \ +_(NO_SESSION, "No session drops") \ +_(NO_LISTENER, "No listener for dst port drops") \ +_(ENQUEUED, "Packets pushed into rx fifo") \ +_(NOT_READY, "Session not ready packets") \ +_(FIFO_FULL, "Packets dropped for lack of rx fifo space") \ +_(EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") \ +_(API_QUEUE_FULL, "Sessions not created for lack of API queue space") \ +_(NEW_SEG_NO_SPACE, "Created segment, couldn't allocate a fifo pair") \ +_(NO_SPACE, "Couldn't allocate a fifo pair") + +typedef enum +{ +#define _(sym,str) SESSION_ERROR_##sym, + foreach_session_input_error +#undef _ + SESSION_N_ERROR, +} session_error_t; + +/* Event queue input node static next indices */ +typedef enum +{ + SESSION_QUEUE_NEXT_DROP, + SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT, + SESSION_QUEUE_NEXT_IP4_LOOKUP, + SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT, + SESSION_QUEUE_NEXT_IP6_LOOKUP, + SESSION_QUEUE_N_NEXT, +} session_queue_next_t; + +#define foreach_session_type \ + _(IP4_TCP, ip4_tcp) \ + _(IP4_UDP, ip4_udp) \ + _(IP6_TCP, ip6_tcp) \ + _(IP6_UDP, ip6_udp) + +typedef enum +{ +#define _(A, a) SESSION_TYPE_##A, + foreach_session_type +#undef _ + SESSION_N_TYPES, +} session_type_t; + +/* + * Application session state + */ +typedef enum +{ + SESSION_STATE_LISTENING, + SESSION_STATE_CONNECTING, + SESSION_STATE_READY, + SESSION_STATE_CLOSED, + SESSION_STATE_N_STATES, +} stream_session_state_t; + +typedef CLIB_PACKED (struct + { + svm_fifo_t * fifo; + u8 event_type; + /* $$$$ for event logging */ + u16 event_id; + u32 enqueue_length; + }) session_fifo_event_t; + +typedef struct _stream_session_t +{ + /** Type */ + u8 session_type; + + /** State */ + u8 session_state; + + /** Session index in per_thread pool */ + u32 session_index; + + /** Transport specific */ + u32 connection_index; + + u8 thread_index; + + /** Application specific */ + u32 pid; + + /** fifo pointers. Once allocated, these do not move */ + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; + + /** To avoid n**2 "one event per frame" check */ + u8 enqueue_epoch; + + /** used during unbind processing */ + u8 is_deleted; + + /** stream server pool index */ + u32 app_index; + + /** svm segment index */ + u32 server_segment_index; +} stream_session_t; + +typedef struct _session_manager +{ + /** segments mapped by this server */ + u32 *segment_indices; + + /** Session fifo sizes. They are provided for binds and take default + * values for connects */ + u32 rx_fifo_size; + u32 tx_fifo_size; + + /** Configured additional segment size */ + u32 add_segment_size; + + /** Flag that indicates if additional segments should be created */ + u8 add_segment; +} session_manager_t; + +/* Forward definition */ +typedef struct _session_manager_main session_manager_main_t; + +typedef int + (session_fifo_rx_fn) (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, stream_session_t * s0, + u32 thread_index, int *n_tx_pkts); + +extern session_fifo_rx_fn session_fifo_rx_peek; +extern session_fifo_rx_fn session_fifo_rx_dequeue; + +struct _session_manager_main +{ + /** Lookup tables for established sessions and listeners */ + clib_bihash_16_8_t v4_session_hash; + clib_bihash_48_8_t v6_session_hash; + + /** Lookup tables for half-open sessions */ + clib_bihash_16_8_t v4_half_open_hash; + clib_bihash_48_8_t v6_half_open_hash; + + /** Per worker thread session pools */ + stream_session_t **sessions; + + /** Pool of listen sessions. Same type as stream sessions to ease lookups */ + stream_session_t *listen_sessions[SESSION_N_TYPES]; + + /** Sparse vector to map dst port to stream server */ + u16 *stream_server_by_dst_port[SESSION_N_TYPES]; + + /** per-worker enqueue epoch counters */ + u8 *current_enqueue_epoch; + + /** Per-worker thread vector of sessions to enqueue */ + u32 **session_indices_to_enqueue_by_thread; + + /** per-worker tx buffer free lists */ + u32 **tx_buffers; + + /** Per worker-thread vector of partially read events */ + session_fifo_event_t **evts_partially_read; + + /** per-worker active event vectors */ + session_fifo_event_t **fifo_events; + + /** vpp fifo event queue */ + unix_shared_memory_queue_t **vpp_event_queues; + + /** Unique segment name counter */ + u32 unique_segment_name_counter; + + /* Connection manager used by incoming connects */ + u32 connect_manager_index[SESSION_N_TYPES]; + + session_manager_t *session_managers; + + /** Per transport rx function that can either dequeue or peek */ + session_fifo_rx_fn *session_rx_fns[SESSION_N_TYPES]; + + /* Convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +}; + +extern session_manager_main_t session_manager_main; + +/* + * Session manager function + */ +always_inline session_manager_main_t * +vnet_get_session_manager_main () +{ + return &session_manager_main; +} + +always_inline session_manager_t * +session_manager_get (u32 index) +{ + return pool_elt_at_index (session_manager_main.session_managers, index); +} + +always_inline unix_shared_memory_queue_t * +session_manager_get_vpp_event_queue (u32 thread_index) +{ + return session_manager_main.vpp_event_queues[thread_index]; +} + +always_inline session_manager_t * +connects_session_manager_get (session_manager_main_t * smm, + session_type_t session_type) +{ + return pool_elt_at_index (smm->session_managers, + smm->connect_manager_index[session_type]); +} + +void session_manager_get_segment_info (u32 index, u8 ** name, u32 * size); +int session_manager_flush_enqueue_events (u32 thread_index); +int +session_manager_add_first_segment (session_manager_main_t * smm, + session_manager_t * sm, u32 segment_size, + u8 ** segment_name); +void +session_manager_del (session_manager_main_t * smm, session_manager_t * sm); +void +connects_session_manager_init (session_manager_main_t * smm, u8 session_type); + +/* + * Stream session functions + */ + +stream_session_t *stream_session_lookup_listener4 (ip4_address_t * lcl, + u16 lcl_port, u8 proto); +stream_session_t *stream_session_lookup4 (ip4_address_t * lcl, + ip4_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto, + u32 thread_index); +stream_session_t *stream_session_lookup_listener6 (ip6_address_t * lcl, + u16 lcl_port, u8 proto); +stream_session_t *stream_session_lookup6 (ip6_address_t * lcl, + ip6_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8, u32 thread_index); +transport_connection_t + * stream_session_lookup_transport4 (session_manager_main_t * smm, + ip4_address_t * lcl, + ip4_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto, + u32 thread_index); +transport_connection_t + * stream_session_lookup_transport6 (session_manager_main_t * smm, + ip6_address_t * lcl, + ip6_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto, + u32 thread_index); +stream_session_t *stream_session_lookup_listener (ip46_address_t * lcl, + u16 lcl_port, u8 proto); + +always_inline stream_session_t * +stream_session_get_tsi (u64 ti_and_si, u32 thread_index) +{ + ASSERT ((u32) (ti_and_si >> 32) == thread_index); + return pool_elt_at_index (session_manager_main.sessions[thread_index], + ti_and_si & 0xFFFFFFFFULL); +} + +always_inline stream_session_t * +stream_session_get (u64 si, u32 thread_index) +{ + return pool_elt_at_index (session_manager_main.sessions[thread_index], si); +} + +always_inline stream_session_t * +stream_session_get_if_valid (u64 si, u32 thread_index) +{ + if (thread_index >= vec_len (session_manager_main.sessions)) + return 0; + + if (pool_is_free_index (session_manager_main.sessions[thread_index], si)) + return 0; + + return pool_elt_at_index (session_manager_main.sessions[thread_index], si); +} + +always_inline stream_session_t * +stream_session_listener_get (u8 sst, u64 si) +{ + return pool_elt_at_index (session_manager_main.listen_sessions[sst], si); +} + +always_inline u32 +stream_session_get_index (stream_session_t * s) +{ + if (s->session_state == SESSION_STATE_LISTENING) + return s - session_manager_main.listen_sessions[s->session_type]; + + return s - session_manager_main.sessions[s->thread_index]; +} + +always_inline u32 +stream_session_max_enqueue (transport_connection_t * tc) +{ + stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index); + return svm_fifo_max_enqueue (s->server_rx_fifo); +} + +int +stream_session_enqueue_data (transport_connection_t * tc, u8 * data, u16 len, + u8 queue_event); +u32 +stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer, + u32 offset, u32 max_bytes); +u32 stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes); + +void +stream_session_connect_notify (transport_connection_t * tc, u8 sst, + u8 is_fail); +void stream_session_accept_notify (transport_connection_t * tc); +void stream_session_disconnect_notify (transport_connection_t * tc); +void stream_session_delete_notify (transport_connection_t * tc); +void stream_session_reset_notify (transport_connection_t * tc); +int +stream_session_accept (transport_connection_t * tc, u32 listener_index, + u8 sst, u8 notify); +void stream_session_open (u8 sst, ip46_address_t * addr, + u16 port_host_byte_order, u32 api_client_index); +void stream_session_disconnect (stream_session_t * s); +void stream_session_cleanup (stream_session_t * s); +int +stream_session_start_listen (u32 server_index, ip46_address_t * ip, u16 port); +void stream_session_stop_listen (u32 server_index); + +u8 *format_stream_session (u8 * s, va_list * args); + +void session_register_transport (u8 type, const transport_proto_vft_t * vft); +transport_proto_vft_t *session_get_transport_vft (u8 type); + +#endif /* __included_session_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c new file mode 100644 index 00000000..9d068684 --- /dev/null +++ b/src/vnet/session/session_api.c @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include "application_interface.h" + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_session_api_msg \ +_(MAP_ANOTHER_SEGMENT_REPLY, map_another_segment_reply) \ +_(BIND_URI, bind_uri) \ +_(UNBIND_URI, unbind_uri) \ +_(CONNECT_URI, connect_uri) \ +_(DISCONNECT_SESSION, disconnect_session) \ +_(DISCONNECT_SESSION_REPLY, disconnect_session_reply) \ +_(ACCEPT_SESSION_REPLY, accept_session_reply) \ +_(RESET_SESSION_REPLY, reset_session_reply) \ +_(BIND_SOCK, bind_sock) \ +_(UNBIND_SOCK, unbind_sock) \ +_(CONNECT_SOCK, connect_sock) \ +_(DISCONNECT_SOCK, disconnect_sock) \ +_(DISCONNECT_SOCK_REPLY, disconnect_sock_reply) \ +_(ACCEPT_SOCK_REPLY, accept_sock_reply) \ +_(RESET_SOCK_REPLY, reset_sock_reply) \ + +static int +send_add_segment_callback (u32 api_client_index, const u8 * segment_name, + u32 segment_size) +{ + vl_api_map_another_segment_t *mp; + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (api_client_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_MAP_ANOTHER_SEGMENT); + mp->segment_size = segment_size; + strncpy ((char *) mp->segment_name, (char *) segment_name, + sizeof (mp->segment_name) - 1); + + vl_msg_api_send_shmem (q, (u8 *) & mp); + + return 0; +} + +static int +send_session_accept_uri_callback (stream_session_t * s) +{ + vl_api_accept_session_t *mp; + unix_shared_memory_queue_t *q, *vpp_queue; + application_t *server = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (server->api_client_index); + vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_ACCEPT_SESSION); + + /* Note: session_type is the first octet in all types of sessions */ + + mp->accept_cookie = server->accept_cookie; + mp->server_rx_fifo = (u64) s->server_rx_fifo; + mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->session_thread_index = s->thread_index; + mp->session_index = s->session_index; + mp->session_type = s->session_type; + mp->vpp_event_queue_address = (u64) vpp_queue; + vl_msg_api_send_shmem (q, (u8 *) & mp); + + return 0; +} + +static void +send_session_disconnect_uri_callback (stream_session_t * s) +{ + vl_api_disconnect_session_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_DISCONNECT_SESSION); + + mp->session_thread_index = s->thread_index; + mp->session_index = s->session_index; + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static int +send_session_connected_uri_callback (u32 api_client_index, + stream_session_t * s, u8 is_fail) +{ + vl_api_connect_uri_reply_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_lookup (api_client_index); + u8 *seg_name; + unix_shared_memory_queue_t *vpp_queue; + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_URI_REPLY); + mp->context = app->api_context; + mp->retval = is_fail; + if (!is_fail) + { + vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); + mp->server_rx_fifo = (u64) s->server_rx_fifo; + mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->session_thread_index = s->thread_index; + mp->session_index = s->session_index; + mp->session_type = s->session_type; + mp->vpp_event_queue_address = (u64) vpp_queue; + mp->client_event_queue_address = (u64) app->event_queue; + + session_manager_get_segment_info (s->server_segment_index, &seg_name, + &mp->segment_size); + mp->segment_name_length = vec_len (seg_name); + if (mp->segment_name_length) + clib_memcpy (mp->segment_name, seg_name, mp->segment_name_length); + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); + + /* Remove client if connect failed */ + if (is_fail) + application_del (app); + + return 0; +} + +/** + * Redirect a connect_uri message to the indicated server. + * Only sent if the server has bound the related port with + * URI_OPTIONS_FLAGS_USE_FIFO + */ +static int +redirect_connect_uri_callback (u32 server_api_client_index, void *mp_arg) +{ + vl_api_connect_uri_t *mp = mp_arg; + unix_shared_memory_queue_t *server_q, *client_q; + vlib_main_t *vm = vlib_get_main (); + f64 timeout = vlib_time_now (vm) + 0.5; + int rv = 0; + + server_q = vl_api_client_index_to_input_queue (server_api_client_index); + + if (!server_q) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + client_q = vl_api_client_index_to_input_queue (mp->client_index); + if (!client_q) + { + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto out; + } + + /* Tell the server the client's API queue address, so it can reply */ + mp->client_queue_address = (u64) client_q; + + /* + * Bounce message handlers MUST NOT block the data-plane. + * Spin waiting for the queue lock, but + */ + + while (vlib_time_now (vm) < timeout) + { + rv = + unix_shared_memory_queue_add (server_q, (u8 *) & mp, 1 /*nowait */ ); + switch (rv) + { + /* correctly enqueued */ + case 0: + return VNET_CONNECT_REDIRECTED; + + /* continue spinning, wait for pthread_mutex_trylock to work */ + case -1: + continue; + + /* queue stuffed, drop the msg */ + case -2: + rv = VNET_API_ERROR_QUEUE_FULL; + goto out; + } + } +out: + /* Dispose of the message */ + vl_msg_api_free (mp); + return rv; +} + +static u64 +make_session_handle (stream_session_t * s) +{ + return (u64) s->session_index << 32 | (u64) s->thread_index; +} + +static int +send_session_accept_callback (stream_session_t * s) +{ + vl_api_accept_sock_t *mp; + unix_shared_memory_queue_t *q, *vpp_queue; + application_t *server = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (server->api_client_index); + vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_ACCEPT_SOCK); + + /* Note: session_type is the first octet in all types of sessions */ + + mp->accept_cookie = server->accept_cookie; + mp->server_rx_fifo = (u64) s->server_rx_fifo; + mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->handle = make_session_handle (s); + mp->vpp_event_queue_address = (u64) vpp_queue; + vl_msg_api_send_shmem (q, (u8 *) & mp); + + return 0; +} + +static int +send_session_connected_callback (u32 api_client_index, stream_session_t * s, + u8 is_fail) +{ + vl_api_connect_sock_reply_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_lookup (api_client_index); + u8 *seg_name; + unix_shared_memory_queue_t *vpp_queue; + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_SOCK_REPLY); + mp->context = app->api_context; + mp->retval = is_fail; + if (!is_fail) + { + vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); + mp->server_rx_fifo = (u64) s->server_rx_fifo; + mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->handle = make_session_handle (s); + mp->vpp_event_queue_address = (u64) vpp_queue; + mp->client_event_queue_address = (u64) app->event_queue; + + session_manager_get_segment_info (s->server_segment_index, &seg_name, + &mp->segment_size); + mp->segment_name_length = vec_len (seg_name); + if (mp->segment_name_length) + clib_memcpy (mp->segment_name, seg_name, mp->segment_name_length); + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); + + /* Remove client if connect failed */ + if (is_fail) + application_del (app); + + return 0; +} + +static void +send_session_disconnect_callback (stream_session_t * s) +{ + vl_api_disconnect_sock_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_DISCONNECT_SOCK); + + mp->handle = make_session_handle (s); + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +/** + * Redirect a connect_uri message to the indicated server. + * Only sent if the server has bound the related port with + * URI_OPTIONS_FLAGS_USE_FIFO + */ +static int +redirect_connect_callback (u32 server_api_client_index, void *mp_arg) +{ + vl_api_connect_sock_t *mp = mp_arg; + unix_shared_memory_queue_t *server_q, *client_q; + vlib_main_t *vm = vlib_get_main (); + f64 timeout = vlib_time_now (vm) + 0.5; + int rv = 0; + + server_q = vl_api_client_index_to_input_queue (server_api_client_index); + + if (!server_q) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + client_q = vl_api_client_index_to_input_queue (mp->client_index); + if (!client_q) + { + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto out; + } + + /* Tell the server the client's API queue address, so it can reply */ + mp->client_queue_address = (u64) client_q; + + /* + * Bounce message handlers MUST NOT block the data-plane. + * Spin waiting for the queue lock, but + */ + + while (vlib_time_now (vm) < timeout) + { + rv = + unix_shared_memory_queue_add (server_q, (u8 *) & mp, 1 /*nowait */ ); + switch (rv) + { + /* correctly enqueued */ + case 0: + return VNET_CONNECT_REDIRECTED; + + /* continue spinning, wait for pthread_mutex_trylock to work */ + case -1: + continue; + + /* queue stuffed, drop the msg */ + case -2: + rv = VNET_API_ERROR_QUEUE_FULL; + goto out; + } + } +out: + /* Dispose of the message */ + vl_msg_api_free (mp); + return rv; +} + +static session_cb_vft_t uri_session_cb_vft = { + .session_accept_callback = send_session_accept_uri_callback, + .session_disconnect_callback = send_session_disconnect_uri_callback, + .session_connected_callback = send_session_connected_uri_callback, + .add_segment_callback = send_add_segment_callback, + .redirect_connect_callback = redirect_connect_uri_callback +}; + +static session_cb_vft_t session_cb_vft = { + .session_accept_callback = send_session_accept_callback, + .session_disconnect_callback = send_session_disconnect_callback, + .session_connected_callback = send_session_connected_callback, + .add_segment_callback = send_add_segment_callback, + .redirect_connect_callback = redirect_connect_callback +}; + +static int +api_session_not_valid (u32 session_index, u32 thread_index) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + stream_session_t *pool; + + if (thread_index >= vec_len (smm->sessions)) + return VNET_API_ERROR_INVALID_VALUE; + + pool = smm->sessions[thread_index]; + + if (pool_is_free_index (pool, session_index)) + return VNET_API_ERROR_INVALID_VALUE_2; + + return 0; +} + +static void +vl_api_bind_uri_t_handler (vl_api_bind_uri_t * mp) +{ + vl_api_bind_uri_reply_t *rmp; + vnet_bind_args_t _a, *a = &_a; + char segment_name[128]; + u32 segment_name_length; + int rv; + + _Static_assert (sizeof (u64) * SESSION_OPTIONS_N_OPTIONS <= + sizeof (mp->options), + "Out of options, fix api message definition"); + + segment_name_length = ARRAY_LEN (segment_name); + + memset (a, 0, sizeof (*a)); + + a->uri = (char *) mp->uri; + a->api_client_index = mp->client_index; + a->options = mp->options; + a->segment_name = segment_name; + a->segment_name_length = segment_name_length; + a->session_cb_vft = &uri_session_cb_vft; + + a->options[SESSION_OPTIONS_SEGMENT_SIZE] = mp->initial_segment_size; + a->options[SESSION_OPTIONS_ACCEPT_COOKIE] = mp->accept_cookie; + rv = vnet_bind_uri (a); + + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_BIND_URI_REPLY, ({ + rmp->retval = rv; + if (!rv) + { + rmp->segment_name_length = 0; + /* $$$$ policy? */ + rmp->segment_size = mp->initial_segment_size; + if (segment_name_length) + { + memcpy (rmp->segment_name, segment_name, segment_name_length); + rmp->segment_name_length = segment_name_length; + } + rmp->server_event_queue_address = a->server_event_queue_address; + } + })); + /* *INDENT-ON* */ + +} + +static void +vl_api_unbind_uri_t_handler (vl_api_unbind_uri_t * mp) +{ + vl_api_unbind_uri_reply_t *rmp; + int rv; + + rv = vnet_unbind_uri ((char *) mp->uri, mp->client_index); + + REPLY_MACRO (VL_API_UNBIND_URI_REPLY); +} + +static void +vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp) +{ + vnet_connect_args_t _a, *a = &_a; + + a->uri = (char *) mp->uri; + a->api_client_index = mp->client_index; + a->api_context = mp->context; + a->options = mp->options; + a->session_cb_vft = &uri_session_cb_vft; + a->mp = mp; + vnet_connect_uri (a); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + vl_api_disconnect_session_reply_t *rmp; + int rv; + + rv = api_session_not_valid (mp->session_index, mp->session_thread_index); + if (!rv) + rv = vnet_disconnect_session (mp->client_index, mp->session_index, + mp->session_thread_index); + + REPLY_MACRO (VL_API_DISCONNECT_SESSION_REPLY); +} + +static void +vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t * + mp) +{ + if (api_session_not_valid (mp->session_index, mp->session_thread_index)) + { + clib_warning ("Invalid session!"); + return; + } + + /* Client objected to disconnecting the session, log and continue */ + if (mp->retval) + { + clib_warning ("client retval %d", mp->retval); + return; + } + + /* Disconnect has been confirmed. Confirm close to transport */ + vnet_disconnect_session (mp->client_index, mp->session_index, + mp->session_thread_index); +} + +static void +vl_api_reset_session_reply_t_handler (vl_api_reset_session_reply_t * mp) +{ + stream_session_t *s; + + if (api_session_not_valid (mp->session_index, mp->session_thread_index)) + { + clib_warning ("Invalid session!"); + return; + } + + /* Client objected to resetting the session, log and continue */ + if (mp->retval) + { + clib_warning ("client retval %d", mp->retval); + return; + } + + s = stream_session_get (mp->session_index, mp->session_thread_index); + + /* This comes as a response to a reset, transport only waiting for + * confirmation to remove connection state, no need to disconnect */ + stream_session_cleanup (s); +} + +static void +vl_api_accept_session_reply_t_handler (vl_api_accept_session_reply_t * mp) +{ + stream_session_t *s; + int rv; + + if (api_session_not_valid (mp->session_index, mp->session_thread_index)) + return; + + s = stream_session_get (mp->session_index, mp->session_thread_index); + rv = mp->retval; + + if (rv) + { + /* Server isn't interested, kill the session */ + stream_session_disconnect (s); + return; + } + + s->session_state = SESSION_STATE_READY; +} + +static void +vl_api_map_another_segment_reply_t_handler (vl_api_map_another_segment_reply_t + * mp) +{ + clib_warning ("not implemented"); +} + +static void +vl_api_bind_sock_t_handler (vl_api_bind_sock_t * mp) +{ + vl_api_bind_sock_reply_t *rmp; + vnet_bind_args_t _a, *a = &_a; + char segment_name[128]; + u32 segment_name_length; + int rv; + + STATIC_ASSERT (sizeof (u64) * SESSION_OPTIONS_N_OPTIONS <= + sizeof (mp->options), + "Out of options, fix api message definition"); + + segment_name_length = ARRAY_LEN (segment_name); + + memset (a, 0, sizeof (*a)); + + clib_memcpy (&a->tep.ip, mp->ip, + (mp->is_ip4 ? sizeof (ip4_address_t) : + sizeof (ip6_address_t))); + a->tep.is_ip4 = mp->is_ip4; + a->tep.port = mp->port; + a->tep.vrf = mp->vrf; + + a->api_client_index = mp->client_index; + a->options = mp->options; + a->segment_name = segment_name; + a->segment_name_length = segment_name_length; + a->session_cb_vft = &session_cb_vft; + + rv = vnet_bind_uri (a); + + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_BIND_SOCK_REPLY, ({ + rmp->retval = rv; + if (!rv) + { + rmp->segment_name_length = 0; + rmp->segment_size = mp->options[SESSION_OPTIONS_SEGMENT_SIZE]; + if (segment_name_length) + { + memcpy(rmp->segment_name, segment_name, segment_name_length); + rmp->segment_name_length = segment_name_length; + } + rmp->server_event_queue_address = a->server_event_queue_address; + } + })); + /* *INDENT-ON* */ +} + +static void +vl_api_unbind_sock_t_handler (vl_api_unbind_sock_t * mp) +{ + vl_api_unbind_sock_reply_t *rmp; + vnet_unbind_args_t _a, *a = &_a; + int rv; + + a->api_client_index = mp->client_index; + a->handle = mp->handle; + + rv = vnet_unbind (a); + + REPLY_MACRO (VL_API_UNBIND_SOCK_REPLY); +} + +static void +vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp) +{ + vnet_connect_args_t _a, *a = &_a; + + clib_memcpy (&a->tep.ip, mp->ip, + (mp->is_ip4 ? sizeof (ip4_address_t) : + sizeof (ip6_address_t))); + a->tep.is_ip4 = mp->is_ip4; + a->tep.port = mp->port; + a->tep.vrf = mp->vrf; + a->options = mp->options; + a->session_cb_vft = &session_cb_vft; + a->api_context = mp->context; + a->mp = mp; + + vnet_connect (a); +} + +static void +vl_api_disconnect_sock_t_handler (vl_api_disconnect_sock_t * mp) +{ + vnet_disconnect_args_t _a, *a = &_a; + vl_api_disconnect_sock_reply_t *rmp; + int rv; + + a->api_client_index = mp->client_index; + a->handle = mp->handle; + rv = vnet_disconnect (a); + + REPLY_MACRO (VL_API_DISCONNECT_SOCK_REPLY); +} + +static void +vl_api_disconnect_sock_reply_t_handler (vl_api_disconnect_sock_reply_t * mp) +{ + vnet_disconnect_args_t _a, *a = &_a; + + /* Client objected to disconnecting the session, log and continue */ + if (mp->retval) + { + clib_warning ("client retval %d", mp->retval); + return; + } + + a->api_client_index = mp->client_index; + a->handle = mp->handle; + + vnet_disconnect (a); +} + +static void +vl_api_reset_sock_reply_t_handler (vl_api_reset_sock_reply_t * mp) +{ + stream_session_t *s; + u32 session_index, thread_index; + + /* Client objected to resetting the session, log and continue */ + if (mp->retval) + { + clib_warning ("client retval %d", mp->retval); + return; + } + + if (api_parse_session_handle (mp->handle, &session_index, &thread_index)) + { + clib_warning ("Invalid handle"); + return; + } + + s = stream_session_get (session_index, thread_index); + + /* This comes as a response to a reset, transport only waiting for + * confirmation to remove connection state, no need to disconnect */ + stream_session_cleanup (s); +} + +static void +vl_api_accept_sock_reply_t_handler (vl_api_accept_sock_reply_t * mp) +{ + stream_session_t *s; + u32 session_index, thread_index; + + if (api_parse_session_handle (mp->handle, &session_index, &thread_index)) + { + clib_warning ("Invalid handle"); + return; + } + s = stream_session_get (session_index, thread_index); + + if (mp->retval) + { + /* Server isn't interested, kill the session */ + stream_session_disconnect (s); + return; + } + + s->session_state = SESSION_STATE_READY; +} + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_session; +#undef _ +} + +/* + * session_api_hookup + * Add uri's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../open-repo/vlib/memclnt_vlib.c:memclnt_process() + */ +static clib_error_t * +session_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_session_api_msg; +#undef _ + + /* + * Messages which bounce off the data-plane to + * an API client. Simply tells the message handling infra not + * to free the message. + * + * Bounced message handlers MUST NOT block the data plane + */ + am->message_bounce[VL_API_CONNECT_URI] = 1; + am->message_bounce[VL_API_CONNECT_SOCK] = 1; + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (session_api_hookup); +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c new file mode 100644 index 00000000..b2943a1c --- /dev/null +++ b/src/vnet/session/session_cli.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +/** + * Format stream session as per the following format + * + * verbose: + * "Connection", "Rx fifo", "Tx fifo", "Session Index" + * non-verbose: + * "Connection" + */ +u8 * +format_stream_session (u8 * s, va_list * args) +{ + stream_session_t *ss = va_arg (*args, stream_session_t *); + int verbose = va_arg (*args, int); + transport_proto_vft_t *tp_vft; + u8 *str = 0; + + tp_vft = session_get_transport_vft (ss->session_type); + + if (verbose) + str = format (0, "%-20llp%-20llp%-15lld", ss->server_rx_fifo, + ss->server_tx_fifo, stream_session_get_index (ss)); + + if (ss->session_state == SESSION_STATE_READY) + { + s = format (s, "%-40U%v", tp_vft->format_connection, + ss->connection_index, ss->thread_index, str); + } + else if (ss->session_state == SESSION_STATE_LISTENING) + { + s = format (s, "%-40U%v", tp_vft->format_listener, ss->connection_index, + str); + } + else if (ss->session_state == SESSION_STATE_READY) + { + s = + format (s, "%-40U%v", tp_vft->format_half_open, ss->connection_index, + str); + } + else if (ss->session_state == SESSION_STATE_CLOSED) + { + s = format (s, "[CL] %-40U%v", tp_vft->format_connection, + ss->connection_index, ss->thread_index, str); + } + else + { + clib_warning ("Session in unknown state!"); + } + + vec_free (str); + + return s; +} + +static clib_error_t * +show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + session_manager_main_t *smm = &session_manager_main; + int verbose = 0, i; + stream_session_t *pool; + stream_session_t *s; + u8 *str = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + else + break; + } + + for (i = 0; i < vec_len (smm->sessions); i++) + { + u32 once_per_pool; + pool = smm->sessions[i]; + + once_per_pool = 1; + + if (pool_elts (pool)) + { + + vlib_cli_output (vm, "Thread %d: %d active sessions", + i, pool_elts (pool)); + if (verbose) + { + if (once_per_pool) + { + str = format (str, "%-40s%-20s%-20s%-15s", + "Connection", "Rx fifo", "Tx fifo", + "Session Index"); + vlib_cli_output (vm, "%v", str); + vec_reset_length (str); + once_per_pool = 0; + } + + /* *INDENT-OFF* */ + pool_foreach (s, pool, + ({ + vlib_cli_output (vm, "%U", format_stream_session, s, verbose); + })); + /* *INDENT-ON* */ + } + } + else + vlib_cli_output (vm, "Thread %d: no active sessions", i); + } + vec_free (str); + + return 0; +} + +VLIB_CLI_COMMAND (show_uri_command, static) = +{ +.path = "show session",.short_help = "show session [verbose]",.function = + show_session_command_fn,}; + + +static clib_error_t * +clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + session_manager_main_t *smm = &session_manager_main; + u32 thread_index = 0; + u32 session_index = ~0; + stream_session_t *pool, *session; + application_t *server; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "thread %d", &thread_index)) + ; + else if (unformat (input, "session %d", &session_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (session_index == ~0) + return clib_error_return (0, "session required, but not set."); + + if (thread_index > vec_len (smm->sessions)) + return clib_error_return (0, "thread %d out of range [0-%d]", + thread_index, vec_len (smm->sessions)); + + pool = smm->sessions[thread_index]; + + if (pool_is_free_index (pool, session_index)) + return clib_error_return (0, "session %d not active", session_index); + + session = pool_elt_at_index (pool, session_index); + server = application_get (session->app_index); + + /* Disconnect both app and transport */ + server->cb_fns.session_disconnect_callback (session); + + return 0; +} + +VLIB_CLI_COMMAND (clear_uri_session_command, static) = +{ +.path = "clear session",.short_help = + "clear session thread session ",.function = + clear_session_command_fn,}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c new file mode 100644 index 00000000..abd94ba4 --- /dev/null +++ b/src/vnet/session/transport.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +u32 +transport_endpoint_lookup (transport_endpoint_table_t *ht, ip46_address_t *ip, + u16 port) +{ + clib_bihash_kv_24_8_t kv; + int rv; + + kv.key[0] = ip->as_u64[0]; + kv.key[1] = ip->as_u64[1]; + kv.key[2] = port; + + rv = clib_bihash_search_inline_24_8 (ht, &kv); + if (rv == 0) + return kv.value; + + return TRANSPORT_ENDPOINT_INVALID_INDEX; +} + +void +transport_endpoint_table_add (transport_endpoint_table_t *ht, + transport_endpoint_t *te, u32 value) +{ + clib_bihash_kv_24_8_t kv; + + kv.key[0] = te->ip.as_u64[0]; + kv.key[1] = te->ip.as_u64[1]; + kv.key[2] = te->port; + kv.value = value; + + clib_bihash_add_del_24_8 (ht, &kv, 1); +} + +void +transport_endpoint_table_del (transport_endpoint_table_t *ht, + transport_endpoint_t *te) +{ + clib_bihash_kv_24_8_t kv; + + kv.key[0] = te->ip.as_u64[0]; + kv.key[1] = te->ip.as_u64[1]; + kv.key[2] = te->port; + + clib_bihash_add_del_24_8 (ht, &kv, 0); +} + + + diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h new file mode 100644 index 00000000..2d4415ba --- /dev/null +++ b/src/vnet/session/transport.h @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef VNET_VNET_URI_TRANSPORT_H_ +#define VNET_VNET_URI_TRANSPORT_H_ + +#include +#include +#include +#include + +/* + * Protocol independent transport properties associated to a session + */ +typedef struct _transport_connection +{ + ip46_address_t rmt_ip; /**< Remote IP */ + ip46_address_t lcl_ip; /**< Local IP */ + u16 lcl_port; /**< Local port */ + u16 rmt_port; /**< Remote port */ + u8 proto; /**< Transport protocol id */ + + u32 s_index; /**< Parent session index */ + u32 c_index; /**< Connection index in transport pool */ + u8 is_ip4; /**< Flag if IP4 connection */ + u32 thread_index; /**< Worker-thread index */ + + /** Macros for 'derived classes' where base is named "connection" */ +#define c_lcl_ip connection.lcl_ip +#define c_rmt_ip connection.rmt_ip +#define c_lcl_ip4 connection.lcl_ip.ip4 +#define c_rmt_ip4 connection.rmt_ip.ip4 +#define c_lcl_ip6 connection.lcl_ip.ip6 +#define c_rmt_ip6 connection.rmt_ip.ip6 +#define c_lcl_port connection.lcl_port +#define c_rmt_port connection.rmt_port +#define c_proto connection.proto +#define c_state connection.state +#define c_s_index connection.s_index +#define c_c_index connection.c_index +#define c_is_ip4 connection.is_ip4 +#define c_thread_index connection.thread_index +} transport_connection_t; + +/* + * Transport protocol virtual function table + */ +typedef struct _transport_proto_vft +{ + /* + * Setup + */ + u32 (*bind) (vlib_main_t *, u32, ip46_address_t *, u16); + u32 (*unbind) (vlib_main_t *, u32); + int (*open) (ip46_address_t * addr, u16 port_host_byte_order); + void (*close) (u32 conn_index, u32 thread_index); + void (*cleanup) (u32 conn_index, u32 thread_index); + + /* + * Transmission + */ + u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b); + u16 (*send_mss) (transport_connection_t * tc); + u32 (*send_space) (transport_connection_t * tc); + u32 (*rx_fifo_offset) (transport_connection_t * tc); + + /* + * Connection retrieval + */ + transport_connection_t *(*get_connection) (u32 conn_idx, u32 thread_idx); + transport_connection_t *(*get_listener) (u32 conn_index); + transport_connection_t *(*get_half_open) (u32 conn_index); + + /* + * Format + */ + u8 *(*format_connection) (u8 * s, va_list * args); + u8 *(*format_listener) (u8 * s, va_list * args); + u8 *(*format_half_open) (u8 * s, va_list * args); + +} transport_proto_vft_t; + +/* 16 octets */ +typedef CLIB_PACKED (struct + { + union + { + struct + { + ip4_address_t src; ip4_address_t dst; + u16 src_port; + u16 dst_port; + /* align by making this 4 octets even though its a 1-bit field + * NOTE: avoid key overlap with other transports that use 5 tuples for + * session identification. + */ + u32 proto; + }; + u64 as_u64[2]; + }; + }) v4_connection_key_t; + +typedef CLIB_PACKED (struct + { + union + { + struct + { + /* 48 octets */ + ip6_address_t src; ip6_address_t dst; + u16 src_port; + u16 dst_port; u32 proto; u8 unused_for_now[8]; + }; u64 as_u64[6]; + }; + }) v6_connection_key_t; + +typedef clib_bihash_kv_16_8_t session_kv4_t; +typedef clib_bihash_kv_48_8_t session_kv6_t; + +always_inline void +make_v4_ss_kv (session_kv4_t * kv, ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + v4_connection_key_t key; + memset (&key, 0, sizeof (v4_connection_key_t)); + + key.src.as_u32 = lcl->as_u32; + key.dst.as_u32 = rmt->as_u32; + key.src_port = lcl_port; + key.dst_port = rmt_port; + key.proto = proto; + + kv->key[0] = key.as_u64[0]; + kv->key[1] = key.as_u64[1]; + kv->value = ~0ULL; +} + +always_inline void +make_v4_listener_kv (session_kv4_t * kv, ip4_address_t * lcl, u16 lcl_port, + u8 proto) +{ + v4_connection_key_t key; + memset (&key, 0, sizeof (v4_connection_key_t)); + + key.src.as_u32 = lcl->as_u32; + key.dst.as_u32 = 0; + key.src_port = lcl_port; + key.dst_port = 0; + key.proto = proto; + + kv->key[0] = key.as_u64[0]; + kv->key[1] = key.as_u64[1]; + kv->value = ~0ULL; +} + +always_inline void +make_v4_ss_kv_from_tc (session_kv4_t * kv, transport_connection_t * t) +{ + return make_v4_ss_kv (kv, &t->lcl_ip.ip4, &t->rmt_ip.ip4, t->lcl_port, + t->rmt_port, t->proto); +} + +always_inline void +make_v6_ss_kv (session_kv6_t * kv, ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + v6_connection_key_t key; + memset (&key, 0, sizeof (v6_connection_key_t)); + + key.src.as_u64[0] = lcl->as_u64[0]; + key.src.as_u64[1] = lcl->as_u64[1]; + key.dst.as_u64[0] = rmt->as_u64[0]; + key.dst.as_u64[1] = rmt->as_u64[1]; + key.src_port = lcl_port; + key.dst_port = rmt_port; + key.proto = proto; + + kv->key[0] = key.as_u64[0]; + kv->key[1] = key.as_u64[1]; + kv->value = ~0ULL; +} + +always_inline void +make_v6_listener_kv (session_kv6_t * kv, ip6_address_t * lcl, u16 lcl_port, + u8 proto) +{ + v6_connection_key_t key; + memset (&key, 0, sizeof (v6_connection_key_t)); + + key.src.as_u64[0] = lcl->as_u64[0]; + key.src.as_u64[1] = lcl->as_u64[1]; + key.dst.as_u64[0] = 0; + key.dst.as_u64[1] = 0; + key.src_port = lcl_port; + key.dst_port = 0; + key.proto = proto; + + kv->key[0] = key.as_u64[0]; + kv->key[1] = key.as_u64[1]; + kv->value = ~0ULL; +} + +always_inline void +make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * t) +{ + make_v6_ss_kv (kv, &t->lcl_ip.ip6, &t->rmt_ip.ip6, t->lcl_port, + t->rmt_port, t->proto); +} + +typedef struct _transport_endpoint +{ + ip46_address_t ip; + u16 port; + u8 is_ip4; + u32 vrf; +} transport_endpoint_t; + +typedef clib_bihash_24_8_t transport_endpoint_table_t; + +#define TRANSPORT_ENDPOINT_INVALID_INDEX ((u32)~0) + +u32 +transport_endpoint_lookup (transport_endpoint_table_t * ht, + ip46_address_t * ip, u16 port); +void transport_endpoint_table_add (transport_endpoint_table_t * ht, + transport_endpoint_t * te, u32 value); +void transport_endpoint_table_del (transport_endpoint_table_t * ht, + transport_endpoint_t * te); + +#endif /* VNET_VNET_URI_TRANSPORT_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c new file mode 100644 index 00000000..0f9b7097 --- /dev/null +++ b/src/vnet/tcp/tcp.c @@ -0,0 +1,708 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +tcp_main_t tcp_main; + +static u32 +tcp_connection_bind (vlib_main_t * vm, u32 session_index, ip46_address_t * ip, + u16 port_host_byte_order, u8 is_ip4) +{ + tcp_main_t *tm = &tcp_main; + tcp_connection_t *listener; + + pool_get (tm->listener_pool, listener); + memset (listener, 0, sizeof (*listener)); + + listener->c_c_index = listener - tm->listener_pool; + listener->c_lcl_port = clib_host_to_net_u16 (port_host_byte_order); + + if (is_ip4) + listener->c_lcl_ip4.as_u32 = ip->ip4.as_u32; + else + clib_memcpy (&listener->c_lcl_ip6, &ip->ip6, sizeof (ip6_address_t)); + + listener->c_s_index = session_index; + listener->c_proto = SESSION_TYPE_IP4_TCP; + listener->state = TCP_STATE_LISTEN; + listener->c_is_ip4 = 1; + + return listener->c_c_index; +} + +u32 +tcp_session_bind_ip4 (vlib_main_t * vm, u32 session_index, + ip46_address_t * ip, u16 port_host_byte_order) +{ + return tcp_connection_bind (vm, session_index, ip, port_host_byte_order, 1); +} + +u32 +tcp_session_bind_ip6 (vlib_main_t * vm, u32 session_index, + ip46_address_t * ip, u16 port_host_byte_order) +{ + return tcp_connection_bind (vm, session_index, ip, port_host_byte_order, 0); + +} + +static void +tcp_session_unbind (u32 listener_index) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + pool_put_index (tm->listener_pool, listener_index); +} + +u32 +tcp_session_unbind_ip4 (vlib_main_t * vm, u32 listener_index) +{ + tcp_session_unbind (listener_index); + return 0; +} + +u32 +tcp_session_unbind_ip6 (vlib_main_t * vm, u32 listener_index) +{ + tcp_session_unbind (listener_index); + return 0; +} + +transport_connection_t * +tcp_session_get_listener (u32 listener_index) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + tcp_connection_t *tc; + tc = pool_elt_at_index (tm->listener_pool, listener_index); + return &tc->connection; +} + +/** + * Cleans up connection state. + * + * No notifications. + */ +void +tcp_connection_cleanup (tcp_connection_t * tc) +{ + tcp_main_t *tm = &tcp_main; + u32 tepi; + transport_endpoint_t *tep; + + /* Cleanup local endpoint if this was an active connect */ + tepi = transport_endpoint_lookup (&tm->local_endpoints_table, &tc->c_lcl_ip, + tc->c_lcl_port); + + /*XXX lock */ + if (tepi != TRANSPORT_ENDPOINT_INVALID_INDEX) + { + tep = pool_elt_at_index (tm->local_endpoints, tepi); + transport_endpoint_table_del (&tm->local_endpoints_table, tep); + pool_put (tm->local_endpoints, tep); + } + + /* Make sure all timers are cleared */ + tcp_connection_timers_reset (tc); + + /* Check if half-open */ + if (tc->state == TCP_STATE_SYN_SENT) + pool_put (tm->half_open_connections, tc); + else + pool_put (tm->connections[tc->c_thread_index], tc); +} + +/** + * Connection removal. + * + * This should be called only once connection enters CLOSED state. Note + * that it notifies the session of the removal event, so if the goal is to + * just remove the connection, call tcp_connection_cleanup instead. + */ +void +tcp_connection_del (tcp_connection_t * tc) +{ + stream_session_delete_notify (&tc->connection); + tcp_connection_cleanup (tc); +} + +/** + * Begin connection closing procedure. + * + * If at the end the connection is not in CLOSED state, it is not removed. + * Instead, we rely on on TCP to advance through state machine to either + * 1) LAST_ACK (passive close) whereby when the last ACK is received + * tcp_connection_del is called. This notifies session of the delete and + * calls cleanup. + * 2) TIME_WAIT (active close) whereby after 2MSL the 2MSL timer triggers + * and cleanup is called. + */ +void +tcp_connection_close (tcp_connection_t * tc) +{ + /* Send FIN if needed */ + if (tc->state == TCP_STATE_ESTABLISHED || tc->state == TCP_STATE_SYN_RCVD + || tc->state == TCP_STATE_CLOSE_WAIT) + tcp_send_fin (tc); + + /* Switch state */ + if (tc->state == TCP_STATE_ESTABLISHED || tc->state == TCP_STATE_SYN_RCVD) + tc->state = TCP_STATE_FIN_WAIT_1; + else if (tc->state == TCP_STATE_SYN_SENT) + tc->state = TCP_STATE_CLOSED; + else if (tc->state == TCP_STATE_CLOSE_WAIT) + tc->state = TCP_STATE_LAST_ACK; + + /* Half-close connections are not supported XXX */ + + if (tc->state == TCP_STATE_CLOSED) + tcp_connection_del (tc); +} + +void +tcp_session_close (u32 conn_index, u32 thread_index) +{ + tcp_connection_t *tc; + tc = tcp_connection_get (conn_index, thread_index); + tcp_connection_close (tc); +} + +void +tcp_session_cleanup (u32 conn_index, u32 thread_index) +{ + tcp_connection_t *tc; + tc = tcp_connection_get (conn_index, thread_index); + tcp_connection_cleanup (tc); +} + +void * +ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4) +{ + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_interface_address_t *ia = 0; + + if (is_ip4) + { + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ , + ({ + return ip_interface_address_get_address (lm4, ia); + })); + /* *INDENT-ON* */ + } + else + { + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ , + ({ + return ip_interface_address_get_address (lm6, ia); + })); + /* *INDENT-ON* */ + } + + return 0; +} + +/** + * Allocate local port and add if successful add entry to local endpoint + * table to mark the pair as used. + */ +u16 +tcp_allocate_local_port (tcp_main_t * tm, ip46_address_t * ip) +{ + u8 unique = 0; + transport_endpoint_t *tep; + u32 time_now, tei; + u16 min = 1024, max = 65535, tries; /* XXX configurable ? */ + + tries = max - min; + time_now = tcp_time_now (); + + /* Start at random point or max */ + pool_get (tm->local_endpoints, tep); + clib_memcpy (&tep->ip, ip, sizeof (*ip)); + tep->port = random_u32 (&time_now) << 16; + tep->port = tep->port < min ? max : tep->port; + + /* Search for first free slot */ + while (tries) + { + tei = transport_endpoint_lookup (&tm->local_endpoints_table, &tep->ip, + tep->port); + if (tei == TRANSPORT_ENDPOINT_INVALID_INDEX) + { + unique = 1; + break; + } + + tep->port--; + + if (tep->port < min) + tep->port = max; + + tries--; + } + + if (unique) + { + transport_endpoint_table_add (&tm->local_endpoints_table, tep, + tep - tm->local_endpoints); + + return tep->port; + } + + /* Failed */ + pool_put (tm->local_endpoints, tep); + return -1; +} + +/** + * Initialize all connection timers as invalid + */ +void +tcp_connection_timers_init (tcp_connection_t * tc) +{ + int i; + + /* Set all to invalid */ + for (i = 0; i < TCP_N_TIMERS; i++) + { + tc->timers[i] = TCP_TIMER_HANDLE_INVALID; + } + + tc->rto = TCP_RTO_INIT; +} + +/** + * Stop all connection timers + */ +void +tcp_connection_timers_reset (tcp_connection_t * tc) +{ + int i; + for (i = 0; i < TCP_N_TIMERS; i++) + { + tcp_timer_reset (tc, i); + } +} + +/** Initialize tcp connection variables + * + * Should be called after having received a msg from the peer, i.e., a SYN or + * a SYNACK, such that connection options have already been exchanged. */ +void +tcp_connection_init_vars (tcp_connection_t * tc) +{ + tcp_connection_timers_init (tc); + tcp_set_snd_mss (tc); + tc->sack_sb.head = TCP_INVALID_SACK_HOLE_INDEX; + tcp_cc_init (tc); +} + +int +tcp_connection_open (ip46_address_t * rmt_addr, u16 rmt_port, u8 is_ip4) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + tcp_connection_t *tc; + fib_prefix_t prefix; + u32 fei, sw_if_index; + ip46_address_t lcl_addr; + u16 lcl_port; + + /* + * Find the local address and allocate port + */ + memset (&lcl_addr, 0, sizeof (lcl_addr)); + + /* Find a FIB path to the destination */ + clib_memcpy (&prefix.fp_addr, rmt_addr, sizeof (*rmt_addr)); + prefix.fp_proto = is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; + prefix.fp_len = is_ip4 ? 32 : 128; + + fei = fib_table_lookup (0, &prefix); + + /* Couldn't find route to destination. Bail out. */ + if (fei == FIB_NODE_INDEX_INVALID) + return -1; + + sw_if_index = fib_entry_get_resolving_interface (fei); + + if (sw_if_index == (u32) ~ 0) + return -1; + + if (is_ip4) + { + ip4_address_t *ip4; + ip4 = ip_interface_get_first_ip (sw_if_index, 1); + lcl_addr.ip4.as_u32 = ip4->as_u32; + } + else + { + ip6_address_t *ip6; + ip6 = ip_interface_get_first_ip (sw_if_index, 0); + clib_memcpy (&lcl_addr.ip6, ip6, sizeof (*ip6)); + } + + /* Allocate source port */ + lcl_port = tcp_allocate_local_port (tm, &lcl_addr); + if (lcl_port < 1) + return -1; + + /* + * Create connection and send SYN + */ + + pool_get (tm->half_open_connections, tc); + memset (tc, 0, sizeof (*tc)); + + clib_memcpy (&tc->c_rmt_ip, rmt_addr, sizeof (ip46_address_t)); + clib_memcpy (&tc->c_lcl_ip, &lcl_addr, sizeof (ip46_address_t)); + tc->c_rmt_port = clib_host_to_net_u16 (rmt_port); + tc->c_lcl_port = clib_host_to_net_u16 (lcl_port); + tc->c_c_index = tc - tm->half_open_connections; + tc->c_is_ip4 = is_ip4; + + /* The other connection vars will be initialized after SYN ACK */ + tcp_connection_timers_init (tc); + + tcp_send_syn (tc); + + tc->state = TCP_STATE_SYN_SENT; + + return tc->c_c_index; +} + +int +tcp_session_open_ip4 (ip46_address_t * addr, u16 port) +{ + return tcp_connection_open (addr, port, 1); +} + +int +tcp_session_open_ip6 (ip46_address_t * addr, u16 port) +{ + return tcp_connection_open (addr, port, 0); +} + +u8 * +format_tcp_session_ip4 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + tcp_connection_t *tc; + + tc = tcp_connection_get (tci, thread_index); + + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip4_address, + &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip4_address, &tc->c_rmt_ip4, + clib_net_to_host_u16 (tc->c_rmt_port)); + + return s; +} + +u8 * +format_tcp_session_ip6 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + tcp_connection_t *tc = tcp_connection_get (tci, thread_index); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_tcp_listener_session_ip4 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + tcp_connection_t *tc = tcp_listener_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip4_address, + &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip4_address, &tc->c_rmt_ip4, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_tcp_listener_session_ip6 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + tcp_connection_t *tc = tcp_listener_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_tcp_half_open_session_ip4 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + tcp_connection_t *tc = tcp_half_open_connection_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip4_address, + &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip4_address, &tc->c_rmt_ip4, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_tcp_half_open_session_ip6 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + tcp_connection_t *tc = tcp_half_open_connection_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +transport_connection_t * +tcp_session_get_transport (u32 conn_index, u32 thread_index) +{ + tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index); + return &tc->connection; +} + +transport_connection_t * +tcp_half_open_session_get_transport (u32 conn_index) +{ + tcp_connection_t *tc = tcp_half_open_connection_get (conn_index); + return &tc->connection; +} + +u16 +tcp_session_send_mss (transport_connection_t * trans_conn) +{ + tcp_connection_t *tc = (tcp_connection_t *) trans_conn; + return tc->snd_mss; +} + +u32 +tcp_session_send_space (transport_connection_t * trans_conn) +{ + tcp_connection_t *tc = (tcp_connection_t *) trans_conn; + return tcp_available_snd_space (tc); +} + +u32 +tcp_session_rx_fifo_offset (transport_connection_t * trans_conn) +{ + tcp_connection_t *tc = (tcp_connection_t *) trans_conn; + return (tc->snd_una_max - tc->snd_una); +} + +/* *INDENT-OFF* */ +const static transport_proto_vft_t tcp4_proto = { + .bind = tcp_session_bind_ip4, + .unbind = tcp_session_unbind_ip4, + .push_header = tcp_push_header, + .get_connection = tcp_session_get_transport, + .get_listener = tcp_session_get_listener, + .get_half_open = tcp_half_open_session_get_transport, + .open = tcp_session_open_ip4, + .close = tcp_session_close, + .cleanup = tcp_session_cleanup, + .send_mss = tcp_session_send_mss, + .send_space = tcp_session_send_space, + .rx_fifo_offset = tcp_session_rx_fifo_offset, + .format_connection = format_tcp_session_ip4, + .format_listener = format_tcp_listener_session_ip4, + .format_half_open = format_tcp_half_open_session_ip4 +}; + +const static transport_proto_vft_t tcp6_proto = { + .bind = tcp_session_bind_ip6, + .unbind = tcp_session_unbind_ip6, + .push_header = tcp_push_header, + .get_connection = tcp_session_get_transport, + .get_listener = tcp_session_get_listener, + .get_half_open = tcp_half_open_session_get_transport, + .open = tcp_session_open_ip6, + .close = tcp_session_close, + .cleanup = tcp_session_cleanup, + .send_mss = tcp_session_send_mss, + .send_space = tcp_session_send_space, + .rx_fifo_offset = tcp_session_rx_fifo_offset, + .format_connection = format_tcp_session_ip6, + .format_listener = format_tcp_listener_session_ip6, + .format_half_open = format_tcp_half_open_session_ip6 +}; +/* *INDENT-ON* */ + +void +tcp_timer_keep_handler (u32 conn_index) +{ + u32 cpu_index = os_get_cpu_number (); + tcp_connection_t *tc; + + tc = tcp_connection_get (conn_index, cpu_index); + tc->timers[TCP_TIMER_KEEP] = TCP_TIMER_HANDLE_INVALID; + + tcp_connection_close (tc); +} + +void +tcp_timer_establish_handler (u32 conn_index) +{ + tcp_connection_t *tc; + u8 sst; + + tc = tcp_half_open_connection_get (conn_index); + tc->timers[TCP_TIMER_ESTABLISH] = TCP_TIMER_HANDLE_INVALID; + + ASSERT (tc->state == TCP_STATE_SYN_SENT); + + sst = tc->c_is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP; + stream_session_connect_notify (&tc->connection, sst, 1 /* fail */ ); + + tcp_connection_cleanup (tc); +} + +void +tcp_timer_2msl_handler (u32 conn_index) +{ + u32 cpu_index = os_get_cpu_number (); + tcp_connection_t *tc; + + tc = tcp_connection_get (conn_index, cpu_index); + tc->timers[TCP_TIMER_2MSL] = TCP_TIMER_HANDLE_INVALID; + + tcp_connection_del (tc); +} + +/* *INDENT-OFF* */ +static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] = +{ + tcp_timer_retransmit_handler, + tcp_timer_delack_handler, + 0, + tcp_timer_keep_handler, + tcp_timer_2msl_handler, + tcp_timer_retransmit_syn_handler, + tcp_timer_establish_handler +}; +/* *INDENT-ON* */ + +static void +tcp_expired_timers_dispatch (u32 * expired_timers) +{ + int i; + u32 connection_index, timer_id; + + for (i = 0; i < vec_len (expired_timers); i++) + { + /* Get session index and timer id */ + connection_index = expired_timers[i] & 0x0FFFFFFF; + timer_id = expired_timers[i] >> 28; + + /* Handle expiration */ + (*timer_expiration_handlers[timer_id]) (connection_index); + } +} + +void +tcp_initialize_timer_wheels (tcp_main_t * tm) +{ + tw_timer_wheel_16t_2w_512sl_t *tw; + vec_foreach (tw, tm->timer_wheels) + { + tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch, + 100e-3 /* timer period 100ms */ , ~0); + tw->last_run_time = vlib_time_now (tm->vlib_main); + } +} + +clib_error_t * +tcp_init (vlib_main_t * vm) +{ + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi; + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_thread_main_t *vtm = vlib_get_thread_main (); + clib_error_t *error = 0; + u32 num_threads; + + tm->vlib_main = vm; + tm->vnet_main = vnet_get_main (); + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + if ((error = vlib_call_init_function (vm, ip4_lookup_init))) + return error; + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + /* + * Registrations + */ + + /* Register with IP */ + pi = ip_get_protocol_info (im, IP_PROTOCOL_TCP); + if (pi == 0) + return clib_error_return (0, "TCP protocol info AWOL"); + pi->format_header = format_tcp_header; + pi->unformat_pg_edit = unformat_pg_tcp_header; + + ip4_register_protocol (IP_PROTOCOL_TCP, tcp4_input_node.index); + + /* Register as transport with URI */ + session_register_transport (SESSION_TYPE_IP4_TCP, &tcp4_proto); + session_register_transport (SESSION_TYPE_IP6_TCP, &tcp6_proto); + + /* + * Initialize data structures + */ + + num_threads = 1 /* main thread */ + vtm->n_threads; + vec_validate (tm->connections, num_threads - 1); + + /* Initialize per worker thread tx buffers (used for control messages) */ + vec_validate (tm->tx_buffers, num_threads - 1); + + /* Initialize timer wheels */ + vec_validate (tm->timer_wheels, num_threads - 1); + tcp_initialize_timer_wheels (tm); + + vec_validate (tm->delack_connections, num_threads - 1); + + /* Initialize clocks per tick for TCP timestamp. Used to compute + * monotonically increasing timestamps. */ + tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock + / TCP_TSTAMP_RESOLUTION; + + clib_bihash_init_24_8 (&tm->local_endpoints_table, "local endpoint table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + + return error; +} + +VLIB_INIT_FUNCTION (tcp_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h new file mode 100644 index 00000000..22f00a63 --- /dev/null +++ b/src/vnet/tcp/tcp.h @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _vnet_tcp_h_ +#define _vnet_tcp_h_ + +#include +#include +#include +#include +#include +#include + +#define TCP_TICK 10e-3 /**< TCP tick period (s) */ +#define THZ 1/TCP_TICK /**< TCP tick frequency */ +#define TCP_TSTAMP_RESOLUTION TCP_TICK /**< Time stamp resolution */ +#define TCP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ /**< 24 days */ +#define TCP_MAX_OPTION_SPACE 40 + +#define TCP_DUPACK_THRESHOLD 3 +#define TCP_DEFAULT_RX_FIFO_SIZE 64 << 10 + +/** TCP FSM state definitions as per RFC793. */ +#define foreach_tcp_fsm_state \ + _(CLOSED, "CLOSED") \ + _(LISTEN, "LISTEN") \ + _(SYN_SENT, "SYN_SENT") \ + _(SYN_RCVD, "SYN_RCVD") \ + _(ESTABLISHED, "ESTABLISHED") \ + _(CLOSE_WAIT, "CLOSE_WAIT") \ + _(FIN_WAIT_1, "FIN_WAIT_1") \ + _(LAST_ACK, "LAST_ACK") \ + _(CLOSING, "CLOSING") \ + _(FIN_WAIT_2, "FIN_WAIT_2") \ + _(TIME_WAIT, "TIME_WAIT") + +typedef enum _tcp_state +{ +#define _(sym, str) TCP_STATE_##sym, + foreach_tcp_fsm_state +#undef _ + TCP_N_STATES +} tcp_state_t; + +format_function_t format_tcp_state; + +/** TCP timers */ +#define foreach_tcp_timer \ + _(RETRANSMIT, "RETRANSMIT") \ + _(DELACK, "DELAYED ACK") \ + _(PERSIST, "PERSIST") \ + _(KEEP, "KEEP") \ + _(2MSL, "2MSL") \ + _(RETRANSMIT_SYN, "RETRANSMIT_SYN") \ + _(ESTABLISH, "ESTABLISH") + +typedef enum _tcp_timers +{ +#define _(sym, str) TCP_TIMER_##sym, + foreach_tcp_timer +#undef _ + TCP_N_TIMERS +} tcp_timers_e; + +typedef void (timer_expiration_handler) (u32 index); + +extern timer_expiration_handler tcp_timer_delack_handler; +extern timer_expiration_handler tcp_timer_retransmit_handler; +extern timer_expiration_handler tcp_timer_retransmit_syn_handler; + +#define TCP_TIMER_HANDLE_INVALID ((u32) ~0) + +/* Timer delays as multiples of 100ms */ +#define TCP_TO_TIMER_TICK TCP_TICK*10 /* Period for converting from TCP + * ticks to timer units */ +#define TCP_DELACK_TIME 1 /* 0.1s */ +#define TCP_ESTABLISH_TIME 750 /* 75s */ +#define TCP_2MSL_TIME 300 /* 30s */ + +#define TCP_RTO_MAX 60 * THZ /* Min max RTO (60s) as per RFC6298 */ +#define TCP_RTT_MAX 30 * THZ /* 30s (probably too much) */ +#define TCP_RTO_SYN_RETRIES 3 /* SYN retries without doubling RTO */ +#define TCP_RTO_INIT 1 * THZ /* Initial retransmit timer */ + +void tcp_update_time (f64 now, u32 thread_index); + +/** TCP connection flags */ +#define foreach_tcp_connection_flag \ + _(DELACK, "Delay ACK") \ + _(SNDACK, "Send ACK") \ + _(BURSTACK, "Burst ACK set") \ + _(SENT_RCV_WND0, "Sent 0 receive window") \ + _(RECOVERY, "Recovery on") \ + _(FAST_RECOVERY, "Fast Recovery on") + +typedef enum _tcp_connection_flag_bits +{ +#define _(sym, str) TCP_CONN_##sym##_BIT, + foreach_tcp_connection_flag +#undef _ + TCP_CONN_N_FLAG_BITS +} tcp_connection_flag_bits_e; + +typedef enum _tcp_connection_flag +{ +#define _(sym, str) TCP_CONN_##sym = 1 << TCP_CONN_##sym##_BIT, + foreach_tcp_connection_flag +#undef _ + TCP_CONN_N_FLAGS +} tcp_connection_flags_e; + +/** TCP buffer flags */ +#define foreach_tcp_buf_flag \ + _ (ACK) /**< Sending ACK. */ \ + _ (DUPACK) /**< Sending DUPACK. */ \ + +enum +{ +#define _(f) TCP_BUF_BIT_##f, + foreach_tcp_buf_flag +#undef _ + TCP_N_BUF_BITS, +}; + +enum +{ +#define _(f) TCP_BUF_FLAG_##f = 1 << TCP_BUF_BIT_##f, + foreach_tcp_buf_flag +#undef _ +}; + +#define TCP_MAX_SACK_BLOCKS 5 /**< Max number of SACK blocks stored */ +#define TCP_INVALID_SACK_HOLE_INDEX ((u32)~0) + +typedef struct _sack_scoreboard_hole +{ + u32 next; /**< Index for next entry in linked list */ + u32 prev; /**< Index for previous entry in linked list */ + u32 start; /**< Start sequence number */ + u32 end; /**< End sequence number */ +} sack_scoreboard_hole_t; + +typedef struct _sack_scoreboard +{ + sack_scoreboard_hole_t *holes; /**< Pool of holes */ + u32 head; /**< Index to first entry */ + u32 sacked_bytes; /**< Number of bytes sacked in sb */ +} sack_scoreboard_t; + +typedef enum _tcp_cc_algorithm_type +{ + TCP_CC_NEWRENO, +} tcp_cc_algorithm_type_e; + +typedef struct _tcp_cc_algorithm tcp_cc_algorithm_t; + +typedef enum _tcp_cc_ack_t +{ + TCP_CC_ACK, + TCP_CC_DUPACK, + TCP_CC_PARTIALACK +} tcp_cc_ack_t; + +typedef struct _tcp_connection +{ + transport_connection_t connection; /**< Common transport data. First! */ + + u8 state; /**< TCP state as per tcp_state_t */ + u16 flags; /**< Connection flags (see tcp_conn_flags_e) */ + u32 timers[TCP_N_TIMERS]; /**< Timer handles into timer wheel */ + + /* TODO RFC4898 */ + + /** Send sequence variables RFC793 */ + u32 snd_una; /**< oldest unacknowledged sequence number */ + u32 snd_una_max; /**< newest unacknowledged sequence number + 1*/ + u32 snd_wnd; /**< send window */ + u32 snd_wl1; /**< seq number used for last snd.wnd update */ + u32 snd_wl2; /**< ack number used for last snd.wnd update */ + u32 snd_nxt; /**< next seq number to be sent */ + + /** Receive sequence variables RFC793 */ + u32 rcv_nxt; /**< next sequence number expected */ + u32 rcv_wnd; /**< receive window we expect */ + + u32 rcv_las; /**< rcv_nxt at last ack sent/rcv_wnd update */ + u32 iss; /**< initial sent sequence */ + u32 irs; /**< initial remote sequence */ + + /* Options */ + tcp_options_t opt; /**< TCP connection options parsed */ + u8 rcv_wscale; /**< Window scale to advertise to peer */ + u8 snd_wscale; /**< Window scale to use when sending */ + u32 tsval_recent; /**< Last timestamp received */ + u32 tsval_recent_age; /**< When last updated tstamp_recent*/ + + sack_block_t *snd_sacks; /**< Vector of SACKs to send. XXX Fixed size? */ + sack_scoreboard_t sack_sb; /**< SACK "scoreboard" that tracks holes */ + + u8 rcv_dupacks; /**< Number of DUPACKs received */ + u8 snt_dupacks; /**< Number of DUPACKs sent in a burst */ + + /* Congestion control */ + u32 cwnd; /**< Congestion window */ + u32 ssthresh; /**< Slow-start threshold */ + u32 prev_ssthresh; /**< ssthresh before congestion */ + u32 bytes_acked; /**< Bytes acknowledged by current segment */ + u32 rtx_bytes; /**< Retransmitted bytes */ + u32 tsecr_last_ack; /**< Timestamp echoed to us in last health ACK */ + tcp_cc_algorithm_t *cc_algo; /**< Congestion control algorithm */ + + /* RTT and RTO */ + u32 rto; /**< Retransmission timeout */ + u32 rto_boff; /**< Index for RTO backoff */ + u32 srtt; /**< Smoothed RTT */ + u32 rttvar; /**< Smoothed mean RTT difference. Approximates variance */ + u32 rtt_ts; /**< Timestamp for tracked ACK */ + u32 rtt_seq; /**< Sequence number for tracked ACK */ + + u16 snd_mss; /**< Send MSS */ +} tcp_connection_t; + +struct _tcp_cc_algorithm +{ + void (*rcv_ack) (tcp_connection_t * tc); + void (*rcv_cong_ack) (tcp_connection_t * tc, tcp_cc_ack_t ack); + void (*congestion) (tcp_connection_t * tc); + void (*recovered) (tcp_connection_t * tc); + void (*init) (tcp_connection_t * tc); +}; + +#define tcp_fastrecovery_on(tc) (tc)->flags |= TCP_CONN_FAST_RECOVERY +#define tcp_fastrecovery_off(tc) (tc)->flags &= ~TCP_CONN_FAST_RECOVERY +#define tcp_in_fastrecovery(tc) ((tc)->flags & TCP_CONN_FAST_RECOVERY) +#define tcp_in_recovery(tc) ((tc)->flags & (TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY)) +#define tcp_recovery_off(tc) ((tc)->flags &= ~(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY)) +#define tcp_in_slowstart(tc) (tc->cwnd < tc->ssthresh) + +typedef enum +{ + TCP_IP4, + TCP_IP6, + TCP_N_AF, +} tcp_af_t; + +typedef enum _tcp_error +{ +#define tcp_error(n,s) TCP_ERROR_##n, +#include +#undef tcp_error + TCP_N_ERROR, +} tcp_error_t; + +typedef struct _tcp_lookup_dispatch +{ + u8 next, error; +} tcp_lookup_dispatch_t; + +typedef struct _tcp_main +{ + /* Per-worker thread tcp connection pools */ + tcp_connection_t **connections; + + /* Pool of listeners. */ + tcp_connection_t *listener_pool; + + /** Dispatch table by state and flags */ + tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64]; + + u8 log2_tstamp_clocks_per_tick; + f64 tstamp_ticks_per_clock; + + /** per-worker tx buffer free lists */ + u32 **tx_buffers; + + /* Per worker-thread timer wheel for connections timers */ + tw_timer_wheel_16t_2w_512sl_t *timer_wheels; + + /* Convenience per worker-thread vector of connections to DELACK */ + u32 **delack_connections; + + /* Pool of half-open connections on which we've sent a SYN */ + tcp_connection_t *half_open_connections; + + /* Pool of local TCP endpoints */ + transport_endpoint_t *local_endpoints; + + /* Local endpoints lookup table */ + transport_endpoint_table_t local_endpoints_table; + + /* Congestion control algorithms registered */ + tcp_cc_algorithm_t *cc_algos; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + ip4_main_t *ip4_main; + ip6_main_t *ip6_main; +} tcp_main_t; + +extern tcp_main_t tcp_main; +extern vlib_node_registration_t tcp4_input_node; +extern vlib_node_registration_t tcp6_input_node; +extern vlib_node_registration_t tcp4_output_node; +extern vlib_node_registration_t tcp6_output_node; + +always_inline tcp_main_t * +vnet_get_tcp_main () +{ + return &tcp_main; +} + +always_inline tcp_connection_t * +tcp_connection_get (u32 conn_index, u32 thread_index) +{ + return pool_elt_at_index (tcp_main.connections[thread_index], conn_index); +} + +always_inline tcp_connection_t * +tcp_connection_get_if_valid (u32 conn_index, u32 thread_index) +{ + if (tcp_main.connections[thread_index] == 0) + return 0; + if (pool_is_free_index (tcp_main.connections[thread_index], conn_index)) + return 0; + return pool_elt_at_index (tcp_main.connections[thread_index], conn_index); +} + +void tcp_connection_close (tcp_connection_t * tc); +void tcp_connection_cleanup (tcp_connection_t * tc); +void tcp_connection_del (tcp_connection_t * tc); + +always_inline tcp_connection_t * +tcp_listener_get (u32 tli) +{ + return pool_elt_at_index (tcp_main.listener_pool, tli); +} + +always_inline tcp_connection_t * +tcp_half_open_connection_get (u32 conn_index) +{ + return pool_elt_at_index (tcp_main.half_open_connections, conn_index); +} + +void tcp_make_ack (tcp_connection_t * ts, vlib_buffer_t * b); +void tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b); +void tcp_make_synack (tcp_connection_t * ts, vlib_buffer_t * b); +void tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4); +void tcp_send_syn (tcp_connection_t * tc); +void tcp_send_fin (tcp_connection_t * tc); +void tcp_set_snd_mss (tcp_connection_t * tc); + +always_inline u32 +tcp_end_seq (tcp_header_t * th, u32 len) +{ + return th->seq_number + tcp_is_syn (th) + tcp_is_fin (th) + len; +} + +/* Modulo arithmetic for TCP sequence numbers */ +#define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0) +#define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0) +#define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0) +#define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0) + +/* Modulo arithmetic for timestamps */ +#define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0) +#define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0) + +always_inline u32 +tcp_flight_size (const tcp_connection_t * tc) +{ + return tc->snd_una_max - tc->snd_una - tc->sack_sb.sacked_bytes + + tc->rtx_bytes; +} + +/** + * Initial cwnd as per RFC5681 + */ +always_inline u32 +tcp_initial_cwnd (const tcp_connection_t * tc) +{ + if (tc->snd_mss > 2190) + return 2 * tc->snd_mss; + else if (tc->snd_mss > 1095) + return 3 * tc->snd_mss; + else + return 4 * tc->snd_mss; +} + +always_inline u32 +tcp_loss_wnd (const tcp_connection_t * tc) +{ + return tc->snd_mss; +} + +always_inline u32 +tcp_available_wnd (const tcp_connection_t * tc) +{ + return clib_min (tc->cwnd, tc->snd_wnd); +} + +always_inline u32 +tcp_available_snd_space (const tcp_connection_t * tc) +{ + u32 available_wnd = tcp_available_wnd (tc); + u32 flight_size = tcp_flight_size (tc); + + if (available_wnd <= flight_size) + return 0; + + return available_wnd - flight_size; +} + +void tcp_retransmit_first_unacked (tcp_connection_t * tc); + +void tcp_fast_retransmit (tcp_connection_t * tc); + +always_inline u32 +tcp_time_now (void) +{ + return clib_cpu_time_now () * tcp_main.tstamp_ticks_per_clock; +} + +u32 tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b); + +u32 +tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b, + u32 max_bytes); + +void tcp_connection_timers_init (tcp_connection_t * tc); +void tcp_connection_timers_reset (tcp_connection_t * tc); + +void tcp_connection_init_vars (tcp_connection_t * tc); + +always_inline void +tcp_connection_force_ack (tcp_connection_t * tc, vlib_buffer_t * b) +{ + /* Reset flags, make sure ack is sent */ + tc->flags = TCP_CONN_SNDACK; + vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK; +} + +always_inline void +tcp_timer_set (tcp_connection_t * tc, u8 timer_id, u32 interval) +{ + tc->timers[timer_id] + = tw_timer_start_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], + tc->c_c_index, timer_id, interval); +} + +always_inline void +tcp_retransmit_timer_set (tcp_main_t * tm, tcp_connection_t * tc) +{ + /* XXX Switch to faster TW */ + tcp_timer_set (tc, TCP_TIMER_RETRANSMIT, + clib_max (tc->rto * TCP_TO_TIMER_TICK, 1)); +} + +always_inline void +tcp_timer_reset (tcp_connection_t * tc, u8 timer_id) +{ + if (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID) + return; + + tw_timer_stop_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], + tc->timers[timer_id]); + tc->timers[timer_id] = TCP_TIMER_HANDLE_INVALID; +} + +always_inline void +tcp_timer_update (tcp_connection_t * tc, u8 timer_id, u32 interval) +{ + if (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID) + tw_timer_stop_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], + tc->timers[timer_id]); + tc->timers[timer_id] = + tw_timer_start_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], + tc->c_c_index, timer_id, interval); +} + +always_inline u8 +tcp_timer_is_active (tcp_connection_t * tc, tcp_timers_e timer) +{ + return tc->timers[timer] != TCP_TIMER_HANDLE_INVALID; +} + +void +scoreboard_remove_hole (sack_scoreboard_t * sb, + sack_scoreboard_hole_t * hole); + +always_inline sack_scoreboard_hole_t * +scoreboard_next_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole) +{ + if (hole->next != TCP_INVALID_SACK_HOLE_INDEX) + return pool_elt_at_index (sb->holes, hole->next); + return 0; +} + +always_inline sack_scoreboard_hole_t * +scoreboard_first_hole (sack_scoreboard_t * sb) +{ + if (sb->head != TCP_INVALID_SACK_HOLE_INDEX) + return pool_elt_at_index (sb->holes, sb->head); + return 0; +} + +always_inline void +scoreboard_clear (sack_scoreboard_t * sb) +{ + sack_scoreboard_hole_t *hole = scoreboard_first_hole (sb); + while ((hole = scoreboard_first_hole (sb))) + { + scoreboard_remove_hole (sb, hole); + } +} + +always_inline u32 +scoreboard_hole_bytes (sack_scoreboard_hole_t * hole) +{ + return hole->end - hole->start; +} + +always_inline void +tcp_cc_algo_register (tcp_cc_algorithm_type_e type, + const tcp_cc_algorithm_t * vft) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vec_validate (tm->cc_algos, type); + + tm->cc_algos[type] = *vft; +} + +always_inline tcp_cc_algorithm_t * +tcp_cc_algo_get (tcp_cc_algorithm_type_e type) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + return &tm->cc_algos[type]; +} + +void tcp_cc_init (tcp_connection_t * tc); + +/** + * Push TCP header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param sp_net - source port net order + * @param dp_net - destination port net order + * @param seq - sequence number net order + * @param ack - ack number net order + * @param tcp_hdr_opts_len - header and options length in bytes + * @param flags - header flags + * @param wnd - window size + * + * @return - pointer to start of TCP header + */ +always_inline void * +vlib_buffer_push_tcp_net_order (vlib_buffer_t * b, u16 sp, u16 dp, u32 seq, + u32 ack, u8 tcp_hdr_opts_len, u8 flags, + u16 wnd) +{ + tcp_header_t *th; + + th = vlib_buffer_push_uninit (b, tcp_hdr_opts_len); + + th->src_port = sp; + th->dst_port = dp; + th->seq_number = seq; + th->ack_number = ack; + th->data_offset_and_reserved = (tcp_hdr_opts_len >> 2) << 4; + th->flags = flags; + th->window = wnd; + th->checksum = 0; + th->urgent_pointer = 0; + return th; +} + +/** + * Push TCP header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param sp_net - source port net order + * @param dp_net - destination port net order + * @param seq - sequence number host order + * @param ack - ack number host order + * @param tcp_hdr_opts_len - header and options length in bytes + * @param flags - header flags + * @param wnd - window size + * + * @return - pointer to start of TCP header + */ +always_inline void * +vlib_buffer_push_tcp (vlib_buffer_t * b, u16 sp_net, u16 dp_net, u32 seq, + u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd) +{ + return vlib_buffer_push_tcp_net_order (b, sp_net, dp_net, + clib_host_to_net_u32 (seq), + clib_host_to_net_u32 (ack), + tcp_hdr_opts_len, flags, + clib_host_to_net_u16 (wnd)); +} + +#endif /* _vnet_tcp_h_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def new file mode 100644 index 00000000..cff5ec13 --- /dev/null +++ b/src/vnet/tcp/tcp_error.def @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +tcp_error (NONE, "no error") +tcp_error (NO_LISTENER, "no listener for dst port") +tcp_error (LOOKUP_DROPS, "lookup drops") +tcp_error (DISPATCH, "Dispatch error") +tcp_error (ENQUEUED, "Packets pushed into rx fifo") +tcp_error (PURE_ACK, "Pure acks") +tcp_error (SYNS_RCVD, "SYNs received") +tcp_error (SYN_ACKS_RCVD, "SYN-ACKs received") +tcp_error (NOT_READY, "Session not ready for packets") +tcp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space") +tcp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") +tcp_error (API_QUEUE_FULL, "Sessions not created for lack of API queue space") +tcp_error (CREATE_SESSION_FAIL, "Sessions couldn't be allocated") +tcp_error (SEGMENT_INVALID, "Invalid segment") +tcp_error (ACK_INVALID, "Invalid ACK") +tcp_error (ACK_DUP, "Duplicate ACK") +tcp_error (ACK_OLD, "Old ACK") +tcp_error (PKTS_SENT, "Packets sent") +tcp_error (FILTERED_DUPACKS, "Filtered duplicate ACKs") +tcp_error (RST_SENT, "Resets sent") \ No newline at end of file diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c new file mode 100644 index 00000000..7136741d --- /dev/null +++ b/src/vnet/tcp/tcp_format.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * tcp/tcp_format.c: tcp formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +static u8 * +format_tcp_flags (u8 * s, va_list * args) +{ + int flags = va_arg (*args, int); + +#define _(f) if (flags & TCP_FLAG_##f) s = format (s, "%s, ", #f); + foreach_tcp_flag +#undef _ + return s; +} + +/* Format TCP header. */ +u8 * +format_tcp_header (u8 * s, va_list * args) +{ + tcp_header_t *tcp = va_arg (*args, tcp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 header_bytes; + uword indent; + + /* Nothing to do. */ + if (max_header_bytes < sizeof (tcp[0])) + return format (s, "TCP header truncated"); + + indent = format_get_indent (s); + indent += 2; + header_bytes = tcp_header_bytes (tcp); + + s = format (s, "TCP: %d -> %d", clib_net_to_host_u16 (tcp->src), + clib_net_to_host_u16 (tcp->dst)); + + s = format (s, "\n%Useq. 0x%08x ack 0x%08x", format_white_space, indent, + clib_net_to_host_u32 (tcp->seq_number), + clib_net_to_host_u32 (tcp->ack_number)); + + s = format (s, "\n%Uflags %U, tcp header: %d bytes", format_white_space, + indent, format_tcp_flags, tcp->flags, header_bytes); + + s = format (s, "\n%Uwindow %d, checksum 0x%04x", format_white_space, indent, + clib_net_to_host_u16 (tcp->window), + clib_net_to_host_u16 (tcp->checksum)); + + +#if 0 + /* Format TCP options. */ + { + u8 *o; + u8 *option_start = (void *) (tcp + 1); + u8 *option_end = (void *) tcp + header_bytes; + + for (o = option_start; o < option_end;) + { + u32 length = o[1]; + switch (o[0]) + { + case TCP_OPTION_END: + length = 1; + o = option_end; + break; + + case TCP_OPTION_NOOP: + length = 1; + break; + + } + } + } +#endif + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ip_main_t *im = &ip_main; + tcp_udp_port_info_t *pi; + + pi = ip_get_tcp_udp_port_info (im, tcp->dst); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", format_white_space, indent - 2, + pi->format_header, + /* next protocol header */ (void *) tcp + header_bytes, + max_header_bytes - header_bytes); + } + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c new file mode 100644 index 00000000..daa0683b --- /dev/null +++ b/src/vnet/tcp/tcp_input.c @@ -0,0 +1,2316 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +static char *tcp_error_strings[] = { +#define tcp_error(n,s) s, +#include +#undef tcp_error +}; + +/* All TCP nodes have the same outgoing arcs */ +#define foreach_tcp_state_next \ + _ (DROP, "error-drop") \ + _ (TCP4_OUTPUT, "tcp4-output") \ + _ (TCP6_OUTPUT, "tcp6-output") + +typedef enum _tcp_established_next +{ +#define _(s,n) TCP_ESTABLISHED_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_ESTABLISHED_N_NEXT, +} tcp_established_next_t; + +typedef enum _tcp_rcv_process_next +{ +#define _(s,n) TCP_RCV_PROCESS_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_RCV_PROCESS_N_NEXT, +} tcp_rcv_process_next_t; + +typedef enum _tcp_syn_sent_next +{ +#define _(s,n) TCP_SYN_SENT_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_SYN_SENT_N_NEXT, +} tcp_syn_sent_next_t; + +typedef enum _tcp_listen_next +{ +#define _(s,n) TCP_LISTEN_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_LISTEN_N_NEXT, +} tcp_listen_next_t; + +/* Generic, state independent indices */ +typedef enum _tcp_state_next +{ +#define _(s,n) TCP_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_STATE_N_NEXT, +} tcp_state_next_t; + +#define tcp_next_output(is_ip4) (is_ip4 ? TCP_NEXT_TCP4_OUTPUT \ + : TCP_NEXT_TCP6_OUTPUT) + +vlib_node_registration_t tcp4_established_node; +vlib_node_registration_t tcp6_established_node; + +/** + * Validate segment sequence number. As per RFC793: + * + * Segment Receive Test + * Length Window + * ------- ------- ------------------------------------------- + * 0 0 SEG.SEQ = RCV.NXT + * 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + * >0 0 not acceptable + * >0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + * or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND + * + * This ultimately consists in checking if segment falls within the window. + * The one important difference compared to RFC793 is that we use rcv_las, + * or the rcv_nxt at last ack sent instead of rcv_nxt since that's the + * peer's reference when computing our receive window. + * + * This accepts only segments within the window. + */ +always_inline u8 +tcp_segment_in_rcv_wnd (tcp_connection_t * tc, u32 seq, u32 end_seq) +{ + return seq_leq (end_seq, tc->rcv_las + tc->rcv_wnd) + && seq_geq (seq, tc->rcv_nxt); +} + +void +tcp_options_parse (tcp_header_t * th, tcp_options_t * to) +{ + const u8 *data; + u8 opt_len, opts_len, kind; + int j; + sack_block_t b; + + opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t); + data = (const u8 *) (th + 1); + + /* Zero out all flags but those set in SYN */ + to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE); + + for (; opts_len > 0; opts_len -= opt_len, data += opt_len) + { + kind = data[0]; + + /* Get options length */ + if (kind == TCP_OPTION_EOL) + break; + else if (kind == TCP_OPTION_NOOP) + opt_len = 1; + else + { + /* broken options */ + if (opts_len < 2) + break; + opt_len = data[1]; + + /* weird option length */ + if (opt_len < 2 || opt_len > opts_len) + break; + } + + /* Parse options */ + switch (kind) + { + case TCP_OPTION_MSS: + if ((opt_len == TCP_OPTION_LEN_MSS) && tcp_syn (th)) + { + to->flags |= TCP_OPTS_FLAG_MSS; + to->mss = clib_net_to_host_u16 (*(u16 *) (data + 2)); + } + break; + case TCP_OPTION_WINDOW_SCALE: + if ((opt_len == TCP_OPTION_LEN_WINDOW_SCALE) && tcp_syn (th)) + { + to->flags |= TCP_OPTS_FLAG_WSCALE; + to->wscale = data[2]; + if (to->wscale > TCP_MAX_WND_SCALE) + { + clib_warning ("Illegal window scaling value: %d", + to->wscale); + to->wscale = TCP_MAX_WND_SCALE; + } + } + break; + case TCP_OPTION_TIMESTAMP: + if (opt_len == TCP_OPTION_LEN_TIMESTAMP) + { + to->flags |= TCP_OPTS_FLAG_TSTAMP; + to->tsval = clib_net_to_host_u32 (*(u32 *) (data + 2)); + to->tsecr = clib_net_to_host_u32 (*(u32 *) (data + 6)); + } + break; + case TCP_OPTION_SACK_PERMITTED: + if (opt_len == TCP_OPTION_LEN_SACK_PERMITTED && tcp_syn (th)) + to->flags |= TCP_OPTS_FLAG_SACK_PERMITTED; + break; + case TCP_OPTION_SACK_BLOCK: + /* If SACK permitted was not advertised or a SYN, break */ + if ((to->flags & TCP_OPTS_FLAG_SACK_PERMITTED) == 0 || tcp_syn (th)) + break; + + /* If too short or not correctly formatted, break */ + if (opt_len < 10 || ((opt_len - 2) % TCP_OPTION_LEN_SACK_BLOCK)) + break; + + to->flags |= TCP_OPTS_FLAG_SACK; + to->n_sack_blocks = (opt_len - 2) / TCP_OPTION_LEN_SACK_BLOCK; + vec_reset_length (to->sacks); + for (j = 0; j < to->n_sack_blocks; j++) + { + b.start = clib_net_to_host_u32 (*(u32 *) (data + 2 + 4 * j)); + b.end = clib_net_to_host_u32 (*(u32 *) (data + 6 + 4 * j)); + vec_add1 (to->sacks, b); + } + break; + default: + /* Nothing to see here */ + continue; + } + } +} + +always_inline int +tcp_segment_check_paws (tcp_connection_t * tc) +{ + /* XXX normally test for timestamp should be lt instead of leq, but for + * local testing this is not enough */ + return tcp_opts_tstamp (&tc->opt) && tc->tsval_recent + && timestamp_lt (tc->opt.tsval, tc->tsval_recent); +} + +/** + * Validate incoming segment as per RFC793 p. 69 and RFC1323 p. 19 + * + * It first verifies if segment has a wrapped sequence number (PAWS) and then + * does the processing associated to the first four steps (ignoring security + * and precedence): sequence number, rst bit and syn bit checks. + * + * @return 0 if segments passes validation. + */ +static int +tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0, + vlib_buffer_t * b0, tcp_header_t * th0, u32 * next0) +{ + u8 paws_failed; + + if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0))) + return -1; + + tcp_options_parse (th0, &tc0->opt); + + /* RFC1323: Check against wrapped sequence numbers (PAWS). If we have + * timestamp to echo and it's less than tsval_recent, drop segment + * but still send an ACK in order to retain TCP's mechanism for detecting + * and recovering from half-open connections */ + paws_failed = tcp_segment_check_paws (tc0); + if (paws_failed) + { + clib_warning ("paws failed"); + + /* If it just so happens that a segment updates tsval_recent for a + * segment over 24 days old, invalidate tsval_recent. */ + if (timestamp_lt (tc0->tsval_recent_age + TCP_PAWS_IDLE, + tcp_time_now ())) + { + /* Age isn't reset until we get a valid tsval (bsd inspired) */ + tc0->tsval_recent = 0; + } + else + { + /* Drop after ack if not rst */ + if (!tcp_rst (th0)) + { + tcp_make_ack (tc0, b0); + *next0 = tcp_next_output (tc0->c_is_ip4); + return -1; + } + } + } + + /* 1st: check sequence number */ + if (!tcp_segment_in_rcv_wnd (tc0, vnet_buffer (b0)->tcp.seq_number, + vnet_buffer (b0)->tcp.seq_end)) + { + if (!tcp_rst (th0)) + { + /* Send dup ack */ + tcp_make_ack (tc0, b0); + *next0 = tcp_next_output (tc0->c_is_ip4); + } + return -1; + } + + /* 2nd: check the RST bit */ + if (tcp_rst (th0)) + { + /* Notify session that connection has been reset. Switch + * state to closed and await for session to do the cleanup. */ + stream_session_reset_notify (&tc0->connection); + tc0->state = TCP_STATE_CLOSED; + return -1; + } + + /* 3rd: check security and precedence (skip) */ + + /* 4th: check the SYN bit */ + if (tcp_syn (th0)) + { + tcp_send_reset (b0, tc0->c_is_ip4); + return -1; + } + + /* If PAWS passed and segment in window, save timestamp */ + if (!paws_failed) + { + tc0->tsval_recent = tc0->opt.tsval; + tc0->tsval_recent_age = tcp_time_now (); + } + + return 0; +} + +always_inline int +tcp_rcv_ack_is_acceptable (tcp_connection_t * tc0, vlib_buffer_t * tb0) +{ + /* SND.UNA =< SEG.ACK =< SND.NXT */ + return (seq_leq (tc0->snd_una, vnet_buffer (tb0)->tcp.ack_number) + && seq_leq (vnet_buffer (tb0)->tcp.ack_number, tc0->snd_nxt)); +} + +/** + * Compute smoothed RTT as per VJ's '88 SIGCOMM and RFC6298 + * + * Note that although the original article, srtt and rttvar are scaled + * to minimize round-off errors, here we don't. Instead, we rely on + * better precision time measurements. + * + * TODO support us rtt resolution + */ +static void +tcp_estimate_rtt (tcp_connection_t * tc, u32 mrtt) +{ + int err; + + if (tc->srtt != 0) + { + err = mrtt - tc->srtt; + tc->srtt += err >> 3; + + /* XXX Drop in RTT results in RTTVAR increase and bigger RTO. + * The increase should be bound */ + tc->rttvar += (clib_abs (err) - tc->rttvar) >> 2; + } + else + { + /* First measurement. */ + tc->srtt = mrtt; + tc->rttvar = mrtt << 1; + } +} + +/** Update RTT estimate and RTO timer + * + * Measure RTT: We have two sources of RTT measurements: TSOPT and ACK + * timing. Middle boxes are known to fiddle with TCP options so we + * should give higher priority to ACK timing. + * + * return 1 if valid rtt 0 otherwise + */ +static int +tcp_update_rtt (tcp_connection_t * tc, u32 ack) +{ + u32 mrtt = 0; + + /* Karn's rule, part 1. Don't use retransmitted segments to estimate + * RTT because they're ambiguous. */ + if (tc->rtt_seq && seq_gt (ack, tc->rtt_seq) && !tc->rto_boff) + { + mrtt = tcp_time_now () - tc->rtt_ts; + tc->rtt_seq = 0; + } + + /* As per RFC7323 TSecr can be used for RTTM only if the segment advances + * snd_una, i.e., the left side of the send window: + * seq_lt (tc->snd_una, ack). Note: last condition could be dropped, we don't + * try to update rtt for dupacks */ + else if (tcp_opts_tstamp (&tc->opt) && tc->opt.tsecr && tc->bytes_acked) + { + mrtt = tcp_time_now () - tc->opt.tsecr; + } + + /* Ignore dubious measurements */ + if (mrtt == 0 || mrtt > TCP_RTT_MAX) + return 0; + + tcp_estimate_rtt (tc, mrtt); + + tc->rto = clib_min (tc->srtt + (tc->rttvar << 2), TCP_RTO_MAX); + + return 1; +} + +/** + * Dequeue bytes that have been acked and while at it update RTT estimates. + */ +static void +tcp_dequeue_acked (tcp_connection_t * tc, u32 ack) +{ + /* Dequeue the newly ACKed bytes */ + stream_session_dequeue_drop (&tc->connection, tc->bytes_acked); + + /* Update rtt and rto */ + if (tcp_update_rtt (tc, ack)) + { + /* Good ACK received and valid RTT, make sure retransmit backoff is 0 */ + tc->rto_boff = 0; + } +} + +/** Check if dupack as per RFC5681 Sec. 2 */ +always_inline u8 +tcp_ack_is_dupack (tcp_connection_t * tc, vlib_buffer_t * b, u32 new_snd_wnd) +{ + return ((vnet_buffer (b)->tcp.ack_number == tc->snd_una) + && seq_gt (tc->snd_una_max, tc->snd_una) + && (vnet_buffer (b)->tcp.seq_end == vnet_buffer (b)->tcp.seq_number) + && (new_snd_wnd == tc->snd_wnd)); +} + +void +scoreboard_remove_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole) +{ + sack_scoreboard_hole_t *next, *prev; + + if (hole->next != TCP_INVALID_SACK_HOLE_INDEX) + { + next = pool_elt_at_index (sb->holes, hole->next); + next->prev = hole->prev; + } + + if (hole->prev != TCP_INVALID_SACK_HOLE_INDEX) + { + prev = pool_elt_at_index (sb->holes, hole->prev); + prev->next = hole->next; + } + else + { + sb->head = hole->next; + } + + pool_put (sb->holes, hole); +} + +sack_scoreboard_hole_t * +scoreboard_insert_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * prev, + u32 start, u32 end) +{ + sack_scoreboard_hole_t *hole, *next; + u32 hole_index; + + pool_get (sb->holes, hole); + memset (hole, 0, sizeof (*hole)); + + hole->start = start; + hole->end = end; + hole_index = hole - sb->holes; + + if (prev) + { + hole->prev = prev - sb->holes; + hole->next = prev->next; + + if ((next = scoreboard_next_hole (sb, hole))) + next->prev = hole_index; + + prev->next = hole_index; + } + else + { + sb->head = hole_index; + hole->prev = TCP_INVALID_SACK_HOLE_INDEX; + hole->next = TCP_INVALID_SACK_HOLE_INDEX; + } + + return hole; +} + +static void +tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) +{ + sack_scoreboard_t *sb = &tc->sack_sb; + sack_block_t *blk, tmp; + sack_scoreboard_hole_t *hole, *next_hole; + u32 blk_index = 0; + int i, j; + + if (!tcp_opts_sack (tc) && sb->head == TCP_INVALID_SACK_HOLE_INDEX) + return; + + /* Remove invalid blocks */ + vec_foreach (blk, tc->opt.sacks) + { + if (seq_lt (blk->start, blk->end) + && seq_gt (blk->start, tc->snd_una) + && seq_gt (blk->start, ack) && seq_lt (blk->end, tc->snd_nxt)) + continue; + + vec_del1 (tc->opt.sacks, blk - tc->opt.sacks); + } + + /* Add block for cumulative ack */ + if (seq_gt (ack, tc->snd_una)) + { + tmp.start = tc->snd_una; + tmp.end = ack; + vec_add1 (tc->opt.sacks, tmp); + } + + if (vec_len (tc->opt.sacks) == 0) + return; + + /* Make sure blocks are ordered */ + for (i = 0; i < vec_len (tc->opt.sacks); i++) + for (j = i; j < vec_len (tc->opt.sacks); j++) + if (seq_lt (tc->opt.sacks[j].start, tc->opt.sacks[i].start)) + { + tmp = tc->opt.sacks[i]; + tc->opt.sacks[i] = tc->opt.sacks[j]; + tc->opt.sacks[j] = tmp; + } + + /* If no holes, insert the first that covers all outstanding bytes */ + if (sb->head == TCP_INVALID_SACK_HOLE_INDEX) + { + scoreboard_insert_hole (sb, 0, tc->snd_una, tc->snd_una_max); + } + + /* Walk the holes with the SACK blocks */ + hole = pool_elt_at_index (sb->holes, sb->head); + while (hole && blk_index < vec_len (tc->opt.sacks)) + { + blk = &tc->opt.sacks[blk_index]; + + if (seq_leq (blk->start, hole->start)) + { + /* Block covers hole. Remove hole */ + if (seq_geq (blk->end, hole->end)) + { + next_hole = scoreboard_next_hole (sb, hole); + + /* Byte accounting */ + if (seq_lt (hole->end, ack)) + { + /* Bytes lost because snd wnd left edge advances */ + if (seq_lt (next_hole->start, ack)) + sb->sacked_bytes -= next_hole->start - hole->end; + else + sb->sacked_bytes -= ack - hole->end; + } + else + { + sb->sacked_bytes += scoreboard_hole_bytes (hole); + } + + scoreboard_remove_hole (sb, hole); + hole = next_hole; + } + /* Partial overlap */ + else + { + sb->sacked_bytes += blk->end - hole->start; + hole->start = blk->end; + blk_index++; + } + } + else + { + /* Hole must be split */ + if (seq_leq (blk->end, hole->end)) + { + sb->sacked_bytes += blk->end - blk->start; + scoreboard_insert_hole (sb, hole, blk->end, hole->end); + hole->end = blk->start - 1; + blk_index++; + } + else + { + sb->sacked_bytes += hole->end - blk->start + 1; + hole->end = blk->start - 1; + hole = scoreboard_next_hole (sb, hole); + } + } + } +} + +/** Update snd_wnd + * + * If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and SND.WL2 =< SEG.ACK)), set + * SND.WND <- SEG.WND, set SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK */ +static void +tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd) +{ + if (tc->snd_wl1 < seq || (tc->snd_wl1 == seq && tc->snd_wl2 <= ack)) + { + tc->snd_wnd = snd_wnd; + tc->snd_wl1 = seq; + tc->snd_wl2 = ack; + } +} + +static void +tcp_cc_congestion (tcp_connection_t * tc) +{ + tc->cc_algo->congestion (tc); +} + +static void +tcp_cc_recover (tcp_connection_t * tc) +{ + if (tcp_in_fastrecovery (tc)) + { + tc->cc_algo->recovered (tc); + tcp_recovery_off (tc); + } + else if (tcp_in_recovery (tc)) + { + tcp_recovery_off (tc); + tc->cwnd = tcp_loss_wnd (tc); + } +} + +static void +tcp_cc_rcv_ack (tcp_connection_t * tc) +{ + u8 partial_ack; + + if (tcp_in_recovery (tc)) + { + partial_ack = seq_lt (tc->snd_una, tc->snd_una_max); + if (!partial_ack) + { + /* Clear retransmitted bytes. */ + tc->rtx_bytes = 0; + tcp_cc_recover (tc); + } + else + { + /* Clear retransmitted bytes. XXX should we clear all? */ + tc->rtx_bytes = 0; + tc->cc_algo->rcv_cong_ack (tc, TCP_CC_PARTIALACK); + + /* Retransmit first unacked segment */ + tcp_retransmit_first_unacked (tc); + } + } + else + { + tc->cc_algo->rcv_ack (tc); + } + + tc->rcv_dupacks = 0; + tc->tsecr_last_ack = tc->opt.tsecr; +} + +static void +tcp_cc_rcv_dupack (tcp_connection_t * tc, u32 ack) +{ + ASSERT (tc->snd_una == ack); + + tc->rcv_dupacks++; + if (tc->rcv_dupacks == TCP_DUPACK_THRESHOLD) + { + /* RFC6582 NewReno heuristic to avoid multiple fast retransmits */ + if (tc->opt.tsecr != tc->tsecr_last_ack) + { + tc->rcv_dupacks = 0; + return; + } + + tcp_fastrecovery_on (tc); + + /* Handle congestion and dupack */ + tcp_cc_congestion (tc); + tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK); + + tcp_fast_retransmit (tc); + + /* Post retransmit update cwnd to ssthresh and account for the + * three segments that have left the network and should've been + * buffered at the receiver */ + tc->cwnd = tc->ssthresh + TCP_DUPACK_THRESHOLD * tc->snd_mss; + } + else if (tc->rcv_dupacks > TCP_DUPACK_THRESHOLD) + { + ASSERT (tcp_in_fastrecovery (tc)); + + tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK); + } +} + +void +tcp_cc_init (tcp_connection_t * tc) +{ + tc->cc_algo = tcp_cc_algo_get (TCP_CC_NEWRENO); + tc->cc_algo->init (tc); +} + +static int +tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b, + tcp_header_t * th, u32 * next, u32 * error) +{ + u32 new_snd_wnd; + + /* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) then send an + * ACK, drop the segment, and return */ + if (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)) + { + tcp_make_ack (tc, b); + *next = tcp_next_output (tc->c_is_ip4); + *error = TCP_ERROR_ACK_INVALID; + return -1; + } + + /* If old ACK, discard */ + if (seq_lt (vnet_buffer (b)->tcp.ack_number, tc->snd_una)) + { + *error = TCP_ERROR_ACK_OLD; + return -1; + } + + if (tcp_opts_sack_permitted (&tc->opt)) + tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number); + + new_snd_wnd = clib_net_to_host_u32 (th->window) << tc->snd_wscale; + + if (tcp_ack_is_dupack (tc, b, new_snd_wnd)) + { + tcp_cc_rcv_dupack (tc, vnet_buffer (b)->tcp.ack_number); + *error = TCP_ERROR_ACK_DUP; + return -1; + } + + /* Valid ACK */ + tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una; + tc->snd_una = vnet_buffer (b)->tcp.ack_number; + + /* Dequeue ACKed packet and update RTT */ + tcp_dequeue_acked (tc, vnet_buffer (b)->tcp.ack_number); + + tcp_update_snd_wnd (tc, vnet_buffer (b)->tcp.seq_number, + vnet_buffer (b)->tcp.ack_number, new_snd_wnd); + + /* Updates congestion control (slow start/congestion avoidance) */ + tcp_cc_rcv_ack (tc); + + /* If everything has been acked, stop retransmit timer + * otherwise update */ + if (tc->snd_una == tc->snd_una_max) + tcp_timer_reset (tc, TCP_TIMER_RETRANSMIT); + else + tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, tc->rto); + + return 0; +} + +/** + * Build SACK list as per RFC2018. + * + * Makes sure the first block contains the segment that generated the current + * ACK and the following ones are the ones most recently reported in SACK + * blocks. + * + * @param tc TCP connection for which the SACK list is updated + * @param start Start sequence number of the newest SACK block + * @param end End sequence of the newest SACK block + */ +static void +tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end) +{ + sack_block_t *new_list = 0, block; + u32 n_elts; + int i; + u8 new_head = 0; + + /* If the first segment is ooo add it to the list. Last write might've moved + * rcv_nxt over the first segment. */ + if (seq_lt (tc->rcv_nxt, start)) + { + block.start = start; + block.end = end; + vec_add1 (new_list, block); + new_head = 1; + } + + /* Find the blocks still worth keeping. */ + for (i = 0; i < vec_len (tc->snd_sacks); i++) + { + /* Discard if: + * 1) rcv_nxt advanced beyond current block OR + * 2) Segment overlapped by the first segment, i.e., it has been merged + * into it.*/ + if (seq_leq (tc->snd_sacks[i].start, tc->rcv_nxt) + || seq_leq (tc->snd_sacks[i].start, end)) + continue; + + /* Save subsequent segments to new SACK list. */ + n_elts = clib_min (vec_len (tc->snd_sacks) - i, + TCP_MAX_SACK_BLOCKS - new_head); + vec_insert_elts (new_list, &tc->snd_sacks[i], n_elts, new_head); + break; + } + + /* Replace old vector with new one */ + vec_free (tc->snd_sacks); + tc->snd_sacks = new_list; +} + +/** Enqueue data for delivery to application */ +always_inline u32 +tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b, + u16 data_len) +{ + int written; + + /* Pure ACK. Update rcv_nxt and be done. */ + if (PREDICT_FALSE (data_len == 0)) + { + tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end; + return TCP_ERROR_PURE_ACK; + } + + written = stream_session_enqueue_data (&tc->connection, + vlib_buffer_get_current (b), + data_len, 1 /* queue event */ ); + + /* Update rcv_nxt */ + if (PREDICT_TRUE (written == data_len)) + { + tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end; + } + /* If more data written than expected, account for out-of-order bytes. */ + else if (written > data_len) + { + tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end + written - data_len; + + /* Send ACK confirming the update */ + tc->flags |= TCP_CONN_SNDACK; + + /* Update SACK list if need be */ + if (tcp_opts_sack_permitted (&tc->opt)) + { + /* Remove SACK blocks that have been delivered */ + tcp_update_sack_list (tc, tc->rcv_nxt, tc->rcv_nxt); + } + } + else + { + ASSERT (0); + return TCP_ERROR_FIFO_FULL; + } + + return TCP_ERROR_ENQUEUED; +} + +/** Enqueue out-of-order data */ +always_inline u32 +tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b, + u16 data_len) +{ + stream_session_t *s0; + u32 offset, seq; + + s0 = stream_session_get (tc->c_s_index, tc->c_thread_index); + seq = vnet_buffer (b)->tcp.seq_number; + offset = seq - tc->rcv_nxt; + + if (svm_fifo_enqueue_with_offset (s0->server_rx_fifo, s0->pid, offset, + data_len, vlib_buffer_get_current (b))) + return TCP_ERROR_FIFO_FULL; + + /* Update SACK list if in use */ + if (tcp_opts_sack_permitted (&tc->opt)) + { + ooo_segment_t *newest; + u32 start, end; + + /* Get the newest segment from the fifo */ + newest = svm_fifo_newest_ooo_segment (s0->server_rx_fifo); + start = tc->rcv_nxt + ooo_segment_offset (s0->server_rx_fifo, newest); + end = tc->rcv_nxt + ooo_segment_end_offset (s0->server_rx_fifo, newest); + + tcp_update_sack_list (tc, start, end); + } + + return TCP_ERROR_ENQUEUED; +} + +/** + * Check if ACK could be delayed. DELACK timer is set only after frame is + * processed so this can return true for a full bursts of packets. + */ +always_inline int +tcp_can_delack (tcp_connection_t * tc) +{ + /* If there's no DELACK timer set and the last window sent wasn't 0 we + * can safely delay. */ + if (!tcp_timer_is_active (tc, TCP_TIMER_DELACK) + && (tc->flags & TCP_CONN_SENT_RCV_WND0) == 0 + && (tc->flags & TCP_CONN_SNDACK) == 0) + return 1; + + return 0; +} + +static int +tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b, + u16 n_data_bytes, u32 * next0) +{ + u32 error = 0; + + /* Handle out-of-order data */ + if (PREDICT_FALSE (vnet_buffer (b)->tcp.seq_number != tc->rcv_nxt)) + { + error = tcp_session_enqueue_ooo (tc, b, n_data_bytes); + + /* Don't send more than 3 dupacks per burst + * XXX decide if this is good */ + if (tc->snt_dupacks < 3) + { + /* RFC2581: Send DUPACK for fast retransmit */ + tcp_make_ack (tc, b); + *next0 = tcp_next_output (tc->c_is_ip4); + + /* Mark as DUPACK. We may filter these in output if + * the burst fills the holes. */ + vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_DUPACK; + + tc->snt_dupacks++; + } + + goto done; + } + + /* In order data, enqueue. Fifo figures out by itself if any out-of-order + * segments can be enqueued after fifo tail offset changes. */ + error = tcp_session_enqueue_data (tc, b, n_data_bytes); + + /* Check if ACK can be delayed */ + if (tcp_can_delack (tc)) + { + /* Nothing to do for pure ACKs */ + if (n_data_bytes == 0) + goto done; + + /* If connection has not been previously marked for delay ack + * add it to the list and flag it */ + if (!tc->flags & TCP_CONN_DELACK) + { + vec_add1 (tm->delack_connections[tc->c_thread_index], + tc->c_c_index); + tc->flags |= TCP_CONN_DELACK; + } + } + else + { + /* Check if a packet has already been enqueued to output for burst. + * If yes, then drop this one, otherwise, let it pass through to + * output */ + if ((tc->flags & TCP_CONN_BURSTACK) == 0) + { + *next0 = tcp_next_output (tc->c_is_ip4); + tcp_make_ack (tc, b); + error = TCP_ERROR_ENQUEUED; + + /* TODO: maybe add counter to ensure N acks will be sent/burst */ + tc->flags |= TCP_CONN_BURSTACK; + } + } + +done: + return error; +} + +void +delack_timers_init (tcp_main_t * tm, u32 thread_index) +{ + tcp_connection_t *tc; + u32 i, *conns; + tw_timer_wheel_16t_2w_512sl_t *tw; + + tw = &tm->timer_wheels[thread_index]; + conns = tm->delack_connections[thread_index]; + for (i = 0; i < vec_len (conns); i++) + { + tc = pool_elt_at_index (tm->connections[thread_index], conns[i]); + ASSERT (0 != tc); + + tc->timers[TCP_TIMER_DELACK] + = tw_timer_start_16t_2w_512sl (tw, conns[i], + TCP_TIMER_DELACK, TCP_DELACK_TIME); + } + vec_reset_length (tm->delack_connections[thread_index]); +} + +always_inline uword +tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index, errors = 0; + tcp_main_t *tm = vnet_get_tcp_main (); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_header_t *th0 = 0; + tcp_connection_t *tc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 n_advance_bytes0, n_data_bytes0; + u32 next0 = TCP_ESTABLISHED_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, + my_thread_index); + + /* Checksum computed by ipx_local no need to compute again */ + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + th0 = ip4_next_header (ip40); + n_advance_bytes0 = (ip4_header_bytes (ip40) + + tcp_header_bytes (th0)); + n_data_bytes0 = clib_net_to_host_u16 (ip40->length) + - n_advance_bytes0; + } + else + { + ip60 = vlib_buffer_get_current (b0); + th0 = ip6_next_header (ip60); + n_advance_bytes0 = tcp_header_bytes (th0); + n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) + - n_advance_bytes0; + n_advance_bytes0 += sizeof (ip60[0]); + } + + /* SYNs, FINs and data consume sequence numbers */ + vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number + + tcp_is_syn (th0) + tcp_is_fin (th0) + n_data_bytes0; + + /* TODO header prediction fast path */ + + /* 1-4: check SEQ, RST, SYN */ + if (PREDICT_FALSE (tcp_segment_validate (vm, tc0, b0, th0, &next0))) + { + error0 = TCP_ERROR_SEGMENT_INVALID; + goto drop; + } + + /* 5: check the ACK field */ + if (tcp_rcv_ack (tc0, b0, th0, &next0, &error0)) + { + goto drop; + } + + /* 6: check the URG bit TODO */ + + /* 7: process the segment text */ + vlib_buffer_advance (b0, n_advance_bytes0); + error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0); + + /* 8: check the FIN bit */ + if (tcp_fin (th0)) + { + /* Send ACK and enter CLOSE-WAIT */ + tcp_make_ack (tc0, b0); + tcp_connection_force_ack (tc0, b0); + next0 = tcp_next_output (tc0->c_is_ip4); + tc0->state = TCP_STATE_CLOSE_WAIT; + stream_session_disconnect_notify (&tc0->connection); + } + + drop: + b0->error = node->errors[error0]; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + errors = session_manager_flush_enqueue_events (my_thread_index); + if (errors) + { + if (is_ip4) + vlib_node_increment_counter (vm, tcp4_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + else + vlib_node_increment_counter (vm, tcp6_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + } + + delack_timers_init (tm, my_thread_index); + + return from_frame->n_vectors; +} + +static uword +tcp4_established (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_established_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_established (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_established_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_established_node) = +{ + .function = tcp4_established, + .name = "tcp4-established", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR,.error_strings = tcp_error_strings, + .n_next_nodes = TCP_ESTABLISHED_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_established_node, tcp4_established); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_established_node) = +{ + .function = tcp6_established, + .name = "tcp6-established", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_ESTABLISHED_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_established_node, tcp6_established); + +vlib_node_registration_t tcp4_syn_sent_node; +vlib_node_registration_t tcp6_syn_sent_node; + +always_inline uword +tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index, errors = 0; + u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, ack0, seq0; + vlib_buffer_t *b0; + tcp_header_t *tcp0 = 0; + tcp_connection_t *tc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 n_advance_bytes0, n_data_bytes0; + tcp_connection_t *new_tc0; + u32 next0 = TCP_SYN_SENT_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = + tcp_half_open_connection_get (vnet_buffer (b0)-> + tcp.connection_index); + + ack0 = vnet_buffer (b0)->tcp.ack_number; + seq0 = vnet_buffer (b0)->tcp.seq_number; + + /* Checksum computed by ipx_local no need to compute again */ + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + tcp0 = ip4_next_header (ip40); + n_advance_bytes0 = (ip4_header_bytes (ip40) + + tcp_header_bytes (tcp0)); + n_data_bytes0 = clib_net_to_host_u16 (ip40->length) + - n_advance_bytes0; + } + else + { + ip60 = vlib_buffer_get_current (b0); + tcp0 = ip6_next_header (ip60); + n_advance_bytes0 = tcp_header_bytes (tcp0); + n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) + - n_advance_bytes0; + n_advance_bytes0 += sizeof (ip60[0]); + } + + if (PREDICT_FALSE + (!tcp_ack (tcp0) && !tcp_rst (tcp0) && !tcp_syn (tcp0))) + goto drop; + + /* SYNs, FINs and data consume sequence numbers */ + vnet_buffer (b0)->tcp.seq_end = seq0 + tcp_is_syn (tcp0) + + tcp_is_fin (tcp0) + n_data_bytes0; + + /* + * 1. check the ACK bit + */ + + /* + * If the ACK bit is set + * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send a reset (unless + * the RST bit is set, if so drop the segment and return) + * + * and discard the segment. Return. + * If SND.UNA =< SEG.ACK =< SND.NXT then the ACK is acceptable. + */ + if (tcp_ack (tcp0)) + { + if (ack0 <= tc0->iss || ack0 > tc0->snd_nxt) + { + if (!tcp_rst (tcp0)) + tcp_send_reset (b0, is_ip4); + + goto drop; + } + + /* Make sure ACK is valid */ + if (tc0->snd_una > ack0) + goto drop; + } + + /* + * 2. check the RST bit + */ + + if (tcp_rst (tcp0)) + { + /* If ACK is acceptable, signal client that peer is not + * willing to accept connection and drop connection*/ + if (tcp_ack (tcp0)) + { + stream_session_connect_notify (&tc0->connection, sst, + 1 /* fail */ ); + tcp_connection_cleanup (tc0); + } + goto drop; + } + + /* + * 3. check the security and precedence (skipped) + */ + + /* + * 4. check the SYN bit + */ + + /* No SYN flag. Drop. */ + if (!tcp_syn (tcp0)) + goto drop; + + /* Stop connection establishment and retransmit timers */ + tcp_timer_reset (tc0, TCP_TIMER_ESTABLISH); + tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN); + + /* Valid SYN or SYN-ACK. Move connection from half-open pool to + * current thread pool. */ + pool_get (tm->connections[my_thread_index], new_tc0); + clib_memcpy (new_tc0, tc0, sizeof (*new_tc0)); + + new_tc0->c_thread_index = my_thread_index; + + /* Cleanup half-open connection XXX lock */ + pool_put (tm->half_open_connections, tc0); + + new_tc0->rcv_nxt = vnet_buffer (b0)->tcp.seq_end; + new_tc0->irs = seq0; + + /* Parse options */ + tcp_options_parse (tcp0, &new_tc0->opt); + tcp_connection_init_vars (new_tc0); + + if (tcp_opts_tstamp (&new_tc0->opt)) + { + new_tc0->tsval_recent = new_tc0->opt.tsval; + new_tc0->tsval_recent_age = tcp_time_now (); + } + + if (tcp_opts_wscale (&new_tc0->opt)) + new_tc0->snd_wscale = new_tc0->opt.wscale; + + new_tc0->snd_wnd = clib_net_to_host_u32 (tcp0->window) + << new_tc0->snd_wscale; + new_tc0->snd_wl1 = seq0; + new_tc0->snd_wl2 = ack0; + + /* SYN-ACK: See if we can switch to ESTABLISHED state */ + if (tcp_ack (tcp0)) + { + /* Our SYN is ACKed: we have iss < ack = snd_una */ + + /* TODO Dequeue acknowledged segments if we support Fast Open */ + new_tc0->snd_una = ack0; + new_tc0->state = TCP_STATE_ESTABLISHED; + + /* Notify app that we have connection */ + stream_session_connect_notify (&new_tc0->connection, sst, 0); + + /* Make sure after data segment processing ACK is sent */ + new_tc0->flags |= TCP_CONN_SNDACK; + } + /* SYN: Simultaneous open. Change state to SYN-RCVD and send SYN-ACK */ + else + { + new_tc0->state = TCP_STATE_SYN_RCVD; + + /* Notify app that we have connection XXX */ + stream_session_connect_notify (&new_tc0->connection, sst, 0); + + tcp_make_synack (new_tc0, b0); + next0 = tcp_next_output (is_ip4); + + goto drop; + } + + /* Read data, if any */ + if (n_data_bytes0) + { + error0 = + tcp_segment_rcv (tm, new_tc0, b0, n_data_bytes0, &next0); + if (error0 == TCP_ERROR_PURE_ACK) + error0 = TCP_ERROR_SYN_ACKS_RCVD; + } + else + { + tcp_make_ack (new_tc0, b0); + next0 = tcp_next_output (new_tc0->c_is_ip4); + } + + drop: + + b0->error = error0 ? node->errors[error0] : 0; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + errors = session_manager_flush_enqueue_events (my_thread_index); + if (errors) + { + if (is_ip4) + vlib_node_increment_counter (vm, tcp4_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + else + vlib_node_increment_counter (vm, tcp6_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + } + + return from_frame->n_vectors; +} + +static uword +tcp4_syn_sent (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_syn_sent_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_syn_sent_rcv (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_syn_sent_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_syn_sent_node) = +{ + .function = tcp4_syn_sent, + .name = "tcp4-syn-sent", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_SYN_SENT_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_syn_sent_node, tcp4_syn_sent); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_syn_sent_node) = +{ + .function = tcp6_syn_sent_rcv, + .name = "tcp6-syn-sent", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_SYN_SENT_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + } +,}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_syn_sent_node, tcp6_syn_sent_rcv); +/** + * Handles reception for all states except LISTEN, SYN-SEND and ESTABLISHED + * as per RFC793 p. 64 + */ +always_inline uword +tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index, errors = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_header_t *tcp0 = 0; + tcp_connection_t *tc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 n_advance_bytes0, n_data_bytes0; + u32 next0 = TCP_RCV_PROCESS_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, + my_thread_index); + + /* Checksum computed by ipx_local no need to compute again */ + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + tcp0 = ip4_next_header (ip40); + n_advance_bytes0 = (ip4_header_bytes (ip40) + + tcp_header_bytes (tcp0)); + n_data_bytes0 = clib_net_to_host_u16 (ip40->length) + - n_advance_bytes0; + } + else + { + ip60 = vlib_buffer_get_current (b0); + tcp0 = ip6_next_header (ip60); + n_advance_bytes0 = tcp_header_bytes (tcp0); + n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) + - n_advance_bytes0; + n_advance_bytes0 += sizeof (ip60[0]); + } + + /* SYNs, FINs and data consume sequence numbers */ + vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number + + tcp_is_syn (tcp0) + tcp_is_fin (tcp0) + n_data_bytes0; + + /* + * Special treatment for CLOSED + */ + switch (tc0->state) + { + case TCP_STATE_CLOSED: + goto drop; + break; + } + + /* + * For all other states (except LISTEN) + */ + + /* 1-4: check SEQ, RST, SYN */ + if (PREDICT_FALSE + (tcp_segment_validate (vm, tc0, b0, tcp0, &next0))) + { + error0 = TCP_ERROR_SEGMENT_INVALID; + goto drop; + } + + /* 5: check the ACK field */ + switch (tc0->state) + { + case TCP_STATE_SYN_RCVD: + /* + * If the segment acknowledgment is not acceptable, form a + * reset segment, + * + * and send it. + */ + if (!tcp_rcv_ack_is_acceptable (tc0, b0)) + { + tcp_send_reset (b0, is_ip4); + goto drop; + } + /* Switch state to ESTABLISHED */ + tc0->state = TCP_STATE_ESTABLISHED; + + /* Initialize session variables */ + tc0->snd_una = vnet_buffer (b0)->tcp.ack_number; + tc0->snd_wnd = clib_net_to_host_u32 (tcp0->window) + << tc0->opt.wscale; + tc0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number; + tc0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number; + + /* Shoulder tap the server */ + stream_session_accept_notify (&tc0->connection); + + tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN); + break; + case TCP_STATE_ESTABLISHED: + /* We can get packets in established state here because they + * were enqueued before state change */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + + break; + case TCP_STATE_FIN_WAIT_1: + /* In addition to the processing for the ESTABLISHED state, if + * our FIN is now acknowledged then enter FIN-WAIT-2 and + * continue processing in that state. */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + tc0->state = TCP_STATE_FIN_WAIT_2; + /* Stop all timers, 2MSL will be set lower */ + tcp_connection_timers_reset (tc0); + break; + case TCP_STATE_FIN_WAIT_2: + /* In addition to the processing for the ESTABLISHED state, if + * the retransmission queue is empty, the user's CLOSE can be + * acknowledged ("ok") but do not delete the TCB. */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + /* check if rtx queue is empty and ack CLOSE TODO */ + break; + case TCP_STATE_CLOSE_WAIT: + /* Do the same processing as for the ESTABLISHED state. */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + break; + case TCP_STATE_CLOSING: + /* In addition to the processing for the ESTABLISHED state, if + * the ACK acknowledges our FIN then enter the TIME-WAIT state, + * otherwise ignore the segment. */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + + /* XXX test that send queue empty */ + tc0->state = TCP_STATE_TIME_WAIT; + goto drop; + + break; + case TCP_STATE_LAST_ACK: + /* The only thing that can arrive in this state is an + * acknowledgment of our FIN. If our FIN is now acknowledged, + * delete the TCB, enter the CLOSED state, and return. */ + + if (!tcp_rcv_ack_is_acceptable (tc0, b0)) + goto drop; + + tcp_connection_del (tc0); + goto drop; + + break; + case TCP_STATE_TIME_WAIT: + /* The only thing that can arrive in this state is a + * retransmission of the remote FIN. Acknowledge it, and restart + * the 2 MSL timeout. */ + + /* TODO */ + goto drop; + break; + default: + ASSERT (0); + } + + /* 6: check the URG bit TODO */ + + /* 7: process the segment text */ + switch (tc0->state) + { + case TCP_STATE_ESTABLISHED: + case TCP_STATE_FIN_WAIT_1: + case TCP_STATE_FIN_WAIT_2: + error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0); + break; + case TCP_STATE_CLOSE_WAIT: + case TCP_STATE_CLOSING: + case TCP_STATE_LAST_ACK: + case TCP_STATE_TIME_WAIT: + /* This should not occur, since a FIN has been received from the + * remote side. Ignore the segment text. */ + break; + } + + /* 8: check the FIN bit */ + if (!tcp_fin (tcp0)) + goto drop; + + switch (tc0->state) + { + case TCP_STATE_ESTABLISHED: + case TCP_STATE_SYN_RCVD: + /* Send FIN-ACK notify app and enter CLOSE-WAIT */ + tcp_connection_timers_reset (tc0); + tcp_make_finack (tc0, b0); + next0 = tcp_next_output (tc0->c_is_ip4); + stream_session_disconnect_notify (&tc0->connection); + tc0->state = TCP_STATE_CLOSE_WAIT; + break; + case TCP_STATE_CLOSE_WAIT: + case TCP_STATE_CLOSING: + case TCP_STATE_LAST_ACK: + /* move along .. */ + break; + case TCP_STATE_FIN_WAIT_1: + tc0->state = TCP_STATE_TIME_WAIT; + tcp_connection_timers_reset (tc0); + tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME); + break; + case TCP_STATE_FIN_WAIT_2: + /* Got FIN, send ACK! */ + tc0->state = TCP_STATE_TIME_WAIT; + tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME); + tcp_make_ack (tc0, b0); + next0 = tcp_next_output (is_ip4); + break; + case TCP_STATE_TIME_WAIT: + /* Remain in the TIME-WAIT state. Restart the 2 MSL time-wait + * timeout. + */ + tcp_timer_update (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME); + break; + } + + b0->error = error0 ? node->errors[error0] : 0; + + drop: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + errors = session_manager_flush_enqueue_events (my_thread_index); + if (errors) + { + if (is_ip4) + vlib_node_increment_counter (vm, tcp4_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + else + vlib_node_increment_counter (vm, tcp6_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + } + + return from_frame->n_vectors; +} + +static uword +tcp4_rcv_process (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_rcv_process_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_rcv_process (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_rcv_process_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_rcv_process_node) = +{ + .function = tcp4_rcv_process, + .name = "tcp4-rcv-process", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_RCV_PROCESS_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_rcv_process_node, tcp4_rcv_process); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_rcv_process_node) = +{ + .function = tcp6_rcv_process, + .name = "tcp6-rcv-process", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_RCV_PROCESS_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_rcv_process_node, tcp6_rcv_process); + +vlib_node_registration_t tcp4_listen_node; +vlib_node_registration_t tcp6_listen_node; + +/** + * LISTEN state processing as per RFC 793 p. 65 + */ +always_inline uword +tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index; + tcp_main_t *tm = vnet_get_tcp_main (); + u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_header_t *th0 = 0; + tcp_connection_t *lc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + tcp_connection_t *child0; + u32 error0 = TCP_ERROR_SYNS_RCVD, next0 = TCP_LISTEN_NEXT_DROP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + lc0 = tcp_listener_get (vnet_buffer (b0)->tcp.connection_index); + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + th0 = ip4_next_header (ip40); + } + else + { + ip60 = vlib_buffer_get_current (b0); + th0 = ip6_next_header (ip60); + } + + /* Create child session. For syn-flood protection use filter */ + + /* 1. first check for an RST */ + if (tcp_rst (th0)) + goto drop; + + /* 2. second check for an ACK */ + if (tcp_ack (th0)) + { + tcp_send_reset (b0, is_ip4); + goto drop; + } + + /* 3. check for a SYN (did that already) */ + + /* Create child session and send SYN-ACK */ + pool_get (tm->connections[my_thread_index], child0); + memset (child0, 0, sizeof (*child0)); + + child0->c_c_index = child0 - tm->connections[my_thread_index]; + child0->c_lcl_port = lc0->c_lcl_port; + child0->c_rmt_port = th0->src_port; + child0->c_is_ip4 = is_ip4; + child0->c_thread_index = my_thread_index; + + if (is_ip4) + { + child0->c_lcl_ip4.as_u32 = ip40->dst_address.as_u32; + child0->c_rmt_ip4.as_u32 = ip40->src_address.as_u32; + } + else + { + clib_memcpy (&child0->c_lcl_ip6, &ip60->dst_address, + sizeof (ip6_address_t)); + clib_memcpy (&child0->c_rmt_ip6, &ip60->src_address, + sizeof (ip6_address_t)); + } + + if (stream_session_accept (&child0->connection, lc0->c_s_index, sst, + 0 /* notify */ )) + { + error0 = TCP_ERROR_CREATE_SESSION_FAIL; + goto drop; + } + + tcp_options_parse (th0, &child0->opt); + tcp_connection_init_vars (child0); + + child0->irs = vnet_buffer (b0)->tcp.seq_number; + child0->rcv_nxt = vnet_buffer (b0)->tcp.seq_number + 1; + child0->state = TCP_STATE_SYN_RCVD; + + /* RFC1323: TSval timestamps sent on {SYN} and {SYN,ACK} + * segments are used to initialize PAWS. */ + if (tcp_opts_tstamp (&child0->opt)) + { + child0->tsval_recent = child0->opt.tsval; + child0->tsval_recent_age = tcp_time_now (); + } + + /* Reuse buffer to make syn-ack and send */ + tcp_make_synack (child0, b0); + next0 = tcp_next_output (is_ip4); + + drop: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + b0->error = error0 ? node->errors[error0] : 0; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static uword +tcp4_listen (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_listen_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_listen (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_listen_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_listen_node) = +{ + .function = tcp4_listen, + .name = "tcp4-listen", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_LISTEN_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_LISTEN_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_listen_node, tcp4_listen); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_listen_node) = +{ + .function = tcp6_listen, + .name = "tcp6-listen", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_LISTEN_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_LISTEN_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_listen_node, tcp6_listen); + +vlib_node_registration_t tcp4_input_node; +vlib_node_registration_t tcp6_input_node; + +typedef enum _tcp_input_next +{ + TCP_INPUT_NEXT_DROP, + TCP_INPUT_NEXT_LISTEN, + TCP_INPUT_NEXT_RCV_PROCESS, + TCP_INPUT_NEXT_SYN_SENT, + TCP_INPUT_NEXT_ESTABLISHED, + TCP_INPUT_NEXT_RESET, + TCP_INPUT_N_NEXT +} tcp_input_next_t; + +#define foreach_tcp4_input_next \ + _ (DROP, "error-drop") \ + _ (LISTEN, "tcp4-listen") \ + _ (RCV_PROCESS, "tcp4-rcv-process") \ + _ (SYN_SENT, "tcp4-syn-sent") \ + _ (ESTABLISHED, "tcp4-established") \ + _ (RESET, "tcp4-reset") + +#define foreach_tcp6_input_next \ + _ (DROP, "error-drop") \ + _ (LISTEN, "tcp6-listen") \ + _ (RCV_PROCESS, "tcp6-rcv-process") \ + _ (SYN_SENT, "tcp6-syn-sent") \ + _ (ESTABLISHED, "tcp6-established") \ + _ (RESET, "tcp6-reset") + +typedef struct +{ + u16 src_port; + u16 dst_port; + u8 state; +} tcp_rx_trace_t; + +const char *tcp_fsm_states[] = { +#define _(sym, str) str, + foreach_tcp_fsm_state +#undef _ +}; + +u8 * +format_tcp_state (u8 * s, va_list * args) +{ + tcp_state_t *state = va_arg (*args, tcp_state_t *); + + if (state[0] < TCP_N_STATES) + s = format (s, "%s", tcp_fsm_states[state[0]]); + else + s = format (s, "UNKNOWN"); + + return s; +} + +u8 * +format_tcp_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *); + + s = format (s, "TCP: src-port %d dst-port %U%s\n", + clib_net_to_host_u16 (t->src_port), + clib_net_to_host_u16 (t->dst_port), format_tcp_state, t->state); + + return s; +} + +#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN) + +always_inline uword +tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index; + tcp_main_t *tm = vnet_get_tcp_main (); + session_manager_main_t *ssm = vnet_get_session_manager_main (); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_header_t *tcp0 = 0; + tcp_connection_t *tc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 error0 = TCP_ERROR_NO_LISTENER, next0 = TCP_INPUT_NEXT_DROP; + u8 flags0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + tcp0 = ip4_next_header (ip40); + + /* lookup session */ + tc0 = + (tcp_connection_t *) stream_session_lookup_transport4 (ssm, + &ip40->dst_address, + &ip40->src_address, + tcp0->dst_port, + tcp0->src_port, + SESSION_TYPE_IP4_TCP, + my_thread_index); + } + else + { + ip60 = vlib_buffer_get_current (b0); + tcp0 = ip6_next_header (ip60); + tc0 = + (tcp_connection_t *) stream_session_lookup_transport6 (ssm, + &ip60->src_address, + &ip60->dst_address, + tcp0->src_port, + tcp0->dst_port, + SESSION_TYPE_IP6_TCP, + my_thread_index); + } + + /* Session exists */ + if (PREDICT_TRUE (0 != tc0)) + { + /* Save connection index */ + vnet_buffer (b0)->tcp.connection_index = tc0->c_c_index; + vnet_buffer (b0)->tcp.seq_number = + clib_net_to_host_u32 (tcp0->seq_number); + vnet_buffer (b0)->tcp.ack_number = + clib_net_to_host_u32 (tcp0->ack_number); + + flags0 = tcp0->flags & filter_flags; + next0 = tm->dispatch_table[tc0->state][flags0].next; + error0 = tm->dispatch_table[tc0->state][flags0].error; + + if (PREDICT_FALSE (error0 == TCP_ERROR_DISPATCH)) + { + /* Overload tcp flags to store state */ + vnet_buffer (b0)->tcp.flags = tc0->state; + } + } + else + { + /* Send reset */ + next0 = TCP_INPUT_NEXT_RESET; + error0 = TCP_ERROR_NO_LISTENER; + vnet_buffer (b0)->tcp.flags = 0; + } + + b0->error = error0 ? node->errors[error0] : 0; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static uword +tcp4_input (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_input (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_input_node) = +{ + .function = tcp4_input, + .name = "tcp4-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_INPUT_NEXT_##s] = n, + foreach_tcp4_input_next +#undef _ + }, + .format_buffer = format_tcp_header, + .format_trace = format_tcp_rx_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_input_node, tcp4_input); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_input_node) = +{ + .function = tcp6_input, + .name = "tcp6-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_INPUT_NEXT_##s] = n, + foreach_tcp6_input_next +#undef _ + }, + .format_buffer = format_tcp_header, + .format_trace = format_tcp_rx_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_input_node, tcp6_input); +void +tcp_update_time (f64 now, u32 thread_index) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + tw_timer_expire_timers_16t_2w_512sl (&tm->timer_wheels[thread_index], now); +} + +static void +tcp_dispatch_table_init (tcp_main_t * tm) +{ + int i, j; + for (i = 0; i < ARRAY_LEN (tm->dispatch_table); i++) + for (j = 0; j < ARRAY_LEN (tm->dispatch_table[i]); j++) + { + tm->dispatch_table[i][j].next = TCP_INPUT_NEXT_DROP; + tm->dispatch_table[i][j].error = TCP_ERROR_DISPATCH; + } + +#define _(t,f,n,e) \ +do { \ + tm->dispatch_table[TCP_STATE_##t][f].next = (n); \ + tm->dispatch_table[TCP_STATE_##t][f].error = (e); \ +} while (0) + + /* SYNs for new connections -> tcp-listen. */ + _(LISTEN, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE); + /* ACK for for a SYN-ACK -> tcp-rcv-process. */ + _(SYN_RCVD, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + /* SYN-ACK for a SYN */ + _(SYN_SENT, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, + TCP_ERROR_NONE); + _(SYN_SENT, TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE); + _(SYN_SENT, TCP_FLAG_RST, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE); + _(SYN_SENT, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, + TCP_ERROR_NONE); + /* ACK for for established connection -> tcp-established. */ + _(ESTABLISHED, TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE); + /* FIN for for established connection -> tcp-established. */ + _(ESTABLISHED, TCP_FLAG_FIN, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE); + _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED, + TCP_ERROR_NONE); + /* ACK or FIN-ACK to our FIN */ + _(FIN_WAIT_1, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(FIN_WAIT_1, TCP_FLAG_ACK | TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, + TCP_ERROR_NONE); + /* FIN in reply to our FIN from the other side */ + _(FIN_WAIT_1, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + /* FIN confirming that the peer (app) has closed */ + _(FIN_WAIT_2, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(FIN_WAIT_2, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, + TCP_ERROR_NONE); + _(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); +#undef _ +} + +clib_error_t * +tcp_input_init (vlib_main_t * vm) +{ + clib_error_t *error = 0; + tcp_main_t *tm = vnet_get_tcp_main (); + + if ((error = vlib_call_init_function (vm, tcp_init))) + return error; + + /* Initialize dispatch table. */ + tcp_dispatch_table_init (tm); + + return error; +} + +VLIB_INIT_FUNCTION (tcp_input_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c new file mode 100644 index 00000000..856dffe4 --- /dev/null +++ b/src/vnet/tcp/tcp_newreno.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +void +newreno_congestion (tcp_connection_t * tc) +{ + tc->prev_ssthresh = tc->ssthresh; + tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss); +} + +void +newreno_recovered (tcp_connection_t * tc) +{ + tc->cwnd = tc->ssthresh; +} + +void +newreno_rcv_ack (tcp_connection_t * tc) +{ + if (tcp_in_slowstart (tc)) + { + tc->cwnd += clib_min (tc->snd_mss, tc->bytes_acked); + } + else + { + /* Round up to 1 if needed */ + tc->cwnd += clib_max (tc->snd_mss * tc->snd_mss / tc->cwnd, 1); + } +} + +void +newreno_rcv_cong_ack (tcp_connection_t * tc, tcp_cc_ack_t ack_type) +{ + if (ack_type == TCP_CC_DUPACK) + { + tc->cwnd += tc->snd_mss; + } + else if (ack_type == TCP_CC_PARTIALACK) + { + tc->cwnd -= tc->bytes_acked; + if (tc->bytes_acked > tc->snd_mss) + tc->bytes_acked += tc->snd_mss; + } +} + +void +newreno_conn_init (tcp_connection_t * tc) +{ + tc->ssthresh = tc->snd_wnd; + tc->cwnd = tcp_initial_cwnd (tc); +} + +const static tcp_cc_algorithm_t tcp_newreno = { + .congestion = newreno_congestion, + .recovered = newreno_recovered, + .rcv_ack = newreno_rcv_ack, + .rcv_cong_ack = newreno_rcv_cong_ack, + .init = newreno_conn_init +}; + +clib_error_t * +newreno_init (vlib_main_t * vm) +{ + clib_error_t *error = 0; + + tcp_cc_algo_register (TCP_CC_NEWRENO, &tcp_newreno); + + return error; +} + +VLIB_INIT_FUNCTION (newreno_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c new file mode 100644 index 00000000..dbcf1f74 --- /dev/null +++ b/src/vnet/tcp/tcp_output.c @@ -0,0 +1,1412 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +vlib_node_registration_t tcp4_output_node; +vlib_node_registration_t tcp6_output_node; + +typedef enum _tcp_output_nect +{ + TCP_OUTPUT_NEXT_DROP, + TCP_OUTPUT_NEXT_IP_LOOKUP, + TCP_OUTPUT_N_NEXT +} tcp_output_next_t; + +#define foreach_tcp4_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip4-lookup") + +#define foreach_tcp6_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip6-lookup") + +static char *tcp_error_strings[] = { +#define tcp_error(n,s) s, +#include +#undef tcp_error +}; + +typedef struct +{ + u16 src_port; + u16 dst_port; + u8 state; +} tcp_tx_trace_t; + +u16 dummy_mtu = 400; + +u8 * +format_tcp_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + + s = format (s, "TBD\n"); + + return s; +} + +void +tcp_set_snd_mss (tcp_connection_t * tc) +{ + u16 snd_mss; + + /* TODO find our iface MTU */ + snd_mss = dummy_mtu; + + /* TODO cache mss and consider PMTU discovery */ + snd_mss = tc->opt.mss < snd_mss ? tc->opt.mss : snd_mss; + + tc->snd_mss = snd_mss; + + if (tc->snd_mss == 0) + { + clib_warning ("snd mss is 0"); + tc->snd_mss = dummy_mtu; + } +} + +static u8 +tcp_window_compute_scale (u32 available_space) +{ + u8 wnd_scale = 0; + while (wnd_scale < TCP_MAX_WND_SCALE + && (available_space >> wnd_scale) > TCP_WND_MAX) + wnd_scale++; + return wnd_scale; +} + +/** + * Compute initial window and scale factor. As per RFC1323, window field in + * SYN and SYN-ACK segments is never scaled. + */ +u32 +tcp_initial_window_to_advertise (tcp_connection_t * tc) +{ + u32 available_space; + + /* Initial wnd for SYN. Fifos are not allocated yet. + * Use some predefined value */ + if (tc->state != TCP_STATE_SYN_RCVD) + { + return TCP_DEFAULT_RX_FIFO_SIZE; + } + + available_space = stream_session_max_enqueue (&tc->connection); + tc->rcv_wscale = tcp_window_compute_scale (available_space); + tc->rcv_wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale); + + return clib_min (tc->rcv_wnd, TCP_WND_MAX); +} + +/** + * Compute and return window to advertise, scaled as per RFC1323 + */ +u32 +tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state) +{ + u32 available_space, wnd, scaled_space; + + if (state != TCP_STATE_ESTABLISHED) + return tcp_initial_window_to_advertise (tc); + + available_space = stream_session_max_enqueue (&tc->connection); + scaled_space = available_space >> tc->rcv_wscale; + + /* Need to update scale */ + if (PREDICT_FALSE ((scaled_space == 0 && available_space != 0)) + || (scaled_space >= TCP_WND_MAX)) + tc->rcv_wscale = tcp_window_compute_scale (available_space); + + wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale); + tc->rcv_wnd = wnd; + + return wnd >> tc->rcv_wscale; +} + +/** + * Write TCP options to segment. + */ +u32 +tcp_options_write (u8 * data, tcp_options_t * opts) +{ + u32 opts_len = 0; + u32 buf, seq_len = 4; + + if (tcp_opts_mss (opts)) + { + *data++ = TCP_OPTION_MSS; + *data++ = TCP_OPTION_LEN_MSS; + buf = clib_host_to_net_u16 (opts->mss); + clib_memcpy (data, &buf, sizeof (opts->mss)); + data += sizeof (opts->mss); + opts_len += TCP_OPTION_LEN_MSS; + } + + if (tcp_opts_wscale (opts)) + { + *data++ = TCP_OPTION_WINDOW_SCALE; + *data++ = TCP_OPTION_LEN_WINDOW_SCALE; + *data++ = opts->wscale; + opts_len += TCP_OPTION_LEN_WINDOW_SCALE; + } + + if (tcp_opts_sack_permitted (opts)) + { + *data++ = TCP_OPTION_SACK_PERMITTED; + *data++ = TCP_OPTION_LEN_SACK_PERMITTED; + opts_len += TCP_OPTION_LEN_SACK_PERMITTED; + } + + if (tcp_opts_tstamp (opts)) + { + *data++ = TCP_OPTION_TIMESTAMP; + *data++ = TCP_OPTION_LEN_TIMESTAMP; + buf = clib_host_to_net_u32 (opts->tsval); + clib_memcpy (data, &buf, sizeof (opts->tsval)); + data += sizeof (opts->tsval); + buf = clib_host_to_net_u32 (opts->tsecr); + clib_memcpy (data, &buf, sizeof (opts->tsecr)); + data += sizeof (opts->tsecr); + opts_len += TCP_OPTION_LEN_TIMESTAMP; + } + + if (tcp_opts_sack (opts)) + { + int i; + u32 n_sack_blocks = clib_min (vec_len (opts->sacks), + TCP_OPTS_MAX_SACK_BLOCKS); + + if (n_sack_blocks != 0) + { + *data++ = TCP_OPTION_SACK_BLOCK; + *data++ = 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK; + for (i = 0; i < n_sack_blocks; i++) + { + buf = clib_host_to_net_u32 (opts->sacks[i].start); + clib_memcpy (data, &buf, seq_len); + data += seq_len; + buf = clib_host_to_net_u32 (opts->sacks[i].end); + clib_memcpy (data, &buf, seq_len); + data += seq_len; + } + opts_len += 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK; + } + } + + /* Terminate TCP options */ + if (opts_len % 4) + { + *data++ = TCP_OPTION_EOL; + opts_len += TCP_OPTION_LEN_EOL; + } + + /* Pad with zeroes to a u32 boundary */ + while (opts_len % 4) + { + *data++ = TCP_OPTION_NOOP; + opts_len += TCP_OPTION_LEN_NOOP; + } + return opts_len; +} + +always_inline int +tcp_make_syn_options (tcp_options_t * opts, u32 initial_wnd) +{ + u8 len = 0; + + opts->flags |= TCP_OPTS_FLAG_MSS; + opts->mss = dummy_mtu; /*XXX discover that */ + len += TCP_OPTION_LEN_MSS; + + opts->flags |= TCP_OPTS_FLAG_WSCALE; + opts->wscale = tcp_window_compute_scale (initial_wnd); + len += TCP_OPTION_LEN_WINDOW_SCALE; + + opts->flags |= TCP_OPTS_FLAG_TSTAMP; + opts->tsval = tcp_time_now (); + opts->tsecr = 0; + len += TCP_OPTION_LEN_TIMESTAMP; + + opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED; + len += TCP_OPTION_LEN_SACK_PERMITTED; + + /* Align to needed boundary */ + len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN; + return len; +} + +always_inline int +tcp_make_synack_options (tcp_connection_t * tc, tcp_options_t * opts) +{ + u8 len = 0; + + opts->flags |= TCP_OPTS_FLAG_MSS; + opts->mss = dummy_mtu; /*XXX discover that */ + len += TCP_OPTION_LEN_MSS; + + if (tcp_opts_wscale (&tc->opt)) + { + opts->flags |= TCP_OPTS_FLAG_WSCALE; + opts->wscale = tc->rcv_wscale; + len += TCP_OPTION_LEN_WINDOW_SCALE; + } + + if (tcp_opts_tstamp (&tc->opt)) + { + opts->flags |= TCP_OPTS_FLAG_TSTAMP; + opts->tsval = tcp_time_now (); + opts->tsecr = tc->tsval_recent; + len += TCP_OPTION_LEN_TIMESTAMP; + } + + if (tcp_opts_sack_permitted (&tc->opt)) + { + opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED; + len += TCP_OPTION_LEN_SACK_PERMITTED; + } + + /* Align to needed boundary */ + len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN; + return len; +} + +always_inline int +tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts) +{ + u8 len = 0; + + opts->flags = 0; + + if (tcp_opts_tstamp (&tc->opt)) + { + opts->flags |= TCP_OPTS_FLAG_TSTAMP; + opts->tsval = tcp_time_now (); + opts->tsecr = tc->tsval_recent; + len += TCP_OPTION_LEN_TIMESTAMP; + } + if (tcp_opts_sack_permitted (&tc->opt)) + { + if (vec_len (tc->snd_sacks)) + { + opts->flags |= TCP_OPTS_FLAG_SACK; + opts->sacks = tc->snd_sacks; + opts->n_sack_blocks = vec_len (tc->snd_sacks); + len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks; + } + } + + /* Align to needed boundary */ + len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN; + return len; +} + +always_inline int +tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts, + tcp_state_t state) +{ + switch (state) + { + case TCP_STATE_ESTABLISHED: + case TCP_STATE_FIN_WAIT_1: + return tcp_make_established_options (tc, opts); + case TCP_STATE_SYN_RCVD: + return tcp_make_synack_options (tc, opts); + case TCP_STATE_SYN_SENT: + return tcp_make_syn_options (opts, + tcp_initial_window_to_advertise (tc)); + default: + clib_warning ("Not handled!"); + return 0; + } +} + +#define tcp_get_free_buffer_index(tm, bidx) \ +do { \ + u32 *my_tx_buffers, n_free_buffers; \ + u32 cpu_index = tm->vlib_main->cpu_index; \ + my_tx_buffers = tm->tx_buffers[cpu_index]; \ + if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0)) \ + { \ + n_free_buffers = 32; /* TODO config or macro */ \ + vec_validate (my_tx_buffers, n_free_buffers - 1); \ + _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list ( \ + tm->vlib_main, my_tx_buffers, n_free_buffers, \ + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); \ + tm->tx_buffers[cpu_index] = my_tx_buffers; \ + } \ + /* buffer shortage */ \ + if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0)) \ + return; \ + *bidx = my_tx_buffers[_vec_len (my_tx_buffers)-1]; \ + _vec_len (my_tx_buffers) -= 1; \ +} while (0) + +always_inline void +tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) +{ + vlib_buffer_t *it = b; + do + { + it->current_data = 0; + it->current_length = 0; + it->total_length_not_including_first_buffer = 0; + } + while ((it->flags & VLIB_BUFFER_NEXT_PRESENT) + && (it = vlib_get_buffer (vm, it->next_buffer))); + + /* Leave enough space for headers */ + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); +} + +/** + * Prepare ACK + */ +void +tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state, + u8 flags) +{ + tcp_options_t _snd_opts, *snd_opts = &_snd_opts; + u8 tcp_opts_len, tcp_hdr_opts_len; + tcp_header_t *th; + u16 wnd; + + wnd = tcp_window_to_advertise (tc, state); + + /* Make and write options */ + tcp_opts_len = tcp_make_established_options (tc, snd_opts); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt, + tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd); + + tcp_options_write ((u8 *) (th + 1), snd_opts); + + /* Mark as ACK */ + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; +} + +/** + * Convert buffer to ACK + */ +void +tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + + tcp_reuse_buffer (vm, b); + tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK); + vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK; +} + +/** + * Convert buffer to FIN-ACK + */ +void +tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + + tcp_reuse_buffer (vm, b); + tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK | TCP_FLAG_FIN); + + /* Reset flags, make sure ack is sent */ + tc->flags = TCP_CONN_SNDACK; + vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK; + + tc->snd_nxt += 1; +} + +/** + * Convert buffer to SYN-ACK + */ +void +tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + tcp_options_t _snd_opts, *snd_opts = &_snd_opts; + u8 tcp_opts_len, tcp_hdr_opts_len; + tcp_header_t *th; + u16 initial_wnd; + u32 time_now; + + memset (snd_opts, 0, sizeof (*snd_opts)); + + tcp_reuse_buffer (vm, b); + + /* Set random initial sequence */ + time_now = tcp_time_now (); + + tc->iss = random_u32 (&time_now); + tc->snd_una = tc->iss; + tc->snd_nxt = tc->iss + 1; + tc->snd_una_max = tc->snd_nxt; + + initial_wnd = tcp_initial_window_to_advertise (tc); + + /* Make and write options */ + tcp_opts_len = tcp_make_synack_options (tc, snd_opts); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss, + tc->rcv_nxt, tcp_hdr_opts_len, + TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd); + + tcp_options_write ((u8 *) (th + 1), snd_opts); + + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; + vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK; + + /* Init retransmit timer */ + tcp_retransmit_timer_set (tm, tc); +} + +always_inline void +tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, + u8 is_ip4) +{ + u32 *to_next, next_index; + vlib_frame_t *f; + + b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b->error = 0; + + /* Default FIB for now */ + vnet_buffer (b)->sw_if_index[VLIB_TX] = 0; + + /* Send to IP lookup */ + next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index; + f = vlib_get_frame_to_node (vm, next_index); + + /* Enqueue the packet */ + to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, next_index, f); +} + +int +tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0, + tcp_state_t state, u32 my_thread_index, u8 is_ip4) +{ + u8 tcp_hdr_len = sizeof (tcp_header_t); + ip4_header_t *ih4; + ip6_header_t *ih6; + tcp_header_t *th0; + ip4_address_t src_ip40; + ip6_address_t src_ip60; + u16 src_port0; + u32 tmp; + + /* Find IP and TCP headers */ + if (is_ip4) + { + ih4 = vlib_buffer_get_current (b0); + th0 = ip4_next_header (ih4); + } + else + { + ih6 = vlib_buffer_get_current (b0); + th0 = ip6_next_header (ih6); + } + + /* Swap src and dst ip */ + if (is_ip4) + { + ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40); + src_ip40.as_u32 = ih4->src_address.as_u32; + ih4->src_address.as_u32 = ih4->dst_address.as_u32; + ih4->dst_address.as_u32 = src_ip40.as_u32; + + /* Chop the end of the pkt */ + b0->current_length += ip4_header_bytes (ih4) + tcp_hdr_len; + } + else + { + ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60); + clib_memcpy (&src_ip60, &ih6->src_address, sizeof (ip6_address_t)); + clib_memcpy (&ih6->src_address, &ih6->dst_address, + sizeof (ip6_address_t)); + clib_memcpy (&ih6->dst_address, &src_ip60, sizeof (ip6_address_t)); + + /* Chop the end of the pkt */ + b0->current_length += sizeof (ip6_header_t) + tcp_hdr_len; + } + + /* Try to determine what/why we're actually resetting and swap + * src and dst ports */ + if (state == TCP_STATE_CLOSED) + { + if (!tcp_syn (th0)) + return -1; + + tmp = clib_net_to_host_u32 (th0->seq_number); + + /* Got a SYN for no listener. */ + th0->flags = TCP_FLAG_RST | TCP_FLAG_ACK; + th0->ack_number = clib_host_to_net_u32 (tmp + 1); + th0->seq_number = 0; + + } + else if (state >= TCP_STATE_SYN_SENT) + { + th0->flags = TCP_FLAG_RST | TCP_FLAG_ACK; + th0->seq_number = th0->ack_number; + th0->ack_number = 0; + } + + src_port0 = th0->src_port; + th0->src_port = th0->dst_port; + th0->dst_port = src_port0; + th0->window = 0; + th0->data_offset_and_reserved = (tcp_hdr_len >> 2) << 4; + th0->urgent_pointer = 0; + + /* Compute checksum */ + if (is_ip4) + { + th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih4); + } + else + { + int bogus = ~0; + th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih6, &bogus); + ASSERT (!bogus); + } + + return 0; +} + +/** + * Send reset without reusing existing buffer + */ +void +tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4) +{ + vlib_buffer_t *b; + u32 bi; + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u8 tcp_hdr_len, flags = 0; + tcp_header_t *th, *pkt_th; + u32 seq, ack; + ip4_header_t *ih4, *pkt_ih4; + ip6_header_t *ih6, *pkt_ih6; + + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + /* Leave enough space for headers */ + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + + /* Make and write options */ + tcp_hdr_len = sizeof (tcp_header_t); + + if (is_ip4) + { + pkt_ih4 = vlib_buffer_get_current (pkt); + pkt_th = ip4_next_header (pkt_ih4); + } + else + { + pkt_ih6 = vlib_buffer_get_current (pkt); + pkt_th = ip6_next_header (pkt_ih6); + } + + if (tcp_ack (pkt_th)) + { + flags = TCP_FLAG_RST; + seq = pkt_th->ack_number; + ack = 0; + } + else + { + flags = TCP_FLAG_RST | TCP_FLAG_ACK; + seq = 0; + ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end); + } + + th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port, + seq, ack, tcp_hdr_len, flags, 0); + + /* Swap src and dst ip */ + if (is_ip4) + { + ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40); + ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address, + &pkt_ih4->src_address, IP_PROTOCOL_TCP); + th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4); + } + else + { + int bogus = ~0; + pkt_ih6 = (ip6_header_t *) (pkt_th - 1); + ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) == + 0x60); + ih6 = + vlib_buffer_push_ip6 (vm, b, &pkt_ih6->dst_address, + &pkt_ih6->src_address, IP_PROTOCOL_TCP); + th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus); + ASSERT (!bogus); + } + + tcp_enqueue_to_ip_lookup (vm, b, bi, is_ip4); +} + +void +tcp_push_ip_hdr (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b) +{ + tcp_header_t *th = vlib_buffer_get_current (b); + + if (tc->c_is_ip4) + { + ip4_header_t *ih; + ih = vlib_buffer_push_ip4 (tm->vlib_main, b, &tc->c_lcl_ip4, + &tc->c_rmt_ip4, IP_PROTOCOL_TCP); + th->checksum = ip4_tcp_udp_compute_checksum (tm->vlib_main, b, ih); + } + else + { + ip6_header_t *ih; + int bogus = ~0; + + ih = vlib_buffer_push_ip6 (tm->vlib_main, b, &tc->c_lcl_ip6, + &tc->c_rmt_ip6, IP_PROTOCOL_TCP); + th->checksum = ip6_tcp_udp_icmp_compute_checksum (tm->vlib_main, b, ih, + &bogus); + ASSERT (!bogus); + } +} + +/** + * Send SYN + * + * Builds a SYN packet for a half-open connection and sends it to ipx_lookup. + * The packet is not forwarded through tcpx_output to avoid doing lookups + * in the half_open pool. + */ +void +tcp_send_syn (tcp_connection_t * tc) +{ + vlib_buffer_t *b; + u32 bi; + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u8 tcp_hdr_opts_len, tcp_opts_len; + tcp_header_t *th; + u32 time_now; + u16 initial_wnd; + tcp_options_t snd_opts; + + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + /* Leave enough space for headers */ + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + + /* Set random initial sequence */ + time_now = tcp_time_now (); + + tc->iss = random_u32 (&time_now); + tc->snd_una = tc->iss; + tc->snd_una_max = tc->snd_nxt = tc->iss + 1; + + initial_wnd = tcp_initial_window_to_advertise (tc); + + /* Make and write options */ + memset (&snd_opts, 0, sizeof (snd_opts)); + tcp_opts_len = tcp_make_syn_options (&snd_opts, initial_wnd); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss, + tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN, + initial_wnd); + + tcp_options_write ((u8 *) (th + 1), &snd_opts); + + /* Measure RTT with this */ + tc->rtt_ts = tcp_time_now (); + tc->rtt_seq = tc->snd_nxt; + + /* Start retransmit trimer */ + tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN, tc->rto * TCP_TO_TIMER_TICK); + tc->rto_boff = 0; + + /* Set the connection establishment timer */ + tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME); + + tcp_push_ip_hdr (tm, tc, b); + tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4); +} + +always_inline void +tcp_enqueue_to_output (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, u8 is_ip4) +{ + u32 *to_next, next_index; + vlib_frame_t *f; + + b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b->error = 0; + + /* Decide where to send the packet */ + next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index; + f = vlib_get_frame_to_node (vm, next_index); + + /* Enqueue the packet */ + to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, next_index, f); +} + +/** + * Send FIN + */ +void +tcp_send_fin (tcp_connection_t * tc) +{ + vlib_buffer_t *b; + u32 bi; + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + /* Leave enough space for headers */ + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + + tcp_make_finack (tc, b); + + tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); +} + +always_inline u8 +tcp_make_state_flags (tcp_state_t next_state) +{ + switch (next_state) + { + case TCP_STATE_ESTABLISHED: + return TCP_FLAG_ACK; + case TCP_STATE_SYN_RCVD: + return TCP_FLAG_SYN | TCP_FLAG_ACK; + case TCP_STATE_SYN_SENT: + return TCP_FLAG_SYN; + case TCP_STATE_LAST_ACK: + case TCP_STATE_FIN_WAIT_1: + return TCP_FLAG_FIN; + default: + clib_warning ("Shouldn't be here!"); + } + return 0; +} + +/** + * Push TCP header and update connection variables + */ +static void +tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, + tcp_state_t next_state) +{ + u32 advertise_wnd, data_len; + u8 tcp_opts_len, tcp_hdr_opts_len, opts_write_len, flags; + tcp_options_t _snd_opts, *snd_opts = &_snd_opts; + tcp_header_t *th; + + data_len = b->current_length; + vnet_buffer (b)->tcp.flags = 0; + + /* Make and write options */ + memset (snd_opts, 0, sizeof (*snd_opts)); + tcp_opts_len = tcp_make_options (tc, snd_opts, next_state); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + /* Get rcv window to advertise */ + advertise_wnd = tcp_window_to_advertise (tc, next_state); + flags = tcp_make_state_flags (next_state); + + /* Push header and options */ + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt, + tc->rcv_nxt, tcp_hdr_opts_len, flags, + advertise_wnd); + + opts_write_len = tcp_options_write ((u8 *) (th + 1), snd_opts); + + ASSERT (opts_write_len == tcp_opts_len); + + /* Tag the buffer with the connection index */ + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; + + tc->snd_nxt += data_len; +} + +/* Send delayed ACK when timer expires */ +void +tcp_timer_delack_handler (u32 index) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u32 thread_index = os_get_cpu_number (); + tcp_connection_t *tc; + vlib_buffer_t *b; + u32 bi; + + tc = tcp_connection_get (index, thread_index); + + /* Get buffer */ + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + /* Fill in the ACK */ + tcp_make_ack (tc, b); + + tc->timers[TCP_TIMER_DELACK] = TCP_TIMER_HANDLE_INVALID; + tc->flags &= ~TCP_CONN_DELACK; + + tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); +} + +/** Build a retransmit segment + * + * @return the number of bytes in the segment or 0 if there's nothing to + * retransmit + * */ +u32 +tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b, + u32 max_bytes) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u32 n_bytes, offset = 0; + sack_scoreboard_hole_t *hole; + u32 hole_size; + + tcp_reuse_buffer (vm, b); + + ASSERT (tc->state == TCP_STATE_ESTABLISHED); + ASSERT (max_bytes != 0); + + if (tcp_opts_sack_permitted (&tc->opt)) + { + /* XXX get first hole not retransmitted yet */ + hole = scoreboard_first_hole (&tc->sack_sb); + if (!hole) + return 0; + + offset = hole->start - tc->snd_una; + hole_size = hole->end - hole->start; + + ASSERT (hole_size); + + if (hole_size < max_bytes) + max_bytes = hole_size; + } + else + { + if (seq_geq (tc->snd_nxt, tc->snd_una_max)) + return 0; + } + + n_bytes = stream_session_peek_bytes (&tc->connection, + vlib_buffer_get_current (b), offset, + max_bytes); + ASSERT (n_bytes != 0); + + tc->snd_nxt += n_bytes; + tcp_push_hdr_i (tc, b, tc->state); + + return n_bytes; +} + +static void +tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u32 thread_index = os_get_cpu_number (); + tcp_connection_t *tc; + vlib_buffer_t *b; + u32 bi, max_bytes, snd_space; + + if (is_syn) + { + tc = tcp_half_open_connection_get (index); + } + else + { + tc = tcp_connection_get (index, thread_index); + } + + /* Make sure timer handle is set to invalid */ + tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID; + + /* Increment RTO backoff (also equal to number of retries) */ + tc->rto_boff += 1; + + /* Go back to first un-acked byte */ + tc->snd_nxt = tc->snd_una; + + /* Get buffer */ + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + if (tc->state == TCP_STATE_ESTABLISHED) + { + tcp_fastrecovery_off (tc); + + /* Exponential backoff */ + tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); + + /* Figure out what and how many bytes we can send */ + snd_space = tcp_available_snd_space (tc); + max_bytes = clib_min (tc->snd_mss, snd_space); + tcp_prepare_retransmit_segment (tc, b, max_bytes); + + tc->rtx_bytes += max_bytes; + + /* No fancy recovery for now! */ + scoreboard_clear (&tc->sack_sb); + } + else + { + /* Retransmit for SYN/SYNACK */ + ASSERT (tc->state == TCP_STATE_SYN_RCVD + || tc->state == TCP_STATE_SYN_SENT); + + /* Try without increasing RTO a number of times. If this fails, + * start growing RTO exponentially */ + if (tc->rto_boff > TCP_RTO_SYN_RETRIES) + tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); + + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + tcp_push_hdr_i (tc, b, tc->state); + } + + if (!is_syn) + { + tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); + + /* Re-enable retransmit timer */ + tcp_retransmit_timer_set (tm, tc); + } + else + { + ASSERT (tc->state == TCP_STATE_SYN_SENT); + + /* This goes straight to ipx_lookup */ + tcp_push_ip_hdr (tm, tc, b); + tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4); + + /* Re-enable retransmit timer */ + tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN, + tc->rto * TCP_TO_TIMER_TICK); + } +} + +void +tcp_timer_retransmit_handler (u32 index) +{ + tcp_timer_retransmit_handler_i (index, 0); +} + +void +tcp_timer_retransmit_syn_handler (u32 index) +{ + tcp_timer_retransmit_handler_i (index, 1); +} + +/** + * Retansmit first unacked segment */ +void +tcp_retransmit_first_unacked (tcp_connection_t * tc) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 snd_nxt = tc->snd_nxt; + vlib_buffer_t *b; + u32 bi; + + tc->snd_nxt = tc->snd_una; + + /* Get buffer */ + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (tm->vlib_main, bi); + + tcp_prepare_retransmit_segment (tc, b, tc->snd_mss); + tcp_enqueue_to_output (tm->vlib_main, b, bi, tc->c_is_ip4); + + tc->snd_nxt = snd_nxt; + tc->rtx_bytes += tc->snd_mss; +} + +void +tcp_fast_retransmit (tcp_connection_t * tc) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 snd_space, max_bytes, n_bytes, bi; + vlib_buffer_t *b; + + ASSERT (tcp_in_fastrecovery (tc)); + + clib_warning ("fast retransmit!"); + + /* Start resending from first un-acked segment */ + tc->snd_nxt = tc->snd_una; + + snd_space = tcp_available_snd_space (tc); + + while (snd_space) + { + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (tm->vlib_main, bi); + + max_bytes = clib_min (tc->snd_mss, snd_space); + n_bytes = tcp_prepare_retransmit_segment (tc, b, max_bytes); + + /* Nothing left to retransmit */ + if (n_bytes == 0) + return; + + tcp_enqueue_to_output (tm->vlib_main, b, bi, tc->c_is_ip4); + + snd_space -= n_bytes; + } + + /* If window allows, send new data */ + tc->snd_nxt = tc->snd_una_max; +} + +always_inline u32 +tcp_session_has_ooo_data (tcp_connection_t * tc) +{ + stream_session_t *s = + stream_session_get (tc->c_s_index, tc->c_thread_index); + return svm_fifo_has_ooo_data (s->server_rx_fifo); +} + +always_inline uword +tcp46_output_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_connection_t *tc0; + tcp_header_t *th0; + u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, + my_thread_index); + th0 = vlib_buffer_get_current (b0); + + if (is_ip4) + { + ip4_header_t *ih0; + ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, + &tc0->c_rmt_ip4, IP_PROTOCOL_TCP); + th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih0); + } + else + { + ip6_header_t *ih0; + int bogus = ~0; + + ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6, + &tc0->c_rmt_ip6, IP_PROTOCOL_TCP); + th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih0, + &bogus); + ASSERT (!bogus); + } + + /* Filter out DUPACKs if there are no OOO segments left */ + if (PREDICT_FALSE + (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK)) + { + tc0->snt_dupacks--; + ASSERT (tc0->snt_dupacks >= 0); + if (!tcp_session_has_ooo_data (tc0)) + { + error0 = TCP_ERROR_FILTERED_DUPACKS; + next0 = TCP_OUTPUT_NEXT_DROP; + goto done; + } + } + + /* Retransmitted SYNs do reach this but it should be harmless */ + tc0->rcv_las = tc0->rcv_nxt; + + /* Stop DELACK timer and fix flags */ + tc0->flags &= + ~(TCP_CONN_SNDACK | TCP_CONN_DELACK | TCP_CONN_BURSTACK); + if (tcp_timer_is_active (tc0, TCP_TIMER_DELACK)) + { + tcp_timer_reset (tc0, TCP_TIMER_DELACK); + } + + /* If not retransmitting + * 1) update snd_una_max (SYN, SYNACK, new data, FIN) + * 2) If we're not tracking an ACK, start tracking */ + if (seq_lt (tc0->snd_una_max, tc0->snd_nxt)) + { + tc0->snd_una_max = tc0->snd_nxt; + if (tc0->rtt_ts == 0) + { + tc0->rtt_ts = tcp_time_now (); + tc0->rtt_seq = tc0->snd_nxt; + } + } + + /* Set the retransmit timer if not set already and not + * doing a pure ACK */ + if (!tcp_timer_is_active (tc0, TCP_TIMER_RETRANSMIT) + && tc0->snd_nxt != tc0->snd_una) + { + tcp_retransmit_timer_set (tm, tc0); + tc0->rto_boff = 0; + } + + /* set fib index to default and lookup node */ + /* XXX network virtualization (vrf/vni) */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + + done: + b0->error = error0 != 0 ? node->errors[error0] : 0; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static uword +tcp4_output (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_output (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +VLIB_REGISTER_NODE (tcp4_output_node) = +{ + .function = tcp4_output,.name = "tcp4-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32),.n_errors = TCP_N_ERROR,.error_strings = + tcp_error_strings,.n_next_nodes = TCP_OUTPUT_N_NEXT,.next_nodes = + { +#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, + foreach_tcp4_output_next +#undef _ + } +,.format_buffer = format_tcp_header,.format_trace = format_tcp_tx_trace,}; + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_output_node, tcp4_output) +VLIB_REGISTER_NODE (tcp6_output_node) = +{ + .function = tcp6_output,.name = "tcp6-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32),.n_errors = TCP_N_ERROR,.error_strings = + tcp_error_strings,.n_next_nodes = TCP_OUTPUT_N_NEXT,.next_nodes = + { +#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, + foreach_tcp6_output_next +#undef _ + } +,.format_buffer = format_tcp_header,.format_trace = format_tcp_tx_trace,}; + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_output_node, tcp6_output) u32 +tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +{ + tcp_connection_t *tc; + + tc = (tcp_connection_t *) tconn; + tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED); + return 0; +} + +typedef enum _tcp_reset_next +{ + TCP_RESET_NEXT_DROP, + TCP_RESET_NEXT_IP_LOOKUP, + TCP_RESET_N_NEXT +} tcp_reset_next_t; + +#define foreach_tcp4_reset_next \ + _(DROP, "error-drop") \ + _(IP_LOOKUP, "ip4-lookup") + +#define foreach_tcp6_reset_next \ + _(DROP, "error-drop") \ + _(IP_LOOKUP, "ip6-lookup") + +static uword +tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, u8 is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (tcp_make_reset_in_place (vm, b0, vnet_buffer (b0)->tcp.flags, + my_thread_index, is_ip4)) + { + error0 = TCP_ERROR_LOOKUP_DROPS; + next0 = TCP_RESET_NEXT_DROP; + goto done; + } + + /* Prepare to send to IP lookup */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0; + next0 = TCP_RESET_NEXT_IP_LOOKUP; + + done: + b0->error = error0 != 0 ? node->errors[error0] : 0; + b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static uword +tcp4_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_send_reset_inline (vm, node, from_frame, 1); +} + +static uword +tcp6_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_send_reset_inline (vm, node, from_frame, 0); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_reset_node) = { + .function = tcp4_send_reset, + .name = "tcp4-reset", + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_RESET_N_NEXT, + .next_nodes = { +#define _(s,n) [TCP_RESET_NEXT_##s] = n, + foreach_tcp4_reset_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_reset_node) = { + .function = tcp6_send_reset, + .name = "tcp6-reset", + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_RESET_N_NEXT, + .next_nodes = { +#define _(s,n) [TCP_RESET_NEXT_##s] = n, + foreach_tcp6_reset_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_packet.h b/src/vnet/tcp/tcp_packet.h new file mode 100644 index 00000000..866c5fd6 --- /dev/null +++ b/src/vnet/tcp/tcp_packet.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_tcp_packet_h +#define included_tcp_packet_h + +#include + +/* TCP flags bit 0 first. */ +#define foreach_tcp_flag \ + _ (FIN) /**< No more data from sender. */ \ + _ (SYN) /**< Synchronize sequence numbers. */ \ + _ (RST) /**< Reset the connection. */ \ + _ (PSH) /**< Push function. */ \ + _ (ACK) /**< Ack field significant. */ \ + _ (URG) /**< Urgent pointer field significant. */ \ + _ (ECE) /**< ECN-echo. Receiver got CE packet */ \ + _ (CWR) /**< Sender reduced congestion window */ + +enum +{ +#define _(f) TCP_FLAG_BIT_##f, + foreach_tcp_flag +#undef _ + TCP_N_FLAG_BITS, +}; + +enum +{ +#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f, + foreach_tcp_flag +#undef _ +}; + +typedef struct _tcp_header +{ + union + { + struct + { + u16 src_port; /**< Source port. */ + u16 dst_port; /**< Destination port. */ + }; + struct + { + u16 src, dst; + }; + }; + + u32 seq_number; /**< Sequence number of the first data octet in this + * segment, except when SYN is present. If SYN + * is present the seq number is is the ISN and the + * first data octet is ISN+1 */ + u32 ack_number; /**< Acknowledgement number if ACK is set. It contains + * the value of the next sequence number the sender + * of the segment is expecting to receive. */ + u8 data_offset_and_reserved; + u8 flags; /**< Flags: see the macro above */ + u16 window; /**< Number of bytes sender is willing to receive. */ + + u16 checksum; /**< Checksum of TCP pseudo header and data. */ + u16 urgent_pointer; /**< Seq number of the byte after the urgent data. */ +} __attribute__ ((packed)) tcp_header_t; + +/* Flag tests that return 0 or !0 */ +#define tcp_doff(_th) ((_th)->data_offset_and_reserved >> 4) +#define tcp_fin(_th) ((_th)->flags & TCP_FLAG_FIN) +#define tcp_syn(_th) ((_th)->flags & TCP_FLAG_SYN) +#define tcp_rst(_th) ((_th)->flags & TCP_FLAG_RST) +#define tcp_psh(_th) ((_th)->flags & TCP_FLAG_PSH) +#define tcp_ack(_th) ((_th)->flags & TCP_FLAG_ACK) +#define tcp_urg(_th) ((_th)->flags & TCP_FLAG_URG) +#define tcp_ece(_th) ((_th)->flags & TCP_FLAG_ECE) +#define tcp_cwr(_th) ((_th)->flags & TCP_FLAG_CWR) + +/* Flag tests that return 0 or 1 */ +#define tcp_is_syn(_th) !!((_th)->flags & TCP_FLAG_SYN) +#define tcp_is_fin(_th) !!((_th)->flags & TCP_FLAG_FIN) + +always_inline int +tcp_header_bytes (tcp_header_t * t) +{ + return tcp_doff (t) * sizeof (u32); +} + +/* + * TCP options. + */ + +typedef enum tcp_option_type +{ + TCP_OPTION_EOL = 0, /**< End of options. */ + TCP_OPTION_NOOP = 1, /**< No operation. */ + TCP_OPTION_MSS = 2, /**< Limit MSS. */ + TCP_OPTION_WINDOW_SCALE = 3, /**< Window scale. */ + TCP_OPTION_SACK_PERMITTED = 4, /**< Selective Ack permitted. */ + TCP_OPTION_SACK_BLOCK = 5, /**< Selective Ack block. */ + TCP_OPTION_TIMESTAMP = 8, /**< Timestamps. */ + TCP_OPTION_UTO = 28, /**< User timeout. */ + TCP_OPTION_AO = 29, /**< Authentication Option. */ +} tcp_option_type_t; + +#define foreach_tcp_options_flag \ + _ (MSS) /**< MSS advertised in SYN */ \ + _ (TSTAMP) /**< Timestamp capability advertised in SYN */ \ + _ (WSCALE) /**< Wnd scale capability advertised in SYN */ \ + _ (SACK_PERMITTED) /**< SACK capability advertised in SYN */ \ + _ (SACK) /**< SACK present */ + +enum +{ +#define _(f) TCP_OPTS_FLAG_BIT_##f, + foreach_tcp_options_flag +#undef _ + TCP_OPTIONS_N_FLAG_BITS, +}; + +enum +{ +#define _(f) TCP_OPTS_FLAG_##f = 1 << TCP_OPTS_FLAG_BIT_##f, + foreach_tcp_options_flag +#undef _ +}; + +typedef struct _sack_block +{ + u32 start; /**< Start sequence number */ + u32 end; /**< End sequence number */ +} sack_block_t; + +typedef struct +{ + u8 flags; /** Option flags, see above */ + + /* Received options */ + u16 mss; /**< Maximum segment size advertised by peer */ + u8 wscale; /**< Window scale advertised by peer */ + u32 tsval; /**< Peer's timestamp value */ + u32 tsecr; /**< Echoed/reflected time stamp */ + sack_block_t *sacks; /**< SACK blocks received */ + u8 n_sack_blocks; /**< Number of SACKs blocks */ +} tcp_options_t; + +/* Flag tests that return 0 or !0 */ +#define tcp_opts_mss(_to) ((_to)->flags & TCP_OPTS_FLAG_MSS) +#define tcp_opts_tstamp(_to) ((_to)->flags & TCP_OPTS_FLAG_TSTAMP) +#define tcp_opts_wscale(_to) ((_to)->flags & TCP_OPTS_FLAG_WSCALE) +#define tcp_opts_sack(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK) +#define tcp_opts_sack_permitted(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK_PERMITTED) + +/* TCP option lengths */ +#define TCP_OPTION_LEN_EOL 1 +#define TCP_OPTION_LEN_NOOP 1 +#define TCP_OPTION_LEN_MSS 4 +#define TCP_OPTION_LEN_WINDOW_SCALE 3 +#define TCP_OPTION_LEN_SACK_PERMITTED 2 +#define TCP_OPTION_LEN_TIMESTAMP 10 +#define TCP_OPTION_LEN_SACK_BLOCK 8 + +#define TCP_WND_MAX 65535U +#define TCP_MAX_WND_SCALE 14 /* See RFC 1323 */ +#define TCP_OPTS_ALIGN 4 +#define TCP_OPTS_MAX_SACK_BLOCKS 3 +#endif /* included_tcp_packet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_pg.c b/src/vnet/tcp/tcp_pg.c new file mode 100644 index 00000000..dc324049 --- /dev/null +++ b/src/vnet/tcp/tcp_pg.c @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/tcp_pg: TCP packet-generator interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +/* TCP flags bit 0 first. */ +#define foreach_tcp_flag \ + _ (FIN) \ + _ (SYN) \ + _ (RST) \ + _ (PSH) \ + _ (ACK) \ + _ (URG) \ + _ (ECE) \ + _ (CWR) + +static void +tcp_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, + u32 n_packets) +{ + vlib_main_t * vm = vlib_get_main(); + u32 ip_offset, tcp_offset; + + tcp_offset = g->start_byte_offset; + ip_offset = (g-1)->start_byte_offset; + + while (n_packets >= 1) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + tcp_header_t * tcp0; + ip_csum_t sum0; + u32 tcp_len0; + + p0 = vlib_get_buffer (vm, packets[0]); + n_packets -= 1; + packets += 1; + + ASSERT (p0->current_data == 0); + ip0 = (void *) (p0->data + ip_offset); + tcp0 = (void *) (p0->data + tcp_offset); + tcp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]); + + /* Initialize checksum with header. */ + if (BITS (sum0) == 32) + { + sum0 = clib_mem_unaligned (&ip0->src_address, u32); + sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32)); + } + else + sum0 = clib_mem_unaligned (&ip0->src_address, u64); + + sum0 = ip_csum_with_carry + (sum0, clib_host_to_net_u32 (tcp_len0 + (ip0->protocol << 16))); + + /* Invalidate possibly old checksum. */ + tcp0->checksum = 0; + + sum0 = ip_incremental_checksum_buffer (vm, p0, tcp_offset, tcp_len0, sum0); + + tcp0->checksum = ~ ip_csum_fold (sum0); + } +} + +typedef struct { + pg_edit_t src, dst; + pg_edit_t seq_number, ack_number; + pg_edit_t data_offset_and_reserved; +#define _(f) pg_edit_t f##_flag; + foreach_tcp_flag +#undef _ + pg_edit_t window; + pg_edit_t checksum; + pg_edit_t urgent_pointer; +} pg_tcp_header_t; + +static inline void +pg_tcp_header_init (pg_tcp_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, tcp_header_t, f); + _ (src); + _ (dst); + _ (seq_number); + _ (ack_number); + _ (window); + _ (checksum); + _ (urgent_pointer); +#undef _ + + /* Initialize bit fields. */ +#define _(f) \ + pg_edit_init_bitfield (&p->f##_flag, tcp_header_t, \ + flags, \ + TCP_FLAG_BIT_##f, 1); + + foreach_tcp_flag +#undef _ + + pg_edit_init_bitfield (&p->data_offset_and_reserved, tcp_header_t, + data_offset_and_reserved, + 4, 4); +} + +uword +unformat_pg_tcp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_tcp_header_t * p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (tcp_header_t), + &group_index); + pg_tcp_header_init (p); + + /* Defaults. */ + pg_edit_set_fixed (&p->seq_number, 0); + pg_edit_set_fixed (&p->ack_number, 0); + + pg_edit_set_fixed (&p->data_offset_and_reserved, + sizeof (tcp_header_t) / sizeof (u32)); + + pg_edit_set_fixed (&p->window, 4096); + pg_edit_set_fixed (&p->urgent_pointer, 0); + +#define _(f) pg_edit_set_fixed (&p->f##_flag, 0); + foreach_tcp_flag +#undef _ + + p->checksum.type = PG_EDIT_UNSPECIFIED; + + if (! unformat (input, "TCP: %U -> %U", + unformat_pg_edit, + unformat_tcp_udp_port, &p->src, + unformat_pg_edit, + unformat_tcp_udp_port, &p->dst)) + goto error; + + /* Parse options. */ + while (1) + { + if (unformat (input, "window %U", + unformat_pg_edit, + unformat_pg_number, &p->window)) + ; + + else if (unformat (input, "checksum %U", + unformat_pg_edit, + unformat_pg_number, &p->checksum)) + ; + + /* Flags. */ +#define _(f) else if (unformat (input, #f)) pg_edit_set_fixed (&p->f##_flag, 1); + foreach_tcp_flag +#undef _ + + /* Can't parse input: try next protocol level. */ + else + break; + } + + { + ip_main_t * im = &ip_main; + u16 dst_port; + tcp_udp_port_info_t * pi; + + pi = 0; + if (p->dst.type == PG_EDIT_FIXED) + { + dst_port = pg_edit_get_value (&p->dst, PG_EDIT_LO); + pi = ip_get_tcp_udp_port_info (im, dst_port); + } + + if (pi && pi->unformat_pg_edit + && unformat_user (input, pi->unformat_pg_edit, s)) + ; + + else if (! unformat_user (input, unformat_pg_payload, s)) + goto error; + + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t * g = pg_stream_get_group (s, group_index); + g->edit_function = tcp_pg_edit_function; + g->edit_function_opaque = 0; + } + + return 1; + } + + error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + diff --git a/src/vnet/tcp/tcp_syn_filter4.c b/src/vnet/tcp/tcp_syn_filter4.c new file mode 100644 index 00000000..c7605a30 --- /dev/null +++ b/src/vnet/tcp/tcp_syn_filter4.c @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +typedef struct +{ + f64 next_reset; + f64 reset_interval; + u8 *syn_counts; +} syn_filter4_runtime_t; + +typedef struct +{ + u32 next_index; + int not_a_syn; + u8 filter_value; +} syn_filter4_trace_t; + +/* packet trace format function */ +static u8 * +format_syn_filter4_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + syn_filter4_trace_t *t = va_arg (*args, syn_filter4_trace_t *); + + s = format (s, "SYN_FILTER4: next index %d, %s", + t->next_index, t->not_a_syn ? "not a syn" : "syn"); + if (t->not_a_syn == 0) + s = format (s, ", filter value %d\n", t->filter_value); + else + s = format (s, "\n"); + return s; +} + +static vlib_node_registration_t syn_filter4_node; + +#define foreach_syn_filter_error \ +_(THROTTLED, "TCP SYN packet throttle drops") \ +_(OK, "TCP SYN packets passed") + +typedef enum +{ +#define _(sym,str) SYN_FILTER_ERROR_##sym, + foreach_syn_filter_error +#undef _ + SYN_FILTER_N_ERROR, +} syn_filter_error_t; + +static char *syn_filter4_error_strings[] = { +#define _(sym,string) string, + foreach_syn_filter_error +#undef _ +}; + +typedef enum +{ + SYN_FILTER_NEXT_DROP, + SYN_FILTER_N_NEXT, +} syn_filter_next_t; + +extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local; + +static uword +syn_filter4_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + syn_filter_next_t next_index; + u32 ok_syn_packets = 0; + vnet_feature_main_t *fm = &feature_main; + u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; + vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index]; + syn_filter4_runtime_t *rt = (syn_filter4_runtime_t *) node->runtime_data; + f64 now = vlib_time_now (vm); + /* Shut up spurious gcc warnings. */ + u8 *c0 = 0, *c1 = 0, *c2 = 0, *c3 = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (now > rt->next_reset) + { + memset (rt->syn_counts, 0, vec_len (rt->syn_counts)); + rt->next_reset = now + rt->reset_interval; + } + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + ip4_header_t *ip0, *ip1, *ip2, *ip3; + tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3; + u32 not_a_syn0 = 1, not_a_syn1 = 1, not_a_syn2 = 1, not_a_syn3 = 1; + u64 hash0, hash1, hash2, hash3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + vnet_get_config_data + (&cm->config_main, &b0->current_config_index, + &next0, 0 /* sizeof (c0[0]) */ ); + vnet_get_config_data + (&cm->config_main, &b1->current_config_index, + &next1, 0 /* sizeof (c0[0]) */ ); + vnet_get_config_data + (&cm->config_main, &b2->current_config_index, + &next2, 0 /* sizeof (c0[0]) */ ); + vnet_get_config_data + (&cm->config_main, &b3->current_config_index, + &next3, 0 /* sizeof (c0[0]) */ ); + + /* Not TCP? */ + ip0 = vlib_buffer_get_current (b0); + if (ip0->protocol != IP_PROTOCOL_TCP) + goto trace00; + + tcp0 = ip4_next_header (ip0); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp0->flags & 0x2))) + goto trace00; + + not_a_syn0 = 0; + hash0 = clib_xxhash ((u64) ip0->src_address.as_u32); + c0 = &rt->syn_counts[hash0 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c0 >= 0x80)) + { + next0 = SYN_FILTER_NEXT_DROP; + b0->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace00; + } + *c0 += 1; + ok_syn_packets++; + + trace00: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->not_a_syn = not_a_syn0; + t->next_index = next0; + t->filter_value = not_a_syn0 ? 0 : *c0; + } + + /* Not TCP? */ + ip1 = vlib_buffer_get_current (b1); + if (ip1->protocol != IP_PROTOCOL_TCP) + goto trace01; + + tcp1 = ip4_next_header (ip1); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp1->flags & 0x2))) + goto trace01; + + not_a_syn1 = 0; + hash1 = clib_xxhash ((u64) ip1->src_address.as_u32); + c1 = &rt->syn_counts[hash1 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c1 >= 0x80)) + { + next1 = SYN_FILTER_NEXT_DROP; + b1->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace01; + } + *c1 += 1; + ok_syn_packets++; + + trace01: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->not_a_syn = not_a_syn1; + t->next_index = next1; + t->filter_value = not_a_syn1 ? 0 : *c1; + } + + /* Not TCP? */ + ip2 = vlib_buffer_get_current (b2); + if (ip2->protocol != IP_PROTOCOL_TCP) + goto trace02; + + tcp2 = ip4_next_header (ip2); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp2->flags & 0x2))) + goto trace02; + + not_a_syn2 = 0; + hash2 = clib_xxhash ((u64) ip2->src_address.as_u32); + c2 = &rt->syn_counts[hash2 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c2 >= 0x80)) + { + next2 = SYN_FILTER_NEXT_DROP; + b2->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace02; + } + *c2 += 1; + ok_syn_packets++; + + trace02: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b2->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b2, sizeof (*t)); + t->not_a_syn = not_a_syn2; + t->next_index = next2; + t->filter_value = not_a_syn2 ? 0 : *c2; + } + + /* Not TCP? */ + ip3 = vlib_buffer_get_current (b3); + if (ip3->protocol != IP_PROTOCOL_TCP) + goto trace03; + + tcp3 = ip4_next_header (ip3); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp3->flags & 0x2))) + goto trace03; + + not_a_syn3 = 0; + hash3 = clib_xxhash ((u64) ip3->src_address.as_u32); + c3 = &rt->syn_counts[hash3 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c3 >= 0x80)) + { + next3 = SYN_FILTER_NEXT_DROP; + b3->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace03; + } + *c3 += 1; + ok_syn_packets++; + + trace03: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b3->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b3, sizeof (*t)); + t->not_a_syn = not_a_syn3; + t->next_index = next3; + t->filter_value = not_a_syn3 ? 0 : *c3; + } + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + ip4_header_t *ip0; + tcp_header_t *tcp0; + u32 not_a_syn0 = 1; + u32 hash0; + u8 *c0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + vnet_get_config_data + (&cm->config_main, &b0->current_config_index, + &next0, 0 /* sizeof (c0[0]) */ ); + + /* Not TCP? */ + ip0 = vlib_buffer_get_current (b0); + if (ip0->protocol != IP_PROTOCOL_TCP) + goto trace0; + + tcp0 = ip4_next_header (ip0); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp0->flags & 0x2))) + goto trace0; + + not_a_syn0 = 0; + hash0 = clib_xxhash ((u64) ip0->src_address.as_u32); + c0 = &rt->syn_counts[hash0 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c0 >= 0x80)) + { + next0 = SYN_FILTER_NEXT_DROP; + b0->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace0; + } + *c0 += 1; + ok_syn_packets++; + + trace0: + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->not_a_syn = not_a_syn0; + t->next_index = next0; + t->filter_value = not_a_syn0 ? 0 : *c0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, syn_filter4_node.index, + SYN_FILTER_ERROR_OK, ok_syn_packets); + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (syn_filter4_node, static) = +{ + .function = syn_filter4_node_fn, + .name = "syn-filter-4", + .vector_size = sizeof (u32), + .format_trace = format_syn_filter4_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .runtime_data_bytes = sizeof (syn_filter4_runtime_t), + .n_errors = ARRAY_LEN(syn_filter4_error_strings), + .error_strings = syn_filter4_error_strings, + + .n_next_nodes = SYN_FILTER_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SYN_FILTER_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (syn_filter4_node, syn_filter4_node_fn); + +/* *INDENT-OFF* */ +VNET_FEATURE_INIT (syn_filter_4, static) = +{ + .arc_name = "ip4-local", + .node_name = "syn-filter-4", + .runs_before = VNET_FEATURES("ip4-local-end-of-arc"), +}; +/* *INDENT-ON* */ + +int +syn_filter_enable_disable (u32 sw_if_index, int enable_disable) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *sw; + int rv = 0; + + /* Utterly wrong? */ + if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + /* Not a physical port? */ + sw = vnet_get_sw_interface (vnm, sw_if_index); + if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if (enable_disable) + { + vlib_main_t *vm = vlib_get_main (); + syn_filter4_runtime_t *rt; + + rt = vlib_node_get_runtime_data (vm, syn_filter4_node.index); + vec_validate (rt->syn_counts, 1023); + /* + * Given perfect disperson / optimal hashing results: + * Allow 128k (successful) syns/sec. 1024, buckets each of which + * absorb 128 syns before filtering. Reset table once a second. + * Reality bites, lets try resetting once every 100ms. + */ + rt->reset_interval = 0.1; /* reset interval in seconds */ + } + + rv = vnet_feature_enable_disable ("ip4-local", "syn-filter-4", + sw_if_index, enable_disable, 0, 0); + + return rv; +} + +static clib_error_t * +syn_filter_enable_disable_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + u32 sw_if_index = ~0; + int enable_disable = 1; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "disable")) + enable_disable = 0; + else if (unformat (input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "Please specify an interface..."); + + rv = syn_filter_enable_disable (sw_if_index, enable_disable); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return + (0, "Invalid interface, only works on physical ports"); + break; + + case VNET_API_ERROR_UNIMPLEMENTED: + return clib_error_return (0, + "Device driver doesn't support redirection"); + break; + + case VNET_API_ERROR_INVALID_VALUE: + return clib_error_return (0, "feature arc not found"); + + case VNET_API_ERROR_INVALID_VALUE_2: + return clib_error_return (0, "feature node not found"); + + default: + return clib_error_return (0, "syn_filter_enable_disable returned %d", + rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_content_command, static) = +{ + .path = "ip syn filter", + .short_help = "ip syn filter [disable]", + .function = syn_filter_enable_disable_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_timer.h b/src/vnet/tcp/tcp_timer.h new file mode 100644 index 00000000..fa25268c --- /dev/null +++ b/src/vnet/tcp/tcp_timer.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_tcp_timer_h__ +#define __included_tcp_timer_h__ + +#include +#include + +#endif /* __included_tcp_timer_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/builtin_server.c b/src/vnet/udp/builtin_server.c new file mode 100644 index 00000000..afa66ba4 --- /dev/null +++ b/src/vnet/udp/builtin_server.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** @file + udp builtin server +*/ + +#include +#include +#include + +/** per-worker built-in server copy buffers */ +u8 **copy_buffers; + +static int +builtin_session_create_callback (stream_session_t * s) +{ + /* Simple version: declare session ready-to-go... */ + s->session_state = SESSION_STATE_READY; + return 0; +} + +static void +builtin_session_disconnect_callback (stream_session_t * s) +{ + stream_session_disconnect (s); +} + +static int +builtin_server_rx_callback (stream_session_t * s) +{ + svm_fifo_t *rx_fifo, *tx_fifo; + u32 this_transfer; + int actual_transfer; + u8 *my_copy_buffer; + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + + my_copy_buffer = copy_buffers[s->thread_index]; + rx_fifo = s->server_rx_fifo; + tx_fifo = s->server_tx_fifo; + + this_transfer = svm_fifo_max_enqueue (tx_fifo) + < svm_fifo_max_dequeue (rx_fifo) ? + svm_fifo_max_enqueue (tx_fifo) : svm_fifo_max_dequeue (rx_fifo); + + vec_validate (my_copy_buffer, this_transfer - 1); + _vec_len (my_copy_buffer) = this_transfer; + + actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, 0, this_transfer, + my_copy_buffer); + ASSERT (actual_transfer == this_transfer); + actual_transfer = svm_fifo_enqueue_nowait (tx_fifo, 0, this_transfer, + my_copy_buffer); + + copy_buffers[s->thread_index] = my_copy_buffer; + + /* Fabricate TX event, send to ourselves */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = actual_transfer; + evt.event_id = 0; + q = session_manager_get_vpp_event_queue (s->thread_index); + unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* do wait for mutex */ ); + + return 0; +} + +/* *INDENT-OFF* */ +static session_cb_vft_t builtin_server = { + .session_accept_callback = builtin_session_create_callback, + .session_disconnect_callback = builtin_session_disconnect_callback, + .builtin_server_rx_callback = builtin_server_rx_callback +}; +/* *INDENT-ON* */ + +static int +bind_builtin_uri_server (u8 * uri) +{ + vnet_bind_args_t _a, *a = &_a; + char segment_name[128]; + u32 segment_name_length; + int rv; + u64 options[16]; + + segment_name_length = ARRAY_LEN (segment_name); + + memset (a, 0, sizeof (*a)); + memset (options, 0, sizeof (options)); + + a->uri = (char *) uri; + a->api_client_index = ~0; /* built-in server */ + a->segment_name = segment_name; + a->segment_name_length = segment_name_length; + a->session_cb_vft = &builtin_server; + + options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678; + options[SESSION_OPTIONS_SEGMENT_SIZE] = (2 << 30); /*$$$$ config / arg */ + a->options = options; + + rv = vnet_bind_uri (a); + + return rv; +} + +static int +unbind_builtin_uri_server (u8 * uri) +{ + int rv; + + rv = vnet_unbind_uri ((char *) uri, ~0 /* client_index */ ); + + return rv; +} + +static clib_error_t * +builtin_server_init (vlib_main_t * vm) +{ + vlib_thread_main_t *vtm = vlib_get_thread_main (); + u32 num_threads; + + num_threads = 1 /* main thread */ + vtm->n_threads; + + vec_validate (copy_buffers, num_threads - 1); + return 0; +} + +VLIB_INIT_FUNCTION (builtin_server_init); + +static clib_error_t * +builtin_uri_bind_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 *uri = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "uri %s", &uri)) + ; + else + break; + } + + if (uri == 0) + return clib_error_return (0, "uri to bind not specified..."); + + rv = bind_builtin_uri_server (uri); + + vec_free (uri); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return (0, "bind_uri_server returned %d", rv); + break; + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (builtin_uri_bind_command, static) = +{ + .path = "builtin uri bind", + .short_help = "builtin uri bind", + .function = builtin_uri_bind_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +builtin_uri_unbind_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 *uri = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "uri %s", &uri)) + ; + else + break; + } + + if (uri == 0) + return clib_error_return (0, "uri to unbind not specified..."); + + rv = unbind_builtin_uri_server (uri); + + vec_free (uri); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return (0, "unbind_uri_server returned %d", rv); + break; + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (builtin_uri_unbind_command, static) = +{ + .path = "builtin uri unbind", + .short_help = "builtin uri unbind", + .function = builtin_uri_unbind_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c new file mode 100644 index 00000000..9e740466 --- /dev/null +++ b/src/vnet/udp/udp.c @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** @file + udp state machine, etc. +*/ + +#include +#include +#include +#include + +udp_uri_main_t udp_uri_main; + +u32 +udp_session_bind_ip4 (vlib_main_t * vm, u32 session_index, + ip46_address_t * ip, u16 port_number_host_byte_order) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + udp_connection_t *listener; + + pool_get (um->udp_listeners, listener); + memset (listener, 0, sizeof (udp_connection_t)); + listener->c_lcl_port = clib_host_to_net_u16 (port_number_host_byte_order); + listener->c_lcl_ip4.as_u32 = ip->ip4.as_u32; + listener->c_proto = SESSION_TYPE_IP4_UDP; + udp_register_dst_port (um->vlib_main, port_number_host_byte_order, + udp4_uri_input_node.index, 1 /* is_ipv4 */ ); + return 0; +} + +u32 +udp_session_bind_ip6 (vlib_main_t * vm, u32 session_index, + ip46_address_t * ip, u16 port_number_host_byte_order) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + udp_connection_t *listener; + + pool_get (um->udp_listeners, listener); + listener->c_lcl_port = clib_host_to_net_u16 (port_number_host_byte_order); + clib_memcpy (&listener->c_lcl_ip6, &ip->ip6, sizeof (ip6_address_t)); + listener->c_proto = SESSION_TYPE_IP6_UDP; + udp_register_dst_port (um->vlib_main, port_number_host_byte_order, + udp4_uri_input_node.index, 0 /* is_ipv4 */ ); + return 0; +} + +u32 +udp_session_unbind_ip4 (vlib_main_t * vm, u32 listener_index) +{ + udp_connection_t *listener; + listener = udp_listener_get (listener_index); + + /* deregister the udp_local mapping */ + udp_unregister_dst_port (vm, listener->c_lcl_port, 1 /* is_ipv4 */ ); + return 0; +} + +u32 +udp_session_unbind_ip6 (vlib_main_t * vm, u32 listener_index) +{ + udp_connection_t *listener; + + listener = udp_listener_get (listener_index); + + /* deregister the udp_local mapping */ + udp_unregister_dst_port (vm, listener->c_lcl_port, 0 /* is_ipv4 */ ); + return 0; +} + +transport_connection_t * +udp_session_get_listener (u32 listener_index) +{ + udp_connection_t *us; + + us = udp_listener_get (listener_index); + return &us->connection; +} + +u32 +udp_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +{ + udp_connection_t *us; + u8 *data; + udp_header_t *udp; + + us = (udp_connection_t *) tconn; + + if (tconn->is_ip4) + { + ip4_header_t *ip; + + data = vlib_buffer_get_current (b); + udp = (udp_header_t *) (data - sizeof (udp_header_t)); + ip = (ip4_header_t *) ((u8 *) udp - sizeof (ip4_header_t)); + + /* Build packet header, swap rx key src + dst fields */ + ip->src_address.as_u32 = us->c_lcl_ip4.as_u32; + ip->dst_address.as_u32 = us->c_rmt_ip4.as_u32; + ip->ip_version_and_header_length = 0x45; + ip->ttl = 254; + ip->protocol = IP_PROTOCOL_UDP; + ip->length = clib_host_to_net_u16 (b->current_length + sizeof (*udp)); + ip->checksum = ip4_header_checksum (ip); + + udp->src_port = us->c_lcl_port; + udp->dst_port = us->c_rmt_port; + udp->length = clib_host_to_net_u16 (b->current_length); + udp->checksum = 0; + + b->current_length = sizeof (*ip) + sizeof (*udp); + return SESSION_QUEUE_NEXT_IP4_LOOKUP; + } + else + { + vlib_main_t *vm = vlib_get_main (); + ip6_header_t *ip; + u16 payload_length; + int bogus = ~0; + + data = vlib_buffer_get_current (b); + udp = (udp_header_t *) (data - sizeof (udp_header_t)); + ip = (ip6_header_t *) ((u8 *) udp - sizeof (ip6_header_t)); + + /* Build packet header, swap rx key src + dst fields */ + clib_memcpy (&ip->src_address, &us->c_lcl_ip6, sizeof (ip6_address_t)); + clib_memcpy (&ip->dst_address, &us->c_rmt_ip6, sizeof (ip6_address_t)); + + ip->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + + ip->hop_limit = 0xff; + ip->protocol = IP_PROTOCOL_UDP; + + payload_length = vlib_buffer_length_in_chain (vm, b); + payload_length -= sizeof (*ip); + + ip->payload_length = clib_host_to_net_u16 (payload_length); + + udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip, &bogus); + ASSERT (!bogus); + + udp->src_port = us->c_lcl_port; + udp->dst_port = us->c_rmt_port; + udp->length = clib_host_to_net_u16 (b->current_length); + udp->checksum = 0; + + b->current_length = sizeof (*ip) + sizeof (*udp); + + return SESSION_QUEUE_NEXT_IP6_LOOKUP; + } +} + +transport_connection_t * +udp_session_get (u32 connection_index, u32 my_thread_index) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + + udp_connection_t *us; + us = + pool_elt_at_index (um->udp_sessions[my_thread_index], connection_index); + return &us->connection; +} + +void +udp_session_close (u32 connection_index, u32 my_thread_index) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + pool_put_index (um->udp_sessions[my_thread_index], connection_index); +} + +u8 * +format_udp_session_ip4 (u8 * s, va_list * args) +{ + u32 uci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + udp_connection_t *u4; + + u4 = udp_connection_get (uci, thread_index); + + s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip4_address, + &u4->c_lcl_ip4, clib_net_to_host_u16 (u4->c_lcl_port), + format_ip4_address, &u4->c_rmt_ip4, + clib_net_to_host_u16 (u4->c_rmt_port)); + return s; +} + +u8 * +format_udp_session_ip6 (u8 * s, va_list * args) +{ + u32 uci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + udp_connection_t *tc = udp_connection_get (uci, thread_index); + s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_udp_listener_session_ip4 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + udp_connection_t *tc = udp_listener_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip4_address, + &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip4_address, &tc->c_rmt_ip4, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_udp_listener_session_ip6 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + udp_connection_t *tc = udp_listener_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u16 +udp_send_mss_uri (transport_connection_t * t) +{ + /* TODO figure out MTU of output interface */ + return 400; +} + +u32 +udp_send_space_uri (transport_connection_t * t) +{ + /* No constraint on TX window */ + return ~0; +} + +int +udp_open_connection (ip46_address_t * addr, u16 port) +{ + clib_warning ("Not implemented"); + return 0; +} + +/* *INDENT-OFF* */ +const static transport_proto_vft_t udp4_proto = { + .bind = udp_session_bind_ip4, + .open = udp_open_connection, + .unbind = udp_session_unbind_ip4, + .push_header = udp_push_header, + .get_connection = udp_session_get, + .get_listener = udp_session_get_listener, + .close = udp_session_close, + .send_mss = udp_send_mss_uri, + .send_space = udp_send_space_uri, + .format_connection = format_udp_session_ip4, + .format_listener = format_udp_listener_session_ip4 +}; + +const static transport_proto_vft_t udp6_proto = { + .bind = udp_session_bind_ip6, + .open = udp_open_connection, + .unbind = udp_session_unbind_ip6, + .push_header = udp_push_header, + .get_connection = udp_session_get, + .get_listener = udp_session_get_listener, + .close = udp_session_close, + .send_mss = udp_send_mss_uri, + .send_space = udp_send_space_uri, + .format_connection = format_udp_session_ip6, + .format_listener = format_udp_listener_session_ip6 +}; +/* *INDENT-ON* */ + +static clib_error_t * +udp_init (vlib_main_t * vm) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + ip_main_t *im = &ip_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 num_threads; + clib_error_t *error = 0; + ip_protocol_info_t *pi; + + um->vlib_main = vm; + um->vnet_main = vnet_get_main (); + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + if ((error = vlib_call_init_function (vm, ip4_lookup_init))) + return error; + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + /* + * Registrations + */ + + /* IP registration */ + pi = ip_get_protocol_info (im, IP_PROTOCOL_UDP); + if (pi == 0) + return clib_error_return (0, "UDP protocol info AWOL"); + pi->format_header = format_udp_header; + pi->unformat_pg_edit = unformat_pg_udp_header; + + + /* Register as transport with URI */ + session_register_transport (SESSION_TYPE_IP4_UDP, &udp4_proto); + session_register_transport (SESSION_TYPE_IP6_UDP, &udp6_proto); + + /* + * Initialize data structures + */ + + num_threads = 1 /* main thread */ + tm->n_threads; + vec_validate (um->udp_sessions, num_threads - 1); + + return error; +} + +VLIB_INIT_FUNCTION (udp_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h new file mode 100644 index 00000000..7ab26ce9 --- /dev/null +++ b/src/vnet/udp/udp.h @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_udp_h__ +#define __included_udp_h__ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +typedef struct +{ + transport_connection_t connection; /** must be first */ + + /** ersatz MTU to limit fifo pushes to test data size */ + u32 mtu; +} udp_connection_t; + +typedef struct _udp_uri_main +{ + /* Per-worker thread udp connection pools */ + udp_connection_t **udp_sessions; + udp_connection_t *udp_listeners; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + ip4_main_t *ip4_main; + ip6_main_t *ip6_main; +} udp_uri_main_t; + +extern udp_uri_main_t udp_uri_main; +extern vlib_node_registration_t udp4_uri_input_node; + +always_inline udp_uri_main_t * +vnet_get_udp_main () +{ + return &udp_uri_main; +} + +always_inline udp_connection_t * +udp_connection_get (u32 conn_index, u32 thread_index) +{ + return pool_elt_at_index (udp_uri_main.udp_sessions[thread_index], + conn_index); +} + +always_inline udp_connection_t * +udp_listener_get (u32 conn_index) +{ + return pool_elt_at_index (udp_uri_main.udp_listeners, conn_index); +} + +typedef enum +{ +#define udp_error(n,s) UDP_ERROR_##n, +#include +#undef udp_error + UDP_N_ERROR, +} udp_error_t; + +#define foreach_udp4_dst_port \ +_ (67, dhcp_to_server) \ +_ (68, dhcp_to_client) \ +_ (500, ikev2) \ +_ (3784, bfd4) \ +_ (3785, bfd_echo4) \ +_ (4341, lisp_gpe) \ +_ (4342, lisp_cp) \ +_ (4739, ipfix) \ +_ (4789, vxlan) \ +_ (4789, vxlan6) \ +_ (4790, vxlan_gpe) \ +_ (6633, vpath_3) + + +#define foreach_udp6_dst_port \ +_ (547, dhcpv6_to_server) \ +_ (546, dhcpv6_to_client) \ +_ (3784, bfd6) \ +_ (3785, bfd_echo6) \ +_ (4341, lisp_gpe6) \ +_ (4342, lisp_cp6) \ +_ (4790, vxlan6_gpe) \ +_ (6633, vpath6_3) + +typedef enum +{ +#define _(n,f) UDP_DST_PORT_##f = n, + foreach_udp4_dst_port foreach_udp6_dst_port +#undef _ +} udp_dst_port_t; + +typedef enum +{ +#define _(n,f) UDP6_DST_PORT_##f = n, + foreach_udp6_dst_port +#undef _ +} udp6_dst_port_t; + +typedef struct +{ + /* Name (a c string). */ + char *name; + + /* GRE protocol type in host byte order. */ + udp_dst_port_t dst_port; + + /* Node which handles this type. */ + u32 node_index; + + /* Next index for this type. */ + u32 next_index; +} udp_dst_port_info_t; + +typedef enum +{ + UDP_IP6 = 0, + UDP_IP4, /* the code is full of is_ip4... */ + N_UDP_AF, +} udp_af_t; + +typedef struct +{ + udp_dst_port_info_t *dst_port_infos[N_UDP_AF]; + + /* Hash tables mapping name/protocol to protocol info index. */ + uword *dst_port_info_by_name[N_UDP_AF]; + uword *dst_port_info_by_dst_port[N_UDP_AF]; + + /* convenience */ + vlib_main_t *vlib_main; +} udp_main_t; + +always_inline udp_dst_port_info_t * +udp_get_dst_port_info (udp_main_t * um, udp_dst_port_t dst_port, u8 is_ip4) +{ + uword *p = hash_get (um->dst_port_info_by_dst_port[is_ip4], dst_port); + return p ? vec_elt_at_index (um->dst_port_infos[is_ip4], p[0]) : 0; +} + +format_function_t format_udp_header; +format_function_t format_udp_rx_trace; + +unformat_function_t unformat_udp_header; + +void udp_register_dst_port (vlib_main_t * vm, + udp_dst_port_t dst_port, + u32 node_index, u8 is_ip4); + +void +udp_unregister_dst_port (vlib_main_t * vm, + udp_dst_port_t dst_port, u8 is_ip4); + +void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add); + +always_inline void +ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) +{ + u16 new_l0; + udp_header_t *udp0; + + if (is_ip4) + { + ip4_header_t *ip0; + ip_csum_t sum0; + u16 old_l0 = 0; + + ip0 = vlib_buffer_get_current (b0); + + /* fix the ing outer-IP checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */ ); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + udp0->length = new_l0; + } + else + { + ip6_header_t *ip0; + int bogus0; + + ip0 = vlib_buffer_get_current (b0); + + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + ip0->payload_length = new_l0; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + udp0->length = new_l0; + + udp0->checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); + ASSERT (bogus0 == 0); + + if (udp0->checksum == 0) + udp0->checksum = 0xffff; + } +} + +always_inline void +ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, + u8 is_ip4) +{ + vlib_buffer_advance (b0, -ec_len); + + if (is_ip4) + { + ip4_header_t *ip0; + + ip0 = vlib_buffer_get_current (b0); + + /* Apply the encap string. */ + clib_memcpy (ip0, ec0, ec_len); + ip_udp_fixup_one (vm, b0, 1); + } + else + { + ip6_header_t *ip0; + + ip0 = vlib_buffer_get_current (b0); + + /* Apply the encap string. */ + clib_memcpy (ip0, ec0, ec_len); + ip_udp_fixup_one (vm, b0, 0); + } +} + +always_inline void +ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, + u8 * ec0, u8 * ec1, word ec_len, u8 is_v4) +{ + u16 new_l0, new_l1; + udp_header_t *udp0, *udp1; + + ASSERT (_vec_len (ec0) == _vec_len (ec1)); + + vlib_buffer_advance (b0, -ec_len); + vlib_buffer_advance (b1, -ec_len); + + if (is_v4) + { + ip4_header_t *ip0, *ip1; + ip_csum_t sum0, sum1; + u16 old_l0 = 0, old_l1 = 0; + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* Apply the encap string */ + clib_memcpy (ip0, ec0, ec_len); + clib_memcpy (ip1, ec1, ec_len); + + /* fix the ing outer-IP checksum */ + sum0 = ip0->checksum; + sum1 = ip1->checksum; + + /* old_l0 always 0, see the rewrite setup */ + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */ ); + sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, + length /* changed member */ ); + + ip0->checksum = ip_csum_fold (sum0); + ip1->checksum = ip_csum_fold (sum1); + + ip0->length = new_l0; + ip1->length = new_l1; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + udp1 = (udp_header_t *) (ip1 + 1); + + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - + sizeof (*ip0)); + new_l1 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) - + sizeof (*ip1)); + udp0->length = new_l0; + udp1->length = new_l1; + } + else + { + ip6_header_t *ip0, *ip1; + int bogus0, bogus1; + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* Apply the encap string. */ + clib_memcpy (ip0, ec0, ec_len); + clib_memcpy (ip1, ec1, ec_len); + + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) + - sizeof (*ip1)); + ip0->payload_length = new_l0; + ip1->payload_length = new_l1; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + udp1 = (udp_header_t *) (ip1 + 1); + + udp0->length = new_l0; + udp1->length = new_l1; + + udp0->checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); + udp1->checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, b1, ip1, &bogus1); + ASSERT (bogus0 == 0); + ASSERT (bogus1 == 0); + + if (udp0->checksum == 0) + udp0->checksum = 0xffff; + if (udp1->checksum == 0) + udp1->checksum = 0xffff; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif /* __included_udp_h__ */ diff --git a/src/vnet/udp/udp_error.def b/src/vnet/udp/udp_error.def new file mode 100644 index 00000000..bfdae0ac --- /dev/null +++ b/src/vnet/udp/udp_error.def @@ -0,0 +1,21 @@ +/* + * udp_error.def: udp errors + * + * Copyright (c) 2013-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +udp_error (NONE, "no error") +udp_error (NO_LISTENER, "no listener for dst port") +udp_error (LENGTH_ERROR, "UDP packets with length errors") +udp_error (PUNT, "no listener punt") diff --git a/src/vnet/udp/udp_format.c b/src/vnet/udp/udp_format.c new file mode 100644 index 00000000..abdf561e --- /dev/null +++ b/src/vnet/udp/udp_format.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/udp_format.c: udp formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +/* Format UDP header. */ +u8 * +format_udp_header (u8 * s, va_list * args) +{ + udp_header_t *udp = va_arg (*args, udp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + uword indent; + u32 header_bytes = sizeof (udp[0]); + + /* Nothing to do. */ + if (max_header_bytes < sizeof (udp[0])) + return format (s, "UDP header truncated"); + + indent = format_get_indent (s); + indent += 2; + + s = format (s, "UDP: %d -> %d", + clib_net_to_host_u16 (udp->src_port), + clib_net_to_host_u16 (udp->dst_port)); + + s = format (s, "\n%Ulength %d, checksum 0x%04x", + format_white_space, indent, + clib_net_to_host_u16 (udp->length), + clib_net_to_host_u16 (udp->checksum)); + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ip_main_t *im = &ip_main; + tcp_udp_port_info_t *pi; + + pi = ip_get_tcp_udp_port_info (im, udp->dst_port); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", + format_white_space, indent - 2, pi->format_header, + /* next protocol header */ (udp + 1), + max_header_bytes - sizeof (udp[0])); + } + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c new file mode 100644 index 00000000..4d509335 --- /dev/null +++ b/src/vnet/udp/udp_input.c @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include "../session/application_interface.h" + +vlib_node_registration_t udp4_uri_input_node; + +typedef struct +{ + u32 session; + u32 disposition; + u32 thread_index; +} udp4_uri_input_trace_t; + +/* packet trace format function */ +static u8 * +format_udp4_uri_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + udp4_uri_input_trace_t *t = va_arg (*args, udp4_uri_input_trace_t *); + + s = format (s, "UDP4_URI_INPUT: session %d, disposition %d, thread %d", + t->session, t->disposition, t->thread_index); + return s; +} + +typedef enum +{ + UDP4_URI_INPUT_NEXT_DROP, + UDP4_URI_INPUT_N_NEXT, +} udp4_uri_input_next_t; + +static char *udp4_uri_input_error_strings[] = { +#define _(sym,string) string, + foreach_session_input_error +#undef _ +}; + +static uword +udp4_uri_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + udp4_uri_input_next_t next_index; + udp_uri_main_t *um = vnet_get_udp_main (); + session_manager_main_t *smm = vnet_get_session_manager_main (); + u32 my_thread_index = vm->cpu_index; + u8 my_enqueue_epoch; + u32 *session_indices_to_enqueue; + static u32 serial_number; + int i; + + my_enqueue_epoch = ++smm->current_enqueue_epoch[my_thread_index]; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = UDP4_URI_INPUT_NEXT_DROP; + u32 error0 = SESSION_ERROR_ENQUEUED; + udp_header_t *udp0; + ip4_header_t *ip0; + stream_session_t *s0; + svm_fifo_t *f0; + u16 udp_len0; + u8 *data0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* udp_local hands us a pointer to the udp data */ + + data0 = vlib_buffer_get_current (b0); + udp0 = (udp_header_t *) (data0 - sizeof (*udp0)); + + /* $$$$ fixme: udp_local doesn't do ip options correctly anyhow */ + ip0 = (ip4_header_t *) (((u8 *) udp0) - sizeof (*ip0)); + s0 = 0; + + /* lookup session */ + s0 = stream_session_lookup4 (&ip0->dst_address, &ip0->src_address, + udp0->dst_port, udp0->src_port, + SESSION_TYPE_IP4_UDP, my_thread_index); + + /* no listener */ + if (PREDICT_FALSE (s0 == 0)) + { + error0 = SESSION_ERROR_NO_LISTENER; + goto trace0; + } + + f0 = s0->server_rx_fifo; + + /* established hit */ + if (PREDICT_TRUE (s0->session_state == SESSION_STATE_READY)) + { + udp_len0 = clib_net_to_host_u16 (udp0->length); + + if (PREDICT_FALSE (udp_len0 > svm_fifo_max_enqueue (f0))) + { + error0 = SESSION_ERROR_FIFO_FULL; + goto trace0; + } + + svm_fifo_enqueue_nowait (f0, 0 /* pid */ , + udp_len0 - sizeof (*udp0), + (u8 *) (udp0 + 1)); + + b0->error = node->errors[SESSION_ERROR_ENQUEUED]; + + /* We need to send an RX event on this fifo */ + if (s0->enqueue_epoch != my_enqueue_epoch) + { + s0->enqueue_epoch = my_enqueue_epoch; + + vec_add1 (smm->session_indices_to_enqueue_by_thread + [my_thread_index], + s0 - smm->sessions[my_thread_index]); + } + } + /* listener hit */ + else if (s0->session_state == SESSION_STATE_LISTENING) + { + udp_connection_t *us; + int rv; + + error0 = SESSION_ERROR_NOT_READY; + + /* + * create udp transport session + */ + pool_get (um->udp_sessions[my_thread_index], us); + + us->mtu = 1024; /* $$$$ policy */ + + us->c_lcl_ip4.as_u32 = ip0->dst_address.as_u32; + us->c_rmt_ip4.as_u32 = ip0->src_address.as_u32; + us->c_lcl_port = udp0->dst_port; + us->c_rmt_port = udp0->src_port; + us->c_proto = SESSION_TYPE_IP4_UDP; + us->c_c_index = us - um->udp_sessions[my_thread_index]; + + /* + * create stream session and attach the udp session to it + */ + rv = stream_session_accept (&us->connection, s0->session_index, + SESSION_TYPE_IP4_UDP, + 1 /*notify */ ); + if (rv) + error0 = rv; + + } + else + { + + error0 = SESSION_ERROR_NOT_READY; + goto trace0; + } + + trace0: + b0->error = node->errors[error0]; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + udp4_uri_input_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + + t->session = ~0; + if (s0) + t->session = s0 - smm->sessions[my_thread_index]; + t->disposition = error0; + t->thread_index = my_thread_index; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Send enqueue events */ + + session_indices_to_enqueue = + smm->session_indices_to_enqueue_by_thread[my_thread_index]; + + for (i = 0; i < vec_len (session_indices_to_enqueue); i++) + { + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + stream_session_t *s0; + application_t *server0; + + /* Get session */ + s0 = pool_elt_at_index (smm->sessions[my_thread_index], + session_indices_to_enqueue[i]); + + /* Get session's server */ + server0 = application_get (s0->app_index); + + /* Built-in server? Deliver the goods... */ + if (server0->cb_fns.builtin_server_rx_callback) + { + server0->cb_fns.builtin_server_rx_callback (s0); + continue; + } + + /* Fabricate event */ + evt.fifo = s0->server_rx_fifo; + evt.event_type = FIFO_EVENT_SERVER_RX; + evt.event_id = serial_number++; + evt.enqueue_length = svm_fifo_max_dequeue (s0->server_rx_fifo); + + /* Add event to server's event queue */ + q = server0->event_queue; + + /* Don't block for lack of space */ + if (PREDICT_TRUE (q->cursize < q->maxsize)) + unix_shared_memory_queue_add (server0->event_queue, (u8 *) & evt, + 0 /* do wait for mutex */ ); + else + { + vlib_node_increment_counter (vm, udp4_uri_input_node.index, + SESSION_ERROR_FIFO_FULL, 1); + } + if (1) + { + ELOG_TYPE_DECLARE (e) = + { + .format = "evt-enqueue: id %d length %d",.format_args = "i4i4",}; + struct + { + u32 data[2]; + } *ed; + ed = ELOG_DATA (&vlib_global_main.elog_main, e); + ed->data[0] = evt.event_id; + ed->data[1] = evt.enqueue_length; + } + } + + vec_reset_length (session_indices_to_enqueue); + + smm->session_indices_to_enqueue_by_thread[my_thread_index] = + session_indices_to_enqueue; + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (udp4_uri_input_node) = +{ + .function = udp4_uri_input_node_fn,.name = "udp4-uri-input",.vector_size = + sizeof (u32),.format_trace = format_udp4_uri_input_trace,.type = + VLIB_NODE_TYPE_INTERNAL,.n_errors = + ARRAY_LEN (udp4_uri_input_error_strings),.error_strings = + udp4_uri_input_error_strings,.n_next_nodes = UDP4_URI_INPUT_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = + { + [UDP4_URI_INPUT_NEXT_DROP] = "error-drop",} +,}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp_local.c b/src/vnet/udp/udp_local.c new file mode 100644 index 00000000..6b239f73 --- /dev/null +++ b/src/vnet/udp/udp_local.c @@ -0,0 +1,666 @@ +/* + * node.c: udp packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +udp_main_t udp_main; + +#define foreach_udp_input_next \ + _ (PUNT, "error-punt") \ + _ (DROP, "error-drop") \ + _ (ICMP4_ERROR, "ip4-icmp-error") \ + _ (ICMP6_ERROR, "ip6-icmp-error") + +typedef enum +{ +#define _(s,n) UDP_INPUT_NEXT_##s, + foreach_udp_input_next +#undef _ + UDP_INPUT_N_NEXT, +} udp_input_next_t; + +typedef struct +{ + u16 src_port; + u16 dst_port; + u8 bound; +} udp_rx_trace_t; + +u8 * +format_udp_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + udp_rx_trace_t *t = va_arg (*args, udp_rx_trace_t *); + + s = format (s, "UDP: src-port %d dst-port %d%s", + clib_net_to_host_u16 (t->src_port), + clib_net_to_host_u16 (t->dst_port), + t->bound ? "" : " (no listener)"); + return s; +} + +typedef struct +{ + /* Sparse vector mapping udp dst_port in network byte order + to next index. */ + u16 *next_by_dst_port; + u8 punt_unknown; +} udp_input_runtime_t; + +vlib_node_registration_t udp4_input_node; +vlib_node_registration_t udp6_input_node; + +always_inline uword +udp46_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + udp_input_runtime_t *rt = is_ip4 ? + (void *) vlib_node_get_runtime_data (vm, udp4_input_node.index) + : (void *) vlib_node_get_runtime_data (vm, udp6_input_node.index); + __attribute__ ((unused)) u32 n_left_from, next_index, *from, *to_next; + word n_no_listener = 0; + u8 punt_unknown = rt->punt_unknown; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + udp_header_t *h0 = 0, *h1 = 0; + u32 i0, i1, dst_port0, dst_port1; + u32 advance0, advance1; + u32 error0, next0, error1, next1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* ip4/6_local hands us the ip header, not the udp header */ + if (is_ip4) + { + advance0 = sizeof (ip4_header_t); + advance1 = sizeof (ip4_header_t); + } + else + { + advance0 = sizeof (ip6_header_t); + advance1 = sizeof (ip6_header_t); + } + + if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) + { + error0 = UDP_ERROR_LENGTH_ERROR; + next0 = UDP_INPUT_NEXT_DROP; + } + else + { + vlib_buffer_advance (b0, advance0); + h0 = vlib_buffer_get_current (b0); + error0 = next0 = 0; + if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) > + vlib_buffer_length_in_chain (vm, b0))) + { + error0 = UDP_ERROR_LENGTH_ERROR; + next0 = UDP_INPUT_NEXT_DROP; + } + } + + if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1))) + { + error1 = UDP_ERROR_LENGTH_ERROR; + next1 = UDP_INPUT_NEXT_DROP; + } + else + { + vlib_buffer_advance (b1, advance1); + h1 = vlib_buffer_get_current (b1); + error1 = next1 = 0; + if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) > + vlib_buffer_length_in_chain (vm, b1))) + { + error1 = UDP_ERROR_LENGTH_ERROR; + next1 = UDP_INPUT_NEXT_DROP; + } + } + + /* Index sparse array with network byte order. */ + dst_port0 = (error0 == 0) ? h0->dst_port : 0; + dst_port1 = (error1 == 0) ? h1->dst_port : 0; + sparse_vec_index2 (rt->next_by_dst_port, dst_port0, dst_port1, + &i0, &i1); + next0 = (error0 == 0) ? vec_elt (rt->next_by_dst_port, i0) : next0; + next1 = (error1 == 0) ? vec_elt (rt->next_by_dst_port, i1) : next1; + + if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX)) + { + // move the pointer back so icmp-error can find the + // ip packet header + vlib_buffer_advance (b0, -(word) advance0); + + if (PREDICT_FALSE (punt_unknown)) + { + b0->error = node->errors[UDP_ERROR_PUNT]; + next0 = UDP_INPUT_NEXT_PUNT; + } + else if (is_ip4) + { + icmp4_error_set_vnet_buffer (b0, + ICMP4_destination_unreachable, + ICMP4_destination_unreachable_port_unreachable, + 0); + next0 = UDP_INPUT_NEXT_ICMP4_ERROR; + n_no_listener++; + } + else + { + icmp6_error_set_vnet_buffer (b0, + ICMP6_destination_unreachable, + ICMP6_destination_unreachable_port_unreachable, + 0); + next0 = UDP_INPUT_NEXT_ICMP6_ERROR; + n_no_listener++; + } + } + else + { + b0->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b0, sizeof (*h0)); + } + + if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX)) + { + // move the pointer back so icmp-error can find the + // ip packet header + vlib_buffer_advance (b1, -(word) advance1); + + if (PREDICT_FALSE (punt_unknown)) + { + b1->error = node->errors[UDP_ERROR_PUNT]; + next1 = UDP_INPUT_NEXT_PUNT; + } + else if (is_ip4) + { + icmp4_error_set_vnet_buffer (b1, + ICMP4_destination_unreachable, + ICMP4_destination_unreachable_port_unreachable, + 0); + next1 = UDP_INPUT_NEXT_ICMP4_ERROR; + n_no_listener++; + } + else + { + icmp6_error_set_vnet_buffer (b1, + ICMP6_destination_unreachable, + ICMP6_destination_unreachable_port_unreachable, + 0); + next1 = UDP_INPUT_NEXT_ICMP6_ERROR; + n_no_listener++; + } + } + else + { + b1->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b1, sizeof (*h1)); + } + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) + { + tr->src_port = h0 ? h0->src_port : 0; + tr->dst_port = h0 ? h0->dst_port : 0; + tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR && + next0 != UDP_INPUT_NEXT_ICMP6_ERROR); + } + } + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_rx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + if (b1->error != node->errors[UDP_ERROR_LENGTH_ERROR]) + { + tr->src_port = h1 ? h1->src_port : 0; + tr->dst_port = h1 ? h1->dst_port : 0; + tr->bound = (next1 != UDP_INPUT_NEXT_ICMP4_ERROR && + next1 != UDP_INPUT_NEXT_ICMP6_ERROR); + } + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + udp_header_t *h0 = 0; + u32 i0, next0; + u32 advance0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* ip4/6_local hands us the ip header, not the udp header */ + if (is_ip4) + advance0 = sizeof (ip4_header_t); + else + advance0 = sizeof (ip6_header_t); + + if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) + { + b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; + next0 = UDP_INPUT_NEXT_DROP; + goto trace_x1; + } + + vlib_buffer_advance (b0, advance0); + + h0 = vlib_buffer_get_current (b0); + + if (PREDICT_TRUE (clib_net_to_host_u16 (h0->length) <= + vlib_buffer_length_in_chain (vm, b0))) + { + i0 = sparse_vec_index (rt->next_by_dst_port, h0->dst_port); + next0 = vec_elt (rt->next_by_dst_port, i0); + + if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX)) + { + // move the pointer back so icmp-error can find the + // ip packet header + vlib_buffer_advance (b0, -(word) advance0); + + if (PREDICT_FALSE (punt_unknown)) + { + b0->error = node->errors[UDP_ERROR_PUNT]; + next0 = UDP_INPUT_NEXT_PUNT; + } + else if (is_ip4) + { + icmp4_error_set_vnet_buffer (b0, + ICMP4_destination_unreachable, + ICMP4_destination_unreachable_port_unreachable, + 0); + next0 = UDP_INPUT_NEXT_ICMP4_ERROR; + n_no_listener++; + } + else + { + icmp6_error_set_vnet_buffer (b0, + ICMP6_destination_unreachable, + ICMP6_destination_unreachable_port_unreachable, + 0); + next0 = UDP_INPUT_NEXT_ICMP6_ERROR; + n_no_listener++; + } + } + else + { + b0->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b0, sizeof (*h0)); + } + } + else + { + b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; + next0 = UDP_INPUT_NEXT_DROP; + } + + trace_x1: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) + { + tr->src_port = h0->src_port; + tr->dst_port = h0->dst_port; + tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR && + next0 != UDP_INPUT_NEXT_ICMP6_ERROR); + } + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_error_count (vm, node->node_index, UDP_ERROR_NO_LISTENER, + n_no_listener); + return from_frame->n_vectors; +} + +static char *udp_error_strings[] = { +#define udp_error(n,s) s, +#include "udp_error.def" +#undef udp_error +}; + +static uword +udp4_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return udp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +udp6_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return udp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (udp4_input_node) = { + .function = udp4_input, + .name = "ip4-udp-lookup", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof (udp_input_runtime_t), + + .n_errors = UDP_N_ERROR, + .error_strings = udp_error_strings, + + .n_next_nodes = UDP_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [UDP_INPUT_NEXT_##s] = n, + foreach_udp_input_next +#undef _ + }, + + .format_buffer = format_udp_header, + .format_trace = format_udp_rx_trace, + .unformat_buffer = unformat_udp_header, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (udp4_input_node, udp4_input); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (udp6_input_node) = { + .function = udp6_input, + .name = "ip6-udp-lookup", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof (udp_input_runtime_t), + + .n_errors = UDP_N_ERROR, + .error_strings = udp_error_strings, + + .n_next_nodes = UDP_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [UDP_INPUT_NEXT_##s] = n, + foreach_udp_input_next +#undef _ + }, + + .format_buffer = format_udp_header, + .format_trace = format_udp_rx_trace, + .unformat_buffer = unformat_udp_header, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (udp6_input_node, udp6_input); + +static void +add_dst_port (udp_main_t * um, + udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4) +{ + udp_dst_port_info_t *pi; + u32 i; + + vec_add2 (um->dst_port_infos[is_ip4], pi, 1); + i = pi - um->dst_port_infos[is_ip4]; + + pi->name = dst_port_name; + pi->dst_port = dst_port; + pi->next_index = pi->node_index = ~0; + + hash_set (um->dst_port_info_by_dst_port[is_ip4], dst_port, i); + + if (pi->name) + hash_set_mem (um->dst_port_info_by_name[is_ip4], pi->name, i); +} + +void +udp_register_dst_port (vlib_main_t * vm, + udp_dst_port_t dst_port, u32 node_index, u8 is_ip4) +{ + udp_main_t *um = &udp_main; + udp_dst_port_info_t *pi; + udp_input_runtime_t *rt; + u16 *n; + + { + clib_error_t *error = vlib_call_init_function (vm, udp_local_init); + if (error) + clib_error_report (error); + } + + pi = udp_get_dst_port_info (um, dst_port, is_ip4); + if (!pi) + { + add_dst_port (um, dst_port, 0, is_ip4); + pi = udp_get_dst_port_info (um, dst_port, is_ip4); + ASSERT (pi); + } + + pi->node_index = node_index; + pi->next_index = vlib_node_add_next (vm, + is_ip4 ? udp4_input_node.index + : udp6_input_node.index, node_index); + + /* Setup udp protocol -> next index sparse vector mapping. */ + rt = vlib_node_get_runtime_data + (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); + n = sparse_vec_validate (rt->next_by_dst_port, + clib_host_to_net_u16 (dst_port)); + n[0] = pi->next_index; +} + +void +udp_unregister_dst_port (vlib_main_t * vm, udp_dst_port_t dst_port, u8 is_ip4) +{ + udp_main_t *um = &udp_main; + udp_dst_port_info_t *pi; + udp_input_runtime_t *rt; + u16 *n; + + pi = udp_get_dst_port_info (um, dst_port, is_ip4); + /* Not registered? Fagedaboudit */ + if (!pi) + return; + + /* Kill the mapping. Don't bother killing the pi, it may be back. */ + rt = vlib_node_get_runtime_data + (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); + n = sparse_vec_validate (rt->next_by_dst_port, + clib_host_to_net_u16 (dst_port)); + n[0] = SPARSE_VEC_INVALID_INDEX; +} + +void +udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add) +{ + udp_input_runtime_t *rt; + + { + clib_error_t *error = vlib_call_init_function (vm, udp_local_init); + if (error) + clib_error_report (error); + } + + rt = vlib_node_get_runtime_data + (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); + + rt->punt_unknown = is_add; +} + +/* Parse a UDP header. */ +uword +unformat_udp_header (unformat_input_t * input, va_list * args) +{ + u8 **result = va_arg (*args, u8 **); + udp_header_t *udp; + __attribute__ ((unused)) int old_length; + u16 src_port, dst_port; + + /* Allocate space for IP header. */ + { + void *p; + + old_length = vec_len (*result); + vec_add2 (*result, p, sizeof (ip4_header_t)); + udp = p; + } + + memset (udp, 0, sizeof (udp[0])); + if (unformat (input, "src-port %d dst-port %d", &src_port, &dst_port)) + { + udp->src_port = clib_host_to_net_u16 (src_port); + udp->dst_port = clib_host_to_net_u16 (dst_port); + return 1; + } + return 0; +} + +static void +udp_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t *n = vlib_get_node (vm, node_index); + pg_node_t *pn = pg_get_node (node_index); + + n->format_buffer = format_udp_header; + n->unformat_buffer = unformat_udp_header; + pn->unformat_edit = unformat_pg_udp_header; +} + +clib_error_t * +udp_local_init (vlib_main_t * vm) +{ + udp_input_runtime_t *rt; + udp_main_t *um = &udp_main; + int i; + + { + clib_error_t *error; + error = vlib_call_init_function (vm, udp_init); + if (error) + clib_error_report (error); + } + + + for (i = 0; i < 2; i++) + { + um->dst_port_info_by_name[i] = hash_create_string (0, sizeof (uword)); + um->dst_port_info_by_dst_port[i] = hash_create (0, sizeof (uword)); + } + + udp_setup_node (vm, udp4_input_node.index); + udp_setup_node (vm, udp6_input_node.index); + + rt = vlib_node_get_runtime_data (vm, udp4_input_node.index); + + rt->next_by_dst_port = sparse_vec_new + ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]), + /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); + + rt->punt_unknown = 0; + +#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */); + foreach_udp4_dst_port +#undef _ + rt = vlib_node_get_runtime_data (vm, udp6_input_node.index); + + rt->next_by_dst_port = sparse_vec_new + ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]), + /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); + + rt->punt_unknown = 0; + +#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */); + foreach_udp6_dst_port +#undef _ + ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index); + /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */ + return 0; +} + +VLIB_INIT_FUNCTION (udp_local_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp_packet.h b/src/vnet/udp/udp_packet.h new file mode 100644 index 00000000..beea3059 --- /dev/null +++ b/src/vnet/udp/udp_packet.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip4/udp_packet.h: UDP packet format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_udp_packet_h +#define included_udp_packet_h + +typedef struct +{ + /* Source and destination port. */ + u16 src_port, dst_port; + + /* Length of UDP header plus payload. */ + u16 length; + + /* Checksum of UDP pseudo-header and data or + zero if checksum is disabled. */ + u16 checksum; +} udp_header_t; + +#endif /* included_udp_packet_h */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp_pg.c b/src/vnet/udp/udp_pg.c new file mode 100644 index 00000000..c9d8d38c --- /dev/null +++ b/src/vnet/udp/udp_pg.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/udp_pg: UDP packet-generator interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include /* for unformat_udp_udp_port */ + +#define UDP_PG_EDIT_LENGTH (1 << 0) +#define UDP_PG_EDIT_CHECKSUM (1 << 1) + +always_inline void +udp_pg_edit_function_inline (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, u32 n_packets, u32 flags) +{ + vlib_main_t *vm = vlib_get_main (); + u32 ip_offset, udp_offset; + + udp_offset = g->start_byte_offset; + ip_offset = (g - 1)->start_byte_offset; + + while (n_packets >= 1) + { + vlib_buffer_t *p0; + ip4_header_t *ip0; + udp_header_t *udp0; + u32 udp_len0; + + p0 = vlib_get_buffer (vm, packets[0]); + n_packets -= 1; + packets += 1; + + ip0 = (void *) (p0->data + ip_offset); + udp0 = (void *) (p0->data + udp_offset); + udp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]); + + if (flags & UDP_PG_EDIT_LENGTH) + udp0->length = + clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, p0) + - ip_offset); + + /* Initialize checksum with header. */ + if (flags & UDP_PG_EDIT_CHECKSUM) + { + ip_csum_t sum0; + + sum0 = clib_mem_unaligned (&ip0->src_address, u64); + + sum0 = ip_csum_with_carry + (sum0, clib_host_to_net_u32 (udp_len0 + (ip0->protocol << 16))); + + /* Invalidate possibly old checksum. */ + udp0->checksum = 0; + + sum0 = + ip_incremental_checksum_buffer (vm, p0, udp_offset, udp_len0, + sum0); + + sum0 = ~ip_csum_fold (sum0); + + /* Zero checksum means checksumming disabled. */ + sum0 = sum0 != 0 ? sum0 : 0xffff; + + udp0->checksum = sum0; + } + } +} + +static void +udp_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, u32 * packets, u32 n_packets) +{ + switch (g->edit_function_opaque) + { + case UDP_PG_EDIT_LENGTH: + udp_pg_edit_function_inline (pg, s, g, packets, n_packets, + UDP_PG_EDIT_LENGTH); + break; + + case UDP_PG_EDIT_CHECKSUM: + udp_pg_edit_function_inline (pg, s, g, packets, n_packets, + UDP_PG_EDIT_CHECKSUM); + break; + + case UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH: + udp_pg_edit_function_inline (pg, s, g, packets, n_packets, + UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH); + break; + + default: + ASSERT (0); + break; + } +} + +typedef struct +{ + pg_edit_t src_port, dst_port; + pg_edit_t length; + pg_edit_t checksum; +} pg_udp_header_t; + +static inline void +pg_udp_header_init (pg_udp_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, udp_header_t, f); + _(src_port); + _(dst_port); + _(length); + _(checksum); +#undef _ +} + +uword +unformat_pg_udp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t *s = va_arg (*args, pg_stream_t *); + pg_udp_header_t *p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (udp_header_t), + &group_index); + pg_udp_header_init (p); + + /* Defaults. */ + p->checksum.type = PG_EDIT_UNSPECIFIED; + p->length.type = PG_EDIT_UNSPECIFIED; + + if (!unformat (input, "UDP: %U -> %U", + unformat_pg_edit, + unformat_tcp_udp_port, &p->src_port, + unformat_pg_edit, unformat_tcp_udp_port, &p->dst_port)) + goto error; + + /* Parse options. */ + while (1) + { + if (unformat (input, "length %U", + unformat_pg_edit, unformat_pg_number, &p->length)) + ; + + else if (unformat (input, "checksum %U", + unformat_pg_edit, unformat_pg_number, &p->checksum)) + ; + + /* Can't parse input: try next protocol level. */ + else + break; + } + + { + ip_main_t *im = &ip_main; + u16 dst_port; + tcp_udp_port_info_t *pi; + + pi = 0; + if (p->dst_port.type == PG_EDIT_FIXED) + { + dst_port = pg_edit_get_value (&p->dst_port, PG_EDIT_LO); + pi = ip_get_tcp_udp_port_info (im, dst_port); + } + + if (pi && pi->unformat_pg_edit + && unformat_user (input, pi->unformat_pg_edit, s)) + ; + + else if (!unformat_user (input, unformat_pg_payload, s)) + goto error; + + p = pg_get_edit_group (s, group_index); + if (p->checksum.type == PG_EDIT_UNSPECIFIED + || p->length.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t *g = pg_stream_get_group (s, group_index); + g->edit_function = udp_pg_edit_function; + g->edit_function_opaque = 0; + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + g->edit_function_opaque |= UDP_PG_EDIT_CHECKSUM; + if (p->length.type == PG_EDIT_UNSPECIFIED) + g->edit_function_opaque |= UDP_PG_EDIT_LENGTH; + } + + return 1; + } + +error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index 142acedc..c4075db6 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/vnet/vxlan-gpe/vxlan_gpe.h index 1b4bc44e..e768d230 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.h +++ b/src/vnet/vxlan-gpe/vxlan_gpe.h @@ -29,7 +29,7 @@ #include #include #include -#include +#include /** * @brief VXLAN GPE header struct diff --git a/src/vnet/vxlan/vxlan.h b/src/vnet/vxlan/vxlan.h index adfa3a8e..dca1cd12 100644 --- a/src/vnet/vxlan/vxlan.h +++ b/src/vnet/vxlan/vxlan.h @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 24f48293..2d6e4f37 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -38,6 +38,7 @@ * IPSEC-GRE APIs: see .../src/vnet/ipsec-gre/{ipsec_gre.api, ipsec_gre_api.c} * LISP APIs: see .../src/vnet/lisp/{lisp.api, lisp_api.c} * LISP-GPE APIs: see .../src/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c} + * SESSION APIs: .../vnet/session/{session.api session_api.c} * MPLS APIs: see .../src/vnet/mpls/{mpls.api, mpls_api.c} * SR APIs: see .../src/vnet/sr/{sr.api, sr_api.c} * DPDK APIs: see ... /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} diff --git a/src/vppinfra.am b/src/vppinfra.am index 8d375958..4b9f0c29 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -157,7 +157,9 @@ nobase_include_HEADERS = \ vppinfra/asm_mips.h \ vppinfra/asm_x86.h \ vppinfra/bihash_8_8.h \ + vppinfra/bihash_16_8.h \ vppinfra/bihash_24_8.h \ + vppinfra/bihash_48_8.h \ vppinfra/bihash_template.h \ vppinfra/bihash_template.c \ vppinfra/bitmap.h \ @@ -206,6 +208,7 @@ nobase_include_HEADERS = \ vppinfra/timer.h \ vppinfra/tw_timer_2t_1w_2048sl.h \ vppinfra/tw_timer_16t_2w_512sl.h \ + vppinfra/tw_timer_16t_1w_2048sl.h \ vppinfra/tw_timer_template.h \ vppinfra/tw_timer_template.c \ vppinfra/types.h \ @@ -261,6 +264,8 @@ CLIB_CORE = \ vppinfra/tw_timer_2t_1w_2048sl.c \ vppinfra/tw_timer_16t_2w_512sl.h \ vppinfra/tw_timer_16t_2w_512sl.c \ + vppinfra/tw_timer_16t_1w_2048sl.h \ + vppinfra/tw_timer_16t_1w_2048sl.c \ vppinfra/unformat.c \ vppinfra/vec.c \ vppinfra/vector.c \ diff --git a/src/vppinfra/bihash_16_8.h b/src/vppinfra/bihash_16_8.h new file mode 100644 index 00000000..ce80f70e --- /dev/null +++ b/src/vppinfra/bihash_16_8.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#undef BIHASH_TYPE + +#define BIHASH_TYPE _16_8 +#define BIHASH_KVP_PER_PAGE 4 + +#ifndef __included_bihash_16_8_h__ +#define __included_bihash_16_8_h__ + +#include +#include +#include +#include + +typedef struct +{ + u64 key[2]; + u64 value; +} clib_bihash_kv_16_8_t; + +static inline int +clib_bihash_is_free_16_8 (clib_bihash_kv_16_8_t * v) +{ + /* Free values are memset to 0xff, check a bit... */ + if (v->key[0] == ~0ULL && v->value == ~0ULL) + return 1; + return 0; +} + +#if __SSE4_2__ +#ifndef __defined_crc_u32__ +#define __defined_crc_u32__ +static inline u32 +crc_u32 (u32 data, u32 value) +{ + __asm__ volatile ("crc32l %[data], %[value];":[value] "+r" (value):[data] + "rm" (data)); + return value; +} +#endif /* __defined_crc_u32__ */ + +static inline u64 +clib_bihash_hash_16_8 (clib_bihash_kv_16_8_t * v) +{ + u32 *dp = (u32 *) & v->key[0]; + u32 value = 0; + + value = crc_u32 (dp[0], value); + value = crc_u32 (dp[1], value); + value = crc_u32 (dp[2], value); + value = crc_u32 (dp[3], value); + + return value; +} +#else +static inline u64 +clib_bihash_hash_16_8 (clib_bihash_kv_16_8_t * v) +{ + u64 tmp = v->key[0] ^ v->key[1]; + return clib_xxhash (tmp); +} +#endif + +static inline u8 * +format_bihash_kvp_16_8 (u8 * s, va_list * args) +{ + clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *); + + s = format (s, "key %llu %llu value %llu", v->key[0], v->key[1], v->value); + return s; +} + +static inline int +clib_bihash_key_compare_16_8 (u64 * a, u64 * b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1])) == 0; +} + +#undef __included_bihash_template_h__ +#include + +#endif /* __included_bihash_16_8_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/bihash_48_8.h b/src/vppinfra/bihash_48_8.h new file mode 100644 index 00000000..1a6e7691 --- /dev/null +++ b/src/vppinfra/bihash_48_8.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#undef BIHASH_TYPE + +#define BIHASH_TYPE _48_8 +#define BIHASH_KVP_PER_PAGE 4 + +#ifndef __included_bihash_48_8_h__ +#define __included_bihash_48_8_h__ + +#include +#include +#include +#include + +typedef struct +{ + u64 key[6]; + u64 value; +} clib_bihash_kv_48_8_t; + +static inline int +clib_bihash_is_free_48_8 (const clib_bihash_kv_48_8_t * v) +{ + /* Free values are memset to 0xff, check a bit... */ + if (v->key[0] == ~0ULL && v->value == ~0ULL) + return 1; + return 0; +} + +#if __SSE4_2__ +#ifndef __defined_crc_u32__ +#define __defined_crc_u32__ +static inline u32 +crc_u32 (u32 data, u32 value) +{ + __asm__ volatile ("crc32l %[data], %[value];":[value] "+r" (value):[data] + "rm" (data)); + return value; +} +#endif /* __defined_crc_u32__ */ + +static inline u64 +clib_bihash_hash_48_8 (const clib_bihash_kv_48_8_t * v) +{ + const u32 *dp = (const u32 *) &v->key[0]; + u32 value = 0; + + value = crc_u32 (dp[0], value); + value = crc_u32 (dp[1], value); + value = crc_u32 (dp[2], value); + value = crc_u32 (dp[3], value); + value = crc_u32 (dp[4], value); + value = crc_u32 (dp[5], value); + value = crc_u32 (dp[6], value); + value = crc_u32 (dp[7], value); + value = crc_u32 (dp[8], value); + value = crc_u32 (dp[9], value); + value = crc_u32 (dp[10], value); + value = crc_u32 (dp[11], value); + + return value; +} +#else +static inline u64 +clib_bihash_hash_48_8 (const clib_bihash_kv_48_8_t * v) +{ + u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2] ^ v->key[3] ^ v->key[4] + ^ v->key[5]; + return clib_xxhash (tmp); +} +#endif + +static inline u8 * +format_bihash_kvp_48_8 (u8 * s, va_list * args) +{ + clib_bihash_kv_48_8_t *v = va_arg (*args, clib_bihash_kv_48_8_t *); + + s = format (s, "key %llu %llu %llu %llu %llu %llu value %llu", v->key[0], + v->key[1], v->key[2], v->key[3], v->key[4], v->key[5], + v->value); + return s; +} + +static inline int +clib_bihash_key_compare_48_8 (const u64 * a, const u64 * b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) + | (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0; +} + +#undef __included_bihash_template_h__ +#include + +#endif /* __included_bihash_48_8_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.c b/src/vppinfra/tw_timer_16t_1w_2048sl.c new file mode 100644 index 00000000..3f342045 --- /dev/null +++ b/src/vppinfra/tw_timer_16t_1w_2048sl.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "tw_timer_16t_1w_2048sl.h" +#include "tw_timer_template.c" + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.h b/src/vppinfra/tw_timer_16t_1w_2048sl.h new file mode 100644 index 00000000..685ac31e --- /dev/null +++ b/src/vppinfra/tw_timer_16t_1w_2048sl.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_tw_timer_16t_2w_512sl_h__ +#define __included_tw_timer_16t_2w_512sl_h__ + +/* ... So that a client app can create multiple wheel geometries */ +#undef TW_TIMER_WHEELS +#undef TW_SLOTS_PER_RING +#undef TW_RING_SHIFT +#undef TW_RING_MASK +#undef TW_TIMERS_PER_OBJECT +#undef LOG2_TW_TIMERS_PER_OBJECT +#undef TW_SUFFIX + +#define TW_TIMER_WHEELS 1 +#define TW_SLOTS_PER_RING 2048 +#define TW_RING_SHIFT 11 +#define TW_RING_MASK (TW_SLOTS_PER_RING -1) +#define TW_TIMERS_PER_OBJECT 16 +#define LOG2_TW_TIMERS_PER_OBJECT 4 +#define TW_SUFFIX _16t_1w_2048sl + +#include + +#endif /* __included_tw_timer_16t_2w_512sl_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From a1a093d4e46e38503332a97ad216f80053a15f2b Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Thu, 2 Mar 2017 13:13:23 -0500 Subject: Clean up binary api message handler registration issues Removed a fair number of "BUG" message handlers, due to conflicts with actual message handlers in api_format.c. Vpp itself had no business receiving certain messages, up to the point where we started building in relevant code from vpp_api_test. Eliminated all but one duplicate registration complaint. That one needs attention from the vxlan team since the duplicated handlers have diverged. Change-Id: Iafce5429d2f906270643b4ea5f0130e20beb4d1d Signed-off-by: Dave Barach --- src/vat/api_format.c | 43 ++++++- src/vlib/unix/input.c | 31 ++++- src/vlibapi/api.h | 15 +++ src/vlibapi/api_shared.c | 4 + src/vnet/classify/classify_api.c | 8 -- src/vnet/devices/virtio/vhost_user_api.c | 10 +- src/vnet/dhcp/dhcp_api.c | 8 -- src/vnet/interface_api.c | 7 - src/vnet/ip/ip_api.c | 83 ------------ src/vnet/l2/l2_api.c | 22 ---- src/vnet/mpls/mpls_api.c | 28 +--- src/vpp/api/api.c | 211 ------------------------------- src/vpp/api/api_main.c | 1 - src/vpp/stats/stats.c | 7 - 14 files changed, 81 insertions(+), 397 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 1321bade..52436917 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -944,6 +944,7 @@ static void vl_api_sw_interface_details_t_handler_json } } +#if VPP_API_TEST_BUILTIN == 0 static void vl_api_sw_interface_set_flags_t_handler (vl_api_sw_interface_set_flags_t * mp) { @@ -954,6 +955,7 @@ static void vl_api_sw_interface_set_flags_t_handler mp->admin_up_down ? "admin-up" : "admin-down", mp->link_up_down ? "link-up" : "link-down"); } +#endif static void vl_api_sw_interface_set_flags_t_handler_json (vl_api_sw_interface_set_flags_t * mp) @@ -4009,7 +4011,6 @@ foreach_standard_reply_retval_handler; #define foreach_vpe_api_reply_msg \ _(CREATE_LOOPBACK_REPLY, create_loopback_reply) \ _(SW_INTERFACE_DETAILS, sw_interface_details) \ -_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ _(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply) \ _(CONTROL_PING_REPLY, control_ping_reply) \ _(CLI_REPLY, cli_reply) \ @@ -4126,11 +4127,6 @@ _(IKEV2_INITIATE_REKEY_CHILD_SA_REPLY, ikev2_initiate_rekey_child_sa_reply) \ _(DELETE_LOOPBACK_REPLY, delete_loopback_reply) \ _(BD_IP_MAC_ADD_DEL_REPLY, bd_ip_mac_add_del_reply) \ _(DHCP_COMPL_EVENT, dhcp_compl_event) \ -_(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ -_(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ -_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ -_(VNET_IP4_NBR_COUNTERS, vnet_ip4_nbr_counters) \ -_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) \ _(MAP_ADD_DOMAIN_REPLY, map_add_domain_reply) \ _(MAP_DEL_DOMAIN_REPLY, map_del_domain_reply) \ _(MAP_ADD_DEL_RULE_REPLY, map_add_del_rule_reply) \ @@ -4232,6 +4228,14 @@ _(SW_INTERFACE_SET_MTU_REPLY, sw_interface_set_mtu_reply) \ _(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) +#define foreach_standalone_reply_msg \ +_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ +_(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ +_(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ +_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ +_(VNET_IP4_NBR_COUNTERS, vnet_ip4_nbr_counters) \ +_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) + typedef struct { u8 *name; @@ -15425,7 +15429,15 @@ api_af_packet_create (vat_main_t * vam) vec_free (host_if_name); S (mp); - W2 (ret, fprintf (vam->ofp, " new sw_if_index = %d ", vam->sw_if_index)); + + /* *INDENT-OFF* */ + W2 (ret, + ({ + if (ret == 0) + fprintf (vam->ofp ? vam->ofp : stderr, + " new sw_if_index = %d\n", vam->sw_if_index); + })); + /* *INDENT-ON* */ return ret; } @@ -18417,6 +18429,9 @@ _(unset, "usage: unset ") } \ } foreach_vpe_api_reply_msg; +#if VPP_API_TEST_BUILTIN == 0 +foreach_standalone_reply_msg; +#endif #undef _ void @@ -18430,6 +18445,9 @@ vat_api_hookup (vat_main_t * vam) vl_api_##n##_t_print, \ sizeof(vl_api_##n##_t), 1); foreach_vpe_api_reply_msg; +#if VPP_API_TEST_BUILTIN == 0 + foreach_standalone_reply_msg; +#endif #undef _ #if (VPP_API_TEST_BUILTIN==0) @@ -18463,6 +18481,17 @@ vat_api_hookup (vat_main_t * vam) #undef _ } +#if VPP_API_TEST_BUILTIN +static clib_error_t * +vat_api_hookup_shim (vlib_main_t * vm) +{ + vat_api_hookup (&vat_main); + return 0; +} + +VLIB_API_INIT_FUNCTION (vat_api_hookup_shim); +#endif + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index 07096ed2..7b4183a4 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -66,6 +66,7 @@ linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) unix_main_t *um = &unix_main; linux_epoll_main_t *em = &linux_epoll_main; struct epoll_event e; + int op; memset (&e, 0, sizeof (e)); @@ -76,13 +77,29 @@ linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) e.events |= EPOLLET; e.data.u32 = f - um->file_pool; - if (epoll_ctl (em->epoll_fd, - (update_type == UNIX_FILE_UPDATE_ADD - ? EPOLL_CTL_ADD - : (update_type == UNIX_FILE_UPDATE_MODIFY - ? EPOLL_CTL_MOD - : EPOLL_CTL_DEL)), f->file_descriptor, &e) < 0) - clib_warning ("epoll_ctl"); + op = -1; + + switch (update_type) + { + case UNIX_FILE_UPDATE_ADD: + op = EPOLL_CTL_ADD; + break; + + case UNIX_FILE_UPDATE_MODIFY: + op = EPOLL_CTL_MOD; + break; + + case UNIX_FILE_UPDATE_DELETE: + op = EPOLL_CTL_DEL; + break; + + default: + clib_warning ("unknown update_type %d", update_type); + return; + } + + if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0) + clib_unix_warning ("epoll_ctl"); } static uword diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h index fcb101d7..b40ece15 100644 --- a/src/vlibapi/api.h +++ b/src/vlibapi/api.h @@ -271,6 +271,21 @@ vlib_node_t **vlib_node_unserialize (u8 * vector); #define VLIB_API_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,api_init) +/* Call given init function: used for init function dependencies. */ +#define vlib_call_api_init_function(vm, x) \ + ({ \ + extern vlib_init_function_t * _VLIB_INIT_FUNCTION_SYMBOL (x,api_init); \ + vlib_init_function_t * _f = _VLIB_INIT_FUNCTION_SYMBOL (x,api_init); \ + clib_error_t * _error = 0; \ + if (! hash_get (vm->init_functions_called, _f)) \ + { \ + hash_set1 (vm->init_functions_called, _f); \ + _error = _f (vm); \ + } \ + _error; \ + }) + + #endif /* included_api_h */ /* diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c index 1a2740e2..79921afe 100644 --- a/src/vlibapi/api_shared.c +++ b/src/vlibapi/api_shared.c @@ -667,6 +667,10 @@ vl_msg_api_config (vl_msg_api_msg_config_t * c) foreach_msg_api_vector; #undef _ + if (am->msg_names[c->id]) + clib_warning ("BUG: multiple registrations of 'vl_api_%s_t_handler'", + c->name); + am->msg_names[c->id] = c->name; am->msg_handlers[c->id] = c->handler; am->msg_cleanup_handlers[c->id] = c->cleanup; diff --git a/src/vnet/classify/classify_api.c b/src/vnet/classify/classify_api.c index 77a8b434..24c7a2b9 100644 --- a/src/vnet/classify/classify_api.c +++ b/src/vnet/classify/classify_api.c @@ -53,7 +53,6 @@ _(CLASSIFY_TABLE_IDS,classify_table_ids) \ _(CLASSIFY_TABLE_BY_INTERFACE, classify_table_by_interface) \ _(CLASSIFY_TABLE_INFO,classify_table_info) \ _(CLASSIFY_SESSION_DUMP,classify_session_dump) \ -_(CLASSIFY_SESSION_DETAILS,classify_session_details) \ _(POLICER_CLASSIFY_SET_INTERFACE, policer_classify_set_interface) \ _(POLICER_CLASSIFY_DUMP, policer_classify_dump) \ _(FLOW_CLASSIFY_SET_INTERFACE, flow_classify_set_interface) \ @@ -356,13 +355,6 @@ vl_api_classify_table_info_t_handler (vl_api_classify_table_info_t * mp) vl_msg_api_send_shmem (q, (u8 *) & rmp); } -static void -vl_api_classify_session_details_t_handler (vl_api_classify_session_details_t * - mp) -{ - clib_warning ("BUG"); -} - static void send_classify_session_details (unix_shared_memory_queue_t * q, u32 table_id, diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c index dd517c26..8dbd032b 100644 --- a/src/vnet/devices/virtio/vhost_user_api.c +++ b/src/vnet/devices/virtio/vhost_user_api.c @@ -46,8 +46,7 @@ _(CREATE_VHOST_USER_IF, create_vhost_user_if) \ _(MODIFY_VHOST_USER_IF, modify_vhost_user_if) \ _(DELETE_VHOST_USER_IF, delete_vhost_user_if) \ -_(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) \ -_(SW_INTERFACE_VHOST_USER_DETAILS, sw_interface_vhost_user_details) +_(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) /* * WARNING: replicated pending api refactor completion @@ -148,13 +147,6 @@ vl_api_delete_vhost_user_if_t_handler (vl_api_delete_vhost_user_if_t * mp) } } -static void - vl_api_sw_interface_vhost_user_details_t_handler - (vl_api_sw_interface_vhost_user_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_sw_interface_vhost_user_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c index bdf02cae..ce34f6a4 100644 --- a/src/vnet/dhcp/dhcp_api.c +++ b/src/vnet/dhcp/dhcp_api.c @@ -46,7 +46,6 @@ #define foreach_vpe_api_msg \ _(DHCP_PROXY_CONFIG,dhcp_proxy_config) \ _(DHCP_PROXY_DUMP,dhcp_proxy_dump) \ -_(DHCP_PROXY_DETAILS,dhcp_proxy_details) \ _(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss) \ _(DHCP_CLIENT_CONFIG, dhcp_client_config) @@ -158,13 +157,6 @@ dhcp_send_details (fib_protocol_t proto, vl_msg_api_send_shmem (q, (u8 *) & mp); } - -static void -vl_api_dhcp_proxy_details_t_handler (vl_api_dhcp_proxy_details_t * mp) -{ - clib_warning ("BUG"); -} - void dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, u8 is_ipv6, u8 * host_address, u8 * router_address, diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 63f7cad4..60cd6d40 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -50,7 +50,6 @@ _(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ _(SW_INTERFACE_SET_MTU, sw_interface_set_mtu) \ _(WANT_INTERFACE_EVENTS, want_interface_events) \ _(SW_INTERFACE_DUMP, sw_interface_dump) \ -_(SW_INTERFACE_DETAILS, sw_interface_details) \ _(SW_INTERFACE_ADD_DEL_ADDRESS, sw_interface_add_del_address) \ _(SW_INTERFACE_SET_TABLE, sw_interface_set_table) \ _(SW_INTERFACE_GET_TABLE, sw_interface_get_table) \ @@ -684,12 +683,6 @@ out: REPLY_MACRO (VL_API_SW_INTERFACE_TAG_ADD_DEL_REPLY); } -static void -vl_api_sw_interface_details_t_handler (vl_api_sw_interface_details_t * mp) -{ - clib_warning ("BUG"); -} - /* * vpe_api_hookup * Add vpe's API message handlers to the table. diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index 49d941c2..ab164a5f 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -59,17 +59,12 @@ #define foreach_ip_api_msg \ _(IP_FIB_DUMP, ip_fib_dump) \ -_(IP_FIB_DETAILS, ip_fib_details) \ _(IP6_FIB_DUMP, ip6_fib_dump) \ -_(IP6_FIB_DETAILS, ip6_fib_details) \ _(IP_MFIB_DUMP, ip_mfib_dump) \ -_(IP_MFIB_DETAILS, ip_mfib_details) \ _(IP6_MFIB_DUMP, ip6_mfib_dump) \ -_(IP6_MFIB_DETAILS, ip6_mfib_details) \ _(IP_NEIGHBOR_DUMP, ip_neighbor_dump) \ _(IP_MROUTE_ADD_DEL, ip_mroute_add_del) \ _(MFIB_SIGNAL_DUMP, mfib_signal_dump) \ -_(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(IP_ADDRESS_DUMP, ip_address_dump) \ _(IP_DUMP, ip_dump) \ _(IP_NEIGHBOR_ADD_DEL, ip_neighbor_add_del) \ @@ -105,12 +100,6 @@ send_ip_neighbor_details (u8 is_ipv6, vl_msg_api_send_shmem (q, (u8 *) & mp); } -static void -vl_api_ip_neighbor_details_t_handler (vl_api_ip_neighbor_details_t * mp) -{ - clib_warning ("BUG"); -} - static void vl_api_ip_neighbor_dump_t_handler (vl_api_ip_neighbor_dump_t * mp) { @@ -185,24 +174,6 @@ copy_fib_next_hop (fib_route_path_encode_t * api_rpath, void *fp_arg) sizeof (api_rpath->rpath.frp_addr.ip6)); } -static void -vl_api_ip_fib_details_t_handler (vl_api_ip_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip_fib_details_t_endian (vl_api_ip_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip_fib_details_t_print (vl_api_ip_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_ip_fib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, @@ -316,24 +287,6 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) vec_free (lfeis); } -static void -vl_api_ip6_fib_details_t_handler (vl_api_ip6_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip6_fib_details_t_endian (vl_api_ip6_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip6_fib_details_t_print (vl_api_ip6_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_ip6_fib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, @@ -469,24 +422,6 @@ vl_api_ip6_fib_dump_t_handler (vl_api_ip6_fib_dump_t * mp) /* *INDENT-ON* */ } -static void -vl_api_ip_mfib_details_t_handler (vl_api_ip_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip_mfib_details_t_endian (vl_api_ip_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip_mfib_details_t_print (vl_api_ip_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_ip_mfib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, @@ -591,24 +526,6 @@ vl_api_ip_mfib_dump_t_handler (vl_api_ip_mfib_dump_t * mp) vec_free (api_rpaths); } -static void -vl_api_ip6_mfib_details_t_handler (vl_api_ip6_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip6_mfib_details_t_endian (vl_api_ip6_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip6_mfib_details_t_print (vl_api_ip6_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_ip6_mfib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index a3cc49bf..a985852c 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -48,13 +48,10 @@ _(L2_XCONNECT_DUMP, l2_xconnect_dump) \ _(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \ _(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \ -_(L2_FIB_TABLE_ENTRY, l2_fib_table_entry) \ _(L2FIB_ADD_DEL, l2fib_add_del) \ _(L2_FLAGS, l2_flags) \ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ -_(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ -_(BRIDGE_DOMAIN_SW_IF_DETAILS, bridge_domain_sw_if_details) \ _(BRIDGE_FLAGS, bridge_flags) \ _(L2_INTERFACE_VLAN_TAG_REWRITE, l2_interface_vlan_tag_rewrite) \ _(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) @@ -140,12 +137,6 @@ send_l2fib_table_entry (vpe_api_main_t * am, vl_msg_api_send_shmem (q, (u8 *) & mp); } -static void -vl_api_l2_fib_table_entry_t_handler (vl_api_l2_fib_table_entry_t * mp) -{ - clib_warning ("BUG"); -} - static void vl_api_l2_fib_table_dump_t_handler (vl_api_l2_fib_table_dump_t * mp) { @@ -329,19 +320,6 @@ vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) REPLY_MACRO (VL_API_BRIDGE_DOMAIN_ADD_DEL_REPLY); } -static void -vl_api_bridge_domain_details_t_handler (vl_api_bridge_domain_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void - vl_api_bridge_domain_sw_if_details_t_handler - (vl_api_bridge_domain_sw_if_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_bridge_domain_details (unix_shared_memory_queue_t * q, l2_bridge_domain_t * bd_config, diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index ebbeba69..a36a5046 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -50,9 +50,7 @@ _(MPLS_IP_BIND_UNBIND, mpls_ip_bind_unbind) \ _(MPLS_ROUTE_ADD_DEL, mpls_route_add_del) \ _(MPLS_TUNNEL_ADD_DEL, mpls_tunnel_add_del) \ _(MPLS_TUNNEL_DUMP, mpls_tunnel_dump) \ -_(MPLS_TUNNEL_DETAILS, mpls_tunnel_details) \ -_(MPLS_FIB_DUMP, mpls_fib_dump) \ -_(MPLS_FIB_DETAILS, mpls_fib_details) +_(MPLS_FIB_DUMP, mpls_fib_dump) extern void stats_dslock_with_hint (int hint, int tag); extern void stats_dsunlock (void); @@ -280,12 +278,6 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) /* *INDENT-ON* */ } -static void -vl_api_mpls_tunnel_details_t_handler (vl_api_mpls_tunnel_details_t * mp) -{ - clib_warning ("BUG"); -} - typedef struct mpls_tunnel_send_walk_ctx_t_ { unix_shared_memory_queue_t *q; @@ -340,24 +332,6 @@ vl_api_mpls_tunnel_dump_t_handler (vl_api_mpls_tunnel_dump_t * mp) mpls_tunnel_walk (send_mpls_tunnel_entry, &ctx); } -static void -vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_mpls_fib_details_t_endian (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_mpls_fib_details_t_print (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_mpls_fib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 60fd0199..a8f471e8 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -128,12 +128,8 @@ _(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table) \ _(CLASSIFY_SET_INTERFACE_L2_TABLES, classify_set_interface_l2_tables) \ _(GET_NODE_INDEX, get_node_index) \ _(ADD_NODE_NEXT, add_node_next) \ -_(VXLAN_ADD_DEL_TUNNEL, vxlan_add_del_tunnel) \ -_(VXLAN_TUNNEL_DUMP, vxlan_tunnel_dump) \ _(L2_INTERFACE_EFP_FILTER, l2_interface_efp_filter) \ _(SHOW_VERSION, show_version) \ -_(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \ -_(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \ _(INTERFACE_NAME_RENUMBER, interface_name_renumber) \ _(WANT_IP4_ARP_EVENTS, want_ip4_arp_events) \ _(WANT_IP6_ND_EVENTS, want_ip6_nd_events) \ @@ -1436,62 +1432,6 @@ out: /* *INDENT-ON* */ } -static void vl_api_vxlan_add_del_tunnel_t_handler - (vl_api_vxlan_add_del_tunnel_t * mp) -{ - vl_api_vxlan_add_del_tunnel_reply_t *rmp; - int rv = 0; - vnet_vxlan_add_del_tunnel_args_t _a, *a = &_a; - u32 encap_fib_index; - uword *p; - ip4_main_t *im = &ip4_main; - vnet_main_t *vnm = vnet_get_main (); - u32 sw_if_index = ~0; - - p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id)); - if (!p) - { - rv = VNET_API_ERROR_NO_SUCH_FIB; - goto out; - } - encap_fib_index = p[0]; - memset (a, 0, sizeof (*a)); - - a->is_add = mp->is_add; - a->is_ip6 = mp->is_ipv6; - - /* ip addresses sent in network byte order */ - ip46_from_addr_buf (mp->is_ipv6, mp->dst_address, &a->dst); - ip46_from_addr_buf (mp->is_ipv6, mp->src_address, &a->src); - - /* Check src & dst are different */ - if (ip46_address_cmp (&a->dst, &a->src) == 0) - { - rv = VNET_API_ERROR_SAME_SRC_DST; - goto out; - } - a->mcast_sw_if_index = ntohl (mp->mcast_sw_if_index); - if (ip46_address_is_multicast (&a->dst) && - pool_is_free_index (vnm->interface_main.sw_interfaces, - a->mcast_sw_if_index)) - { - rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; - goto out; - } - a->encap_fib_index = encap_fib_index; - a->decap_next_index = ntohl (mp->decap_next_index); - a->vni = ntohl (mp->vni); - rv = vnet_vxlan_add_del_tunnel (a, &sw_if_index); - -out: - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_VXLAN_ADD_DEL_TUNNEL_REPLY, - ({ - rmp->sw_if_index = ntohl (sw_if_index); - })); - /* *INDENT-ON* */ -} - static void send_vxlan_tunnel_details (vxlan_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context) { @@ -1525,43 +1465,6 @@ static void send_vxlan_tunnel_details vl_msg_api_send_shmem (q, (u8 *) & rmp); } -static void vl_api_vxlan_tunnel_dump_t_handler - (vl_api_vxlan_tunnel_dump_t * mp) -{ - unix_shared_memory_queue_t *q; - vxlan_main_t *vxm = &vxlan_main; - vxlan_tunnel_t *t; - u32 sw_if_index; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - { - return; - } - - sw_if_index = ntohl (mp->sw_if_index); - - if (~0 == sw_if_index) - { - /* *INDENT-OFF* */ - pool_foreach (t, vxm->tunnels, - ({ - send_vxlan_tunnel_details(t, q, mp->context); - })); - /* *INDENT-ON* */ - } - else - { - if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) || - (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index])) - { - return; - } - t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]]; - send_vxlan_tunnel_details (t, q, mp->context); - } -} - static void vl_api_l2_patch_add_del_t_handler (vl_api_l2_patch_add_del_t * mp) { @@ -1585,83 +1488,6 @@ vl_api_l2_patch_add_del_t_handler (vl_api_l2_patch_add_del_t * mp) REPLY_MACRO (VL_API_L2_PATCH_ADD_DEL_REPLY); } -static void - vl_api_vxlan_gpe_add_del_tunnel_t_handler - (vl_api_vxlan_gpe_add_del_tunnel_t * mp) -{ - vl_api_vxlan_gpe_add_del_tunnel_reply_t *rmp; - int rv = 0; - vnet_vxlan_gpe_add_del_tunnel_args_t _a, *a = &_a; - u32 encap_fib_index, decap_fib_index; - u8 protocol; - uword *p; - ip4_main_t *im = &ip4_main; - u32 sw_if_index = ~0; - - - p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id)); - if (!p) - { - rv = VNET_API_ERROR_NO_SUCH_FIB; - goto out; - } - encap_fib_index = p[0]; - - protocol = mp->protocol; - - /* Interpret decap_vrf_id as an opaque if sending to other-than-ip4-input */ - if (protocol == VXLAN_GPE_INPUT_NEXT_IP4_INPUT) - { - p = hash_get (im->fib_index_by_table_id, ntohl (mp->decap_vrf_id)); - if (!p) - { - rv = VNET_API_ERROR_NO_SUCH_INNER_FIB; - goto out; - } - decap_fib_index = p[0]; - } - else - { - decap_fib_index = ntohl (mp->decap_vrf_id); - } - - /* Check src & dst are different */ - if ((mp->is_ipv6 && memcmp (mp->local, mp->remote, 16) == 0) || - (!mp->is_ipv6 && memcmp (mp->local, mp->remote, 4) == 0)) - { - rv = VNET_API_ERROR_SAME_SRC_DST; - goto out; - } - memset (a, 0, sizeof (*a)); - - a->is_add = mp->is_add; - a->is_ip6 = mp->is_ipv6; - /* ip addresses sent in network byte order */ - if (a->is_ip6) - { - clib_memcpy (&(a->local.ip6), mp->local, 16); - clib_memcpy (&(a->remote.ip6), mp->remote, 16); - } - else - { - clib_memcpy (&(a->local.ip4), mp->local, 4); - clib_memcpy (&(a->remote.ip4), mp->remote, 4); - } - a->encap_fib_index = encap_fib_index; - a->decap_fib_index = decap_fib_index; - a->protocol = protocol; - a->vni = ntohl (mp->vni); - rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index); - -out: - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_REPLY, - ({ - rmp->sw_if_index = ntohl (sw_if_index); - })); - /* *INDENT-ON* */ -} - static void send_vxlan_gpe_tunnel_details (vxlan_gpe_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context) { @@ -1696,43 +1522,6 @@ static void send_vxlan_gpe_tunnel_details vl_msg_api_send_shmem (q, (u8 *) & rmp); } -static void vl_api_vxlan_gpe_tunnel_dump_t_handler - (vl_api_vxlan_gpe_tunnel_dump_t * mp) -{ - unix_shared_memory_queue_t *q; - vxlan_gpe_main_t *vgm = &vxlan_gpe_main; - vxlan_gpe_tunnel_t *t; - u32 sw_if_index; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - { - return; - } - - sw_if_index = ntohl (mp->sw_if_index); - - if (~0 == sw_if_index) - { - /* *INDENT-OFF* */ - pool_foreach (t, vgm->tunnels, - ({ - send_vxlan_gpe_tunnel_details(t, q, mp->context); - })); - /* *INDENT-ON* */ - } - else - { - if ((sw_if_index >= vec_len (vgm->tunnel_index_by_sw_if_index)) || - (~0 == vgm->tunnel_index_by_sw_if_index[sw_if_index])) - { - return; - } - t = &vgm->tunnels[vgm->tunnel_index_by_sw_if_index[sw_if_index]]; - send_vxlan_gpe_tunnel_details (t, q, mp->context); - } -} - static void vl_api_interface_name_renumber_t_handler (vl_api_interface_name_renumber_t * mp) diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index 97b501e0..6ae510b1 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -48,7 +48,6 @@ api_main_init (vlib_main_t * vm) vam->vlib_main = vm; vam->my_client_index = (u32) ~ 0; init_error_string_table (vam); - vat_api_hookup (vam); rv = vat_plugin_init (vam); if (rv) clib_warning ("vat_plugin_init returned %d", rv); diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c index 5e9b0d69..c46d441a 100644 --- a/src/vpp/stats/stats.c +++ b/src/vpp/stats/stats.c @@ -46,7 +46,6 @@ stats_main_t stats_main; #define foreach_stats_msg \ _(WANT_STATS, want_stats) \ -_(WANT_STATS_REPLY, want_stats_reply) \ _(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ _(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ _(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ @@ -1226,12 +1225,6 @@ vl_api_vnet_ip6_nbr_counters_t_handler (vl_api_vnet_ip6_nbr_counters_t * mp) } } -static void -vl_api_want_stats_reply_t_handler (vl_api_want_stats_reply_t * mp) -{ - clib_warning ("BUG"); -} - static void vl_api_want_stats_t_handler (vl_api_want_stats_t * mp) { -- cgit 1.2.3-korg From 358425b86cb231efd2330e5aeb077cad38c2efad Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Mon, 20 Feb 2017 09:42:36 -0800 Subject: DHCPv6 - Be consistent with the use of MFIB index as the RX FIB index for DHCPv6. For the same table ID, the unicast-FIB index is not necessarily the same value as the multicast-FIB index, since features (like LISP, SR) can create unicast-tables, and thus affect only the index of the unicast FIBs Change-Id: Ibfa334d7eda822f742c241b7ce69a6271b4753a9 Signed-off-by: Neale Ranns --- src/vnet/dhcp/dhcp6_proxy_node.c | 9 +++-- src/vnet/dhcp/dhcp_proxy.c | 84 +++++++++++++++++++++++++++++++++------- src/vnet/dhcp/dhcp_proxy.h | 1 - src/vnet/interface_api.c | 4 +- 4 files changed, 78 insertions(+), 20 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index ed44977d..58674209 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -226,7 +226,7 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, /* Send to DHCPV6 server via the configured FIB */ rx_sw_if_index = sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - rx_fib_idx = im->fib_index_by_sw_if_index [rx_sw_if_index]; + rx_fib_idx = im->mfib_index_by_sw_if_index [rx_sw_if_index]; server = dhcp_get_server(dpm, rx_fib_idx, FIB_PROTOCOL_IP6); if (PREDICT_FALSE (NULL == server)) @@ -587,7 +587,7 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm, //Advance buffer to start of encapsulated DHCPv6 message vlib_buffer_advance (b0, sizeof(*r0)); - client_fib_idx = im->fib_index_by_sw_if_index[sw_if_index]; + client_fib_idx = im->mfib_index_by_sw_if_index[sw_if_index]; server = dhcp_get_server(dm, client_fib_idx, FIB_PROTOCOL_IP6); if (NULL == server) @@ -894,7 +894,8 @@ static u8 * format_dhcp6_proxy_server (u8 * s, va_list * args) { dhcp_server_t * server = va_arg (*args, dhcp_server_t *); - ip6_fib_t * rx_fib, * server_fib; + ip6_fib_t *server_fib; + ip6_mfib_t *rx_fib; if (NULL == server) { @@ -904,7 +905,7 @@ format_dhcp6_proxy_server (u8 * s, va_list * args) } server_fib = ip6_fib_get(server->server_fib_index); - rx_fib = ip6_fib_get(server->rx_fib_index); + rx_fib = ip6_mfib_get(server->rx_fib_index); s = format (s, "%=40U%=40U%=14u%=14u", diff --git a/src/vnet/dhcp/dhcp_proxy.c b/src/vnet/dhcp/dhcp_proxy.c index da2deea6..8e31c3db 100644 --- a/src/vnet/dhcp/dhcp_proxy.c +++ b/src/vnet/dhcp/dhcp_proxy.c @@ -17,12 +17,55 @@ #include #include +#include /** * @brief Shard 4/6 instance of DHCP main */ dhcp_proxy_main_t dhcp_proxy_main; +static void +dhcp_proxy_rx_table_lock (fib_protocol_t proto, + u32 fib_index) +{ + if (FIB_PROTOCOL_IP4 == proto) + fib_table_lock(fib_index, proto); + else + mfib_table_lock(fib_index, proto); +} + +static void +dhcp_proxy_rx_table_unlock (fib_protocol_t proto, + u32 fib_index) +{ + if (FIB_PROTOCOL_IP4 == proto) + fib_table_unlock(fib_index, proto); + else + mfib_table_unlock(fib_index, proto); +} + +static u32 +dhcp_proxy_rx_table_get_table_id (fib_protocol_t proto, + u32 fib_index) +{ + if (FIB_PROTOCOL_IP4 == proto) + { + fib_table_t *fib; + + fib = fib_table_get(fib_index, proto); + + return (fib->ft_table_id); + } + else + { + mfib_table_t *mfib; + + mfib = mfib_table_get(fib_index, proto); + + return (mfib->mft_table_id); + } +} + void dhcp_proxy_walk (fib_protocol_t proto, dhcp_proxy_walk_fn_t fn, @@ -51,11 +94,11 @@ dhcp_vss_walk (fib_protocol_t proto, void *ctx) { dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + mfib_table_t *mfib; dhcp_vss_t * vss; u32 vss_index, i; fib_table_t *fib; - vec_foreach_index (i, dpm->vss_index_by_rx_fib_index[proto]) { vss_index = dpm->vss_index_by_rx_fib_index[proto][i]; @@ -64,10 +107,20 @@ dhcp_vss_walk (fib_protocol_t proto, vss = pool_elt_at_index (dpm->vss[proto], vss_index); - fib = fib_table_get(i, proto); + if (FIB_PROTOCOL_IP4 == proto) + { + fib = fib_table_get(i, proto); - if (!fn(vss, fib->ft_table_id, ctx)) - break; + if (!fn(vss, fib->ft_table_id, ctx)) + break; + } + else + { + mfib = mfib_table_get(i, proto); + + if (!fn(vss, mfib->mft_table_id, ctx)) + break; + } } } @@ -164,7 +217,8 @@ dhcp_proxy_dump_walk (dhcp_server_t *server, void *arg) { dhcp_proxy_dump_walk_cxt_t *ctx = arg; - fib_table_t *s_fib, *r_fib; + fib_table_t *s_fib; + u32 rx_table_id; dhcp_vss_t *v; v = dhcp_get_vss_info(&dhcp_proxy_main, @@ -172,7 +226,8 @@ dhcp_proxy_dump_walk (dhcp_server_t *server, ctx->proto); s_fib = fib_table_get(server->server_fib_index, ctx->proto); - r_fib = fib_table_get(server->rx_fib_index, ctx->proto); + rx_table_id = dhcp_proxy_rx_table_get_table_id(server->rx_fib_index, + ctx->proto); dhcp_send_details(ctx->proto, ctx->opaque, @@ -180,7 +235,7 @@ dhcp_proxy_dump_walk (dhcp_server_t *server, &server->dhcp_server, &server->dhcp_src_address, s_fib->ft_table_id, - r_fib->ft_table_id, + rx_table_id, (v ? v->fib_id : 0), (v ? v->oui : 0)); @@ -226,7 +281,10 @@ int dhcp_proxy_set_vss (fib_protocol_t proto, u32 rx_fib_index; int rc = 0; - rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id); + if (proto == FIB_PROTOCOL_IP4) + rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id); + else + rx_fib_index = mfib_table_find_or_create_and_lock(proto, tbl_id); v = dhcp_get_vss_info(dm, rx_fib_index, proto); if (NULL != v) @@ -235,7 +293,7 @@ int dhcp_proxy_set_vss (fib_protocol_t proto, { /* release the lock held on the table when the VSS * info was created */ - fib_table_unlock (rx_fib_index, proto); + dhcp_proxy_rx_table_unlock (proto, rx_fib_index); pool_put (dm->vss[proto], v); dm->vss_index_by_rx_fib_index[proto][rx_fib_index] = ~0; @@ -258,18 +316,18 @@ int dhcp_proxy_set_vss (fib_protocol_t proto, rx_fib_index, ~0); /* hold a lock on the table whilst the VSS info exist */ - fib_table_lock (rx_fib_index, proto); - pool_get (dm->vss[proto], v); v->fib_id = fib_id; v->oui = oui; + dm->vss_index_by_rx_fib_index[proto][rx_fib_index] = v - dm->vss[proto]; + dhcp_proxy_rx_table_lock (proto, rx_fib_index); } } /* Release the lock taken during the create_or_lock at the start */ - fib_table_unlock (rx_fib_index, proto); - + dhcp_proxy_rx_table_unlock (proto, rx_fib_index); + return (rc); } diff --git a/src/vnet/dhcp/dhcp_proxy.h b/src/vnet/dhcp/dhcp_proxy.h index 4586d883..708e92f3 100644 --- a/src/vnet/dhcp/dhcp_proxy.h +++ b/src/vnet/dhcp/dhcp_proxy.h @@ -104,7 +104,6 @@ typedef struct { /* hash lookup specific vrf_id -> option 82 vss suboption */ u32 *vss_index_by_rx_fib_index[DHCP_N_PROTOS]; - } dhcp_proxy_main_t; extern dhcp_proxy_main_t dhcp_proxy_main; diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index f94928b6..bfd2af31 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -29,6 +29,7 @@ #include #include #include +#include #define vl_typedefs /* define message structures */ #include @@ -340,9 +341,9 @@ vl_api_sw_interface_set_table_t_handler (vl_api_sw_interface_set_table_t * mp) fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); - vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index); @@ -362,7 +363,6 @@ vl_api_sw_interface_set_table_t_handler (vl_api_sw_interface_set_table_t * mp) fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); - vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; -- cgit 1.2.3-korg From 3466c30261950823828d1dad0d2fb170ee2f9aaf Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Thu, 16 Feb 2017 07:45:03 -0800 Subject: DHCP Multiple Servers (VPP-602, VPP-605) Multiple DHCP (4 and/or 6) servers can be added and removed through multiple calls to the 'set dhcp server' API. All 4/6/ discover/solicit messages will then be replicated to all servers in the list. The expectation is that the servers/system is configured in such a way that this is viable. If VSS information is providied for the clinet VRF which also has multiple servers configured, then the same VSS information is sent to each server. Likewise the source address of packets sent to from VPP to each server is the same. Change-Id: I3287cb084c84b3f612b78bc69cfcb5b9c1f8934d Signed-off-by: Neale Ranns --- src/scripts/vnet/dhcp/proxy | 3 +- src/vat/api_format.c | 56 ++++-- src/vnet/dhcp/client.c | 2 +- src/vnet/dhcp/dhcp.api | 12 +- src/vnet/dhcp/dhcp4_packet.h | 5 + src/vnet/dhcp/dhcp4_proxy_node.c | 151 +++++++++++++---- src/vnet/dhcp/dhcp6_proxy_node.c | 158 ++++++++++++----- src/vnet/dhcp/dhcp_api.c | 66 +++++--- src/vnet/dhcp/dhcp_proxy.c | 129 +++++++------- src/vnet/dhcp/dhcp_proxy.h | 85 +++++++--- src/vnet/ip/ip6_packet.h | 2 + src/vnet/pg/input.c | 6 + test/test_dhcp.py | 356 ++++++++++++++++++++++++++++++++++----- 13 files changed, 796 insertions(+), 235 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/scripts/vnet/dhcp/proxy b/src/scripts/vnet/dhcp/proxy index c709d87d..42dff2a0 100644 --- a/src/scripts/vnet/dhcp/proxy +++ b/src/scripts/vnet/dhcp/proxy @@ -14,7 +14,8 @@ set int ip addr loop0 2001::1/64 set int ip addr loop0 2001:1::1/64 set dhcp proxy server 10.255.0.1 src-address 10.0.0.1 server-fib-id 0 rx-fib-id 0 -set dhcp proxy server 10.255.0.2 src-address 10.0.1.1 server-fib-id 1 rx-fib-id 1 +set dhcp proxy server 10.255.0.2 src-address 10.0.0.1 server-fib-id 0 rx-fib-id 0 +set dhcp proxy server 10.255.1.2 src-address 10.0.1.1 server-fib-id 1 rx-fib-id 1 set dhcpv6 proxy server 3001::1 src-address 2001::1 server-fib-id 0 rx-fib-id 0 set dhcpv6 proxy server 3002::1 src-address 2001:1::1 server-fib-id 1 rx-fib-id 1 diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 0b60b910..b5943f03 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -7573,23 +7573,35 @@ static void vl_api_dhcp_proxy_details_t_handler (vl_api_dhcp_proxy_details_t * mp) { vat_main_t *vam = &vat_main; + u32 i, count = mp->count; + vl_api_dhcp_server_t *s; if (mp->is_ipv6) print (vam->ofp, - "RX Table-ID %d, Server Table-ID %d, Server Address %U, Source Address %U, VSS FIB-ID %d, VSS OUI %d", + "RX Table-ID %d, Source Address %U, VSS FIB-ID %d, VSS OUI %d", ntohl (mp->rx_vrf_id), - ntohl (mp->server_vrf_id), - format_ip6_address, mp->dhcp_server, format_ip6_address, mp->dhcp_src_address, ntohl (mp->vss_oui), ntohl (mp->vss_fib_id)); else print (vam->ofp, - "RX Table-ID %d, Server Table-ID %d, Server Address %U, Source Address %U, VSS FIB-ID %d, VSS OUI %d", + "RX Table-ID %d, Source Address %U, VSS FIB-ID %d, VSS OUI %d", ntohl (mp->rx_vrf_id), - ntohl (mp->server_vrf_id), - format_ip4_address, mp->dhcp_server, format_ip4_address, mp->dhcp_src_address, ntohl (mp->vss_oui), ntohl (mp->vss_fib_id)); + + for (i = 0; i < count; i++) + { + s = &mp->servers[i]; + + if (mp->is_ipv6) + print (vam->ofp, + " Server Table-ID %d, Server Address %U", + ntohl (s->server_vrf_id), format_ip6_address, s->dhcp_server); + else + print (vam->ofp, + " Server Table-ID %d, Server Address %U", + ntohl (s->server_vrf_id), format_ip4_address, s->dhcp_server); + } } static void vl_api_dhcp_proxy_details_t_handler_json @@ -7597,8 +7609,10 @@ static void vl_api_dhcp_proxy_details_t_handler_json { vat_main_t *vam = &vat_main; vat_json_node_t *node = NULL; + u32 i, count = mp->count; struct in_addr ip4; struct in6_addr ip6; + vl_api_dhcp_server_t *s; if (VAT_JSON_ARRAY != vam->json_tree.type) { @@ -7609,24 +7623,38 @@ static void vl_api_dhcp_proxy_details_t_handler_json vat_json_init_object (node); vat_json_object_add_uint (node, "rx-table-id", ntohl (mp->rx_vrf_id)); - vat_json_object_add_uint (node, "server-table-id", - ntohl (mp->server_vrf_id)); + vat_json_object_add_uint (node, "vss-fib-id", ntohl (mp->vss_fib_id)); + vat_json_object_add_uint (node, "vss-oui", ntohl (mp->vss_oui)); + if (mp->is_ipv6) { - clib_memcpy (&ip6, &mp->dhcp_server, sizeof (ip6)); - vat_json_object_add_ip6 (node, "server_address", ip6); clib_memcpy (&ip6, &mp->dhcp_src_address, sizeof (ip6)); vat_json_object_add_ip6 (node, "src_address", ip6); } else { - clib_memcpy (&ip4, &mp->dhcp_server, sizeof (ip4)); - vat_json_object_add_ip4 (node, "server_address", ip4); clib_memcpy (&ip4, &mp->dhcp_src_address, sizeof (ip4)); vat_json_object_add_ip4 (node, "src_address", ip4); } - vat_json_object_add_uint (node, "vss-fib-id", ntohl (mp->vss_fib_id)); - vat_json_object_add_uint (node, "vss-oui", ntohl (mp->vss_oui)); + + for (i = 0; i < count; i++) + { + s = &mp->servers[i]; + + vat_json_object_add_uint (node, "server-table-id", + ntohl (s->server_vrf_id)); + + if (mp->is_ipv6) + { + clib_memcpy (&ip4, &s->dhcp_server, sizeof (ip4)); + vat_json_object_add_ip4 (node, "src_address", ip4); + } + else + { + clib_memcpy (&ip6, &s->dhcp_server, sizeof (ip6)); + vat_json_object_add_ip6 (node, "server_address", ip6); + } + } } static int diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c index d34c5a64..29749a33 100644 --- a/src/vnet/dhcp/client.c +++ b/src/vnet/dhcp/client.c @@ -366,7 +366,7 @@ send_dhcp_pkt (dhcp_client_main_t * dcm, dhcp_client_t * c, o = (dhcp_option_t * )dhcp->options; /* Send option 53, the DHCP message type */ - o->option = 53; + o->option = DHCP_PACKET_OPTION_MSG_TYPE; o->length = 1; o->data[0] = type; o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api index 8daadd8c..2db85a79 100644 --- a/src/vnet/dhcp/dhcp.api +++ b/src/vnet/dhcp/dhcp.api @@ -137,19 +137,25 @@ define dhcp_proxy_dump u8 is_ip6; }; +typeonly manual_print manual_endian define dhcp_server +{ + u32 server_vrf_id; + u8 dhcp_server[16]; +}; + /** \brief Tell client about a DHCP completion event @param client_index - opaque cookie to identify the sender */ -define dhcp_proxy_details +manual_endian manual_print define dhcp_proxy_details { u32 context; u32 rx_vrf_id; - u32 server_vrf_id; u32 vss_oui; u32 vss_fib_id; u8 is_ipv6; - u8 dhcp_server[16]; u8 dhcp_src_address[16]; + u8 count; + vl_api_dhcp_server_t servers[count]; }; /* diff --git a/src/vnet/dhcp/dhcp4_packet.h b/src/vnet/dhcp/dhcp4_packet.h index 28c4b156..07829f48 100644 --- a/src/vnet/dhcp/dhcp4_packet.h +++ b/src/vnet/dhcp/dhcp4_packet.h @@ -55,6 +55,11 @@ typedef enum { DHCP_PACKET_ACK=5, } dhcp_packet_type_t; +typedef enum dhcp_packet_option_t_ +{ + DHCP_PACKET_OPTION_MSG_TYPE = 53, +} dhcp_packet_option_t; + /* charming antique: 99.130.83.99 is the dhcp magic cookie */ #define DHCP_MAGIC (clib_host_to_net_u32(0x63825363)) diff --git a/src/vnet/dhcp/dhcp4_proxy_node.c b/src/vnet/dhcp/dhcp4_proxy_node.c index 88a99249..1c84881a 100644 --- a/src/vnet/dhcp/dhcp4_proxy_node.c +++ b/src/vnet/dhcp/dhcp4_proxy_node.c @@ -135,18 +135,17 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, u32 original_sw_if_index = 0; u8 *end = NULL; u32 fib_index; - dhcp_server_t * server; + dhcp_proxy_t *proxy; + dhcp_server_t *server; u32 rx_sw_if_index; dhcp_option_t *o; u32 len = 0; vlib_buffer_free_list_t *fl; + u8 is_discover = 0; bi0 = from[0]; - to_next[0] = bi0; from += 1; - to_next += 1; n_left_from -= 1; - n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); @@ -172,16 +171,17 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, rx_sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; fib_index = im->fib_index_by_sw_if_index [rx_sw_if_index]; - server = dhcp_get_server(dpm, fib_index, FIB_PROTOCOL_IP4); - - if (PREDICT_FALSE (NULL == server)) + proxy = dhcp_get_proxy(dpm, fib_index, FIB_PROTOCOL_IP4); + + if (PREDICT_FALSE (NULL == proxy)) { error0 = DHCP_PROXY_ERROR_NO_SERVER; next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; pkts_no_server++; goto do_trace; } - + + server = &proxy->dhcp_servers[0]; vlib_buffer_advance (b0, -(sizeof(*ip0))); ip0 = vlib_buffer_get_current (b0); @@ -198,7 +198,7 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, sum0 = ip0->checksum; old0 = ip0->src_address.as_u32; - new0 = server->dhcp_src_address.ip4.as_u32; + new0 = proxy->dhcp_src_address.ip4.as_u32; ip0->src_address.as_u32 = new0; sum0 = ip_csum_update (sum0, old0, new0, ip4_header_t /* structure */, @@ -209,7 +209,7 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, vnet_buffer(b0)->sw_if_index[VLIB_TX] = server->server_fib_index; - h0->gateway_ip_address.as_u32 = server->dhcp_src_address.ip4.as_u32; + h0->gateway_ip_address.as_u32 = proxy->dhcp_src_address.ip4.as_u32; pkts_to_server++; o = (dhcp_option_t *) h0->options; @@ -220,7 +220,16 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, end = b0->data + b0->current_data + b0->current_length; /* TLVs are not performance-friendly... */ while (o->option != 0xFF /* end of options */ && (u8 *)o < end) + { + if (DHCP_PACKET_OPTION_MSG_TYPE == o->option) + { + if (DHCP_PACKET_DISCOVER == o->data[0]) + { + is_discover = 1; + } + } o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } fl = vlib_buffer_get_free_list (vm, b0->free_list_index); // start write at (option*)o, some packets have padding @@ -340,6 +349,65 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; + /* + * If we have multiple servers configured and this is the + * client's discover message, then send copies to each of + * those servers + */ + if (is_discover && vec_len(proxy->dhcp_servers) > 1) + { + u32 ii; + + for (ii = 1; ii < vec_len(proxy->dhcp_servers); ii++) + { + vlib_buffer_t *c0; + u32 ci0; + + c0 = vlib_buffer_copy(vm, b0); + ci0 = vlib_get_buffer_index(vm, c0); + server = &proxy->dhcp_servers[ii]; + + ip0 = vlib_buffer_get_current (c0); + + sum0 = ip0->checksum; + old0 = ip0->dst_address.as_u32; + new0 = server->dhcp_server.ip4.as_u32; + ip0->dst_address.as_u32 = server->dhcp_server.ip4.as_u32; + sum0 = ip_csum_update (sum0, old0, new0, + ip4_header_t /* structure */, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + to_next[0] = ci0; + to_next += 1; + n_left_to_next -= 1; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + ci0, next0); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcp_proxy_trace_t *tr; + + tr = vlib_add_trace (vm, node, c0, sizeof (*tr)); + tr->which = 0; /* to server */ + tr->error = error0; + tr->original_sw_if_index = original_sw_if_index; + tr->sw_if_index = sw_if_index; + if (next0 == DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP) + tr->trace_ip4_address.as_u32 = server->dhcp_server.ip4.as_u32; + } + + if (PREDICT_FALSE(0 == n_left_to_next)) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + } + } + } do_trace: if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -350,10 +418,15 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, tr->original_sw_if_index = original_sw_if_index; tr->sw_if_index = sw_if_index; if (next0 == DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP) - tr->trace_ip4_address.as_u32 = server->dhcp_server.ip4.as_u32; + tr->trace_ip4_address.as_u32 = + proxy->dhcp_servers[0].dhcp_server.ip4.as_u32; } do_enqueue: + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); @@ -437,7 +510,8 @@ dhcp_proxy_to_client_input (vlib_main_t * vm, u32 error0 = (u32)~0; vnet_sw_interface_t *swif; u32 fib_index; - dhcp_server_t * server; + dhcp_proxy_t *proxy; + dhcp_server_t *server; u32 original_sw_if_index = (u32) ~0; ip4_address_t relay_addr = { .as_u32 = 0, @@ -547,20 +621,26 @@ dhcp_proxy_to_client_input (vlib_main_t * vm, } fib_index = im->fib_index_by_sw_if_index [sw_if_index]; - server = dhcp_get_server(dpm, fib_index, FIB_PROTOCOL_IP4); + proxy = dhcp_get_proxy(dpm, fib_index, FIB_PROTOCOL_IP4); - if (PREDICT_FALSE (NULL == server)) + if (PREDICT_FALSE (NULL == proxy)) { error0 = DHCP_PROXY_ERROR_NO_SERVER; goto drop_packet; } - if (ip0->src_address.as_u32 != server->dhcp_server.ip4.as_u32) - { - error0 = DHCP_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; - goto drop_packet; + vec_foreach(server, proxy->dhcp_servers) + { + if (ip0->src_address.as_u32 == server->dhcp_server.ip4.as_u32) + { + goto server_found; + } } + error0 = DHCP_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; + goto drop_packet; + + server_found: vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index; swif = vnet_get_sw_interface (vnm, sw_if_index); @@ -709,9 +789,8 @@ dhcp4_proxy_set_server (ip46_address_t *addr, if (is_del) { - rc = dhcp_proxy_server_del (FIB_PROTOCOL_IP4, rx_fib_index); - - if (0 == rc) + if (dhcp_proxy_server_del (FIB_PROTOCOL_IP4, rx_fib_index, + addr, server_table_id)) { fib_table_entry_special_remove(rx_fib_index, &all_1s, @@ -809,29 +888,35 @@ VLIB_CLI_COMMAND (dhcp_proxy_set_command, static) = { static u8 * format_dhcp4_proxy_server (u8 * s, va_list * args) { - dhcp_server_t * server = va_arg (*args, dhcp_server_t *); + dhcp_proxy_t *proxy = va_arg (*args, dhcp_proxy_t *); ip4_fib_t * rx_fib, * server_fib; + dhcp_server_t *server; - if (server == 0) + if (proxy == 0) { - s = format (s, "%=16s%=16s%=14s%=14s", "Server", "Src Address", - "Server FIB", "RX FIB"); + s = format (s, "%=14s%=16s%s", "RX FIB", "Src Address", + "Servers FIB,Address"); return s; } - server_fib = ip4_fib_get(server->server_fib_index); - rx_fib = ip4_fib_get(server->rx_fib_index); + rx_fib = ip4_fib_get(proxy->rx_fib_index); + + s = format (s, "%=14u%=16U", + rx_fib->table_id, + format_ip46_address, &proxy->dhcp_src_address, IP46_TYPE_ANY); - s = format (s, "%=16U%=16U%=14u%=14u", - format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY, - format_ip46_address, &server->dhcp_src_address, IP46_TYPE_ANY, - server_fib->table_id, - rx_fib->table_id); + vec_foreach(server, proxy->dhcp_servers) + { + server_fib = ip4_fib_get(server->server_fib_index); + s = format (s, "%u,%U ", + server_fib->table_id, + format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY); + } return s; } static int -dhcp4_proxy_show_walk (dhcp_server_t *server, +dhcp4_proxy_show_walk (dhcp_proxy_t *server, void *ctx) { vlib_main_t * vm = ctx; diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index 58674209..524cb095 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -140,7 +140,8 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, ip6_main_t * im = &ip6_main; ip6_address_t * src; int bogus_length; - dhcp_server_t * server; + dhcp_proxy_t *proxy; + dhcp_server_t *server; u32 rx_fib_idx = 0, server_fib_idx = 0; next_index = node->cached_next_index; @@ -176,13 +177,11 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, u8 client_src_mac[6]; vlib_buffer_free_list_t *fl; dhcp_vss_t *vss; + u8 is_solicit = 0; bi0 = from[0]; - to_next[0] = bi0; from += 1; - to_next += 1; n_left_from -= 1; - n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); @@ -227,9 +226,9 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, /* Send to DHCPV6 server via the configured FIB */ rx_sw_if_index = sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_idx = im->mfib_index_by_sw_if_index [rx_sw_if_index]; - server = dhcp_get_server(dpm, rx_fib_idx, FIB_PROTOCOL_IP6); + proxy = dhcp_get_proxy(dpm, rx_fib_idx, FIB_PROTOCOL_IP6); - if (PREDICT_FALSE (NULL == server)) + if (PREDICT_FALSE (NULL == proxy)) { error0 = DHCPV6_PROXY_ERROR_NO_SERVER; next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; @@ -237,6 +236,7 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, goto do_trace; } + server = &proxy->dhcp_servers[0]; server_fib_idx = server->server_fib_index; vnet_buffer(b0)->sw_if_index[VLIB_TX] = server_fib_idx; @@ -371,18 +371,19 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, ip1->payload_length = u1->length; ip1->protocol = PROTO_UDP; ip1->hop_limit = HOP_COUNT_LIMIT; - src = (server->dhcp_server.ip6.as_u64[0] || - server->dhcp_server.ip6.as_u64[1]) ? - &server->dhcp_server.ip6 : &all_dhcpv6_server_address; + src = ((server->dhcp_server.ip6.as_u64[0] || + server->dhcp_server.ip6.as_u64[1]) ? + &server->dhcp_server.ip6 : + &all_dhcpv6_server_address); copy_ip6_address(&ip1->dst_address, src); ia0 = ip6_interface_first_global_or_site_address (&ip6_main, vnet_buffer(b0)->sw_if_index[VLIB_RX]); - src = (server->dhcp_src_address.ip6.as_u64[0] || - server->dhcp_src_address.ip6.as_u64[1]) ? - &server->dhcp_src_address.ip6 : ia0; + src = (proxy->dhcp_src_address.ip6.as_u64[0] || + proxy->dhcp_src_address.ip6.as_u64[1]) ? + &proxy->dhcp_src_address.ip6 : ia0; if (ia0 == 0) { error0 = DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS; @@ -400,6 +401,66 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; + is_solicit = (DHCPV6_MSG_SOLICIT == h0->u.msg_type); + + /* + * If we have multiple servers configured and this is the + * client's discover message, then send copies to each of + * those servers + */ + if (is_solicit && vec_len(proxy->dhcp_servers) > 1) + { + u32 ii; + + for (ii = 1; ii < vec_len(proxy->dhcp_servers); ii++) + { + vlib_buffer_t *c0; + u32 ci0; + + c0 = vlib_buffer_copy(vm, b0); + ci0 = vlib_get_buffer_index(vm, c0); + server = &proxy->dhcp_servers[ii]; + + ip0 = vlib_buffer_get_current (c0); + + src = ((server->dhcp_server.ip6.as_u64[0] || + server->dhcp_server.ip6.as_u64[1]) ? + &server->dhcp_server.ip6 : + &all_dhcpv6_server_address); + copy_ip6_address(&ip1->dst_address, src); + + to_next[0] = ci0; + to_next += 1; + n_left_to_next -= 1; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + ci0, next0); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcpv6_proxy_trace_t *tr; + + tr = vlib_add_trace (vm, node, c0, sizeof (*tr)); + tr->which = 0; /* to server */ + tr->error = error0; + tr->original_sw_if_index = rx_sw_if_index; + tr->sw_if_index = sw_if_index; + if (next0 == DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP) + copy_ip6_address((ip6_address_t *)&tr->packet_data[0], + &server->dhcp_server.ip6); + } + + if (PREDICT_FALSE(0 == n_left_to_next)) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + } + } + } + do_trace: if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -414,6 +475,10 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, } do_enqueue: + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); @@ -475,7 +540,8 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm, u32 n_left_from, * from; ethernet_main_t *em = ethernet_get_main (vm); dhcp_proxy_main_t * dm = &dhcp_proxy_main; - dhcp_server_t * server; + dhcp_proxy_t *proxy; + dhcp_server_t *server; vnet_main_t * vnm = vnet_get_main(); int bogus_length; @@ -588,9 +654,9 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm, vlib_buffer_advance (b0, sizeof(*r0)); client_fib_idx = im->mfib_index_by_sw_if_index[sw_if_index]; - server = dhcp_get_server(dm, client_fib_idx, FIB_PROTOCOL_IP6); + proxy = dhcp_get_proxy(dm, client_fib_idx, FIB_PROTOCOL_IP6); - if (NULL == server) + if (NULL == proxy) { error0 = DHCPV6_PROXY_ERROR_NO_SERVER; goto drop_packet; @@ -599,15 +665,21 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm, server_fib_idx = im->fib_index_by_sw_if_index [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; - if (server_fib_idx != server->server_fib_index || - ip0->src_address.as_u64[0] != server->dhcp_server.ip6.as_u64[0] || - ip0->src_address.as_u64[1] != server->dhcp_server.ip6.as_u64[1]) - { - //drop packet if not from server with configured address or FIB - error0 = DHCPV6_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; - goto drop_packet; - } + vec_foreach(server, proxy->dhcp_servers) + { + if (server_fib_idx == server->server_fib_index && + ip0->src_address.as_u64[0] == server->dhcp_server.ip6.as_u64[0] && + ip0->src_address.as_u64[1] == server->dhcp_server.ip6.as_u64[1]) + { + goto server_found; + } + } + + //drop packet if not from server with configured address or FIB + error0 = DHCPV6_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; + goto drop_packet; + server_found: vnet_buffer (b0)->sw_if_index[VLIB_TX] = original_sw_if_index = sw_if_index; @@ -773,9 +845,8 @@ dhcp6_proxy_set_server (ip46_address_t *addr, if (is_del) { - rc = dhcp_proxy_server_del (FIB_PROTOCOL_IP6, rx_fib_index); - - if (0 == rc) + if (dhcp_proxy_server_del (FIB_PROTOCOL_IP6, rx_fib_index, + addr, server_table_id)) { mfib_table_entry_delete(rx_fib_index, &all_dhcp_servers, @@ -893,43 +964,50 @@ VLIB_CLI_COMMAND (dhcpv6_proxy_set_command, static) = { static u8 * format_dhcp6_proxy_server (u8 * s, va_list * args) { - dhcp_server_t * server = va_arg (*args, dhcp_server_t *); + dhcp_proxy_t * proxy = va_arg (*args, dhcp_proxy_t *); ip6_fib_t *server_fib; + dhcp_server_t *server; ip6_mfib_t *rx_fib; - if (NULL == server) + if (proxy == 0) { - s = format (s, "%=40s%=40s%=14s%=14s", "Server Address", "Source Address", - "Server FIB", "RX FIB"); + s = format (s, "%=14s%=16s%s", "RX FIB", "Src Address", + "Servers FIB,Address"); return s; } - server_fib = ip6_fib_get(server->server_fib_index); - rx_fib = ip6_mfib_get(server->rx_fib_index); + rx_fib = ip6_mfib_get(proxy->rx_fib_index); + + s = format (s, "%=14u%=16U", + rx_fib->table_id, + format_ip46_address, &proxy->dhcp_src_address, IP46_TYPE_ANY); + vec_foreach(server, proxy->dhcp_servers) + { + server_fib = ip6_fib_get(server->server_fib_index); + s = format (s, "%u,%U ", + server_fib->table_id, + format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY); + } - s = format (s, "%=40U%=40U%=14u%=14u", - format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY, - format_ip46_address, &server->dhcp_src_address, IP46_TYPE_ANY, - server_fib->table_id, rx_fib->table_id); return s; } static int -dhcp6_proxy_show_walk (dhcp_server_t *server, +dhcp6_proxy_show_walk (dhcp_proxy_t *proxy, void *ctx) { vlib_main_t * vm = ctx; - vlib_cli_output (vm, "%U", format_dhcp6_proxy_server, server); + vlib_cli_output (vm, "%U", format_dhcp6_proxy_server, proxy); return (1); } static clib_error_t * dhcpv6_proxy_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, + vlib_cli_command_t * cmd) { vlib_cli_output (vm, "%U", format_dhcp6_proxy_server, NULL /* header line */); diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c index ce34f6a4..e9c757e8 100644 --- a/src/vnet/dhcp/dhcp_api.c +++ b/src/vnet/dhcp/dhcp_api.c @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -113,46 +114,73 @@ vl_api_dhcp_proxy_dump_t_handler (vl_api_dhcp_proxy_dump_t * mp) if (q == 0) return; - dhcp_proxy_dump ((mp->is_ip6 == 0 ? + dhcp_proxy_dump ((mp->is_ip6 == 1 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4), q, mp->context); } void dhcp_send_details (fib_protocol_t proto, - void *opaque, - u32 context, - const ip46_address_t * server, - const ip46_address_t * src, - u32 server_fib_id, - u32 rx_fib_id, u32 vss_fib_id, u32 vss_oui) + void *opaque, u32 context, dhcp_proxy_t * proxy) { vl_api_dhcp_proxy_details_t *mp; unix_shared_memory_queue_t *q = opaque; - - mp = vl_msg_api_alloc (sizeof (*mp)); + vl_api_dhcp_server_t *v_server; + dhcp_server_t *server; + fib_table_t *s_fib; + dhcp_vss_t *vss; + u32 count; + size_t n; + + count = vec_len (proxy->dhcp_servers); + n = sizeof (*mp) + (count * sizeof (vl_api_dhcp_server_t)); + mp = vl_msg_api_alloc (n); if (!mp) return; - memset (mp, 0, sizeof (*mp)); + memset (mp, 0, n); mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_DETAILS); mp->context = context; - - mp->rx_vrf_id = htonl (rx_fib_id); - mp->server_vrf_id = htonl (server_fib_id); - mp->vss_oui = htonl (vss_oui); - mp->vss_fib_id = htonl (vss_fib_id); + mp->count = count; mp->is_ipv6 = (proto == FIB_PROTOCOL_IP6); + mp->rx_vrf_id = + htonl (dhcp_proxy_rx_table_get_table_id (proto, proxy->rx_fib_index)); + + vss = dhcp_get_vss_info (&dhcp_proxy_main, proxy->rx_fib_index, proto); + + if (NULL != vss) + { + mp->vss_oui = htonl (vss->oui); + mp->vss_fib_id = htonl (vss->fib_id); + } + + vec_foreach_index (count, proxy->dhcp_servers) + { + server = &proxy->dhcp_servers[count]; + v_server = &mp->servers[count]; + + s_fib = fib_table_get (server->server_fib_index, proto); + + v_server->server_vrf_id = htonl (s_fib->ft_table_id); + + if (mp->is_ipv6) + { + memcpy (v_server->dhcp_server, &server->dhcp_server.ip6, 16); + } + else + { + /* put the address in the first bytes */ + memcpy (v_server->dhcp_server, &server->dhcp_server.ip4, 4); + } + } if (mp->is_ipv6) { - memcpy (mp->dhcp_server, server, 16); - memcpy (mp->dhcp_src_address, src, 16); + memcpy (mp->dhcp_src_address, &proxy->dhcp_src_address.ip6, 16); } else { /* put the address in the first bytes */ - memcpy (mp->dhcp_server, &server->ip4, 4); - memcpy (mp->dhcp_src_address, &src->ip4, 4); + memcpy (mp->dhcp_src_address, &proxy->dhcp_src_address.ip4, 4); } vl_msg_api_send_shmem (q, (u8 *) & mp); } diff --git a/src/vnet/dhcp/dhcp_proxy.c b/src/vnet/dhcp/dhcp_proxy.c index 8e31c3db..ba7f354e 100644 --- a/src/vnet/dhcp/dhcp_proxy.c +++ b/src/vnet/dhcp/dhcp_proxy.c @@ -44,7 +44,7 @@ dhcp_proxy_rx_table_unlock (fib_protocol_t proto, mfib_table_unlock(fib_index, proto); } -static u32 + u32 dhcp_proxy_rx_table_get_table_id (fib_protocol_t proto, u32 fib_index) { @@ -72,7 +72,7 @@ dhcp_proxy_walk (fib_protocol_t proto, void *ctx) { dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - dhcp_server_t * server; + dhcp_proxy_t * server; u32 server_index, i; vec_foreach_index (i, dpm->dhcp_server_index_by_rx_fib_index[proto]) @@ -124,31 +124,68 @@ dhcp_vss_walk (fib_protocol_t proto, } } +static u32 +dhcp_proxy_server_find (dhcp_proxy_t *proxy, + fib_protocol_t proto, + ip46_address_t *addr, + u32 server_table_id) +{ + dhcp_server_t *server; + u32 ii, fib_index; + + vec_foreach_index(ii, proxy->dhcp_servers) + { + server = &proxy->dhcp_servers[ii]; + fib_index = fib_table_find(proto, server_table_id); + + if (ip46_address_is_equal(&server->dhcp_server, + addr) && + (server->server_fib_index == fib_index)) + { + return (ii); + } + } + return (~0); +} + int dhcp_proxy_server_del (fib_protocol_t proto, - u32 rx_fib_index) + u32 rx_fib_index, + ip46_address_t *addr, + u32 server_table_id) { dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - dhcp_server_t * server = 0; - int rc = 0; + dhcp_proxy_t *proxy = 0; - server = dhcp_get_server(dpm, rx_fib_index, proto); + proxy = dhcp_get_proxy(dpm, rx_fib_index, proto); - if (NULL == server) - { - rc = VNET_API_ERROR_NO_SUCH_ENTRY; - } - else + if (NULL != proxy) { - /* Use the default server again. */ - dpm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] = ~0; + dhcp_server_t *server; + u32 index; - fib_table_unlock (server->server_fib_index, proto); + index = dhcp_proxy_server_find(proxy, proto, addr, server_table_id); - pool_put (dpm->dhcp_servers[proto], server); + if (~0 != index) + { + server = &proxy->dhcp_servers[index]; + fib_table_unlock (server->server_fib_index, proto); + + vec_del1(proxy->dhcp_servers, index); + + if (0 == vec_len(proxy->dhcp_servers)) + { + /* no servers left, delete the proxy config */ + dpm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] = ~0; + vec_free(proxy->dhcp_servers); + pool_put (dpm->dhcp_servers[proto], proxy); + return (1); + } + } } - return (rc); + /* the proxy still exists */ + return (0); } int @@ -159,48 +196,42 @@ dhcp_proxy_server_add (fib_protocol_t proto, u32 server_table_id) { dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - dhcp_server_t * server = 0; + dhcp_proxy_t * proxy = 0; int new = 0; - server = dhcp_get_server(dpm, rx_fib_index, proto); + proxy = dhcp_get_proxy(dpm, rx_fib_index, proto); - if (NULL == server) + if (NULL == proxy) { vec_validate_init_empty(dpm->dhcp_server_index_by_rx_fib_index[proto], rx_fib_index, ~0); - pool_get (dpm->dhcp_servers[proto], server); - memset (server, 0, sizeof (*server)); + pool_get (dpm->dhcp_servers[proto], proxy); + memset (proxy, 0, sizeof (*proxy)); new = 1; dpm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] = - server - dpm->dhcp_servers[proto]; + proxy - dpm->dhcp_servers[proto]; - server->rx_fib_index = rx_fib_index; - server->server_fib_index = - fib_table_find_or_create_and_lock(proto, server_table_id); + proxy->dhcp_src_address = *src_address; + proxy->rx_fib_index = rx_fib_index; } else { - /* modify, may need to swap server FIBs */ - u32 tmp_index; - - tmp_index = fib_table_find(proto, server_table_id); - - if (tmp_index != server->server_fib_index) + if (~0 != dhcp_proxy_server_find(proxy, proto, addr, server_table_id)) { - tmp_index = server->server_fib_index; - - /* certainly swapping if the fib doesn't exist */ - server->server_fib_index = - fib_table_find_or_create_and_lock(proto, server_table_id); - fib_table_unlock (tmp_index, proto); + return (new); } } - server->dhcp_server = *addr; - server->dhcp_src_address = *src_address; + dhcp_server_t server = { + .dhcp_server = *addr, + .server_fib_index = fib_table_find_or_create_and_lock(proto, + server_table_id), + }; + + vec_add1(proxy->dhcp_servers, server); return (new); } @@ -213,31 +244,15 @@ typedef struct dhcp4_proxy_dump_walk_ctx_t_ } dhcp_proxy_dump_walk_cxt_t; static int -dhcp_proxy_dump_walk (dhcp_server_t *server, +dhcp_proxy_dump_walk (dhcp_proxy_t *proxy, void *arg) { dhcp_proxy_dump_walk_cxt_t *ctx = arg; - fib_table_t *s_fib; - u32 rx_table_id; - dhcp_vss_t *v; - - v = dhcp_get_vss_info(&dhcp_proxy_main, - server->rx_fib_index, - ctx->proto); - - s_fib = fib_table_get(server->server_fib_index, ctx->proto); - rx_table_id = dhcp_proxy_rx_table_get_table_id(server->rx_fib_index, - ctx->proto); dhcp_send_details(ctx->proto, ctx->opaque, ctx->context, - &server->dhcp_server, - &server->dhcp_src_address, - s_fib->ft_table_id, - rx_table_id, - (v ? v->fib_id : 0), - (v ? v->oui : 0)); + proxy); return (1); } diff --git a/src/vnet/dhcp/dhcp_proxy.h b/src/vnet/dhcp/dhcp_proxy.h index 708e92f3..ef2bc0a1 100644 --- a/src/vnet/dhcp/dhcp_proxy.h +++ b/src/vnet/dhcp/dhcp_proxy.h @@ -58,32 +58,58 @@ typedef struct dhcp_vss_t_ { } dhcp_vss_t; /** - * @brief A DHCP proxy server represenation + * @brief A representation of a single DHCP Server within a given VRF config */ -typedef struct dhcp_server_t_ { +typedef struct dhcp_server_t_ +{ /** * @brief The address of the DHCP server to which to relay the client's * messages */ ip46_address_t dhcp_server; - /** - * @brief The source address to use in relayed messaes - */ - ip46_address_t dhcp_src_address; - /** * @brief The FIB index (not the external Table-ID) in which the server * is reachable. */ u32 server_fib_index; +} dhcp_server_t; + +/** + * @brief A DHCP proxy represenation fpr per-client VRF config + */ +typedef struct dhcp_proxy_t_ { + /** + * @brief The set of DHCP servers to which messages are relayed. + * If multiple servers are configured then discover/solict messages + * are relayed to each. A cookie is maintained for the relay, and only + * one message is replayed to the client, based on the presence of the + * cookie. + * The expectation is there are only 1 or 2 servers, hence no fancy DB. + */ + dhcp_server_t *dhcp_servers; + + /** + * @brief Hash table of pending requets key'd on the clients MAC address + */ + uword *dhcp_pending; + + /** + * @brief A lock for the pending request DB. + */ + int lock; + + /** + * @brief The source address to use in relayed messaes + */ + ip46_address_t dhcp_src_address; /** * @brief The FIB index (not the external Table-ID) in which the client * is resides. */ u32 rx_fib_index; -} dhcp_server_t; +} dhcp_proxy_t; #define DHCP_N_PROTOS (FIB_PROTOCOL_IP6 + 1) @@ -92,7 +118,7 @@ typedef struct dhcp_server_t_ { */ typedef struct { /* Pool of DHCP servers */ - dhcp_server_t *dhcp_servers[DHCP_N_PROTOS]; + dhcp_proxy_t *dhcp_servers[DHCP_N_PROTOS]; /* Pool of selected DHCP server. Zero is the default server */ u32 * dhcp_server_index_by_rx_fib_index[DHCP_N_PROTOS]; @@ -114,12 +140,7 @@ extern dhcp_proxy_main_t dhcp_proxy_main; void dhcp_send_details (fib_protocol_t proto, void *opaque, u32 context, - const ip46_address_t *server, - const ip46_address_t *src, - u32 server_fib_id, - u32 rx_fib_id, - u32 vss_fib_id, - u32 vss_oui); + dhcp_proxy_t *proxy); /** * @brief Show (on CLI) a VSS config during a show walk @@ -157,16 +178,22 @@ int dhcp_proxy_server_add(fib_protocol_t proto, /** * @brief Delete a DHCP proxy config - * @return 0 is deleted, otherwise an error code + * @return 1 if the proxy is deleted, 0 otherwise */ int dhcp_proxy_server_del(fib_protocol_t proto, - u32 rx_fib_index); + u32 rx_fib_index, + ip46_address_t *addr, + u32 server_table_id); + +u32 +dhcp_proxy_rx_table_get_table_id (fib_protocol_t proto, + u32 fib_index); /** * @brief Callback function invoked for each DHCP proxy entry * return 0 to break the walk, non-zero otherwise. */ -typedef int (*dhcp_proxy_walk_fn_t)(dhcp_server_t *server, +typedef int (*dhcp_proxy_walk_fn_t)(dhcp_proxy_t *server, void *ctx); /** @@ -191,6 +218,18 @@ void dhcp_vss_walk(fib_protocol_t proto, dhcp_vss_walk_fn_t fn, void *ctx); +/** + * @brief Lock a proxy object to prevent simultaneous access of its + * pending store + */ +void dhcp_proxy_lock (dhcp_proxy_t *server); + +/** + * @brief Lock a proxy object to prevent simultaneous access of its + * pending store + */ +void dhcp_proxy_unlock (dhcp_proxy_t *server); + /** * @brief Get the VSS data for the FIB index */ @@ -215,12 +254,12 @@ dhcp_get_vss_info (dhcp_proxy_main_t *dm, /** * @brief Get the DHCP proxy server data for the FIB index */ -static inline dhcp_server_t * -dhcp_get_server (dhcp_proxy_main_t *dm, - u32 rx_fib_index, - fib_protocol_t proto) +static inline dhcp_proxy_t * +dhcp_get_proxy (dhcp_proxy_main_t *dm, + u32 rx_fib_index, + fib_protocol_t proto) { - dhcp_server_t *s = NULL; + dhcp_proxy_t *s = NULL; if (vec_len(dm->dhcp_server_index_by_rx_fib_index[proto]) > rx_fib_index && dm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] != ~0) diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index 6eabeef1..9bf19edb 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -79,6 +79,8 @@ typedef CLIB_PACKED (union { #define ip46_address_reset(ip46) ((ip46)->as_u64[0] = (ip46)->as_u64[1] = 0) #define ip46_address_cmp(ip46_1, ip46_2) (memcmp(ip46_1, ip46_2, sizeof(*ip46_1))) #define ip46_address_is_zero(ip46) (((ip46)->as_u64[0] == 0) && ((ip46)->as_u64[1] == 0)) +#define ip46_address_is_equal(a1, a2) (((a1)->as_u64[0] == (a2)->as_u64[0]) \ + && ((a1)->as_u64[1] == (a2)->as_u64[1])) always_inline void ip46_from_addr_buf (u32 is_ipv6, u8 * buf, ip46_address_t * ip) diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c index 4a65b024..2649798b 100644 --- a/src/vnet/pg/input.c +++ b/src/vnet/pg/input.c @@ -1373,6 +1373,7 @@ typedef struct u32 stream_index; u32 packet_length; + u32 sw_if_index; /* Use pre data for packet data. */ vlib_buffer_t buffer; @@ -1399,6 +1400,7 @@ format_pg_input_trace (u8 * s, va_list * va) s = format (s, "stream %d", t->stream_index); s = format (s, ", %d bytes", t->packet_length); + s = format (s, ", %d sw_if_index", t->sw_if_index); s = format (s, "\n%U%U", format_white_space, indent, format_vlib_buffer, &t->buffer); @@ -1458,6 +1460,9 @@ pg_input_trace (pg_main_t * pg, t0->packet_length = vlib_buffer_length_in_chain (vm, b0); t1->packet_length = vlib_buffer_length_in_chain (vm, b1); + t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t1->sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); clib_memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b1->pre_data)); @@ -1484,6 +1489,7 @@ pg_input_trace (pg_main_t * pg, t0->stream_index = stream_index; t0->packet_length = vlib_buffer_length_in_chain (vm, b0); + t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); clib_memcpy (t0->buffer.pre_data, b0->data, sizeof (t0->buffer.pre_data)); diff --git a/test/test_dhcp.py b/test/test_dhcp.py index a09c9bdb..89667d3d 100644 --- a/test/test_dhcp.py +++ b/test/test_dhcp.py @@ -2,8 +2,10 @@ import unittest import socket +import struct from framework import VppTestCase, VppTestRunner +from vpp_neighbor import VppNeighbor from scapy.layers.l2 import Ether, getmacbyip from scapy.layers.inet import IP, UDP, ICMP @@ -11,7 +13,7 @@ from scapy.layers.inet6 import IPv6, in6_getnsmac, in6_mactoifaceid from scapy.layers.dhcp import DHCP, BOOTP, DHCPTypes from scapy.layers.dhcp6 import DHCP6, DHCP6_Solicit, DHCP6_RelayForward, \ DHCP6_RelayReply, DHCP6_Advertise, DHCP6OptRelayMsg, DHCP6OptIfaceId, \ - DHCP6OptStatusCode, DHCP6OptVSS, DHCP6OptClientLinkLayerAddr + DHCP6OptStatusCode, DHCP6OptVSS, DHCP6OptClientLinkLayerAddr, DHCP6_Request from socket import AF_INET, AF_INET6 from scapy.utils import inet_pton, inet_ntop from scapy.utils6 import in6_ptop @@ -140,7 +142,7 @@ class TestDHCP(VppTestCase): return data - def verify_dhcp_offer(self, pkt, intf): + def verify_dhcp_offer(self, pkt, intf, fib_id=0, oui=0): ether = pkt[Ether] self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff") self.assertEqual(ether.src, intf.local_mac) @@ -162,15 +164,22 @@ class TestDHCP(VppTestCase): is_offer = True self.assertTrue(is_offer) - data = self.validate_relay_options(pkt, intf, intf.local_ip4, 0, 0) + data = self.validate_relay_options(pkt, intf, intf.local_ip4, + fib_id, oui) + + def verify_dhcp_discover(self, pkt, intf, src_intf=None, fib_id=0, oui=0, + dst_mac=None, dst_ip=None): + if not dst_mac: + dst_mac = intf.remote_mac + if not dst_ip: + dst_ip = intf.remote_ip4 - def verify_dhcp_discover(self, pkt, intf, src_intf=None, fib_id=0, oui=0): ether = pkt[Ether] - self.assertEqual(ether.dst, intf.remote_mac) + self.assertEqual(ether.dst, dst_mac) self.assertEqual(ether.src, intf.local_mac) ip = pkt[IP] - self.assertEqual(ip.dst, intf.remote_ip4) + self.assertEqual(ip.dst, dst_ip) self.assertEqual(ip.src, intf.local_ip4) udp = pkt[UDP] @@ -195,13 +204,20 @@ class TestDHCP(VppTestCase): def verify_dhcp6_solicit(self, pkt, intf, peer_ip, peer_mac, fib_id=0, - oui=0): + oui=0, + dst_mac=None, + dst_ip=None): + if not dst_mac: + dst_mac = intf.remote_mac + if not dst_ip: + dst_ip = in6_ptop(intf.remote_ip6) + ether = pkt[Ether] - self.assertEqual(ether.dst, intf.remote_mac) + self.assertEqual(ether.dst, dst_mac) self.assertEqual(ether.src, intf.local_mac) ip = pkt[IPv6] - self.assertEqual(in6_ptop(ip.dst), in6_ptop(intf.remote_ip6)) + self.assertEqual(in6_ptop(ip.dst), dst_ip) self.assertEqual(in6_ptop(ip.src), in6_ptop(intf.local_ip6)) udp = pkt[UDP] @@ -447,6 +463,128 @@ class TestDHCP(VppTestCase): self.pg_enable_capture(self.pg_interfaces) self.pg_start() + rx = self.pg1.get_capture(1) + rx = rx[0] + self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3, + fib_id=1, oui=4) + + # + # Add a second DHCP server in VRF 1 + # expect clients messages to be relay to both configured servers + # + self.pg1.generate_remote_hosts(2) + server_addr2 = socket.inet_pton(AF_INET, self.pg1.remote_hosts[1].ip4) + + self.vapi.dhcp_proxy_config(server_addr2, + src_addr, + rx_table_id=1, + server_table_id=1, + is_add=1) + + # + # We'll need an ARP entry for the server to send it packets + # + arp_entry = VppNeighbor(self, + self.pg1.sw_if_index, + self.pg1.remote_hosts[1].mac, + self.pg1.remote_hosts[1].ip4) + arp_entry.add_vpp_config() + + # + # Send a discover from the client. expect two relayed messages + # The frist packet is sent to the second server + # We're not enforcing that here, it's just the way it is. + # + self.pg3.add_stream(pkts_disc_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(2) + + option_82 = self.verify_dhcp_discover( + rx[0], self.pg1, + src_intf=self.pg3, + dst_mac=self.pg1.remote_hosts[1].mac, + dst_ip=self.pg1.remote_hosts[1].ip4, + fib_id=1, oui=4) + self.verify_dhcp_discover(rx[1], self.pg1, src_intf=self.pg3, + fib_id=1, oui=4) + + # + # Send both packets back. Client gets both. + # + p1 = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / + IP(src=self.pg1.remote_ip4, dst=self.pg1.local_ip4) / + UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_SERVER_PORT) / + BOOTP(op=1) / + DHCP(options=[('message-type', 'offer'), + ('relay_agent_Information', option_82), + ('end')])) + p2 = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / + IP(src=self.pg1.remote_hosts[1].ip4, dst=self.pg1.local_ip4) / + UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_SERVER_PORT) / + BOOTP(op=1) / + DHCP(options=[('message-type', 'offer'), + ('relay_agent_Information', option_82), + ('end')])) + pkts = [p1, p2] + + self.pg1.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg3.get_capture(2) + + self.verify_dhcp_offer(rx[0], self.pg3, fib_id=1, oui=4) + self.verify_dhcp_offer(rx[1], self.pg3, fib_id=1, oui=4) + + # + # Ensure offers from non-servers are dropeed + # + p2 = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / + IP(src="8.8.8.8", dst=self.pg1.local_ip4) / + UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_SERVER_PORT) / + BOOTP(op=1) / + DHCP(options=[('message-type', 'offer'), + ('relay_agent_Information', option_82), + ('end')])) + self.send_and_assert_no_replies(self.pg1, p2, + "DHCP offer from non-server") + + # + # Ensure only the discover is sent to multiple servers + # + p_req_vrf1 = (Ether(dst="ff:ff:ff:ff:ff:ff", + src=self.pg3.remote_mac) / + IP(src="0.0.0.0", dst="255.255.255.255") / + UDP(sport=DHCP4_CLIENT_PORT, + dport=DHCP4_SERVER_PORT) / + BOOTP(op=1) / + DHCP(options=[('message-type', 'request'), + ('end')])) + + self.pg3.add_stream(p_req_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(1) + + # + # Remove the second DHCP server + # + self.vapi.dhcp_proxy_config(server_addr2, + src_addr, + rx_table_id=1, + server_table_id=1, + is_add=0) + + # + # Test we can still relay with the first + # + self.pg3.add_stream(pkts_disc_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + rx = self.pg1.get_capture(1) rx = rx[0] self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3, @@ -472,7 +610,7 @@ class TestDHCP(VppTestCase): self.vapi.dhcp_proxy_config(server_addr, src_addr, rx_table_id=1, - server_table_id=11, + server_table_id=1, is_add=0) self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0, @@ -500,18 +638,16 @@ class TestDHCP(VppTestCase): UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_CLIENT_PORT) / DHCP6_Solicit()) - pkts_solicit_vrf0 = [p_solicit_vrf0] p_solicit_vrf1 = (Ether(dst=dmac, src=self.pg3.remote_mac) / IPv6(src=dhcp_solicit_src_vrf1, dst=dhcp_solicit_dst) / UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_CLIENT_PORT) / DHCP6_Solicit()) - pkts_solicit_vrf1 = [p_solicit_vrf1] - self.send_and_assert_no_replies(self.pg2, pkts_solicit_vrf0, + self.send_and_assert_no_replies(self.pg2, p_solicit_vrf0, "DHCP with no configuration") - self.send_and_assert_no_replies(self.pg3, pkts_solicit_vrf1, + self.send_and_assert_no_replies(self.pg3, p_solicit_vrf1, "DHCP with no configuration") # @@ -525,9 +661,9 @@ class TestDHCP(VppTestCase): server_table_id=0, is_ipv6=1) - self.send_and_assert_no_replies(self.pg2, pkts_solicit_vrf0, + self.send_and_assert_no_replies(self.pg2, p_solicit_vrf0, "DHCP with no configuration") - self.send_and_assert_no_replies(self.pg3, pkts_solicit_vrf1, + self.send_and_assert_no_replies(self.pg3, p_solicit_vrf1, "DHCP with no configuration") # @@ -538,13 +674,13 @@ class TestDHCP(VppTestCase): # # Now the DHCP requests are relayed to the server # - self.pg2.add_stream(pkts_solicit_vrf0) + self.pg2.add_stream(p_solicit_vrf0) self.pg_enable_capture(self.pg_interfaces) self.pg_start() rx = self.pg0.get_capture(1) - rx = rx[0] - self.verify_dhcp6_solicit(rx, self.pg0, + + self.verify_dhcp6_solicit(rx[0], self.pg0, dhcp_solicit_src_vrf0, self.pg2.remote_mac) @@ -557,8 +693,7 @@ class TestDHCP(VppTestCase): IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) / UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) / DHCP6_Advertise()) - pkts_adv_vrf0 = [p_adv_vrf0] - self.send_and_assert_no_replies(self.pg2, pkts_adv_vrf0, + self.send_and_assert_no_replies(self.pg2, p_adv_vrf0, "DHCP6 not a relay reply") # 2 - no relay message option @@ -567,8 +702,7 @@ class TestDHCP(VppTestCase): UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) / DHCP6_RelayReply() / DHCP6_Advertise()) - pkts_adv_vrf0 = [p_adv_vrf0] - self.send_and_assert_no_replies(self.pg2, pkts_adv_vrf0, + self.send_and_assert_no_replies(self.pg2, p_adv_vrf0, "DHCP not a relay message") # 3 - no circuit ID @@ -578,8 +712,7 @@ class TestDHCP(VppTestCase): DHCP6_RelayReply() / DHCP6OptRelayMsg(optlen=0) / DHCP6_Advertise()) - pkts_adv_vrf0 = [p_adv_vrf0] - self.send_and_assert_no_replies(self.pg2, pkts_adv_vrf0, + self.send_and_assert_no_replies(self.pg2, p_adv_vrf0, "DHCP6 no circuit ID") # 4 - wrong circuit ID p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / @@ -589,8 +722,7 @@ class TestDHCP(VppTestCase): DHCP6OptIfaceId(optlen=4, ifaceid='\x00\x00\x00\x05') / DHCP6OptRelayMsg(optlen=0) / DHCP6_Advertise()) - pkts_adv_vrf0 = [p_adv_vrf0] - self.send_and_assert_no_replies(self.pg2, pkts_adv_vrf0, + self.send_and_assert_no_replies(self.pg2, p_adv_vrf0, "DHCP6 wrong circuit ID") # @@ -611,8 +743,8 @@ class TestDHCP(VppTestCase): self.pg_start() rx = self.pg2.get_capture(1) - rx = rx[0] - self.verify_dhcp6_advert(rx, self.pg2, "::") + + self.verify_dhcp6_advert(rx[0], self.pg2, "::") # # Send the relay response (the advertisement) @@ -632,8 +764,8 @@ class TestDHCP(VppTestCase): self.pg_start() rx = self.pg2.get_capture(1) - rx = rx[0] - self.verify_dhcp6_advert(rx, self.pg2, dhcp_solicit_src_vrf0) + + self.verify_dhcp6_advert(rx[0], self.pg2, dhcp_solicit_src_vrf0) # # Add all the config for VRF 1 @@ -648,13 +780,13 @@ class TestDHCP(VppTestCase): # # VRF 1 solicit # - self.pg3.add_stream(pkts_solicit_vrf1) + self.pg3.add_stream(p_solicit_vrf1) self.pg_enable_capture(self.pg_interfaces) self.pg_start() rx = self.pg1.get_capture(1) - rx = rx[0] - self.verify_dhcp6_solicit(rx, self.pg1, + + self.verify_dhcp6_solicit(rx[0], self.pg1, dhcp_solicit_src_vrf1, self.pg3.remote_mac) @@ -676,21 +808,21 @@ class TestDHCP(VppTestCase): self.pg_start() rx = self.pg3.get_capture(1) - rx = rx[0] - self.verify_dhcp6_advert(rx, self.pg3, dhcp_solicit_src_vrf1) + + self.verify_dhcp6_advert(rx[0], self.pg3, dhcp_solicit_src_vrf1) # # Add VSS config # table=1, fib=id=1, oui=4 self.vapi.dhcp_proxy_set_vss(1, 1, 4, is_ip6=1) - self.pg3.add_stream(pkts_solicit_vrf1) + self.pg3.add_stream(p_solicit_vrf1) self.pg_enable_capture(self.pg_interfaces) self.pg_start() rx = self.pg1.get_capture(1) - rx = rx[0] - self.verify_dhcp6_solicit(rx, self.pg1, + + self.verify_dhcp6_solicit(rx[0], self.pg1, dhcp_solicit_src_vrf1, self.pg3.remote_mac, fib_id=1, @@ -702,27 +834,163 @@ class TestDHCP(VppTestCase): # self.vapi.dhcp_proxy_set_vss(1, 1, 4, is_ip6=1, is_add=0) - self.pg3.add_stream(pkts_solicit_vrf1) + self.pg3.add_stream(p_solicit_vrf1) self.pg_enable_capture(self.pg_interfaces) self.pg_start() rx = self.pg1.get_capture(1) - rx = rx[0] - self.verify_dhcp6_solicit(rx, self.pg1, + + self.verify_dhcp6_solicit(rx[0], self.pg1, dhcp_solicit_src_vrf1, self.pg3.remote_mac) # - # Cleanup + # Add a second DHCP server in VRF 1 + # expect clients messages to be relay to both configured servers # - self.vapi.dhcp_proxy_config(server_addr_vrf1, + self.pg1.generate_remote_hosts(2) + server_addr2 = socket.inet_pton(AF_INET6, self.pg1.remote_hosts[1].ip6) + + self.vapi.dhcp_proxy_config(server_addr2, + src_addr_vrf1, + rx_table_id=1, + server_table_id=1, + is_ipv6=1) + + # + # We'll need an ND entry for the server to send it packets + # + nd_entry = VppNeighbor(self, + self.pg1.sw_if_index, + self.pg1.remote_hosts[1].mac, + self.pg1.remote_hosts[1].ip6, + af=AF_INET6) + nd_entry.add_vpp_config() + + # + # Send a discover from the client. expect two relayed messages + # The frist packet is sent to the second server + # We're not enforcing that here, it's just the way it is. + # + self.pg3.add_stream(p_solicit_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(2) + + self.verify_dhcp6_solicit(rx[0], self.pg1, + dhcp_solicit_src_vrf1, + self.pg3.remote_mac) + self.verify_dhcp6_solicit(rx[1], self.pg1, + dhcp_solicit_src_vrf1, + self.pg3.remote_mac, + dst_mac=self.pg1.remote_hosts[1].mac, + dst_ip=self.pg1.remote_hosts[1].ip6) + + # + # Send both packets back. Client gets both. + # + p1 = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / + IPv6(dst=self.pg1.local_ip6, src=self.pg1.remote_ip6) / + UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) / + DHCP6_RelayReply(peeraddr=dhcp_solicit_src_vrf1) / + DHCP6OptIfaceId(optlen=4, ifaceid='\x00\x00\x00\x04') / + DHCP6OptRelayMsg(optlen=0) / + DHCP6_Advertise(trid=1) / + DHCP6OptStatusCode(statuscode=0)) + p2 = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_hosts[1].mac) / + IPv6(dst=self.pg1.local_ip6, src=self.pg1._remote_hosts[1].ip6) / + UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) / + DHCP6_RelayReply(peeraddr=dhcp_solicit_src_vrf1) / + DHCP6OptIfaceId(optlen=4, ifaceid='\x00\x00\x00\x04') / + DHCP6OptRelayMsg(optlen=0) / + DHCP6_Advertise(trid=1) / + DHCP6OptStatusCode(statuscode=0)) + + pkts = [p1, p2] + + self.pg1.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg3.get_capture(2) + + self.verify_dhcp6_advert(rx[0], self.pg3, dhcp_solicit_src_vrf1) + self.verify_dhcp6_advert(rx[1], self.pg3, dhcp_solicit_src_vrf1) + + # + # Ensure only solicit messages are duplicated + # + p_request_vrf1 = (Ether(dst=dmac, src=self.pg3.remote_mac) / + IPv6(src=dhcp_solicit_src_vrf1, + dst=dhcp_solicit_dst) / + UDP(sport=DHCP6_SERVER_PORT, + dport=DHCP6_CLIENT_PORT) / + DHCP6_Request()) + + self.pg3.add_stream(p_request_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(1) + + # + # Test we drop DHCP packets from addresses that are not configured as + # DHCP servers + # + p2 = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_hosts[1].mac) / + IPv6(dst=self.pg1.local_ip6, src="3001::1") / + UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) / + DHCP6_RelayReply(peeraddr=dhcp_solicit_src_vrf1) / + DHCP6OptIfaceId(optlen=4, ifaceid='\x00\x00\x00\x04') / + DHCP6OptRelayMsg(optlen=0) / + DHCP6_Advertise(trid=1) / + DHCP6OptStatusCode(statuscode=0)) + self.send_and_assert_no_replies(self.pg1, p2, + "DHCP6 not from server") + + # + # Remove the second DHCP server + # + self.vapi.dhcp_proxy_config(server_addr2, src_addr_vrf1, rx_table_id=1, server_table_id=1, is_ipv6=1, is_add=0) + + # + # Test we can still relay with the first + # + self.pg3.add_stream(p_solicit_vrf1) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(1) + + self.verify_dhcp6_solicit(rx[0], self.pg1, + dhcp_solicit_src_vrf1, + self.pg3.remote_mac) + + # + # Cleanup + # self.vapi.dhcp_proxy_config(server_addr_vrf1, src_addr_vrf1, + rx_table_id=1, + server_table_id=1, + is_ipv6=1, + is_add=0) + self.vapi.dhcp_proxy_config(server_addr_vrf0, + src_addr_vrf0, + rx_table_id=0, + server_table_id=0, + is_ipv6=1, + is_add=0) + + # duplicate delete + self.vapi.dhcp_proxy_config(server_addr_vrf0, + src_addr_vrf0, rx_table_id=0, server_table_id=0, is_ipv6=1, -- cgit 1.2.3-korg From 0f26c5a0138ac86d7ebd197c31a09d8d624c35fe Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 1 Mar 2017 15:12:11 -0800 Subject: MPLS Mcast 1 - interface-DPO Used in the Data-plane to change a packet's input interface 2 - MPLS multicast FIB entry Same as a unicast entry but it links to a replicate not a load-balance DPO 3 - Multicast MPLS tunnel Update MPLS tunnels to use a FIB path-list to describe the endpoint[s]. Use the path-list to generate the forwarding chain (DPOs) to link to . 4 - Resolve a path via a local label (of an mLDP LSP) For IP multicast entries to use an LSP in the replication list, we need to decribe the 'resolve-via-label' where the label is that of a multicast LSP. 5 - MPLS disposition path sets RPF-ID For a interface-less LSP (i.e. mLDP not RSVP-TE) at the tail of the LSP we still need to perform an RPF check. An MPLS disposition DPO performs the MPLS pop validation checks and sets the RPF-ID in the packet. 6 - RPF check with per-entry RPF-ID An RPF-ID is used instead of a real interface SW if index in the case the IP traffic arrives from an LSP that does not have an associated interface. Change-Id: Ib92e177be919147bafeb599729abf3d1abc2f4b3 Signed-off-by: Neale Ranns --- src/plugins/dpdk/device/node.c | 2 +- src/vat/api_format.c | 133 ++--- src/vnet.am | 2 + src/vnet/adj/adj.c | 13 +- src/vnet/adj/adj.h | 6 + src/vnet/adj/adj_internal.h | 14 +- src/vnet/adj/adj_mcast.c | 134 ++++- src/vnet/adj/adj_mcast.h | 27 + src/vnet/adj/adj_midchain.c | 62 ++- src/vnet/adj/adj_nbr.c | 2 - src/vnet/buffer.h | 3 + src/vnet/devices/ssvm/node.c | 2 +- src/vnet/dhcp/dhcp6_proxy_node.c | 1 + src/vnet/dpo/dpo.c | 10 + src/vnet/dpo/dpo.h | 8 +- src/vnet/dpo/interface_dpo.c | 416 ++++++++++++++++ src/vnet/dpo/interface_dpo.h | 67 +++ src/vnet/dpo/lookup_dpo.c | 211 +++++++- src/vnet/dpo/lookup_dpo.h | 20 + src/vnet/dpo/mpls_disposition.c | 364 ++++++++++++++ src/vnet/dpo/mpls_disposition.h | 85 ++++ src/vnet/dpo/mpls_label_dpo.c | 6 +- src/vnet/dpo/replicate_dpo.c | 48 +- src/vnet/dpo/replicate_dpo.h | 2 + src/vnet/ethernet/arp.c | 1 + src/vnet/ethernet/interface.c | 2 +- src/vnet/ethernet/node.c | 4 +- src/vnet/ethernet/types.def | 4 +- src/vnet/fib/fib_api.h | 4 + src/vnet/fib/fib_entry.c | 47 +- src/vnet/fib/fib_entry.h | 13 +- src/vnet/fib/fib_entry_src.c | 154 +++--- src/vnet/fib/fib_internal.h | 1 + src/vnet/fib/fib_path.c | 222 +++++++-- src/vnet/fib/fib_path.h | 17 +- src/vnet/fib/fib_path_ext.c | 4 +- src/vnet/fib/fib_path_ext.h | 3 +- src/vnet/fib/fib_path_list.c | 270 ++++++---- src/vnet/fib/fib_path_list.h | 22 +- src/vnet/fib/fib_table.c | 47 +- src/vnet/fib/fib_test.c | 345 +++++++++++-- src/vnet/fib/fib_test.h | 111 +++++ src/vnet/fib/fib_types.c | 15 +- src/vnet/fib/fib_types.h | 60 ++- src/vnet/fib/mpls_fib.c | 15 +- src/vnet/handoff.h | 10 +- src/vnet/interface.c | 2 +- src/vnet/ip/ip.api | 3 + src/vnet/ip/ip4_forward.c | 20 + src/vnet/ip/ip6_forward.c | 23 + src/vnet/ip/ip6_neighbor.c | 1 + src/vnet/ip/ip_api.c | 98 ++-- src/vnet/ip/lookup.c | 3 +- src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c | 1 + src/vnet/mfib/ip4_mfib.c | 1 + src/vnet/mfib/ip6_mfib.c | 1 + src/vnet/mfib/mfib_entry.c | 395 +++++++++------ src/vnet/mfib/mfib_entry.h | 20 +- src/vnet/mfib/mfib_forward.c | 29 +- src/vnet/mfib/mfib_table.c | 8 +- src/vnet/mfib/mfib_table.h | 1 + src/vnet/mfib/mfib_test.c | 127 ++++- src/vnet/mpls/mpls.api | 87 ++-- src/vnet/mpls/mpls.c | 17 +- src/vnet/mpls/mpls_api.c | 97 ++-- src/vnet/mpls/mpls_input.c | 2 +- src/vnet/mpls/mpls_lookup.c | 236 ++++++--- src/vnet/mpls/mpls_tunnel.c | 883 ++++++++++++++++++++++----------- src/vnet/mpls/mpls_tunnel.h | 57 ++- src/vnet/mpls/mpls_types.h | 20 + src/vnet/srp/interface.c | 2 +- test/test_ip_mcast.py | 1 + test/test_mpls.py | 277 ++++++++++- test/vpp_ip_route.py | 38 +- test/vpp_mpls_tunnel_interface.py | 46 ++ test/vpp_papi_provider.py | 16 +- 76 files changed, 4393 insertions(+), 1128 deletions(-) create mode 100644 src/vnet/dpo/interface_dpo.c create mode 100644 src/vnet/dpo/interface_dpo.h create mode 100644 src/vnet/dpo/mpls_disposition.c create mode 100644 src/vnet/dpo/mpls_disposition.h create mode 100644 src/vnet/fib/fib_test.h create mode 100644 test/vpp_mpls_tunnel_interface.py (limited to 'src/vnet/dhcp') diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index b10e0fad..0549ba5d 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -52,7 +52,7 @@ always_inline int vlib_buffer_is_mpls (vlib_buffer_t * b) { ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b); - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)); } always_inline u32 diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 61b8e1d8..107aa012 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -16369,32 +16369,82 @@ api_netmap_delete (vat_main_t * vam) return ret; } -static void vl_api_mpls_tunnel_details_t_handler - (vl_api_mpls_tunnel_details_t * mp) +static void +vl_api_mpls_fib_path_print (vat_main_t * vam, vl_api_fib_path2_t * fp) +{ + if (fp->afi == IP46_TYPE_IP6) + print (vam->ofp, + " weight %d, sw_if_index %d, is_local %d, is_drop %d, " + "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U", + ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local, + fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi, + format_ip6_address, fp->next_hop); + else if (fp->afi == IP46_TYPE_IP4) + print (vam->ofp, + " weight %d, sw_if_index %d, is_local %d, is_drop %d, " + "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U", + ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local, + fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi, + format_ip4_address, fp->next_hop); +} + +static void +vl_api_mpls_fib_path_json_print (vat_json_node_t * node, + vl_api_fib_path2_t * fp) +{ + struct in_addr ip4; + struct in6_addr ip6; + + vat_json_object_add_uint (node, "weight", ntohl (fp->weight)); + vat_json_object_add_uint (node, "sw_if_index", ntohl (fp->sw_if_index)); + vat_json_object_add_uint (node, "is_local", fp->is_local); + vat_json_object_add_uint (node, "is_drop", fp->is_drop); + vat_json_object_add_uint (node, "is_unreach", fp->is_unreach); + vat_json_object_add_uint (node, "is_prohibit", fp->is_prohibit); + vat_json_object_add_uint (node, "next_hop_afi", fp->afi); + if (fp->afi == IP46_TYPE_IP4) + { + clib_memcpy (&ip4, &fp->next_hop, sizeof (ip4)); + vat_json_object_add_ip4 (node, "next_hop", ip4); + } + else if (fp->afi == IP46_TYPE_IP6) + { + clib_memcpy (&ip6, &fp->next_hop, sizeof (ip6)); + vat_json_object_add_ip6 (node, "next_hop", ip6); + } +} + +static void +vl_api_mpls_tunnel_details_t_handler (vl_api_mpls_tunnel_details_t * mp) { vat_main_t *vam = &vat_main; - i32 len = mp->mt_next_hop_n_labels; + int count = ntohl (mp->mt_count); + vl_api_fib_path2_t *fp; i32 i; - print (vam->ofp, "[%d]: via %U %d labels ", - mp->tunnel_index, - format_ip4_address, mp->mt_next_hop, - ntohl (mp->mt_next_hop_sw_if_index)); - for (i = 0; i < len; i++) + print (vam->ofp, "[%d]: sw_if_index %d via:", + ntohl (mp->mt_tunnel_index), ntohl (mp->mt_sw_if_index)); + fp = mp->mt_paths; + for (i = 0; i < count; i++) { - print (vam->ofp, "%u ", ntohl (mp->mt_next_hop_out_labels[i])); + vl_api_mpls_fib_path_print (vam, fp); + fp++; } + print (vam->ofp, ""); } -static void vl_api_mpls_tunnel_details_t_handler_json - (vl_api_mpls_tunnel_details_t * mp) +#define vl_api_mpls_tunnel_details_t_endian vl_noop_handler +#define vl_api_mpls_tunnel_details_t_print vl_noop_handler + +static void +vl_api_mpls_tunnel_details_t_handler_json (vl_api_mpls_tunnel_details_t * mp) { vat_main_t *vam = &vat_main; vat_json_node_t *node = NULL; - struct in_addr ip4; + int count = ntohl (mp->mt_count); + vl_api_fib_path2_t *fp; i32 i; - i32 len = mp->mt_next_hop_n_labels; if (VAT_JSON_ARRAY != vam->json_tree.type) { @@ -16404,17 +16454,17 @@ static void vl_api_mpls_tunnel_details_t_handler_json node = vat_json_array_add (&vam->json_tree); vat_json_init_object (node); - vat_json_object_add_uint (node, "tunnel_index", ntohl (mp->tunnel_index)); - clib_memcpy (&ip4, &(mp->mt_next_hop), sizeof (ip4)); - vat_json_object_add_ip4 (node, "next_hop", ip4); - vat_json_object_add_uint (node, "next_hop_sw_if_index", - ntohl (mp->mt_next_hop_sw_if_index)); - vat_json_object_add_uint (node, "l2_only", ntohl (mp->mt_l2_only)); - vat_json_object_add_uint (node, "label_count", len); - for (i = 0; i < len; i++) + vat_json_object_add_uint (node, "tunnel_index", + ntohl (mp->mt_tunnel_index)); + vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->mt_sw_if_index)); + + vat_json_object_add_uint (node, "l2_only", mp->mt_l2_only); + + fp = mp->mt_paths; + for (i = 0; i < count; i++) { - vat_json_object_add_uint (node, "label", - ntohl (mp->mt_next_hop_out_labels[i])); + vl_api_mpls_fib_path_json_print (node, fp); + fp++; } } @@ -16453,6 +16503,7 @@ api_mpls_tunnel_dump (vat_main_t * vam) #define vl_api_mpls_fib_details_t_endian vl_noop_handler #define vl_api_mpls_fib_details_t_print vl_noop_handler + static void vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp) { @@ -16467,20 +16518,7 @@ vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp) fp = mp->path; for (i = 0; i < count; i++) { - if (fp->afi == IP46_TYPE_IP6) - print (vam->ofp, - " weight %d, sw_if_index %d, is_local %d, is_drop %d, " - "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U", - ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local, - fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi, - format_ip6_address, fp->next_hop); - else if (fp->afi == IP46_TYPE_IP4) - print (vam->ofp, - " weight %d, sw_if_index %d, is_local %d, is_drop %d, " - "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U", - ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local, - fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi, - format_ip4_address, fp->next_hop); + vl_api_mpls_fib_path_print (vam, fp); fp++; } } @@ -16491,8 +16529,6 @@ static void vl_api_mpls_fib_details_t_handler_json vat_main_t *vam = &vat_main; int count = ntohl (mp->count); vat_json_node_t *node = NULL; - struct in_addr ip4; - struct in6_addr ip6; vl_api_fib_path2_t *fp; int i; @@ -16511,23 +16547,8 @@ static void vl_api_mpls_fib_details_t_handler_json fp = mp->path; for (i = 0; i < count; i++) { - vat_json_object_add_uint (node, "weight", ntohl (fp->weight)); - vat_json_object_add_uint (node, "sw_if_index", ntohl (fp->sw_if_index)); - vat_json_object_add_uint (node, "is_local", fp->is_local); - vat_json_object_add_uint (node, "is_drop", fp->is_drop); - vat_json_object_add_uint (node, "is_unreach", fp->is_unreach); - vat_json_object_add_uint (node, "is_prohibit", fp->is_prohibit); - vat_json_object_add_uint (node, "next_hop_afi", fp->afi); - if (fp->afi == IP46_TYPE_IP4) - { - clib_memcpy (&ip4, &fp->next_hop, sizeof (ip4)); - vat_json_object_add_ip4 (node, "next_hop", ip4); - } - else if (fp->afi == IP46_TYPE_IP6) - { - clib_memcpy (&ip6, &fp->next_hop, sizeof (ip6)); - vat_json_object_add_ip6 (node, "next_hop", ip6); - } + vl_api_mpls_fib_path_json_print (node, fp); + fp++; } } diff --git a/src/vnet.am b/src/vnet.am index 643ae92e..bed4902b 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -990,6 +990,8 @@ libvnet_la_SOURCES += \ vnet/dpo/lookup_dpo.c \ vnet/dpo/classify_dpo.c \ vnet/dpo/replicate_dpo.c \ + vnet/dpo/interface_dpo.c \ + vnet/dpo/mpls_disposition.c \ vnet/dpo/mpls_label_dpo.c nobase_include_HEADERS += \ diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index 90182006..36dfe500 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -67,6 +67,10 @@ adj_alloc (fib_protocol_t proto) adj->lookup_next_index = 0; adj->ia_delegates = NULL; + /* lest it become a midchain in the future */ + memset(&adj->sub_type.midchain.next_dpo, 0, + sizeof(adj->sub_type.midchain.next_dpo)); + ip4_main.lookup_main.adjacency_heap = adj_pool; ip6_main.lookup_main.adjacency_heap = adj_pool; @@ -118,6 +122,9 @@ format_ip_adjacency (u8 * s, va_list * args) case IP_LOOKUP_NEXT_MCAST: s = format (s, "%U", format_adj_mcast, adj_index, 0); break; + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: + s = format (s, "%U", format_adj_mcast_midchain, adj_index, 0); + break; default: break; } @@ -180,6 +187,7 @@ adj_last_lock_gone (ip_adjacency_t *adj) adj->rewrite_header.sw_if_index); break; case IP_LOOKUP_NEXT_MCAST: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: adj_mcast_remove(adj->ia_nh_proto, adj->rewrite_header.sw_if_index); break; @@ -338,6 +346,7 @@ adj_walk (u32 sw_if_index, FOR_EACH_FIB_IP_PROTOCOL(proto) { adj_nbr_walk(sw_if_index, proto, cb, ctx); + adj_mcast_walk(sw_if_index, proto, cb, ctx); } } @@ -544,9 +553,9 @@ adj_show (vlib_main_t * vm, * [@0] * [@1] glean: loop0 * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc - * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc + * [@3] mpls via 1.0.0.2 loop0: MPLS: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc - * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc + * [@5] mpls via 1.0.0.3 loop0: MPLS: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc * @cliexend ?*/ VLIB_CLI_COMMAND (adj_show_command, static) = { diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h index 32997c91..ed5eb1f1 100644 --- a/src/vnet/adj/adj.h +++ b/src/vnet/adj/adj.h @@ -81,6 +81,10 @@ typedef enum /** Multicast Adjacency. */ IP_LOOKUP_NEXT_MCAST, + /** Multicast Midchain Adjacency. An Adjacency for sending macst packets + * on a tunnel/virtual interface */ + IP_LOOKUP_NEXT_MCAST_MIDCHAIN, + IP_LOOKUP_N_NEXT, } __attribute__ ((packed)) ip_lookup_next_t; @@ -107,6 +111,7 @@ typedef enum [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite", \ [IP_LOOKUP_NEXT_MCAST] = "ip4-rewrite-mcast", \ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \ + [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip4-mcast-midchain", \ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \ } @@ -119,6 +124,7 @@ typedef enum [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \ [IP_LOOKUP_NEXT_MCAST] = "ip6-rewrite-mcast", \ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \ + [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip6-mcast-midchain", \ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \ [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \ [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \ diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h index 30668625..2c123c54 100644 --- a/src/vnet/adj/adj_internal.h +++ b/src/vnet/adj/adj_internal.h @@ -17,6 +17,7 @@ #define __ADJ_INTERNAL_H__ #include +#include #include #include #include @@ -87,11 +88,14 @@ adj_get_index (ip_adjacency_t *adj) return (adj - adj_pool); } -extern void adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, - ip_lookup_next_t adj_next_index, - u32 complete_next_index, - u32 next_index, - u8 *rewrite); +extern void adj_nbr_update_rewrite_internal(ip_adjacency_t *adj, + ip_lookup_next_t adj_next_index, + u32 complete_next_index, + u32 next_index, + u8 *rewrite); +extern void adj_midchain_setup(adj_index_t adj_index, + adj_midchain_fixup_t fixup, + adj_flags_t flags); extern ip_adjacency_t * adj_alloc(fib_protocol_t proto); diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c index 4f678e43..755abfd4 100644 --- a/src/vnet/adj/adj_mcast.c +++ b/src/vnet/adj/adj_mcast.c @@ -13,7 +13,7 @@ * limitations under the License. */ -#include +#include #include #include #include @@ -129,6 +129,59 @@ adj_mcast_update_rewrite (adj_index_t adj_index, adj->rewrite_header.dst_mcast_mask = clib_host_to_net_u32(mask); } +/** + * adj_mcast_midchain_update_rewrite + * + * Update the adjacency's rewrite string. A NULL string implies the + * rewirte is reset (i.e. when ARP/ND etnry is gone). + * NB: the adj being updated may be handling traffic in the DP. + */ +void +adj_mcast_midchain_update_rewrite (adj_index_t adj_index, + adj_midchain_fixup_t fixup, + adj_flags_t flags, + u8 *rewrite, + u8 offset, + u32 mask) +{ + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != adj_index); + + adj = adj_get(adj_index); + + /* + * one time only update. since we don't support chainging the tunnel + * src,dst, this is all we need. + */ + ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_MCAST); + /* + * tunnels can always provide a rewrite. + */ + ASSERT(NULL != rewrite); + + adj_midchain_setup(adj_index, fixup, flags); + + /* + * update the adj's rewrite string and build the arc + * from the rewrite node to the interface's TX node + */ + adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MCAST_MIDCHAIN, + adj_get_mcast_node(adj->ia_nh_proto), + vnet_tx_node_index_for_sw_interface( + vnet_get_main(), + adj->rewrite_header.sw_if_index), + rewrite); + + /* + * set the fields corresponding to the mcast IP address rewrite + * The mask must be stored in network byte order, since the packet's + * IP address will also be in network order. + */ + adj->rewrite_header.dst_mcast_offset = offset; + adj->rewrite_header.dst_mcast_mask = clib_host_to_net_u32(mask); +} + void adj_mcast_remove (fib_protocol_t proto, u32 sw_if_index) @@ -260,6 +313,24 @@ adj_mcast_interface_delete (vnet_main_t * vnm, VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_mcast_interface_delete); +/** + * @brief Walk the multicast Adjacencies on a given interface + */ +void +adj_mcast_walk (u32 sw_if_index, + fib_protocol_t proto, + adj_walk_cb_t cb, + void *ctx) +{ + if (vec_len(adj_mcasts[proto]) > sw_if_index) + { + if (ADJ_INDEX_INVALID != adj_mcasts[proto][sw_if_index]) + { + cb(adj_mcasts[proto][sw_if_index], ctx); + } + } +} + u8* format_adj_mcast (u8* s, va_list *ap) { @@ -269,6 +340,8 @@ format_adj_mcast (u8* s, va_list *ap) s = format(s, "%U-mcast: ", format_fib_protocol, adj->ia_nh_proto); + if (adj->rewrite_header.flags & VNET_REWRITE_HAS_FEATURES) + s = format(s, "[features] "); s = format (s, "%U", format_vnet_rewrite, &adj->rewrite_header, sizeof (adj->rewrite_data), 0); @@ -276,6 +349,28 @@ format_adj_mcast (u8* s, va_list *ap) return (s); } +u8* +format_adj_mcast_midchain (u8* s, va_list *ap) +{ + index_t index = va_arg(*ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(*ap, u32); + vnet_main_t * vnm = vnet_get_main(); + ip_adjacency_t * adj = adj_get(index); + + s = format(s, "%U-mcast-midchain: ", + format_fib_protocol, adj->ia_nh_proto); + s = format (s, "%U", + format_vnet_rewrite, + vnm->vlib_main, &adj->rewrite_header, + sizeof (adj->rewrite_data), 0); + s = format (s, "\n%Ustacked-on:\n%U%U", + format_white_space, indent, + format_white_space, indent+2, + format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2); + + return (s); +} + static void adj_dpo_lock (dpo_id_t *dpo) @@ -293,6 +388,11 @@ const static dpo_vft_t adj_mcast_dpo_vft = { .dv_unlock = adj_dpo_unlock, .dv_format = format_adj_mcast, }; +const static dpo_vft_t adj_mcast_midchain_dpo_vft = { + .dv_lock = adj_dpo_lock, + .dv_unlock = adj_dpo_unlock, + .dv_format = format_adj_mcast_midchain, +}; /** * @brief The per-protocol VLIB graph nodes that are assigned to a mcast @@ -319,6 +419,31 @@ const static char* const * const adj_mcast_nodes[DPO_PROTO_NUM] = [DPO_PROTO_MPLS] = NULL, }; +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a mcast + * object. + * + * this means that these graph nodes are ones from which a mcast is the + * parent object in the DPO-graph. + */ +const static char* const adj_mcast_midchain_ip4_nodes[] = +{ + "ip4-mcast-midchain", + NULL, +}; +const static char* const adj_mcast_midchain_ip6_nodes[] = +{ + "ip6-mcast-midchain", + NULL, +}; + +const static char* const * const adj_mcast_midchain_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = adj_mcast_midchain_ip4_nodes, + [DPO_PROTO_IP6] = adj_mcast_midchain_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + /** * @brief Return the size of the adj DB. * This is only for testing purposes so an efficient implementation is not needed @@ -349,5 +474,10 @@ adj_mcast_db_size (void) void adj_mcast_module_init (void) { - dpo_register(DPO_ADJACENCY_MCAST, &adj_mcast_dpo_vft, adj_mcast_nodes); + dpo_register(DPO_ADJACENCY_MCAST, + &adj_mcast_dpo_vft, + adj_mcast_nodes); + dpo_register(DPO_ADJACENCY_MCAST_MIDCHAIN, + &adj_mcast_midchain_dpo_vft, + adj_mcast_midchain_nodes); } diff --git a/src/vnet/adj/adj_mcast.h b/src/vnet/adj/adj_mcast.h index 40d44313..bfb0d6f6 100644 --- a/src/vnet/adj/adj_mcast.h +++ b/src/vnet/adj/adj_mcast.h @@ -26,6 +26,7 @@ #define __ADJ_MCAST_H__ #include +#include /** * @brief @@ -68,10 +69,36 @@ extern void adj_mcast_update_rewrite(adj_index_t adj_index, u8 offset, u32 mask); +/** + * @brief + * Update the rewrite string for an existing adjacecny and + * Convert the adjacency into a midchain + * + * @param + * The index of the adj to update + * + * @param + * The new rewrite + */ +extern void adj_mcast_midchain_update_rewrite(adj_index_t adj_index, + adj_midchain_fixup_t fixup, + adj_flags_t flags, + u8 *rewrite, + u8 offset, + u32 mask); +/** + * @brief Walk the multicast Adjacencies on a given interface + */ +extern void adj_mcast_walk (u32 sw_if_index, + fib_protocol_t adj_nh_proto, + adj_walk_cb_t cb, + void *ctx); + /** * @brief Format/display a mcast adjacency. */ extern u8* format_adj_mcast(u8* s, va_list *ap); +extern u8* format_adj_mcast_midchain(u8* s, va_list *ap); /** * @brief Get the sze of the mcast adj DB. Test purposes only. diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c index 5756de43..a93a1c3e 100644 --- a/src/vnet/adj/adj_midchain.c +++ b/src/vnet/adj/adj_midchain.c @@ -346,7 +346,7 @@ adj_get_midchain_node (vnet_link_t link) static u8 adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj) { - u8 arc = (u8) ~0; + u8 arc = (u8) ~0; switch (adj->ia_link) { case VNET_LINK_IP4: @@ -393,17 +393,14 @@ adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj) } /** - * adj_nbr_midchain_update_rewrite + * adj_midchain_setup * - * Update the adjacency's rewrite string. A NULL string implies the - * rewrite is reset (i.e. when ARP/ND etnry is gone). - * NB: the adj being updated may be handling traffic in the DP. + * Setup the adj as a mid-chain */ void -adj_nbr_midchain_update_rewrite (adj_index_t adj_index, - adj_midchain_fixup_t fixup, - adj_flags_t flags, - u8 *rewrite) +adj_midchain_setup (adj_index_t adj_index, + adj_midchain_fixup_t fixup, + adj_flags_t flags) { u32 feature_index, tx_node; ip_adjacency_t *adj; @@ -413,16 +410,6 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, adj = adj_get(adj_index); - /* - * one time only update. since we don't support chainging the tunnel - * src,dst, this is all we need. - */ - ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP); - /* - * tunnels can always provide a rewrite. - */ - ASSERT(NULL != rewrite); - adj->sub_type.midchain.fixup_func = fixup; adj->ia_flags |= flags; @@ -447,6 +434,38 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, dpo_stack_from_node(tx_node, &adj->sub_type.midchain.next_dpo, drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link))); +} + +/** + * adj_nbr_midchain_update_rewrite + * + * Update the adjacency's rewrite string. A NULL string implies the + * rewrite is reset (i.e. when ARP/ND etnry is gone). + * NB: the adj being updated may be handling traffic in the DP. + */ +void +adj_nbr_midchain_update_rewrite (adj_index_t adj_index, + adj_midchain_fixup_t fixup, + adj_flags_t flags, + u8 *rewrite) +{ + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != adj_index); + + adj = adj_get(adj_index); + + /* + * one time only update. since we don't support chainging the tunnel + * src,dst, this is all we need. + */ + ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP); + /* + * tunnels can always provide a rewrite. + */ + ASSERT(NULL != rewrite); + + adj_midchain_setup(adj_index, fixup, flags); /* * update the rewirte with the workers paused. @@ -454,7 +473,7 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MIDCHAIN, adj_get_midchain_node(adj->ia_link), - tx_node, + adj_nbr_midchain_get_tx_node(adj), rewrite); } @@ -496,7 +515,8 @@ adj_nbr_midchain_stack (adj_index_t adj_index, adj = adj_get(adj_index); - ASSERT(IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index); + ASSERT((IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index) || + (IP_LOOKUP_NEXT_MCAST_MIDCHAIN == adj->lookup_next_index)); dpo_stack_from_node(adj_nbr_midchain_get_tx_node(adj), &adj->sub_type.midchain.next_dpo, diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c index ddacb030..3d450d1f 100644 --- a/src/vnet/adj/adj_nbr.c +++ b/src/vnet/adj/adj_nbr.c @@ -195,8 +195,6 @@ adj_nbr_alloc (fib_protocol_t nh_proto, adj->ia_link = link_type; adj->ia_nh_proto = nh_proto; adj->rewrite_header.sw_if_index = sw_if_index; - memset(&adj->sub_type.midchain.next_dpo, 0, - sizeof(adj->sub_type.midchain.next_dpo)); adj_nbr_evaluate_feature (adj_get_index(adj)); return (adj); diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index ea3ce093..ed869d1f 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -130,6 +130,9 @@ typedef struct /* Rewrite length */ u32 save_rewrite_length; + + /* MFIB RPF ID */ + u32 rpf_id; }; /* ICMP */ diff --git a/src/vnet/devices/ssvm/node.c b/src/vnet/devices/ssvm/node.c index 539b4161..b7a8db05 100644 --- a/src/vnet/devices/ssvm/node.c +++ b/src/vnet/devices/ssvm/node.c @@ -210,7 +210,7 @@ ssvm_eth_device_input (ssvm_eth_main_t * em, next0 = SSVM_ETH_INPUT_NEXT_IP4_INPUT; else if (type0 == ETHERNET_TYPE_IP6) next0 = SSVM_ETH_INPUT_NEXT_IP6_INPUT; - else if (type0 == ETHERNET_TYPE_MPLS_UNICAST) + else if (type0 == ETHERNET_TYPE_MPLS) next0 = SSVM_ETH_INPUT_NEXT_MPLS_INPUT; l3_offset0 = ((next0 == SSVM_ETH_INPUT_NEXT_IP4_INPUT || diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index 524cb095..de73154d 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -883,6 +883,7 @@ dhcp6_proxy_set_server (ip46_address_t *addr, mfib_table_entry_update(rx_fib_index, &all_dhcp_servers, MFIB_SOURCE_DHCP, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF); mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6); } diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c index d8e075a7..dfc2bd92 100644 --- a/src/vnet/dpo/dpo.c +++ b/src/vnet/dpo/dpo.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include /** * Array of char* names for the DPO types and protos @@ -182,6 +184,12 @@ dpo_set (dpo_id_t *dpo, case IP_LOOKUP_NEXT_MIDCHAIN: dpo->dpoi_type = DPO_ADJACENCY_MIDCHAIN; break; + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: + dpo->dpoi_type = DPO_ADJACENCY_MCAST_MIDCHAIN; + break; + case IP_LOOKUP_NEXT_MCAST: + dpo->dpoi_type = DPO_ADJACENCY_MCAST; + break; default: break; } @@ -453,6 +461,8 @@ dpo_module_init (vlib_main_t * vm) lookup_dpo_module_init(); ip_null_dpo_module_init(); replicate_module_init(); + interface_dpo_module_init(); + mpls_disp_dpo_module_init(); return (NULL); } diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h index 48b92d3d..5aa4e2d2 100644 --- a/src/vnet/dpo/dpo.h +++ b/src/vnet/dpo/dpo.h @@ -108,12 +108,15 @@ typedef enum dpo_type_t_ { DPO_ADJACENCY_MIDCHAIN, DPO_ADJACENCY_GLEAN, DPO_ADJACENCY_MCAST, + DPO_ADJACENCY_MCAST_MIDCHAIN, DPO_RECEIVE, DPO_LOOKUP, DPO_LISP_CP, DPO_CLASSIFY, DPO_MPLS_LABEL, + DPO_MPLS_DISPOSITION, DPO_MFIB_ENTRY, + DPO_INTERFACE, DPO_LAST, } __attribute__((packed)) dpo_type_t; @@ -129,6 +132,7 @@ typedef enum dpo_type_t_ { [DPO_ADJACENCY_MIDCHAIN] = "dpo-adjacency-midcahin", \ [DPO_ADJACENCY_GLEAN] = "dpo-glean", \ [DPO_ADJACENCY_MCAST] = "dpo-adj-mcast", \ + [DPO_ADJACENCY_MCAST_MIDCHAIN] = "dpo-adj-mcast-midchain", \ [DPO_RECEIVE] = "dpo-receive", \ [DPO_LOOKUP] = "dpo-lookup", \ [DPO_LOAD_BALANCE] = "dpo-load-balance", \ @@ -136,7 +140,9 @@ typedef enum dpo_type_t_ { [DPO_LISP_CP] = "dpo-lisp-cp", \ [DPO_CLASSIFY] = "dpo-classify", \ [DPO_MPLS_LABEL] = "dpo-mpls-label", \ - [DPO_MFIB_ENTRY] = "dpo-mfib_entry" \ + [DPO_MPLS_DISPOSITION] = "dpo-mpls-diposition", \ + [DPO_MFIB_ENTRY] = "dpo-mfib_entry", \ + [DPO_INTERFACE] = "dpo-interface" \ } /** diff --git a/src/vnet/dpo/interface_dpo.c b/src/vnet/dpo/interface_dpo.c new file mode 100644 index 00000000..50ca756f --- /dev/null +++ b/src/vnet/dpo/interface_dpo.c @@ -0,0 +1,416 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +/* + * The 'DB' of interface DPOs. + * There is only one per-interface per-protocol, so this is a per-interface + * vector + */ +static index_t *interface_dpo_db[DPO_PROTO_NUM]; + +static interface_dpo_t * +interface_dpo_alloc (void) +{ + interface_dpo_t *ido; + + pool_get(interface_dpo_pool, ido); + + return (ido); +} + +static inline interface_dpo_t * +interface_dpo_get_from_dpo (const dpo_id_t *dpo) +{ + ASSERT(DPO_INTERFACE == dpo->dpoi_type); + + return (interface_dpo_get(dpo->dpoi_index)); +} + +static inline index_t +interface_dpo_get_index (interface_dpo_t *ido) +{ + return (ido - interface_dpo_pool); +} + +static void +interface_dpo_lock (dpo_id_t *dpo) +{ + interface_dpo_t *ido; + + ido = interface_dpo_get_from_dpo(dpo); + ido->ido_locks++; +} + +static void +interface_dpo_unlock (dpo_id_t *dpo) +{ + interface_dpo_t *ido; + + ido = interface_dpo_get_from_dpo(dpo); + ido->ido_locks--; + + if (0 == ido->ido_locks) + { + interface_dpo_db[ido->ido_proto][ido->ido_sw_if_index] = + INDEX_INVALID; + pool_put(interface_dpo_pool, ido); + } +} + +/* + * interface_dpo_add_or_lock + * + * Add/create and lock a new or lock an existing for the interface DPO + * on the interface and protocol given + */ +void +interface_dpo_add_or_lock (dpo_proto_t proto, + u32 sw_if_index, + dpo_id_t *dpo) +{ + interface_dpo_t *ido; + + vec_validate_init_empty(interface_dpo_db[proto], + sw_if_index, + INDEX_INVALID); + + if (INDEX_INVALID == interface_dpo_db[proto][sw_if_index]) + { + ido = interface_dpo_alloc(); + + ido->ido_sw_if_index = sw_if_index; + ido->ido_proto = proto; + + interface_dpo_db[proto][sw_if_index] = + interface_dpo_get_index(ido); + } + else + { + ido = interface_dpo_get(interface_dpo_db[proto][sw_if_index]); + } + + dpo_set(dpo, DPO_INTERFACE, proto, interface_dpo_get_index(ido)); +} + + +static clib_error_t * +interface_dpo_interface_state_change (vnet_main_t * vnm, + u32 sw_if_index, + u32 flags) +{ + /* + */ + return (NULL); +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION( + interface_dpo_interface_state_change); + +/** + * @brief Registered callback for HW interface state changes + */ +static clib_error_t * +interface_dpo_hw_interface_state_change (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags) +{ + return (NULL); +} + +VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION( + interface_dpo_hw_interface_state_change); + +static clib_error_t * +interface_dpo_interface_delete (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + return (NULL); +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION( + interface_dpo_interface_delete); + +u8* +format_interface_dpo (u8* s, va_list *ap) +{ + index_t index = va_arg(*ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(*ap, u32); + vnet_main_t * vnm = vnet_get_main(); + interface_dpo_t *ido = interface_dpo_get(index); + + return (format(s, "%U-dpo: %U", + format_vnet_sw_interface_name, + vnm, + vnet_get_sw_interface(vnm, ido->ido_sw_if_index), + format_dpo_proto, ido->ido_proto)); +} + +static void +interface_dpo_mem_show (void) +{ + fib_show_memory_usage("Interface", + pool_elts(interface_dpo_pool), + pool_len(interface_dpo_pool), + sizeof(interface_dpo_t)); +} + + +const static dpo_vft_t interface_dpo_vft = { + .dv_lock = interface_dpo_lock, + .dv_unlock = interface_dpo_unlock, + .dv_format = format_interface_dpo, + .dv_mem_show = interface_dpo_mem_show, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a glean + * object. + * + * this means that these graph nodes are ones from which a glean is the + * parent object in the DPO-graph. + */ +const static char* const interface_dpo_ip4_nodes[] = +{ + "interface-dpo-ip4", + NULL, +}; +const static char* const interface_dpo_ip6_nodes[] = +{ + "interface-dpo-ip4", + NULL, +}; + +const static char* const * const interface_dpo_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = interface_dpo_ip4_nodes, + [DPO_PROTO_IP6] = interface_dpo_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +void +interface_dpo_module_init (void) +{ + dpo_register(DPO_INTERFACE, + &interface_dpo_vft, + interface_dpo_nodes); +} + +/** + * @brief Interface DPO trace data + */ +typedef struct interface_dpo_trace_t_ +{ + u32 sw_if_index; +} interface_dpo_trace_t; + +typedef enum interface_dpo_next_t_ +{ + INTERFACE_DPO_DROP = 0, + INTERFACE_DPO_INPUT = 1, +} interface_dpo_next_t; + +always_inline uword +interface_dpo_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + u32 cpu_index = os_get_cpu_number(); + vnet_interface_main_t *im; + + im = &vnet_get_main ()->interface_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next > 2) + { + const interface_dpo_t *ido0, *ido1; + u32 bi0, idoi0, bi1, idoi1; + vlib_buffer_t *b0, *b1; + + bi0 = from[0]; + to_next[0] = bi0; + bi1 = from[1]; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + idoi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + idoi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; + ido0 = interface_dpo_get(idoi0); + ido1 = interface_dpo_get(idoi1); + + vnet_buffer(b0)->sw_if_index[VLIB_RX] = ido0->ido_sw_if_index; + vnet_buffer(b1)->sw_if_index[VLIB_RX] = ido1->ido_sw_if_index; + + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + ido0->ido_sw_if_index, + 1, + vlib_buffer_length_in_chain (vm, b0)); + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + ido1->ido_sw_if_index, + 1, + vlib_buffer_length_in_chain (vm, b1)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + interface_dpo_trace_t *tr0; + + tr0 = vlib_add_trace (vm, node, b0, sizeof (*tr0)); + tr0->sw_if_index = ido0->ido_sw_if_index; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + interface_dpo_trace_t *tr1; + + tr1 = vlib_add_trace (vm, node, b1, sizeof (*tr1)); + tr1->sw_if_index = ido1->ido_sw_if_index; + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, + INTERFACE_DPO_INPUT, + INTERFACE_DPO_INPUT); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + const interface_dpo_t * ido0; + vlib_buffer_t * b0; + u32 bi0, idoi0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + idoi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + ido0 = interface_dpo_get(idoi0); + + /* Swap the RX interface of the packet to the one the + * interface DPR represents */ + vnet_buffer(b0)->sw_if_index[VLIB_RX] = ido0->ido_sw_if_index; + + /* Bump the interface's RX coutners */ + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + ido0->ido_sw_if_index, + 1, + vlib_buffer_length_in_chain (vm, b0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + interface_dpo_trace_t *tr; + + tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->sw_if_index = ido0->ido_sw_if_index; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, + INTERFACE_DPO_INPUT); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static u8 * +format_interface_dpo_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + interface_dpo_trace_t * t = va_arg (*args, interface_dpo_trace_t *); + uword indent = format_get_indent (s); + s = format (s, "%U sw_if_index:%d", + format_white_space, indent, + t->sw_if_index); + return s; +} + +static uword +interface_dpo_ip4 (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (interface_dpo_inline(vm, node, from_frame)); +} + +static uword +interface_dpo_ip6 (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (interface_dpo_inline(vm, node, from_frame)); +} + +VLIB_REGISTER_NODE (interface_dpo_ip4_node) = { + .function = interface_dpo_ip4, + .name = "interface-dpo-ip4", + .vector_size = sizeof (u32), + .format_trace = format_interface_dpo_trace, + + .n_next_nodes = 2, + .next_nodes = { + [INTERFACE_DPO_DROP] = "ip4-drop", + [INTERFACE_DPO_INPUT] = "ip4-input", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_ip4_node, + interface_dpo_ip4) + +VLIB_REGISTER_NODE (interface_dpo_ip6_node) = { + .function = interface_dpo_ip6, + .name = "interface-dpo-ip6", + .vector_size = sizeof (u32), + .format_trace = format_interface_dpo_trace, + + .n_next_nodes = 2, + .next_nodes = { + [INTERFACE_DPO_DROP] = "ip6-drop", + [INTERFACE_DPO_INPUT] = "ip6-input", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_ip6_node, + interface_dpo_ip6) + diff --git a/src/vnet/dpo/interface_dpo.h b/src/vnet/dpo/interface_dpo.h new file mode 100644 index 00000000..1538dfbb --- /dev/null +++ b/src/vnet/dpo/interface_dpo.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * The data-path object representing interfaceing the packet, i.e. it's for-us + */ + +#ifndef __INTERFACE_DPO_H__ +#define __INTERFACE_DPO_H__ + +#include + +typedef struct interface_dpo_t_ +{ + /** + * The Software interface index that the packets will be given + * as the ingress/rx interface + */ + u32 ido_sw_if_index; + + /** + * next VLIB node. A '-input' node. + */ + u32 ido_next_node; + + /** + * DPO protocol that the packets will have as they 'ingress' + * on this interface + */ + dpo_proto_t ido_proto; + + /** + * number of locks. + */ + u16 ido_locks; +} interface_dpo_t; + +extern void interface_dpo_add_or_lock (dpo_proto_t proto, + u32 sw_if_index, + dpo_id_t *dpo); + +extern void interface_dpo_module_init(void); + +/** + * @brief pool of all interface DPOs + */ +interface_dpo_t *interface_dpo_pool; + +static inline interface_dpo_t * +interface_dpo_get (index_t index) +{ + return (pool_elt_at_index(interface_dpo_pool, index)); +} + +#endif diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c index 97ad0a44..e5b00a79 100644 --- a/src/vnet/dpo/lookup_dpo.c +++ b/src/vnet/dpo/lookup_dpo.c @@ -21,8 +21,12 @@ #include #include #include +#include +#include +#include static const char *const lookup_input_names[] = LOOKUP_INPUTS; +static const char *const lookup_cast_names[] = LOOKUP_CASTS; /** * @brief Enumeration of the lookup subtypes @@ -31,6 +35,7 @@ typedef enum lookup_sub_type_t_ { LOOKUP_SUB_TYPE_SRC, LOOKUP_SUB_TYPE_DST, + LOOKUP_SUB_TYPE_DST_MCAST, LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE, } lookup_sub_type_t; #define LOOKUP_SUB_TYPE_NUM (LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE+1) @@ -67,6 +72,7 @@ lookup_dpo_get_index (lookup_dpo_t *lkd) static void lookup_dpo_add_or_lock_i (fib_node_index_t fib_index, dpo_proto_t proto, + lookup_cast_t cast, lookup_input_t input, lookup_table_t table_config, dpo_id_t *dpo) @@ -79,6 +85,7 @@ lookup_dpo_add_or_lock_i (fib_node_index_t fib_index, lkd->lkd_proto = proto; lkd->lkd_input = input; lkd->lkd_table = table_config; + lkd->lkd_cast = cast; /* * use the input type to select the lookup sub-type @@ -100,6 +107,10 @@ lookup_dpo_add_or_lock_i (fib_node_index_t fib_index, type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST]; break; } + if (LOOKUP_MULTICAST == cast) + { + type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_MCAST]; + } } if (0 == type) @@ -115,20 +126,29 @@ lookup_dpo_add_or_lock_i (fib_node_index_t fib_index, void lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index, dpo_proto_t proto, + lookup_cast_t cast, lookup_input_t input, lookup_table_t table_config, dpo_id_t *dpo) { if (LOOKUP_TABLE_FROM_CONFIG == table_config) { - fib_table_lock(fib_index, dpo_proto_to_fib(proto)); + if (LOOKUP_UNICAST == cast) + { + fib_table_lock(fib_index, dpo_proto_to_fib(proto)); + } + else + { + mfib_table_lock(fib_index, dpo_proto_to_fib(proto)); + } } - lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo); + lookup_dpo_add_or_lock_i(fib_index, proto, cast, input, table_config, dpo); } void lookup_dpo_add_or_lock_w_table_id (u32 table_id, dpo_proto_t proto, + lookup_cast_t cast, lookup_input_t input, lookup_table_t table_config, dpo_id_t *dpo) @@ -137,13 +157,22 @@ lookup_dpo_add_or_lock_w_table_id (u32 table_id, if (LOOKUP_TABLE_FROM_CONFIG == table_config) { - fib_index = - fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), - table_id); + if (LOOKUP_UNICAST == cast) + { + fib_index = + fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), + table_id); + } + else + { + fib_index = + mfib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), + table_id); + } } ASSERT(FIB_NODE_INDEX_INVALID != fib_index); - lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo); + lookup_dpo_add_or_lock_i(fib_index, proto, cast, input, table_config, dpo); } u8* @@ -156,16 +185,29 @@ format_lookup_dpo (u8 *s, va_list *args) if (LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table) { - s = format(s, "%s lookup in interface's %U table", + s = format(s, "%s,%s lookup in interface's %U table", lookup_input_names[lkd->lkd_input], + lookup_cast_names[lkd->lkd_cast], format_dpo_proto, lkd->lkd_proto); } else { - s = format(s, "%s lookup in %U", - lookup_input_names[lkd->lkd_input], - format_fib_table_name, lkd->lkd_fib_index, - dpo_proto_to_fib(lkd->lkd_proto)); + if (LOOKUP_UNICAST == lkd->lkd_cast) + { + s = format(s, "%s,%s lookup in %U", + lookup_input_names[lkd->lkd_input], + lookup_cast_names[lkd->lkd_cast], + format_fib_table_name, lkd->lkd_fib_index, + dpo_proto_to_fib(lkd->lkd_proto)); + } + else + { + s = format(s, "%s,%s lookup in %U", + lookup_input_names[lkd->lkd_input], + lookup_cast_names[lkd->lkd_cast], + format_mfib_table_name, lkd->lkd_fib_index, + dpo_proto_to_fib(lkd->lkd_proto)); + } } return (s); } @@ -193,8 +235,16 @@ lookup_dpo_unlock (dpo_id_t *dpo) { if (LOOKUP_TABLE_FROM_CONFIG == lkd->lkd_table) { - fib_table_unlock(lkd->lkd_fib_index, - dpo_proto_to_fib(lkd->lkd_proto)); + if (LOOKUP_UNICAST == lkd->lkd_cast) + { + fib_table_unlock(lkd->lkd_fib_index, + dpo_proto_to_fib(lkd->lkd_proto)); + } + else + { + mfib_table_unlock(lkd->lkd_fib_index, + dpo_proto_to_fib(lkd->lkd_proto)); + } } pool_put(lookup_dpo_pool, lkd); } @@ -1069,6 +1119,123 @@ VLIB_REGISTER_NODE (lookup_mpls_dst_itf_node) = { }; VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_itf_node, lookup_mpls_dst_itf) +typedef enum lookup_ip_dst_mcast_next_t_ { + LOOKUP_IP_DST_MCAST_NEXT_RPF, + LOOKUP_IP_DST_MCAST_N_NEXT, +} mfib_forward_lookup_next_t; + +always_inline uword +lookup_dpo_ip_dst_mcast_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + int is_v4) +{ + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = LOOKUP_IP_DST_MCAST_NEXT_RPF; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* } */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, lkdi0, fib_index0, next0; + const lookup_dpo_t * lkd0; + fib_node_index_t mfei0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* dst lookup was done by mpls lookup */ + lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + lkd0 = lookup_dpo_get(lkdi0); + fib_index0 = lkd0->lkd_fib_index; + next0 = LOOKUP_IP_DST_MCAST_NEXT_RPF; + + if (is_v4) + { + ip4_header_t * ip0; + + ip0 = vlib_buffer_get_current (b0); + mfei0 = ip4_mfib_table_lookup(ip4_mfib_get(fib_index0), + &ip0->src_address, + &ip0->dst_address, + 64); + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->fib_index = fib_index0; + tr->lbi = mfei0; + tr->addr.ip4 = ip0->dst_address; + } + } + else + { + ip6_header_t * ip0; + + ip0 = vlib_buffer_get_current (b0); + mfei0 = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index0), + &ip0->src_address, + &ip0->dst_address); + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->fib_index = fib_index0; + tr->lbi = mfei0; + tr->addr.ip6 = ip0->dst_address; + } + } + + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = mfei0; + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +always_inline uword +lookup_ip4_dst_mcast (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_ip_dst_mcast_inline(vm, node, from_frame, 1)); +} + +VLIB_REGISTER_NODE (lookup_ip4_dst_mcast_node) = { + .function = lookup_ip4_dst_mcast, + .name = "lookup-ip4-dst-mcast", + .vector_size = sizeof (u32), + + .format_trace = format_lookup_trace, + .n_next_nodes = LOOKUP_IP_DST_MCAST_N_NEXT, + .next_nodes = { + [LOOKUP_IP_DST_MCAST_NEXT_RPF] = "ip4-mfib-forward-rpf", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_mcast_node, + lookup_ip4_dst_mcast) + static void lookup_dpo_mem_show (void) { @@ -1129,6 +1296,22 @@ const static char* const * const lookup_dst_nodes[DPO_PROTO_NUM] = [DPO_PROTO_MPLS] = lookup_dst_mpls_nodes, }; +const static char* const lookup_dst_mcast_ip4_nodes[] = +{ + "lookup-ip4-dst-mcast", + NULL, +}; +const static char* const lookup_dst_mcast_ip6_nodes[] = +{ + "lookup-ip6-dst-mcast", + NULL, +}; +const static char* const * const lookup_dst_mcast_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = lookup_dst_mcast_ip4_nodes, + [DPO_PROTO_IP6] = lookup_dst_mcast_ip6_nodes, +}; + const static char* const lookup_dst_from_interface_ip4_nodes[] = { "lookup-ip4-dst-itf", @@ -1168,6 +1351,8 @@ lookup_dpo_module_init (void) dpo_register_new_type(&lkd_vft, lookup_src_nodes); lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST] = dpo_register_new_type(&lkd_vft, lookup_dst_nodes); + lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_MCAST] = + dpo_register_new_type(&lkd_vft, lookup_dst_mcast_nodes); lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE] = dpo_register_new_type(&lkd_vft, lookup_dst_from_interface_nodes); } diff --git a/src/vnet/dpo/lookup_dpo.h b/src/vnet/dpo/lookup_dpo.h index ff283388..7dfd0385 100644 --- a/src/vnet/dpo/lookup_dpo.h +++ b/src/vnet/dpo/lookup_dpo.h @@ -46,6 +46,19 @@ typedef enum lookup_table_t_ { [LOOKUP_INPUT_DST_ADDR] = "table-configured", \ } +/** + * Switch to use the packet's source or destination address for lookup + */ +typedef enum lookup_cast_t_ { + LOOKUP_UNICAST, + LOOKUP_MULTICAST, +} __attribute__ ((packed)) lookup_cast_t; + +#define LOOKUP_CASTS { \ + [LOOKUP_UNICAST] = "unicast", \ + [LOOKUP_MULTICAST] = "multicast", \ +} + /** * A representation of an MPLS label for imposition in the data-path */ @@ -73,6 +86,11 @@ typedef struct lookup_dpo_t */ lookup_table_t lkd_table; + /** + * Unicast of rmulticast FIB lookup + */ + lookup_cast_t lkd_cast; + /** * Number of locks */ @@ -81,11 +99,13 @@ typedef struct lookup_dpo_t extern void lookup_dpo_add_or_lock_w_fib_index(fib_node_index_t fib_index, dpo_proto_t proto, + lookup_cast_t cast, lookup_input_t input, lookup_table_t table, dpo_id_t *dpo); extern void lookup_dpo_add_or_lock_w_table_id(u32 table_id, dpo_proto_t proto, + lookup_cast_t cast, lookup_input_t input, lookup_table_t table, dpo_id_t *dpo); diff --git a/src/vnet/dpo/mpls_disposition.c b/src/vnet/dpo/mpls_disposition.c new file mode 100644 index 00000000..5dc33fcf --- /dev/null +++ b/src/vnet/dpo/mpls_disposition.c @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +/* + * pool of all MPLS Label DPOs + */ +mpls_disp_dpo_t *mpls_disp_dpo_pool; + +static mpls_disp_dpo_t * +mpls_disp_dpo_alloc (void) +{ + mpls_disp_dpo_t *mdd; + + pool_get_aligned(mpls_disp_dpo_pool, mdd, CLIB_CACHE_LINE_BYTES); + memset(mdd, 0, sizeof(*mdd)); + + dpo_reset(&mdd->mdd_dpo); + + return (mdd); +} + +static index_t +mpls_disp_dpo_get_index (mpls_disp_dpo_t *mdd) +{ + return (mdd - mpls_disp_dpo_pool); +} + +index_t +mpls_disp_dpo_create (dpo_proto_t payload_proto, + fib_rpf_id_t rpf_id, + const dpo_id_t *dpo) +{ + mpls_disp_dpo_t *mdd; + + mdd = mpls_disp_dpo_alloc(); + + mdd->mdd_payload_proto = payload_proto; + mdd->mdd_rpf_id = rpf_id; + + dpo_stack(DPO_MPLS_DISPOSITION, + mdd->mdd_payload_proto, + &mdd->mdd_dpo, + dpo); + + return (mpls_disp_dpo_get_index(mdd)); +} + +u8* +format_mpls_disp_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + u32 indent = va_arg (*args, u32); + mpls_disp_dpo_t *mdd; + + mdd = mpls_disp_dpo_get(index); + + s = format(s, "mpls-disposition:[%d]:[%U]", + index, + format_dpo_proto, mdd->mdd_payload_proto); + + s = format(s, "\n%U", format_white_space, indent); + s = format(s, "%U", format_dpo_id, &mdd->mdd_dpo, indent+2); + + return (s); +} + +static void +mpls_disp_dpo_lock (dpo_id_t *dpo) +{ + mpls_disp_dpo_t *mdd; + + mdd = mpls_disp_dpo_get(dpo->dpoi_index); + + mdd->mdd_locks++; +} + +static void +mpls_disp_dpo_unlock (dpo_id_t *dpo) +{ + mpls_disp_dpo_t *mdd; + + mdd = mpls_disp_dpo_get(dpo->dpoi_index); + + mdd->mdd_locks--; + + if (0 == mdd->mdd_locks) + { + dpo_reset(&mdd->mdd_dpo); + pool_put(mpls_disp_dpo_pool, mdd); + } +} + +/** + * @brief A struct to hold tracing information for the MPLS label disposition + * node. + */ +typedef struct mpls_label_disposition_trace_t_ +{ + index_t mdd; +} mpls_label_disposition_trace_t; + +always_inline uword +mpls_label_disposition_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + u8 payload_is_ip4, + u8 payload_is_ip6) +{ + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + mpls_disp_dpo_t *mdd0, *mdd1; + u32 bi0, mddi0, bi1, mddi1; + vlib_buffer_t * b0, *b1; + u32 next0, next1; + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (ip6_header_t), STORE); + CLIB_PREFETCH (p3->data, sizeof (ip6_header_t), STORE); + } + + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* dst lookup was done by ip4 lookup */ + mddi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + mddi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; + mdd0 = mpls_disp_dpo_get(mddi0); + mdd1 = mpls_disp_dpo_get(mddi1); + + if (payload_is_ip4) + { + /* + * decrement the TTL on ingress to the LSP + */ + } + else if (payload_is_ip6) + { + /* + * decrement the TTL on ingress to the LSP + */ + } + + next0 = mdd0->mdd_dpo.dpoi_next_node; + next1 = mdd1->mdd_dpo.dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mdd0->mdd_dpo.dpoi_index; + vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mdd1->mdd_dpo.dpoi_index; + vnet_buffer(b0)->ip.rpf_id = mdd0->mdd_rpf_id; + vnet_buffer(b1)->ip.rpf_id = mdd1->mdd_rpf_id; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_disposition_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + + tr->mdd = mddi0; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_disposition_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + tr->mdd = mddi1; + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + mpls_disp_dpo_t *mdd0; + vlib_buffer_t * b0; + u32 bi0, mddi0; + u32 next0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* dst lookup was done by ip4 lookup */ + mddi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + mdd0 = mpls_disp_dpo_get(mddi0); + + if (payload_is_ip4) + { + /* + * decrement the TTL on ingress to the LSP + */ + } + else if (payload_is_ip6) + { + /* + * decrement the TTL on ingress to the LSP + */ + } + else + { + } + + next0 = mdd0->mdd_dpo.dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mdd0->mdd_dpo.dpoi_index; + vnet_buffer(b0)->ip.rpf_id = mdd0->mdd_rpf_id; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_disposition_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->mdd = mddi0; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static u8 * +format_mpls_label_disposition_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + CLIB_UNUSED (mpls_label_disposition_trace_t * t); + + t = va_arg (*args, mpls_label_disposition_trace_t *); + + s = format(s, "disp:%d", t->mdd); + return (s); +} + +static uword +ip4_mpls_label_disposition (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_label_disposition_inline(vm, node, frame, 1, 0)); +} + +VLIB_REGISTER_NODE (ip4_mpls_label_disposition_node) = { + .function = ip4_mpls_label_disposition, + .name = "ip4-mpls-label-disposition", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_label_disposition_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "ip4-drop", + } +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_mpls_label_disposition_node, + ip4_mpls_label_disposition) + +static uword +ip6_mpls_label_disposition (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_label_disposition_inline(vm, node, frame, 0, 1)); +} + +VLIB_REGISTER_NODE (ip6_mpls_label_disposition_node) = { + .function = ip6_mpls_label_disposition, + .name = "ip6-mpls-label-disposition", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_label_disposition_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "ip6-drop", + } +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_disposition_node, + ip6_mpls_label_disposition) + +static void +mpls_disp_dpo_mem_show (void) +{ + fib_show_memory_usage("MPLS label", + pool_elts(mpls_disp_dpo_pool), + pool_len(mpls_disp_dpo_pool), + sizeof(mpls_disp_dpo_t)); +} + +const static dpo_vft_t mdd_vft = { + .dv_lock = mpls_disp_dpo_lock, + .dv_unlock = mpls_disp_dpo_unlock, + .dv_format = format_mpls_disp_dpo, + .dv_mem_show = mpls_disp_dpo_mem_show, +}; + +const static char* const mpls_label_disp_ip4_nodes[] = +{ + "ip4-mpls-label-disposition", + NULL, +}; +const static char* const mpls_label_disp_ip6_nodes[] = +{ + "ip6-mpls-label-disposition", + NULL, +}; +const static char* const * const mpls_label_disp_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = mpls_label_disp_ip4_nodes, + [DPO_PROTO_IP6] = mpls_label_disp_ip6_nodes, +}; + + +void +mpls_disp_dpo_module_init (void) +{ + dpo_register(DPO_MPLS_DISPOSITION, &mdd_vft, mpls_label_disp_nodes); +} diff --git a/src/vnet/dpo/mpls_disposition.h b/src/vnet/dpo/mpls_disposition.h new file mode 100644 index 00000000..9c015083 --- /dev/null +++ b/src/vnet/dpo/mpls_disposition.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MPLS_DISP_DPO_H__ +#define __MPLS_DISP_DPO_H__ + +#include +#include +#include +#include + +/** + * A representation of an MPLS label for imposition in the data-path + */ +typedef struct mpls_disp_dpo_t +{ + /** + * Next DPO in the graph + */ + dpo_id_t mdd_dpo; + + /** + * The protocol of the payload/packets that are being encapped + */ + dpo_proto_t mdd_payload_proto; + + /** + * RPF-ID (if this is an mcast disposition) + */ + fib_rpf_id_t mdd_rpf_id; + + /** + * Number of locks/users of the label + */ + u16 mdd_locks; +} mpls_disp_dpo_t; + +/** + * @brief Assert that the MPLS label object is less than a cache line in size. + * Should this get any bigger then we will need to reconsider how many labels + * can be pushed in one object. + */ +_Static_assert((sizeof(mpls_disp_dpo_t) <= CLIB_CACHE_LINE_BYTES), + "MPLS Disposition DPO is larger than one cache line."); + +/** + * @brief Create an MPLS label object + * + * @param payload_proto The ptocool of the payload packets that will + * be imposed with this label header. + * @param dpo The parent of the created MPLS label object + */ +extern index_t mpls_disp_dpo_create(dpo_proto_t payload_proto, + fib_rpf_id_t rpf_id, + const dpo_id_t *dpo); + +extern u8* format_mpls_disp_dpo(u8 *s, va_list *args); + + +/* + * Encapsulation violation for fast data-path access + */ +extern mpls_disp_dpo_t *mpls_disp_dpo_pool; + +static inline mpls_disp_dpo_t * +mpls_disp_dpo_get (index_t index) +{ + return (pool_elt_at_index(mpls_disp_dpo_pool, index)); +} + +extern void mpls_disp_dpo_module_init(void); + +#endif diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c index be9b2850..4d84b900 100644 --- a/src/vnet/dpo/mpls_label_dpo.c +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -562,7 +562,7 @@ VLIB_REGISTER_NODE (mpls_label_imposition_node) = { .format_trace = format_mpls_label_imposition_trace, .n_next_nodes = 1, .next_nodes = { - [0] = "error-drop", + [0] = "mpls-drop", } }; VLIB_NODE_FUNCTION_MULTIARCH (mpls_label_imposition_node, @@ -584,7 +584,7 @@ VLIB_REGISTER_NODE (ip4_mpls_label_imposition_node) = { .format_trace = format_mpls_label_imposition_trace, .n_next_nodes = 1, .next_nodes = { - [0] = "error-drop", + [0] = "ip4-drop", } }; VLIB_NODE_FUNCTION_MULTIARCH (ip4_mpls_label_imposition_node, @@ -606,7 +606,7 @@ VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = { .format_trace = format_mpls_label_imposition_trace, .n_next_nodes = 1, .next_nodes = { - [0] = "error-drop", + [0] = "ip6-drop", } }; VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_imposition_node, diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c index e25ceae9..9fdb9a05 100644 --- a/src/vnet/dpo/replicate_dpo.c +++ b/src/vnet/dpo/replicate_dpo.c @@ -17,6 +17,7 @@ #include #include #include +#include #undef REP_DEBUG @@ -106,6 +107,7 @@ replicate_format (index_t repi, dpo_id_t *buckets; u32 i; + repi &= ~MPLS_IS_REPLICATE; rep = replicate_get(repi); vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to); buckets = replicate_get_buckets(rep); @@ -187,6 +189,7 @@ replicate_set_bucket (index_t repi, replicate_t *rep; dpo_id_t *buckets; + repi &= ~MPLS_IS_REPLICATE; rep = replicate_get(repi); buckets = replicate_get_buckets(rep); @@ -199,11 +202,13 @@ int replicate_is_drop (const dpo_id_t *dpo) { replicate_t *rep; + index_t repi; if (DPO_REPLICATE != dpo->dpoi_type) return (0); - rep = replicate_get(dpo->dpoi_index); + repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE; + rep = replicate_get(repi); if (1 == rep->rep_n_buckets) { @@ -218,6 +223,7 @@ replicate_get_bucket (index_t repi, { replicate_t *rep; + repi &= ~MPLS_IS_REPLICATE; rep = replicate_get(repi); return (replicate_get_bucket_i(rep, bucket)); @@ -288,9 +294,11 @@ replicate_multipath_update (const dpo_id_t *dpo, dpo_id_t *tmp_dpo; u32 ii, n_buckets; replicate_t *rep; + index_t repi; ASSERT(DPO_REPLICATE == dpo->dpoi_type); - rep = replicate_get(dpo->dpoi_index); + repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE; + rep = replicate_get(repi); nhs = replicate_multipath_next_hop_fixup(next_hops, rep->rep_proto); n_buckets = vec_len(nhs); @@ -718,7 +726,7 @@ format_replicate_trace (u8 * s, va_list * args) s = format (s, "replicate: %d via %U", t->rep_index, - format_dpo_id, &t->dpo); + format_dpo_id, &t->dpo, 0); return s; } @@ -731,7 +739,7 @@ ip4_replicate (vlib_main_t * vm, } /** - * @brief + * @brief IP4 replication node */ VLIB_REGISTER_NODE (ip4_replicate_node) = { .function = ip4_replicate, @@ -744,7 +752,7 @@ VLIB_REGISTER_NODE (ip4_replicate_node) = { .format_trace = format_replicate_trace, .n_next_nodes = 1, .next_nodes = { - [0] = "error-drop", + [0] = "ip4-drop", }, }; @@ -757,7 +765,7 @@ ip6_replicate (vlib_main_t * vm, } /** - * @brief + * @brief IPv6 replication node */ VLIB_REGISTER_NODE (ip6_replicate_node) = { .function = ip6_replicate, @@ -770,7 +778,33 @@ VLIB_REGISTER_NODE (ip6_replicate_node) = { .format_trace = format_replicate_trace, .n_next_nodes = 1, .next_nodes = { - [0] = "error-drop", + [0] = "ip6-drop", + }, +}; + +static uword +mpls_replicate (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (replicate_inline (vm, node, frame)); +} + +/** + * @brief MPLS replication node + */ +VLIB_REGISTER_NODE (mpls_replicate_node) = { + .function = mpls_replicate, + .name = "mpls-replicate", + .vector_size = sizeof (u32), + + .n_errors = ARRAY_LEN(replicate_dpo_error_strings), + .error_strings = replicate_dpo_error_strings, + + .format_trace = format_replicate_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "mpls-drop", }, }; diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h index 77273015..7383184a 100644 --- a/src/vnet/dpo/replicate_dpo.h +++ b/src/vnet/dpo/replicate_dpo.h @@ -25,6 +25,7 @@ #include #include #include +#include /** * replicate main @@ -119,6 +120,7 @@ extern replicate_t *replicate_pool; static inline replicate_t* replicate_get (index_t repi) { + repi &= ~MPLS_IS_REPLICATE; return (pool_elt_at_index(replicate_pool, repi)); } diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index c74a097e..dd509193 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -507,6 +507,7 @@ arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) case IP_LOOKUP_NEXT_PUNT: case IP_LOOKUP_NEXT_LOCAL: case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: case IP_LOOKUP_NEXT_MIDCHAIN: case IP_LOOKUP_NEXT_ICMP_ERROR: case IP_LOOKUP_N_NEXT: diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c index 335e3f9f..9ac30bc6 100644 --- a/src/vnet/ethernet/interface.c +++ b/src/vnet/ethernet/interface.c @@ -115,7 +115,7 @@ ethernet_build_rewrite (vnet_main_t * vnm, #define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break _(IP4, IP4); _(IP6, IP6); - _(MPLS, MPLS_UNICAST); + _(MPLS, MPLS); _(ARP, ARP); #undef _ default: diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index f7787ed2..5305012f 100755 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -249,7 +249,7 @@ determine_next_node (ethernet_main_t * em, { *next0 = em->l3_next.input_next_ip6; } - else if (type0 == ETHERNET_TYPE_MPLS_UNICAST) + else if (type0 == ETHERNET_TYPE_MPLS) { *next0 = em->l3_next.input_next_mpls; @@ -1252,7 +1252,7 @@ next_by_ethertype_register (next_by_ethertype_t * l3_next, { l3_next->input_next_ip6 = next_index; } - else if (ethertype == ETHERNET_TYPE_MPLS_UNICAST) + else if (ethertype == ETHERNET_TYPE_MPLS) { l3_next->input_next_mpls = next_index; } diff --git a/src/vnet/ethernet/types.def b/src/vnet/ethernet/types.def index 643f3152..7dab8ee1 100644 --- a/src/vnet/ethernet/types.def +++ b/src/vnet/ethernet/types.def @@ -85,8 +85,8 @@ ethernet_type (0x876D, SECURE_DATA) ethernet_type (0x8808, MAC_CONTROL) ethernet_type (0x8809, SLOW_PROTOCOLS) ethernet_type (0x880B, PPP) -ethernet_type (0x8847, MPLS_UNICAST) -ethernet_type (0x8848, MPLS_MULTICAST) +ethernet_type (0x8847, MPLS) +ethernet_type (0x8848, MPLS_UPSTREAM_ASSIGNED) ethernet_type (0x8863, PPPOE_DISCOVERY) ethernet_type (0x8864, PPPOE_SESSION) ethernet_type (0x886D, INTEL_ANS) diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h index f8275317..10d0cb58 100644 --- a/src/vnet/fib/fib_api.h +++ b/src/vnet/fib/fib_api.h @@ -24,6 +24,7 @@ add_del_route_check (fib_protocol_t table_proto, fib_protocol_t next_hop_table_proto, u32 next_hop_table_id, u8 create_missing_tables, + u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index); int @@ -33,10 +34,13 @@ add_del_route_t_handler (u8 is_multipath, u8 is_unreach, u8 is_prohibit, u8 is_local, + u8 is_multicast, u8 is_classify, u32 classify_table_index, u8 is_resolve_host, u8 is_resolve_attached, + u8 is_interface_rx, + u8 is_rpf_id, u32 fib_index, const fib_prefix_t * prefix, u8 next_hop_proto_is_ip4, diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index dac1fce9..6f811aa1 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -75,13 +75,7 @@ fib_entry_get_default_chain_type (const fib_entry_t *fib_entry) return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); case FIB_PROTOCOL_MPLS: if (MPLS_EOS == fib_entry->fe_prefix.fp_eos) - /* - * If the entry being asked is a eos-MPLS label entry, - * then use the payload-protocol field, that we stashed there - * for just this purpose - */ - return (fib_forw_chain_type_from_dpo_proto( - fib_entry->fe_prefix.fp_payload_proto)); + return (FIB_FORW_CHAIN_TYPE_MPLS_EOS); else return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); } @@ -370,6 +364,35 @@ fib_entry_contribute_urpf (fib_node_index_t entry_index, return (fib_path_list_contribute_urpf(fib_entry->fe_parent, urpf)); } +/* + * If the client is request a chain for multicast forwarding then swap + * the chain type to one that can provide such transport. + */ +static fib_forward_chain_type_t +fib_entry_chain_type_mcast_to_ucast (fib_forward_chain_type_t fct) +{ + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_MCAST_IP4: + case FIB_FORW_CHAIN_TYPE_MCAST_IP6: + /* + * we can only transport IP multicast packets if there is an + * LSP. + */ + fct = FIB_FORW_CHAIN_TYPE_MPLS_EOS; + break; + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + case FIB_FORW_CHAIN_TYPE_ETHERNET: + case FIB_FORW_CHAIN_TYPE_NSH: + break; + } + + return (fct); +} + /* * fib_entry_contribute_forwarding * @@ -385,6 +408,11 @@ fib_entry_contribute_forwarding (fib_node_index_t fib_entry_index, fib_entry = fib_entry_get(fib_entry_index); + /* + * mfib children ask for mcast chains. fix these to the appropriate ucast types. + */ + fct = fib_entry_chain_type_mcast_to_ucast(fct); + if (fct == fib_entry_get_default_chain_type(fib_entry)) { dpo_copy(dpo, &fib_entry->fe_lb); @@ -414,6 +442,11 @@ fib_entry_contribute_forwarding (fib_node_index_t fib_entry_index, dpo_copy(dpo, &fed->fd_dpo); } + /* + * don't allow the special index indicating replicate.vs.load-balance + * to escape to the clients + */ + dpo->dpoi_index &= ~MPLS_IS_REPLICATE; } const dpo_id_t * diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index a3f75e60..b17a0b64 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -192,6 +192,11 @@ typedef enum fib_entry_attribute_t_ { * The prefix/address is local to this device */ FIB_ENTRY_ATTRIBUTE_LOCAL, + /** + * The prefix/address is a multicast prefix. + * this aplies only to MPLS. IP multicast is handled by mfib + */ + FIB_ENTRY_ATTRIBUTE_MULTICAST, /** * The prefix/address exempted from loose uRPF check * To be used with caution @@ -200,7 +205,7 @@ typedef enum fib_entry_attribute_t_ { /** * Marker. add new entries before this one. */ - FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT, + FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_MULTICAST, } fib_entry_attribute_t; /** @@ -215,7 +220,8 @@ typedef enum fib_entry_attribute_t_ { [FIB_ENTRY_ATTRIBUTE_DROP] = "drop", \ [FIB_ENTRY_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ [FIB_ENTRY_ATTRIBUTE_LOCAL] = "local", \ - [FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT] = "uRPF-exempt" \ + [FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT] = "uRPF-exempt", \ + [FIB_ENTRY_ATTRIBUTE_MULTICAST] = "multicast", \ } #define FOR_EACH_FIB_ATTRIBUTE(_item) \ @@ -232,6 +238,7 @@ typedef enum fib_entry_flag_t_ { FIB_ENTRY_FLAG_LOCAL = (1 << FIB_ENTRY_ATTRIBUTE_LOCAL), FIB_ENTRY_FLAG_IMPORT = (1 << FIB_ENTRY_ATTRIBUTE_IMPORT), FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT = (1 << FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT), + FIB_ENTRY_FLAG_MULTICAST = (1 << FIB_ENTRY_ATTRIBUTE_MULTICAST), } __attribute__((packed)) fib_entry_flag_t; /** @@ -396,7 +403,7 @@ typedef struct fib_entry_t_ { * paint the header straight on without the need to check the packet * type to derive the EOS bit value. */ - dpo_id_t fe_lb; // [FIB_FORW_CHAIN_MPLS_NUM]; + dpo_id_t fe_lb; /** * Vector of source infos. * Most entries will only have 1 source. So we optimise for memory usage, diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c index aa1d5a24..a700282e 100644 --- a/src/vnet/fib/fib_entry_src.c +++ b/src/vnet/fib/fib_entry_src.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -229,8 +230,6 @@ fib_forward_chain_type_t fib_entry_chain_type_fixup (const fib_entry_t *entry, fib_forward_chain_type_t fct) { - ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS == fct); - /* * The EOS chain is a tricky since one cannot know the adjacency * to link to without knowing what the packets payload protocol @@ -238,6 +237,11 @@ fib_entry_chain_type_fixup (const fib_entry_t *entry, */ fib_forward_chain_type_t dfct; + if (FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct) + { + return (fct); + } + dfct = fib_entry_get_default_chain_type(entry); if (FIB_FORW_CHAIN_TYPE_MPLS_EOS == dfct) @@ -303,7 +307,12 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index, * found a matching extension. stack it to obtain the forwarding * info for this path. */ - ctx->next_hops = fib_path_ext_stack(path_ext, ctx->fib_entry, ctx->fct, ctx->next_hops); + ctx->next_hops = + fib_path_ext_stack(path_ext, + ctx->fct, + fib_entry_chain_type_fixup(ctx->fib_entry, + ctx->fct), + ctx->next_hops); } else { @@ -355,6 +364,9 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index, fib_entry_chain_type_fixup(ctx->fib_entry, ctx->fct), &nh->path_dpo); + fib_path_stack_mpls_disp(path_index, + ctx->fib_entry->fe_prefix.fp_payload_proto, + &nh->path_dpo); break; } @@ -424,50 +436,70 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry, /* * first time create */ - flow_hash_config_t fhc; - - fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, - dpo_proto_to_fib(lb_proto)); - dpo_set(dpo_lb, - DPO_LOAD_BALANCE, - lb_proto, - load_balance_create(0, lb_proto, fhc)); + if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_MULTICAST) + { + dpo_set(dpo_lb, + DPO_REPLICATE, + lb_proto, + MPLS_IS_REPLICATE | replicate_create(0, lb_proto)); + } + else + { + flow_hash_config_t fhc; + + fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, + dpo_proto_to_fib(lb_proto)); + dpo_set(dpo_lb, + DPO_LOAD_BALANCE, + lb_proto, + load_balance_create(0, lb_proto, fhc)); + } } - load_balance_multipath_update(dpo_lb, - ctx.next_hops, - fib_entry_calc_lb_flags(&ctx)); - vec_free(ctx.next_hops); - - /* - * if this entry is sourced by the uRPF-exempt source then we - * append the always present local0 interface (index 0) to the - * uRPF list so it is not empty. that way packets pass the loose check. - */ - index_t ui = fib_path_list_get_urpf(esrc->fes_pl); - - if ((fib_entry_is_sourced(fib_entry_get_index(fib_entry), - FIB_SOURCE_URPF_EXEMPT) || - (esrc->fes_entry_flags & FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT))&& - (0 == fib_urpf_check_size(ui))) + if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_MULTICAST) { - /* - * The uRPF list we get from the path-list is shared by all - * other users of the list, but the uRPF exemption applies - * only to this prefix. So we need our own list. - */ - ui = fib_urpf_list_alloc_and_lock(); - fib_urpf_list_append(ui, 0); - fib_urpf_list_bake(ui); - load_balance_set_urpf(dpo_lb->dpoi_index, ui); - fib_urpf_list_unlock(ui); + /* + * MPLS multicast + */ + replicate_multipath_update(dpo_lb, ctx.next_hops); } else { - load_balance_set_urpf(dpo_lb->dpoi_index, ui); + load_balance_multipath_update(dpo_lb, + ctx.next_hops, + fib_entry_calc_lb_flags(&ctx)); + vec_free(ctx.next_hops); + + /* + * if this entry is sourced by the uRPF-exempt source then we + * append the always present local0 interface (index 0) to the + * uRPF list so it is not empty. that way packets pass the loose check. + */ + index_t ui = fib_path_list_get_urpf(esrc->fes_pl); + + if ((fib_entry_is_sourced(fib_entry_get_index(fib_entry), + FIB_SOURCE_URPF_EXEMPT) || + (esrc->fes_entry_flags & FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT))&& + (0 == fib_urpf_check_size(ui))) + { + /* + * The uRPF list we get from the path-list is shared by all + * other users of the list, but the uRPF exemption applies + * only to this prefix. So we need our own list. + */ + ui = fib_urpf_list_alloc_and_lock(); + fib_urpf_list_append(ui, 0); + fib_urpf_list_bake(ui); + load_balance_set_urpf(dpo_lb->dpoi_index, ui); + fib_urpf_list_unlock(ui); + } + else + { + load_balance_set_urpf(dpo_lb->dpoi_index, ui); + } + load_balance_set_fib_entry_flags(dpo_lb->dpoi_index, + fib_entry_get_flags_i(fib_entry)); } - load_balance_set_fib_entry_flags(dpo_lb->dpoi_index, - fib_entry_get_flags_i(fib_entry)); } void @@ -887,21 +919,6 @@ fib_entry_src_action_remove (fib_entry_t *fib_entry, return (sflags); } -static inline int -fib_route_recurses_via_self (const fib_prefix_t *prefix, - const fib_route_path_t *rpath) -{ - /* - * not all zeros next hop && - * is recursive path && - * nexthop is same as the route's address - */ - return ((!ip46_address_is_zero(&rpath->frp_addr)) && - (~0 == rpath->frp_sw_if_index) && - (0 == ip46_address_cmp(&rpath->frp_addr, &prefix->fp_addr))); - -} - /* * fib_route_attached_cross_table * @@ -962,14 +979,14 @@ fib_entry_src_flags_2_path_list_flags (fib_entry_flag_t eflags) { plf |= FIB_PATH_LIST_FLAG_DROP; } - if (eflags & FIB_ENTRY_FLAG_LOCAL) - { - plf |= FIB_PATH_LIST_FLAG_LOCAL; - } if (eflags & FIB_ENTRY_FLAG_EXCLUSIVE) { plf |= FIB_PATH_LIST_FLAG_EXCLUSIVE; } + if (eflags & FIB_ENTRY_FLAG_LOCAL) + { + plf |= FIB_PATH_LIST_FLAG_LOCAL; + } return (plf); } @@ -980,25 +997,6 @@ fib_entry_flags_update (const fib_entry_t *fib_entry, fib_path_list_flags_t *pl_flags, fib_entry_src_t *esrc) { - /* - * don't allow the addition of a recursive looped path for prefix - * via itself. - */ - if (fib_route_recurses_via_self(&fib_entry->fe_prefix, rpath)) - { - /* - * force the install of a drop path-list. - * we want the entry to have some path-list, mainly so - * the dodgy path can be rmeoved when the source stops playing - * silly buggers. - */ - *pl_flags |= FIB_PATH_LIST_FLAG_DROP; - } - else - { - *pl_flags &= ~FIB_PATH_LIST_FLAG_DROP; - } - if ((esrc->fes_src == FIB_SOURCE_API) || (esrc->fes_src == FIB_SOURCE_CLI)) { diff --git a/src/vnet/fib/fib_internal.h b/src/vnet/fib/fib_internal.h index 2d980bcc..8abc0e07 100644 --- a/src/vnet/fib/fib_internal.h +++ b/src/vnet/fib/fib_internal.h @@ -25,6 +25,7 @@ #undef FIB_DEBUG extern void fib_prefix_from_mpls_label(mpls_label_t label, + mpls_eos_bit_t eos, fib_prefix_t *prf); extern int fib_route_path_cmp(const fib_route_path_t *rpath1, diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index 6b202a97..f81f4170 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -66,6 +68,10 @@ typedef enum fib_path_type_t_ { * deag. Link to a lookup adj in the next table */ FIB_PATH_TYPE_DEAG, + /** + * interface receive. + */ + FIB_PATH_TYPE_INTF_RX, /** * receive. it's for-us. */ @@ -88,6 +94,7 @@ typedef enum fib_path_type_t_ { [FIB_PATH_TYPE_SPECIAL] = "special", \ [FIB_PATH_TYPE_EXCLUSIVE] = "exclusive", \ [FIB_PATH_TYPE_DEAG] = "deag", \ + [FIB_PATH_TYPE_INTF_RX] = "intf-rx", \ [FIB_PATH_TYPE_RECEIVE] = "receive", \ } @@ -220,10 +227,16 @@ typedef struct fib_path_t_ { * The next-hop */ ip46_address_t fp_ip; - /** - * The local label to resolve through. - */ - mpls_label_t fp_local_label; + struct { + /** + * The local label to resolve through. + */ + mpls_label_t fp_local_label; + /** + * The EOS bit of the resolving label + */ + mpls_eos_bit_t fp_eos; + }; } fp_nh; /** * The FIB table index in which to find the next-hop. @@ -254,6 +267,10 @@ typedef struct fib_path_t_ { * The FIB index in which to perfom the next lookup */ fib_node_index_t fp_tbl_id; + /** + * The RPF-ID to tag the packets with + */ + fib_rpf_id_t fp_rpf_id; } deag; struct { } special; @@ -273,6 +290,12 @@ typedef struct fib_path_t_ { */ ip46_address_t fp_addr; } receive; + struct { + /** + * The interface on which the packets will be input. + */ + u32 fp_interface; + } intf_rx; }; STRUCT_MARK(path_hash_end); @@ -444,9 +467,11 @@ format_fib_path (u8 * s, va_list * args) case FIB_PATH_TYPE_RECURSIVE: if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) { - s = format (s, "via %U", + s = format (s, "via %U %U", format_mpls_unicast_label, - path->recursive.fp_nh.fp_local_label); + path->recursive.fp_nh.fp_local_label, + format_mpls_eos_bit, + path->recursive.fp_nh.fp_eos); } else { @@ -465,6 +490,7 @@ format_fib_path (u8 * s, va_list * args) break; case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_INTF_RX: case FIB_PATH_TYPE_SPECIAL: case FIB_PATH_TYPE_DEAG: case FIB_PATH_TYPE_EXCLUSIVE: @@ -736,6 +762,7 @@ fib_path_unresolve (fib_path_t *path) break; case FIB_PATH_TYPE_SPECIAL: case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_INTF_RX: case FIB_PATH_TYPE_DEAG: /* * these hold only the path's DPO, which is reset below. @@ -754,16 +781,24 @@ fib_path_unresolve (fib_path_t *path) } static fib_forward_chain_type_t -fib_path_proto_to_chain_type (fib_protocol_t proto) +fib_path_to_chain_type (const fib_path_t *path) { - switch (proto) + switch (path->fp_nh_proto) { case FIB_PROTOCOL_IP4: return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); case FIB_PROTOCOL_IP6: return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); case FIB_PROTOCOL_MPLS: - return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); + if (FIB_PATH_TYPE_RECURSIVE == path->fp_type && + MPLS_EOS == path->recursive.fp_nh.fp_eos) + { + return (FIB_FORW_CHAIN_TYPE_MPLS_EOS); + } + else + { + return (FIB_FORW_CHAIN_TYPE_MPLS_EOS); + } } return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); } @@ -793,7 +828,7 @@ fib_path_back_walk_notify (fib_node_t *node, */ fib_path_recursive_adj_update( path, - fib_path_proto_to_chain_type(path->fp_nh_proto), + fib_path_to_chain_type(path), &path->fp_dpo); } if ((FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) || @@ -931,6 +966,8 @@ FIXME comment path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP; } break; + case FIB_PATH_TYPE_INTF_RX: + ASSERT(0); case FIB_PATH_TYPE_DEAG: /* * FIXME When VRF delete is allowed this will need a poke. @@ -986,6 +1023,14 @@ fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath) cfg_flags |= FIB_PATH_CFG_FLAG_LOCAL; if (rpath->frp_flags & FIB_ROUTE_PATH_ATTACHED) cfg_flags |= FIB_PATH_CFG_FLAG_ATTACHED; + if (rpath->frp_flags & FIB_ROUTE_PATH_INTF_RX) + cfg_flags |= FIB_PATH_CFG_FLAG_INTF_RX; + if (rpath->frp_flags & FIB_ROUTE_PATH_RPF_ID) + cfg_flags |= FIB_PATH_CFG_FLAG_RPF_ID; + if (rpath->frp_flags & FIB_ROUTE_PATH_EXCLUSIVE) + cfg_flags |= FIB_PATH_CFG_FLAG_EXCLUSIVE; + if (rpath->frp_flags & FIB_ROUTE_PATH_DROP) + cfg_flags |= FIB_PATH_CFG_FLAG_DROP; return (cfg_flags); } @@ -998,8 +1043,6 @@ fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath) */ fib_node_index_t fib_path_create (fib_node_index_t pl_index, - fib_protocol_t nh_proto, - fib_path_cfg_flags_t flags, const fib_route_path_t *rpath) { fib_path_t *path; @@ -1012,7 +1055,7 @@ fib_path_create (fib_node_index_t pl_index, dpo_reset(&path->fp_dpo); path->fp_pl_index = pl_index; - path->fp_nh_proto = nh_proto; + path->fp_nh_proto = rpath->frp_proto; path->fp_via_fib = FIB_NODE_INDEX_INVALID; path->fp_weight = rpath->frp_weight; if (0 == path->fp_weight) @@ -1023,8 +1066,7 @@ fib_path_create (fib_node_index_t pl_index, */ path->fp_weight = 1; } - path->fp_cfg_flags = flags; - path->fp_cfg_flags |= fib_path_route_flags_to_cfg_flags(rpath); + path->fp_cfg_flags = fib_path_route_flags_to_cfg_flags(rpath); /* * deduce the path's tpye from the parementers and save what is needed. @@ -1035,6 +1077,17 @@ fib_path_create (fib_node_index_t pl_index, path->receive.fp_interface = rpath->frp_sw_if_index; path->receive.fp_addr = rpath->frp_addr; } + else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_INTF_RX) + { + path->fp_type = FIB_PATH_TYPE_INTF_RX; + path->intf_rx.fp_interface = rpath->frp_sw_if_index; + } + else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RPF_ID) + { + path->fp_type = FIB_PATH_TYPE_DEAG; + path->deag.fp_tbl_id = rpath->frp_fib_index; + path->deag.fp_rpf_id = rpath->frp_rpf_id; + } else if (~0 != rpath->frp_sw_if_index) { if (ip46_address_is_zero(&rpath->frp_addr)) @@ -1069,6 +1122,7 @@ fib_path_create (fib_node_index_t pl_index, if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) { path->recursive.fp_nh.fp_local_label = rpath->frp_local_label; + path->recursive.fp_nh.fp_eos = rpath->frp_eos; } else { @@ -1238,17 +1292,13 @@ fib_path_cmp_i (const fib_path_t *path1, res = ip46_address_cmp(&path1->attached_next_hop.fp_nh, &path2->attached_next_hop.fp_nh); if (0 == res) { - res = vnet_sw_interface_compare( - vnet_get_main(), - path1->attached_next_hop.fp_interface, - path2->attached_next_hop.fp_interface); + res = (path1->attached_next_hop.fp_interface - + path2->attached_next_hop.fp_interface); } break; case FIB_PATH_TYPE_ATTACHED: - res = vnet_sw_interface_compare( - vnet_get_main(), - path1->attached.fp_interface, - path2->attached.fp_interface); + res = (path1->attached.fp_interface - + path2->attached.fp_interface); break; case FIB_PATH_TYPE_RECURSIVE: res = ip46_address_cmp(&path1->recursive.fp_nh, @@ -1261,6 +1311,13 @@ fib_path_cmp_i (const fib_path_t *path1, break; case FIB_PATH_TYPE_DEAG: res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id); + if (0 == res) + { + res = (path1->deag.fp_rpf_id - path2->deag.fp_rpf_id); + } + break; + case FIB_PATH_TYPE_INTF_RX: + res = (path1->intf_rx.fp_interface - path2->intf_rx.fp_interface); break; case FIB_PATH_TYPE_SPECIAL: case FIB_PATH_TYPE_RECEIVE: @@ -1336,22 +1393,22 @@ fib_path_cmp_w_route_path (fib_node_index_t path_index, &rpath->frp_addr); if (0 == res) { - res = vnet_sw_interface_compare( - vnet_get_main(), - path->attached_next_hop.fp_interface, - rpath->frp_sw_if_index); + res = (path->attached_next_hop.fp_interface - + rpath->frp_sw_if_index); } break; case FIB_PATH_TYPE_ATTACHED: - res = vnet_sw_interface_compare( - vnet_get_main(), - path->attached.fp_interface, - rpath->frp_sw_if_index); + res = (path->attached.fp_interface - rpath->frp_sw_if_index); break; case FIB_PATH_TYPE_RECURSIVE: if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) { res = path->recursive.fp_nh.fp_local_label - rpath->frp_local_label; + + if (res == 0) + { + res = path->recursive.fp_nh.fp_eos - rpath->frp_eos; + } } else { @@ -1364,9 +1421,16 @@ fib_path_cmp_w_route_path (fib_node_index_t path_index, res = (path->recursive.fp_tbl_id - rpath->frp_fib_index); } break; + case FIB_PATH_TYPE_INTF_RX: + res = (path->intf_rx.fp_interface - rpath->frp_sw_if_index); + break; case FIB_PATH_TYPE_DEAG: res = (path->deag.fp_tbl_id - rpath->frp_fib_index); - break; + if (0 == res) + { + res = (path->deag.fp_rpf_id - rpath->frp_rpf_id); + } + break; case FIB_PATH_TYPE_SPECIAL: case FIB_PATH_TYPE_RECEIVE: case FIB_PATH_TYPE_EXCLUSIVE: @@ -1465,6 +1529,7 @@ fib_path_recursive_loop_detect (fib_node_index_t path_index, case FIB_PATH_TYPE_SPECIAL: case FIB_PATH_TYPE_DEAG: case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_INTF_RX: case FIB_PATH_TYPE_EXCLUSIVE: /* * these path types cannot be part of a loop, since they are the leaves @@ -1563,7 +1628,9 @@ fib_path_resolve (fib_node_index_t path_index) if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) { - fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, &pfx); + fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, + path->recursive.fp_nh.fp_eos, + &pfx); } else { @@ -1592,7 +1659,7 @@ fib_path_resolve (fib_node_index_t path_index) */ fib_path_recursive_adj_update( path, - fib_path_proto_to_chain_type(path->fp_nh_proto), + fib_path_to_chain_type(path), &path->fp_dpo); break; @@ -1605,16 +1672,25 @@ fib_path_resolve (fib_node_index_t path_index) drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); break; case FIB_PATH_TYPE_DEAG: + { /* * Resolve via a lookup DPO. * FIXME. control plane should add routes with a table ID */ - lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id, - fib_proto_to_dpo(path->fp_nh_proto), - LOOKUP_INPUT_DST_ADDR, - LOOKUP_TABLE_FROM_CONFIG, - &path->fp_dpo); + lookup_cast_t cast; + + cast = (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RPF_ID ? + LOOKUP_MULTICAST : + LOOKUP_UNICAST); + + lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id, + fib_proto_to_dpo(path->fp_nh_proto), + cast, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_CONFIG, + &path->fp_dpo); break; + } case FIB_PATH_TYPE_RECEIVE: /* * Resolve via a receive DPO. @@ -1624,6 +1700,15 @@ fib_path_resolve (fib_node_index_t path_index) &path->receive.fp_addr, &path->fp_dpo); break; + case FIB_PATH_TYPE_INTF_RX: { + /* + * Resolve via a receive DPO. + */ + interface_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto), + path->intf_rx.fp_interface, + &path->fp_dpo); + break; + } case FIB_PATH_TYPE_EXCLUSIVE: /* * Resolve via the user provided DPO @@ -1652,6 +1737,7 @@ fib_path_get_resolving_interface (fib_node_index_t path_index) return (path->receive.fp_interface); case FIB_PATH_TYPE_RECURSIVE: return (fib_entry_get_resolving_interface(path->fp_via_fib)); + case FIB_PATH_TYPE_INTF_RX: case FIB_PATH_TYPE_SPECIAL: case FIB_PATH_TYPE_DEAG: case FIB_PATH_TYPE_EXCLUSIVE: @@ -1743,6 +1829,7 @@ fib_path_contribute_urpf (fib_node_index_t path_index, case FIB_PATH_TYPE_DEAG: case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_INTF_RX: /* * these path types don't link to an adj */ @@ -1750,6 +1837,44 @@ fib_path_contribute_urpf (fib_node_index_t path_index, } } +void +fib_path_stack_mpls_disp (fib_node_index_t path_index, + dpo_proto_t payload_proto, + dpo_id_t *dpo) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + ASSERT(path); + + switch (path->fp_type) + { + case FIB_PATH_TYPE_DEAG: + { + dpo_id_t tmp = DPO_INVALID; + + dpo_copy(&tmp, dpo); + dpo_set(dpo, + DPO_MPLS_DISPOSITION, + payload_proto, + mpls_disp_dpo_create(payload_proto, + path->deag.fp_rpf_id, + &tmp)); + dpo_reset(&tmp); + break; + } + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_ATTACHED: + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + case FIB_PATH_TYPE_RECURSIVE: + case FIB_PATH_TYPE_INTF_RX: + case FIB_PATH_TYPE_EXCLUSIVE: + case FIB_PATH_TYPE_SPECIAL: + break; + } +} + void fib_path_contribute_forwarding (fib_node_index_t path_index, fib_forward_chain_type_t fct, @@ -1769,7 +1894,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, * This then represents the path's 'native' protocol; IP. * For all others will need to go find something else. */ - if (fib_path_proto_to_chain_type(path->fp_nh_proto) == fct) + if (fib_path_to_chain_type(path) == fct) { dpo_copy(dpo, &path->fp_dpo); } @@ -1813,10 +1938,10 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: - fib_path_recursive_adj_update(path, fct, dpo); - break; case FIB_FORW_CHAIN_TYPE_MCAST_IP4: case FIB_FORW_CHAIN_TYPE_MCAST_IP6: + fib_path_recursive_adj_update(path, fct, dpo); + break; case FIB_FORW_CHAIN_TYPE_ETHERNET: case FIB_FORW_CHAIN_TYPE_NSH: ASSERT(0); @@ -1829,13 +1954,14 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID, DPO_PROTO_MPLS, + LOOKUP_UNICAST, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_CONFIG, dpo); break; + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: - case FIB_FORW_CHAIN_TYPE_MPLS_EOS: dpo_copy(dpo, &path->fp_dpo); break; case FIB_FORW_CHAIN_TYPE_MCAST_IP4: @@ -1870,7 +1996,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, ai = adj_mcast_add_or_lock(path->fp_nh_proto, fib_forw_chain_type_to_link_type(fct), path->attached.fp_interface); - dpo_set(dpo, DPO_ADJACENCY_MCAST, + dpo_set(dpo, DPO_ADJACENCY, fib_forw_chain_type_to_dpo_proto(fct), ai); adj_unlock(ai); @@ -1878,6 +2004,14 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, break; } break; + case FIB_PATH_TYPE_INTF_RX: + /* + * Create the adj needed for sending IP multicast traffic + */ + interface_dpo_add_or_lock(fib_forw_chain_type_to_dpo_proto(fct), + path->attached.fp_interface, + dpo); + break; case FIB_PATH_TYPE_RECEIVE: case FIB_PATH_TYPE_SPECIAL: dpo_copy(dpo, &path->fp_dpo); diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h index 14efc1ab..334be6f5 100644 --- a/src/vnet/fib/fib_path.h +++ b/src/vnet/fib/fib_path.h @@ -69,6 +69,14 @@ typedef enum fib_path_cfg_attribute_t_ { /** * The path is a for-us path */ + FIB_PATH_CFG_ATTRIBUTE_INTF_RX, + /** + * The path is a deag with rpf-id + */ + FIB_PATH_CFG_ATTRIBUTE_RPF_ID, + /** + * The path is an interface recieve + */ FIB_PATH_CFG_ATTRIBUTE_LOCAL, /** * Marker. Add new types before this one, then update it. @@ -88,6 +96,8 @@ typedef enum fib_path_cfg_attribute_t_ { [FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED] = "resolve-attached", \ [FIB_PATH_CFG_ATTRIBUTE_LOCAL] = "local", \ [FIB_PATH_CFG_ATTRIBUTE_ATTACHED] = "attached", \ + [FIB_PATH_CFG_ATTRIBUTE_INTF_RX] = "interface-rx", \ + [FIB_PATH_CFG_ATTRIBUTE_RPF_ID] = "rpf-id", \ } #define FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(_item) \ @@ -106,6 +116,8 @@ typedef enum fib_path_cfg_flags_t_ { FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED), FIB_PATH_CFG_FLAG_LOCAL = (1 << FIB_PATH_CFG_ATTRIBUTE_LOCAL), FIB_PATH_CFG_FLAG_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_ATTACHED), + FIB_PATH_CFG_FLAG_INTF_RX = (1 << FIB_PATH_CFG_ATTRIBUTE_INTF_RX), + FIB_PATH_CFG_FLAG_RPF_ID = (1 << FIB_PATH_CFG_ATTRIBUTE_RPF_ID), } __attribute__ ((packed)) fib_path_cfg_flags_t; @@ -117,8 +129,6 @@ extern u8 *fib_path_adj_format(fib_node_index_t pi, extern u8 * format_fib_path(u8 * s, va_list * args); extern fib_node_index_t fib_path_create(fib_node_index_t pl_index, - fib_protocol_t nh_proto, - fib_path_cfg_flags_t flags, const fib_route_path_t *path); extern fib_node_index_t fib_path_create_special(fib_node_index_t pl_index, fib_protocol_t nh_proto, @@ -145,6 +155,9 @@ extern load_balance_path_t * fib_path_append_nh_for_multipath_hash( fib_node_index_t path_index, fib_forward_chain_type_t fct, load_balance_path_t *hash_key); +extern void fib_path_stack_mpls_disp(fib_node_index_t path_index, + dpo_proto_t payload_proto, + dpo_id_t *dpo); extern void fib_path_contribute_forwarding(fib_node_index_t path_index, fib_forward_chain_type_t type, dpo_id_t *dpo); diff --git a/src/vnet/fib/fib_path_ext.c b/src/vnet/fib/fib_path_ext.c index f75b5626..08293bcf 100644 --- a/src/vnet/fib/fib_path_ext.c +++ b/src/vnet/fib/fib_path_ext.c @@ -103,8 +103,8 @@ fib_path_ext_is_imp_null (fib_path_ext_t *path_ext) load_balance_path_t * fib_path_ext_stack (fib_path_ext_t *path_ext, - const fib_entry_t *entry, fib_forward_chain_type_t child_fct, + fib_forward_chain_type_t imp_null_fct, load_balance_path_t *nhs) { fib_forward_chain_type_t parent_fct; @@ -129,7 +129,7 @@ fib_path_ext_stack (fib_path_ext_t *path_ext, */ if (fib_path_ext_is_imp_null(path_ext)) { - parent_fct = fib_entry_chain_type_fixup(entry, child_fct); + parent_fct = imp_null_fct; } else { diff --git a/src/vnet/fib/fib_path_ext.h b/src/vnet/fib/fib_path_ext.h index cf8f8df0..d617700d 100644 --- a/src/vnet/fib/fib_path_ext.h +++ b/src/vnet/fib/fib_path_ext.h @@ -18,6 +18,7 @@ #include #include +#include /** * A path extension is a per-entry addition to the forwarding information @@ -61,8 +62,8 @@ extern void fib_path_ext_resolve(fib_path_ext_t *path_ext, fib_node_index_t path_list_index); extern load_balance_path_t *fib_path_ext_stack(fib_path_ext_t *path_ext, - const struct fib_entry_t_ *entry, fib_forward_chain_type_t fct, + fib_forward_chain_type_t imp_null_fct, load_balance_path_t *nhs); #endif diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c index b9a391b3..ea6565dd 100644 --- a/src/vnet/fib/fib_path_list.c +++ b/src/vnet/fib/fib_path_list.c @@ -40,13 +40,6 @@ typedef struct fib_path_list_t_ { */ fib_path_list_flags_t fpl_flags; - /** - * The next-hop protocol for the paths in this path list. - * Note that fixing the proto here means we don't support a mix of - * v4 and v6 paths. ho hum. - */ - fib_protocol_t fpl_nh_proto; - /** * Vector of paths indicies for all configured paths. * For shareable path-lists this list MUST not change. @@ -57,6 +50,11 @@ typedef struct fib_path_list_t_ { * the RPF list calculated for this path list */ fib_node_index_t fpl_urpf; + + /** + * Hash table of paths. valid only with INDEXED flag + */ + uword *fpl_db; } fib_path_list_t; /* @@ -131,7 +129,6 @@ format_fib_path_list (u8 * s, va_list * args) s = format (s, " index:%u", fib_path_list_get_index(path_list)); s = format (s, " locks:%u", path_list->fpl_node.fn_locks); - s = format (s, " proto:%U", format_fib_protocol, path_list->fpl_nh_proto); if (FIB_PATH_LIST_FLAG_NONE != path_list->fpl_flags) { @@ -155,26 +152,6 @@ format_fib_path_list (u8 * s, va_list * args) return (s); } -u8 * -fib_path_list_adjs_format (fib_node_index_t path_list_index, - u32 indent, - u8 * s) -{ - fib_path_list_t *path_list; - u32 i; - - path_list = fib_path_list_get(path_list_index); - - vec_foreach_index (i, path_list->fpl_paths) - { - s = fib_path_adj_format(path_list->fpl_paths[i], - indent, s); - } - - return (s); -} - - u8 * fib_path_list_format (fib_node_index_t path_list_index, u8 * s) @@ -648,27 +625,6 @@ fib_path_list_is_looped (fib_node_index_t path_list_index) return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_LOOPED); } -static fib_path_cfg_flags_t -fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf) -{ - fib_path_cfg_flags_t pf = FIB_PATH_CFG_FLAG_NONE; - - if (plf & FIB_PATH_LIST_FLAG_LOCAL) - { - pf |= FIB_PATH_CFG_FLAG_LOCAL; - } - if (plf & FIB_PATH_LIST_FLAG_DROP) - { - pf |= FIB_PATH_CFG_FLAG_DROP; - } - if (plf & FIB_PATH_LIST_FLAG_EXCLUSIVE) - { - pf |= FIB_PATH_CFG_FLAG_EXCLUSIVE; - } - - return (pf); -} - static fib_path_list_flags_t fib_path_list_flags_fixup (fib_path_list_flags_t flags) { @@ -695,18 +651,15 @@ fib_path_list_create (fib_path_list_flags_t flags, flags = fib_path_list_flags_fixup(flags); path_list = fib_path_list_alloc(&path_list_index); path_list->fpl_flags = flags; - /* - * we'll assume for now all paths are the same next-hop protocol - */ - path_list->fpl_nh_proto = rpaths[0].frp_proto; - vec_foreach_index(i, rpaths) + if (NULL != rpaths) { - vec_add1(path_list->fpl_paths, - fib_path_create(path_list_index, - path_list->fpl_nh_proto, - fib_path_list_flags_2_path_flags(flags), - &rpaths[i])); + vec_foreach_index(i, rpaths) + { + vec_add1(path_list->fpl_paths, + fib_path_create(path_list_index, + &rpaths[i])); + } } /* @@ -748,6 +701,27 @@ fib_path_list_create (fib_path_list_flags_t flags, return (path_list_index); } +static fib_path_cfg_flags_t +fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf) +{ + fib_path_cfg_flags_t pf = FIB_PATH_CFG_FLAG_NONE; + + if (plf & FIB_PATH_LIST_FLAG_DROP) + { + pf |= FIB_PATH_CFG_FLAG_DROP; + } + if (plf & FIB_PATH_LIST_FLAG_EXCLUSIVE) + { + pf |= FIB_PATH_CFG_FLAG_EXCLUSIVE; + } + if (plf & FIB_PATH_LIST_FLAG_LOCAL) + { + pf |= FIB_PATH_CFG_FLAG_LOCAL; + } + + return (pf); +} + fib_node_index_t fib_path_list_create_special (fib_protocol_t nh_proto, fib_path_list_flags_t flags, @@ -758,11 +732,10 @@ fib_path_list_create_special (fib_protocol_t nh_proto, path_list = fib_path_list_alloc(&path_list_index); path_list->fpl_flags = flags; - path_list->fpl_nh_proto = nh_proto; path_index = fib_path_create_special(path_list_index, - path_list->fpl_nh_proto, + nh_proto, fib_path_list_flags_2_path_flags(flags), dpo); vec_add1(path_list->fpl_paths, path_index); @@ -775,6 +748,30 @@ fib_path_list_create_special (fib_protocol_t nh_proto, return (path_list_index); } +/* + * return the index info the path-lists's vector of paths, of the matching path. + * ~0 if not found + */ +u32 +fib_path_list_find_rpath (fib_node_index_t path_list_index, + const fib_route_path_t *rpath) +{ + fib_path_list_t *path_list; + u32 ii; + + path_list = fib_path_list_get(path_list_index); + + vec_foreach_index (ii, path_list->fpl_paths) + { + if (!fib_path_cmp_w_route_path(path_list->fpl_paths[ii], rpath)) + { + return (ii); + } + } + return (~0); +} + + /* * fib_path_list_copy_and_path_add * @@ -782,13 +779,62 @@ fib_path_list_create_special (fib_protocol_t nh_proto, * The path-list returned could either have been newly created, or * can be a shared path-list from the data-base. */ +fib_node_index_t +fib_path_list_path_add (fib_node_index_t path_list_index, + const fib_route_path_t *rpaths) +{ + fib_node_index_t new_path_index, *orig_path_index; + fib_path_list_t *path_list; + + /* + * alloc the new list before we retrieve the old one, lest + * the alloc result in a realloc + */ + path_list = fib_path_list_get(path_list_index); + + ASSERT(1 == vec_len(rpaths)); + ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED)); + + FIB_PATH_LIST_DBG(orig_path_list, "path-add"); + + new_path_index = fib_path_create(path_list_index, + rpaths); + + vec_foreach (orig_path_index, path_list->fpl_paths) + { + /* + * don't add duplicate paths + */ + if (0 == fib_path_cmp(new_path_index, *orig_path_index)) + { + return (*orig_path_index); + } + } + + /* + * Add the new path - no sort, no sharing, no key.. + */ + vec_add1(path_list->fpl_paths, new_path_index); + + FIB_PATH_LIST_DBG(path_list, "path-added"); + + /* + * no shared path list requested. resolve and use the one + * just created. + */ + fib_path_resolve(new_path_index); + + return (new_path_index); +} + fib_node_index_t fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index, - fib_path_list_flags_t flags, - const fib_route_path_t *rpaths) + fib_path_list_flags_t flags, + const fib_route_path_t *rpaths) { fib_node_index_t path_index, new_path_index, *orig_path_index; fib_path_list_t *path_list, *orig_path_list; + fib_node_index_t exist_path_list_index; fib_node_index_t path_list_index; fib_node_index_t pi; @@ -806,13 +852,11 @@ fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index, flags = fib_path_list_flags_fixup(flags); path_list->fpl_flags = flags; - path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto; + vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths)); pi = 0; new_path_index = fib_path_create(path_list_index, - path_list->fpl_nh_proto, - fib_path_list_flags_2_path_flags(flags), rpaths); vec_foreach (orig_path_index, orig_path_list->fpl_paths) @@ -845,46 +889,79 @@ fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index, FIB_PATH_LIST_DBG(path_list, "path-added"); /* - * If a shared path list is requested, consult the DB for a match + * check for a matching path-list in the DB. + * If we find one then we can return the existing one and destroy the + * new one just created. */ - if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED) + exist_path_list_index = fib_path_list_db_find(path_list); + if (FIB_NODE_INDEX_INVALID != exist_path_list_index) { - fib_node_index_t exist_path_list_index; - /* - * check for a matching path-list in the DB. - * If we find one then we can return the existing one and destroy the - * new one just created. - */ - exist_path_list_index = fib_path_list_db_find(path_list); - if (FIB_NODE_INDEX_INVALID != exist_path_list_index) - { - fib_path_list_destroy(path_list); + fib_path_list_destroy(path_list); - path_list_index = exist_path_list_index; - } - else - { - /* - * if there was not a matching path-list, then this - * new one will need inserting into the DB and resolving. - */ - fib_path_list_db_insert(path_list_index); - - path_list = fib_path_list_resolve(path_list); - } + path_list_index = exist_path_list_index; } else { - /* - * no shared path list requested. resolve and use the one - * just created. - */ - path_list = fib_path_list_resolve(path_list); + /* + * if there was not a matching path-list, then this + * new one will need inserting into the DB and resolving. + */ + fib_path_list_db_insert(path_list_index); + + path_list = fib_path_list_resolve(path_list); } return (path_list_index); } +/* + * fib_path_list_path_remove + */ +fib_node_index_t +fib_path_list_path_remove (fib_node_index_t path_list_index, + const fib_route_path_t *rpaths) +{ + fib_node_index_t match_path_index, tmp_path_index; + fib_path_list_t *path_list; + fib_node_index_t pi; + + path_list = fib_path_list_get(path_list_index); + + ASSERT(1 == vec_len(rpaths)); + ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED)); + + FIB_PATH_LIST_DBG(orig_path_list, "path-remove"); + + /* + * create a representation of the path to be removed, so it + * can be used as a comparison object during the copy. + */ + tmp_path_index = fib_path_create(path_list_index, + rpaths); + match_path_index = FIB_NODE_INDEX_INVALID; + + vec_foreach_index (pi, path_list->fpl_paths) + { + if (0 == fib_path_cmp(tmp_path_index, + path_list->fpl_paths[pi])) + { + /* + * match - remove it + */ + match_path_index = path_list->fpl_paths[pi]; + fib_path_destroy(match_path_index); + vec_del1(path_list->fpl_paths, pi); + } + } + + /* + * done with the temporary now + */ + fib_path_destroy(tmp_path_index); + + return (match_path_index); +} + /* * fib_path_list_copy_and_path_remove * @@ -911,7 +988,6 @@ fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index, FIB_PATH_LIST_DBG(orig_path_list, "copy-remove"); path_list->fpl_flags = flags; - path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto; /* * allocate as many paths as we might need in one go, rather than * using vec_add to do a few at a time. @@ -927,8 +1003,6 @@ fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index, * can be used as a comparison object during the copy. */ tmp_path_index = fib_path_create(path_list_index, - path_list->fpl_nh_proto, - fib_path_list_flags_2_path_flags(flags), rpaths); vec_foreach (orig_path_index, orig_path_list->fpl_paths) diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h index b4971add..9d246211 100644 --- a/src/vnet/fib/fib_path_list.h +++ b/src/vnet/fib/fib_path_list.h @@ -38,6 +38,11 @@ typedef enum fib_path_list_attribute_t_ { * be searched for each route update. */ FIB_PATH_LIST_ATTRIBUTE_SHARED = FIB_PATH_LIST_ATTRIBUTE_FIRST, + /** + * Indexed means the path-list keeps a hash table of all paths for + * fast lookup. The lookup result is the fib_node_index of the path. + */ + FIB_PATH_LIST_ATTRIBUTE_INDEXED, /** * explicit drop path-list. Used when the entry source needs to * force a drop, despite the fact the path info is present. @@ -73,6 +78,7 @@ typedef enum fib_path_list_attribute_t_ { typedef enum fib_path_list_flags_t_ { FIB_PATH_LIST_FLAG_NONE = 0, FIB_PATH_LIST_FLAG_SHARED = (1 << FIB_PATH_LIST_ATTRIBUTE_SHARED), + FIB_PATH_LIST_FLAG_INDEXED = (1 << FIB_PATH_LIST_ATTRIBUTE_INDEXED), FIB_PATH_LIST_FLAG_DROP = (1 << FIB_PATH_LIST_ATTRIBUTE_DROP), FIB_PATH_LIST_FLAG_LOCAL = (1 << FIB_PATH_LIST_ATTRIBUTE_LOCAL), FIB_PATH_LIST_FLAG_EXCLUSIVE = (1 << FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE), @@ -83,10 +89,11 @@ typedef enum fib_path_list_flags_t_ { #define FIB_PATH_LIST_ATTRIBUTES { \ [FIB_PATH_LIST_ATTRIBUTE_SHARED] = "shared", \ + [FIB_PATH_LIST_ATTRIBUTE_INDEXED] = "indexed", \ [FIB_PATH_LIST_ATTRIBUTE_RESOLVED] = "resolved", \ [FIB_PATH_LIST_ATTRIBUTE_DROP] = "drop", \ [FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ - [FIB_PATH_LIST_ATTRIBUTE_LOCAL] = "local", \ + [FIB_PATH_LIST_ATTRIBUTE_LOCAL] = "local", \ [FIB_PATH_LIST_ATTRIBUTE_LOOPED] = "looped", \ [FIB_PATH_LIST_ATTRIBUTE_NO_URPF] = "no-uRPF", \ } @@ -110,6 +117,13 @@ extern fib_node_index_t fib_path_list_copy_and_path_remove( fib_node_index_t pl_index, fib_path_list_flags_t flags, const fib_route_path_t *path); +extern fib_node_index_t fib_path_list_path_add ( + fib_node_index_t path_list_index, + const fib_route_path_t *rpaths); +extern fib_node_index_t fib_path_list_path_remove ( + fib_node_index_t path_list_index, + const fib_route_path_t *rpaths); + extern u32 fib_path_list_get_n_paths(fib_node_index_t pl_index); extern void fib_path_list_contribute_forwarding(fib_node_index_t path_list_index, @@ -137,11 +151,11 @@ extern int fib_path_list_is_looped(fib_node_index_t path_list_index); extern fib_protocol_t fib_path_list_get_proto(fib_node_index_t path_list_index); extern u8 * fib_path_list_format(fib_node_index_t pl_index, u8 * s); -extern u8 * fib_path_list_adjs_format(fib_node_index_t pl_index, - u32 indent, - u8 * s); extern index_t fib_path_list_lb_map_add_or_lock(fib_node_index_t pl_index, const fib_node_index_t *pis); +extern u32 fib_path_list_find_rpath (fib_node_index_t path_list_index, + const fib_route_path_t *rpath); + /** * A callback function type for walking a path-list's paths */ diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 6c3162e7..b31f35e3 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -475,8 +475,21 @@ fib_table_entry_special_remove (u32 fib_index, */ static void fib_table_route_path_fixup (const fib_prefix_t *prefix, + fib_entry_flag_t eflags, fib_route_path_t *path) { + /* + * not all zeros next hop && + * is recursive path && + * nexthop is same as the route's address + */ + if ((!ip46_address_is_zero(&path->frp_addr)) && + (~0 == path->frp_sw_if_index) && + (0 == ip46_address_cmp(&path->frp_addr, &prefix->fp_addr))) + { + /* Prefix recurses via itse;f */ + path->frp_flags |= FIB_ROUTE_PATH_DROP; + } if (fib_prefix_is_host(prefix) && ip46_address_is_zero(&path->frp_addr) && path->frp_sw_if_index != ~0) @@ -484,7 +497,19 @@ fib_table_route_path_fixup (const fib_prefix_t *prefix, path->frp_addr = prefix->fp_addr; path->frp_flags |= FIB_ROUTE_PATH_ATTACHED; } -} + if (eflags & FIB_ENTRY_FLAG_DROP) + { + path->frp_flags |= FIB_ROUTE_PATH_DROP; + } + if (eflags & FIB_ENTRY_FLAG_LOCAL) + { + path->frp_flags |= FIB_ROUTE_PATH_LOCAL; + } + if (eflags & FIB_ENTRY_FLAG_EXCLUSIVE) + { + path->frp_flags |= FIB_ROUTE_PATH_EXCLUSIVE; + } +} fib_node_index_t fib_table_entry_path_add (u32 fib_index, @@ -536,7 +561,7 @@ fib_table_entry_path_add2 (u32 fib_index, for (ii = 0; ii < vec_len(rpath); ii++) { - fib_table_route_path_fixup(prefix, &rpath[ii]); + fib_table_route_path_fixup(prefix, flags, &rpath[ii]); } if (FIB_NODE_INDEX_INVALID == fib_entry_index) @@ -583,11 +608,6 @@ fib_table_entry_path_remove2 (u32 fib_index, fib_table = fib_table_get(fib_index, prefix->fp_proto); fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); - for (ii = 0; ii < vec_len(rpath); ii++) - { - fib_table_route_path_fixup(prefix, &rpath[ii]); - } - if (FIB_NODE_INDEX_INVALID == fib_entry_index) { /* @@ -605,6 +625,15 @@ fib_table_entry_path_remove2 (u32 fib_index, fib_entry_lock(fib_entry_index); was_sourced = fib_entry_is_sourced(fib_entry_index, source); + for (ii = 0; ii < vec_len(rpath); ii++) + { + fib_table_route_path_fixup( + prefix, + fib_entry_get_flags_for_source(fib_entry_index, + source), + &rpath[ii]); + } + src_flag = fib_entry_path_remove(fib_entry_index, source, rpath); if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag)) @@ -661,7 +690,6 @@ fib_table_entry_path_remove (u32 fib_index, }; fib_route_path_t *paths = NULL; - fib_table_route_path_fixup(prefix, &path); vec_add1(paths, path); fib_table_entry_path_remove2(fib_index, prefix, source, paths); @@ -692,7 +720,7 @@ fib_table_entry_update (u32 fib_index, for (ii = 0; ii < vec_len(paths); ii++) { - fib_table_route_path_fixup(prefix, &paths[ii]); + fib_table_route_path_fixup(prefix, flags, &paths[ii]); } /* * sort the paths provided by the control plane. this means @@ -750,7 +778,6 @@ fib_table_entry_update_one_path (u32 fib_index, }; fib_route_path_t *paths = NULL; - fib_table_route_path_fixup(prefix, &path); vec_add1(paths, path); fib_entry_index = diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index 3c9b8a38..e4a8a70e 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include @@ -271,6 +273,7 @@ typedef enum fib_test_lb_bucket_type_t_ { FT_LB_O_LB, FT_LB_SPECIAL, FT_LB_ADJ, + FT_LB_INTF, } fib_test_lb_bucket_type_t; typedef struct fib_test_lb_bucket_t_ { @@ -315,6 +318,31 @@ typedef struct fib_test_lb_bucket_t_ { }; } fib_test_lb_bucket_t; +typedef enum fib_test_rep_bucket_type_t_ { + FT_REP_LABEL_O_ADJ, + FT_REP_DISP_MFIB_LOOKUP, + FT_REP_INTF, +} fib_test_rep_bucket_type_t; + +typedef struct fib_test_rep_bucket_t_ { + fib_test_rep_bucket_type_t type; + + union + { + struct + { + mpls_eos_bit_t eos; + mpls_label_t label; + u8 ttl; + adj_index_t adj; + } label_o_adj; + struct + { + adj_index_t adj; + } adj; + }; +} fib_test_rep_bucket_t; + #define FIB_TEST_LB(_cond, _comment, _args...) \ { \ if (!FIB_TEST_I(_cond, _comment, ##_args)) { \ @@ -322,7 +350,83 @@ typedef struct fib_test_lb_bucket_t_ { } \ } -static int +int +fib_test_validate_rep_v (const replicate_t *rep, + u16 n_buckets, + va_list ap) +{ + const fib_test_rep_bucket_t *exp; + const dpo_id_t *dpo; + int bucket; + + FIB_TEST_LB((n_buckets == rep->rep_n_buckets), + "n_buckets = %d", rep->rep_n_buckets); + + for (bucket = 0; bucket < n_buckets; bucket++) + { + exp = va_arg(ap, fib_test_rep_bucket_t*); + + dpo = replicate_get_bucket_i(rep, bucket); + + switch (exp->type) + { + case FT_REP_LABEL_O_ADJ: + { + const mpls_label_dpo_t *mld; + mpls_label_t hdr; + FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + + mld = mpls_label_dpo_get(dpo->dpoi_index); + hdr = clib_net_to_host_u32(mld->mld_hdr[0].label_exp_s_ttl); + + FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) == + exp->label_o_adj.label), + "bucket %d stacks on label %d", + bucket, + exp->label_o_adj.label); + + FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) == + exp->label_o_adj.eos), + "bucket %d stacks on label %d %U", + bucket, + exp->label_o_adj.label, + format_mpls_eos_bit, exp->label_o_adj.eos); + + FIB_TEST_LB((DPO_ADJACENCY_INCOMPLETE == mld->mld_dpo.dpoi_type), + "bucket %d label stacks on %U", + bucket, + format_dpo_type, mld->mld_dpo.dpoi_type); + + FIB_TEST_LB((exp->label_o_adj.adj == mld->mld_dpo.dpoi_index), + "bucket %d label stacks on adj %d", + bucket, + exp->label_o_adj.adj); + } + break; + case FT_REP_INTF: + FIB_TEST_LB((DPO_INTERFACE == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + + FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index), + "bucket %d stacks on adj %d", + bucket, + exp->adj.adj); + break; + case FT_REP_DISP_MFIB_LOOKUP: +// ASSERT(0); + break; + } + } + + return (!0); +} + +int fib_test_validate_lb_v (const load_balance_t *lb, u16 n_buckets, va_list ap) @@ -484,6 +588,16 @@ fib_test_validate_lb_v (const load_balance_t *lb, bucket, exp->adj.adj); break; + case FT_LB_INTF: + FIB_TEST_I((DPO_INTERFACE == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index), + "bucket %d stacks on adj %d", + bucket, + exp->adj.adj); + break; case FT_LB_O_LB: FIB_TEST_I((DPO_LOAD_BALANCE == dpo->dpoi_type), "bucket %d stacks on %U", @@ -509,14 +623,13 @@ fib_test_validate_lb_v (const load_balance_t *lb, return (!0); } -static int +int fib_test_validate_entry (fib_node_index_t fei, fib_forward_chain_type_t fct, u16 n_buckets, ...) { dpo_id_t dpo = DPO_INVALID; - const load_balance_t *lb; fib_prefix_t pfx; index_t fw_lbi; u32 fib_index; @@ -529,47 +642,59 @@ fib_test_validate_entry (fib_node_index_t fei, fib_index = fib_entry_get_fib_index(fei); fib_entry_contribute_forwarding(fei, fct, &dpo); - FIB_TEST_LB((DPO_LOAD_BALANCE == dpo.dpoi_type), - "Entry links to %U", - format_dpo_type, dpo.dpoi_type); - lb = load_balance_get(dpo.dpoi_index); - - res = fib_test_validate_lb_v(lb, n_buckets, ap); + if (DPO_REPLICATE == dpo.dpoi_type) + { + const replicate_t *rep; - /* - * ensure that the LB contributed by the entry is the - * same as the LB in the forwarding tables - */ - if (fct == fib_entry_get_default_chain_type(fib_entry_get(fei))) + rep = replicate_get(dpo.dpoi_index); + res = fib_test_validate_rep_v(rep, n_buckets, ap); + } + else { - switch (pfx.fp_proto) - { - case FIB_PROTOCOL_IP4: - fw_lbi = ip4_fib_forwarding_lookup(fib_index, &pfx.fp_addr.ip4); - break; - case FIB_PROTOCOL_IP6: - fw_lbi = ip6_fib_table_fwding_lookup(&ip6_main, fib_index, &pfx.fp_addr.ip6); - break; - case FIB_PROTOCOL_MPLS: - { - mpls_unicast_header_t hdr = { - .label_exp_s_ttl = 0, - }; + const load_balance_t *lb; + + FIB_TEST_LB((DPO_LOAD_BALANCE == dpo.dpoi_type), + "Entry links to %U", + format_dpo_type, dpo.dpoi_type); - vnet_mpls_uc_set_label(&hdr.label_exp_s_ttl, pfx.fp_label); - vnet_mpls_uc_set_s(&hdr.label_exp_s_ttl, pfx.fp_eos); - hdr.label_exp_s_ttl = clib_host_to_net_u32(hdr.label_exp_s_ttl); + lb = load_balance_get(dpo.dpoi_index); + res = fib_test_validate_lb_v(lb, n_buckets, ap); - fw_lbi = mpls_fib_table_forwarding_lookup(fib_index, &hdr); + /* + * ensure that the LB contributed by the entry is the + * same as the LB in the forwarding tables + */ + if (fct == fib_entry_get_default_chain_type(fib_entry_get(fei))) + { + switch (pfx.fp_proto) + { + case FIB_PROTOCOL_IP4: + fw_lbi = ip4_fib_forwarding_lookup(fib_index, &pfx.fp_addr.ip4); + break; + case FIB_PROTOCOL_IP6: + fw_lbi = ip6_fib_table_fwding_lookup(&ip6_main, fib_index, &pfx.fp_addr.ip6); break; + case FIB_PROTOCOL_MPLS: + { + mpls_unicast_header_t hdr = { + .label_exp_s_ttl = 0, + }; + + vnet_mpls_uc_set_label(&hdr.label_exp_s_ttl, pfx.fp_label); + vnet_mpls_uc_set_s(&hdr.label_exp_s_ttl, pfx.fp_eos); + hdr.label_exp_s_ttl = clib_host_to_net_u32(hdr.label_exp_s_ttl); + + fw_lbi = mpls_fib_table_forwarding_lookup(fib_index, &hdr); + break; + } + default: + fw_lbi = 0; } - default: - fw_lbi = 0; + FIB_TEST_LB((fw_lbi == dpo.dpoi_index), + "Contributed LB = FW LB: %U\n %U", + format_load_balance, fw_lbi, 0, + format_load_balance, dpo.dpoi_index, 0); } - FIB_TEST_LB((fw_lbi == dpo.dpoi_index), - "Contributed LB = FW LB: %U\n %U", - format_load_balance, fw_lbi, 0, - format_load_balance, dpo.dpoi_index, 0); } dpo_reset(&dpo); @@ -1289,6 +1414,7 @@ fib_test_v4 (void) lookup_dpo_add_or_lock_w_fib_index(fib_index, DPO_PROTO_IP4, + LOOKUP_UNICAST, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_CONFIG, &ex_dpo); @@ -2605,7 +2731,6 @@ fib_test_v4 (void) NULL, FIB_ROUTE_PATH_FLAG_NONE); - fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32); dpo1 = fib_entry_contribute_ip_forwarding(fei); @@ -7493,6 +7618,7 @@ lfib_test (void) fib_route_path_t *rpaths = NULL, rpath = { .frp_proto = FIB_PROTOCOL_MPLS, .frp_local_label = 1200, + .frp_eos = MPLS_NON_EOS, .frp_sw_if_index = ~0, // recurive .frp_fib_index = 0, // Default MPLS fib .frp_weight = 1, @@ -7607,6 +7733,146 @@ lfib_test (void) dpo_reset(&ip_1200); + /* + * An rx-interface route. + * like the tail of an mcast LSP + */ + dpo_id_t idpo = DPO_INVALID; + + interface_dpo_add_or_lock(DPO_PROTO_IP4, + tm->hw[0]->sw_if_index, + &idpo); + + fib_prefix_t pfx_2500 = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 2500, + .fp_eos = MPLS_EOS, + .fp_payload_proto = DPO_PROTO_IP4, + }; + fib_test_lb_bucket_t rx_intf_0 = { + .type = FT_LB_INTF, + .adj = { + .adj = idpo.dpoi_index, + }, + }; + + lfe = fib_table_entry_update_one_path(fib_index, + &pfx_2500, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 0, + NULL, + FIB_ROUTE_PATH_INTF_RX); + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 1, + &rx_intf_0), + "2500 rx-interface 0"); + fib_table_entry_delete(fib_index, &pfx_2500, FIB_SOURCE_API); + + /* + * An MPLS mulicast entry + */ + fib_prefix_t pfx_3500 = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 3500, + .fp_eos = MPLS_EOS, + .fp_payload_proto = DPO_PROTO_IP4, + }; + fib_test_rep_bucket_t mc_0 = { + .type = FT_REP_LABEL_O_ADJ, + .label_o_adj = { + .adj = ai_mpls_10_10_10_1, + .label = 3300, + .eos = MPLS_EOS, + }, + }; + fib_test_rep_bucket_t mc_intf_0 = { + .type = FT_REP_INTF, + .adj = { + .adj = idpo.dpoi_index, + }, + }; + mpls_label_t *l3300 = NULL; + vec_add1(l3300, 3300); + + lfe = fib_table_entry_update_one_path(lfib_index, + &pfx_3500, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_MULTICAST, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + l3300, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 1, + &mc_0), + "3500 via replicate over 10.10.10.1"); + + /* + * MPLS Bud-node. Add a replication via an interface-receieve path + */ + lfe = fib_table_entry_path_add(lfib_index, + &pfx_3500, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_MULTICAST, + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 0, + NULL, + FIB_ROUTE_PATH_INTF_RX); + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 2, + &mc_0, + &mc_intf_0), + "3500 via replicate over 10.10.10.1 and interface-rx"); + + /* + * Add a replication via an interface-free for-us path + */ + fib_test_rep_bucket_t mc_disp = { + .type = FT_REP_DISP_MFIB_LOOKUP, + .adj = { + .adj = idpo.dpoi_index, + }, + }; + lfe = fib_table_entry_path_add(lfib_index, + &pfx_3500, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_MULTICAST, + FIB_PROTOCOL_IP4, + NULL, + 5, // rpf-id + 0, // default table + 0, + NULL, + FIB_ROUTE_PATH_RPF_ID); + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 3, + &mc_0, + &mc_disp, + &mc_intf_0), + "3500 via replicate over 10.10.10.1 and interface-rx"); + + + + fib_table_entry_delete(fib_index, &pfx_3500, FIB_SOURCE_API); + dpo_reset(&idpo); + /* * cleanup */ @@ -7617,6 +7883,9 @@ lfib_test (void) FIB_TEST(lb_count == pool_elts(load_balance_pool), "Load-balance resources freed %d of %d", lb_count, pool_elts(load_balance_pool)); + FIB_TEST(0 == pool_elts(interface_dpo_pool), + "interface_dpo resources freed %d of %d", + 0, pool_elts(interface_dpo_pool)); return (0); } diff --git a/src/vnet/fib/fib_test.h b/src/vnet/fib/fib_test.h new file mode 100644 index 00000000..b98680bf --- /dev/null +++ b/src/vnet/fib/fib_test.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_TEST_H__ +#define __FIB_TEST_H__ + +#include + +typedef enum fib_test_lb_bucket_type_t_ { + FT_LB_LABEL_O_ADJ, + FT_LB_LABEL_STACK_O_ADJ, + FT_LB_LABEL_O_LB, + FT_LB_O_LB, + FT_LB_SPECIAL, + FT_LB_ADJ, + FT_LB_INTF, +} fib_test_lb_bucket_type_t; + +typedef struct fib_test_lb_bucket_t_ { + fib_test_lb_bucket_type_t type; + + union + { + struct + { + mpls_eos_bit_t eos; + mpls_label_t label; + u8 ttl; + adj_index_t adj; + } label_o_adj; + struct + { + mpls_eos_bit_t eos; + mpls_label_t label_stack[8]; + u8 label_stack_size; + u8 ttl; + adj_index_t adj; + } label_stack_o_adj; + struct + { + mpls_eos_bit_t eos; + mpls_label_t label; + u8 ttl; + index_t lb; + } label_o_lb; + struct + { + index_t adj; + } adj; + struct + { + index_t lb; + } lb; + struct + { + index_t adj; + } special; + }; +} fib_test_lb_bucket_t; + +typedef enum fib_test_rep_bucket_type_t_ { + FT_REP_LABEL_O_ADJ, + FT_REP_INTF, +} fib_test_rep_bucket_type_t; + +typedef struct fib_test_rep_bucket_t_ { + fib_test_rep_bucket_type_t type; + + union + { + struct + { + mpls_eos_bit_t eos; + mpls_label_t label; + u8 ttl; + adj_index_t adj; + } label_o_adj; + struct + { + adj_index_t adj; + } adj; + }; +} fib_test_rep_bucket_t; + + +extern int fib_test_validate_rep_v(const replicate_t *rep, + u16 n_buckets, + va_list ap); + +extern int fib_test_validate_lb_v(const load_balance_t *lb, + u16 n_buckets, + va_list ap); + +extern int fib_test_validate_entry(fib_node_index_t fei, + fib_forward_chain_type_t fct, + u16 n_buckets, + ...); + +#endif diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c index 2837a59d..8165f3eb 100644 --- a/src/vnet/fib/fib_types.c +++ b/src/vnet/fib/fib_types.c @@ -66,12 +66,13 @@ fib_prefix_from_ip46_addr (const ip46_address_t *addr, void fib_prefix_from_mpls_label (mpls_label_t label, + mpls_eos_bit_t eos, fib_prefix_t *pfx) { pfx->fp_proto = FIB_PROTOCOL_MPLS; pfx->fp_len = 21; pfx->fp_label = label; - pfx->fp_eos = MPLS_NON_EOS; + pfx->fp_eos = eos; } int @@ -194,17 +195,7 @@ fib_route_path_cmp (const fib_route_path_t *rpath1, if (0 != res) return (res); - if (~0 != rpath1->frp_sw_if_index && - ~0 != rpath2->frp_sw_if_index) - { - res = vnet_sw_interface_compare(vnet_get_main(), - rpath1->frp_sw_if_index, - rpath2->frp_sw_if_index); - } - else - { - res = rpath1->frp_sw_if_index - rpath2->frp_sw_if_index; - } + res = (rpath1->frp_sw_if_index - rpath2->frp_sw_if_index); if (0 != res) return (res); diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h index 1c5299a9..4cb73e8a 100644 --- a/src/vnet/fib/fib_types.h +++ b/src/vnet/fib/fib_types.h @@ -286,8 +286,36 @@ typedef enum fib_route_path_flags_t_ * Attached path */ FIB_ROUTE_PATH_ATTACHED = (1 << 3), + /** + * A Drop path - resolve the path on the drop DPO + */ + FIB_ROUTE_PATH_DROP = (1 << 4), + /** + * Don't resolve the path, use the DPO the client provides + */ + FIB_ROUTE_PATH_EXCLUSIVE = (1 << 5), + /** + * A path that result in received traffic being recieved/recirculated + * so that it appears to have arrived on the new interface + */ + FIB_ROUTE_PATH_INTF_RX = (1 << 6), + /** + * A local path with a RPF-ID => multicast traffic + */ + FIB_ROUTE_PATH_RPF_ID = (1 << 7), } fib_route_path_flags_t; +/** + * An RPF-ID is numerical value that is used RPF validate. An entry + * has-a RPF-ID, when a packet egress from (e.g. an LSP) it gains an + * RPF-ID, these two are compared for the RPF check. + * This replaces the interfce based chack (since the LSP has no associated + * interface. + */ +typedef u32 fib_rpf_id_t; + +#define MFIB_RPF_ID_NONE (0) + /** * @brief * A representation of a path as described by a route producer. @@ -321,17 +349,29 @@ typedef struct fib_route_path_t_ { */ ip46_address_t frp_addr; - /** - * The MPLS local Label to reursively resolve through. - * This is valid when the path type is MPLS. - */ - mpls_label_t frp_local_label; + struct { + /** + * The MPLS local Label to reursively resolve through. + * This is valid when the path type is MPLS. + */ + mpls_label_t frp_local_label; + /** + * EOS bit for the resolving label + */ + mpls_eos_bit_t frp_eos; + }; + }; + union { + /** + * The interface. + * Will be invalid for recursive paths. + */ + u32 frp_sw_if_index; + /** + * The RPF-ID + */ + fib_rpf_id_t frp_rpf_id; }; - /** - * The interface. - * Will be invalid for recursive paths. - */ - u32 frp_sw_if_index; /** * The FIB index to lookup the nexthop * Only valid for recursive paths. diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c index 4b2b76ea..19f9f3c1 100644 --- a/src/vnet/fib/mpls_fib.c +++ b/src/vnet/fib/mpls_fib.c @@ -165,6 +165,7 @@ mpls_fib_create_with_table_id (u32 table_id) lookup_dpo_add_or_lock_w_fib_index(0, // unused DPO_PROTO_IP4, + LOOKUP_UNICAST, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_INPUT_INTERFACE, &dpo); @@ -179,6 +180,7 @@ mpls_fib_create_with_table_id (u32 table_id) lookup_dpo_add_or_lock_w_fib_index(0, //unsued DPO_PROTO_MPLS, + LOOKUP_UNICAST, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_INPUT_INTERFACE, &dpo); @@ -197,6 +199,7 @@ mpls_fib_create_with_table_id (u32 table_id) lookup_dpo_add_or_lock_w_fib_index(0, //unused DPO_PROTO_IP6, + LOOKUP_UNICAST, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_INPUT_INTERFACE, &dpo); @@ -210,6 +213,7 @@ mpls_fib_create_with_table_id (u32 table_id) prefix.fp_eos = MPLS_NON_EOS; lookup_dpo_add_or_lock_w_fib_index(0, // unsued DPO_PROTO_MPLS, + LOOKUP_UNICAST, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_INPUT_INTERFACE, &dpo); @@ -320,8 +324,15 @@ mpls_fib_forwarding_table_update (mpls_fib_t *mf, { mpls_label_t key; - ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type); - + ASSERT((DPO_LOAD_BALANCE == dpo->dpoi_type) || + (DPO_REPLICATE == dpo->dpoi_type)); + if (CLIB_DEBUG > 0) + { + if (DPO_REPLICATE == dpo->dpoi_type) + ASSERT(dpo->dpoi_index & MPLS_IS_REPLICATE); + if (DPO_LOAD_BALANCE == dpo->dpoi_type) + ASSERT(!(dpo->dpoi_index & MPLS_IS_REPLICATE)); + } key = mpls_fib_entry_mk_key(label, eos); mf->mf_lbs[key] = dpo->dpoi_index; diff --git a/src/vnet/handoff.h b/src/vnet/handoff.h index 815206a9..04ba8bfb 100644 --- a/src/vnet/handoff.h +++ b/src/vnet/handoff.h @@ -150,7 +150,7 @@ eth_get_sym_key (ethernet_header_t * h0) ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1] ^ ip->protocol); } - else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)) + else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)) { hash_key = mpls_get_key ((mpls_unicast_header_t *) (h0 + 1)); } @@ -179,8 +179,7 @@ eth_get_sym_key (ethernet_header_t * h0) ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1] ^ ip->protocol); } - else if (outer->type == - clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)) + else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)) { hash_key = mpls_get_key ((mpls_unicast_header_t *) (outer + 1)); } @@ -210,7 +209,7 @@ eth_get_key (ethernet_header_t * h0) { hash_key = ipv6_get_key ((ip6_header_t *) (h0 + 1)); } - else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)) + else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)) { hash_key = mpls_get_key ((mpls_unicast_header_t *) (h0 + 1)); } @@ -230,8 +229,7 @@ eth_get_key (ethernet_header_t * h0) { hash_key = ipv6_get_key ((ip6_header_t *) (outer + 1)); } - else if (outer->type == - clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)) + else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)) { hash_key = mpls_get_key ((mpls_unicast_header_t *) (outer + 1)); } diff --git a/src/vnet/interface.c b/src/vnet/interface.c index 2a1e70e8..45417b2f 100644 --- a/src/vnet/interface.c +++ b/src/vnet/interface.c @@ -1360,7 +1360,7 @@ vnet_link_to_l3_proto (vnet_link_t link) case VNET_LINK_IP6: return (VNET_L3_PACKET_TYPE_IP6); case VNET_LINK_MPLS: - return (VNET_L3_PACKET_TYPE_MPLS_UNICAST); + return (VNET_L3_PACKET_TYPE_MPLS); case VNET_LINK_ARP: return (VNET_L3_PACKET_TYPE_ARP); case VNET_LINK_ETHERNET: diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api index 5c2df32c..6af1714f 100644 --- a/src/vnet/ip/ip.api +++ b/src/vnet/ip/ip.api @@ -478,6 +478,7 @@ define ip_mroute_add_del u32 table_id; u32 entry_flags; u32 itf_flags; + u32 rpf_id; u16 grp_address_length; u8 create_vrf_if_needed; u8 is_add; @@ -518,6 +519,8 @@ manual_endian manual_print define ip_mfib_details { u32 context; u32 table_id; + u32 entry_flags; + u32 rpf_id; u8 address_length; u8 grp_address[4]; u8 src_address[4]; diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index fdfe7f63..9fdf9b3c 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -2752,6 +2752,16 @@ ip4_rewrite_mcast (vlib_main_t * vm, return ip4_rewrite_inline (vm, node, frame, 0, 0, 1); } +static uword +ip4_mcast_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip4_rewrite_inline (vm, node, frame, 1, 1, 1); + else + return ip4_rewrite_inline (vm, node, frame, 0, 1, 1); +} + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_rewrite_node) = { .function = ip4_rewrite, @@ -2778,6 +2788,16 @@ VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = { }; VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast) +VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = { + .function = ip4_mcast_midchain, + .name = "ip4-mcast-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_rewrite_trace, + .sibling_of = "ip4-rewrite", +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain) + VLIB_REGISTER_NODE (ip4_midchain_node) = { .function = ip4_midchain, .name = "ip4-midchain", diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index c2fc4f87..a369f79f 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -2246,6 +2246,16 @@ ip6_midchain (vlib_main_t * vm, return ip6_rewrite_inline (vm, node, frame, 0, 1, 0); } +static uword +ip6_mcast_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip6_rewrite_inline (vm, node, frame, 1, 1, 1); + else + return ip6_rewrite_inline (vm, node, frame, 1, 1, 1); +} + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_midchain_node) = { @@ -2290,6 +2300,19 @@ VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) = VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_mcast_node, ip6_rewrite_mcast); +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_mcast_midchain_node, static) = +{ + .function = ip6_mcast_midchain, + .name = "ip6-mcast-midchain", + .vector_size = sizeof (u32), + .format_trace = format_ip6_rewrite_trace, + .sibling_of = "ip6-rewrite", +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_mcast_midchain_node, ip6_mcast_midchain); + /* * Hop-by-Hop handling */ diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 2af546df..58b997aa 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -557,6 +557,7 @@ ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) case IP_LOOKUP_NEXT_PUNT: case IP_LOOKUP_NEXT_LOCAL: case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: case IP_LOOKUP_NEXT_MIDCHAIN: case IP_LOOKUP_NEXT_ICMP_ERROR: case IP_LOOKUP_N_NEXT: diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index b9f1782b..9c9cb4a4 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -438,17 +438,20 @@ vl_api_ip6_fib_dump_t_handler (vl_api_ip6_fib_dump_t * mp) } static void -send_ip_mfib_details (vpe_api_main_t * am, - unix_shared_memory_queue_t * q, - u32 table_id, - mfib_prefix_t * pfx, - fib_route_path_encode_t * api_rpaths, u32 context) +send_ip_mfib_details (unix_shared_memory_queue_t * q, + u32 context, u32 table_id, fib_node_index_t mfei) { + fib_route_path_encode_t *api_rpath, *api_rpaths = NULL; vl_api_ip_mfib_details_t *mp; - fib_route_path_encode_t *api_rpath; + mfib_entry_t *mfib_entry; vl_api_fib_path_t *fp; + mfib_prefix_t pfx; int path_count; + mfib_entry = mfib_entry_get (mfei); + mfib_entry_get_prefix (mfei, &pfx); + mfib_entry_encode (mfei, &api_rpaths); + path_count = vec_len (api_rpaths); mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp)); if (!mp) @@ -457,12 +460,14 @@ send_ip_mfib_details (vpe_api_main_t * am, mp->_vl_msg_id = ntohs (VL_API_IP_FIB_DETAILS); mp->context = context; + mp->rpf_id = mfib_entry->mfe_rpf_id; + mp->entry_flags = mfib_entry->mfe_flags; mp->table_id = htonl (table_id); - mp->address_length = pfx->fp_len; - memcpy (mp->grp_address, &pfx->fp_grp_addr.ip4, - sizeof (pfx->fp_grp_addr.ip4)); - memcpy (mp->src_address, &pfx->fp_src_addr.ip4, - sizeof (pfx->fp_src_addr.ip4)); + mp->address_length = pfx.fp_len; + memcpy (mp->grp_address, &pfx.fp_grp_addr.ip4, + sizeof (pfx.fp_grp_addr.ip4)); + memcpy (mp->src_address, &pfx.fp_src_addr.ip4, + sizeof (pfx.fp_src_addr.ip4)); mp->count = htonl (path_count); fp = mp->path; @@ -475,6 +480,7 @@ send_ip_mfib_details (vpe_api_main_t * am, copy_fib_next_hop (api_rpath, fp); fp++; } + vec_free (api_rpaths); vl_msg_api_send_shmem (q, (u8 *) & mp); } @@ -497,13 +503,10 @@ vl_api_ip_mfib_table_dump_walk (fib_node_index_t fei, void *arg) static void vl_api_ip_mfib_dump_t_handler (vl_api_ip_mfib_dump_t * mp) { - vpe_api_main_t *am = &vpe_api_main; unix_shared_memory_queue_t *q; ip4_main_t *im = &ip4_main; mfib_table_t *mfib_table; fib_node_index_t *mfeip; - mfib_prefix_t pfx; - fib_route_path_encode_t *api_rpaths = NULL; vl_api_ip_mfib_dump_ctc_t ctx = { .entries = NULL, }; @@ -524,21 +527,16 @@ vl_api_ip_mfib_dump_t_handler (vl_api_ip_mfib_dump_t * mp) vec_foreach (mfeip, ctx.entries) { - mfib_entry_get_prefix (*mfeip, &pfx); - mfib_entry_encode (*mfeip, &api_rpaths); - send_ip_mfib_details (am, q, + send_ip_mfib_details (q, mp->context, mfib_table->mft_table_id, - &pfx, api_rpaths, - mp->context); + *mfeip); } - vec_reset_length (api_rpaths); vec_reset_length (ctx.entries); })); /* *INDENT-ON* */ vec_free (ctx.entries); - vec_free (api_rpaths); } static void @@ -705,10 +703,13 @@ add_del_route_t_handler (u8 is_multipath, u8 is_unreach, u8 is_prohibit, u8 is_local, + u8 is_multicast, u8 is_classify, u32 classify_table_index, u8 is_resolve_host, u8 is_resolve_attached, + u8 is_interface_rx, + u8 is_rpf_id, u32 fib_index, const fib_prefix_t * prefix, u8 next_hop_proto_is_ip4, @@ -731,16 +732,24 @@ add_del_route_t_handler (u8 is_multipath, .frp_label_stack = next_hop_out_label_stack, }; fib_route_path_t *paths = NULL; + fib_entry_flag_t entry_flags = FIB_ENTRY_FLAG_NONE; if (MPLS_LABEL_INVALID != next_hop_via_label) { path.frp_proto = FIB_PROTOCOL_MPLS; path.frp_local_label = next_hop_via_label; + path.frp_eos = MPLS_NON_EOS; } if (is_resolve_host) path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST; if (is_resolve_attached) path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED; + if (is_interface_rx) + path_flags |= FIB_ROUTE_PATH_INTF_RX; + if (is_rpf_id) + path_flags |= FIB_ROUTE_PATH_RPF_ID; + if (is_multicast) + entry_flags |= FIB_ENTRY_FLAG_MULTICAST; path.frp_flags = path_flags; @@ -754,8 +763,7 @@ add_del_route_t_handler (u8 is_multipath, if (is_add) fib_table_entry_path_add2 (fib_index, prefix, - FIB_SOURCE_API, - FIB_ENTRY_FLAG_NONE, paths); + FIB_SOURCE_API, entry_flags, paths); else fib_table_entry_path_remove2 (fib_index, prefix, FIB_SOURCE_API, paths); @@ -826,8 +834,7 @@ add_del_route_t_handler (u8 is_multipath, { vec_add1 (paths, path); fib_table_entry_update (fib_index, - prefix, - FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, paths); + prefix, FIB_SOURCE_API, entry_flags, paths); vec_free (paths); } else @@ -847,7 +854,7 @@ add_del_route_check (fib_protocol_t table_proto, fib_protocol_t next_hop_table_proto, u32 next_hop_table_id, u8 create_missing_tables, - u32 * fib_index, u32 * next_hop_fib_index) + u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index) { vnet_main_t *vnm = vnet_get_main (); @@ -866,7 +873,7 @@ add_del_route_check (fib_protocol_t table_proto, } } - if (~0 != ntohl (next_hop_sw_if_index)) + if (!is_rpf_id && ~0 != ntohl (next_hop_sw_if_index)) { if (pool_is_free_index (vnm->interface_main.sw_interfaces, ntohl (next_hop_sw_if_index))) @@ -876,16 +883,27 @@ add_del_route_check (fib_protocol_t table_proto, } else { - *next_hop_fib_index = fib_table_find (next_hop_table_proto, - ntohl (next_hop_table_id)); + if (is_rpf_id) + *next_hop_fib_index = mfib_table_find (next_hop_table_proto, + ntohl (next_hop_table_id)); + else + *next_hop_fib_index = fib_table_find (next_hop_table_proto, + ntohl (next_hop_table_id)); if (~0 == *next_hop_fib_index) { if (create_missing_tables) { - *next_hop_fib_index = - fib_table_find_or_create_and_lock (next_hop_table_proto, - ntohl (next_hop_table_id)); + if (is_rpf_id) + *next_hop_fib_index = + mfib_table_find_or_create_and_lock (next_hop_table_proto, + ntohl + (next_hop_table_id)); + else + *next_hop_fib_index = + fib_table_find_or_create_and_lock (next_hop_table_proto, + ntohl + (next_hop_table_id)); } else { @@ -910,7 +928,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->next_hop_sw_if_index, FIB_PROTOCOL_IP4, mp->next_hop_table_id, - mp->create_vrf_if_needed, + mp->create_vrf_if_needed, 0, &fib_index, &next_hop_fib_index); if (0 != rv) @@ -943,11 +961,11 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->is_drop, mp->is_unreach, mp->is_prohibit, - mp->is_local, + mp->is_local, 0, mp->is_classify, mp->classify_table_index, mp->is_resolve_host, - mp->is_resolve_attached, + mp->is_resolve_attached, 0, 0, fib_index, &pfx, 1, &nh, ntohl (mp->next_hop_sw_if_index), @@ -969,7 +987,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->next_hop_sw_if_index, FIB_PROTOCOL_IP6, mp->next_hop_table_id, - mp->create_vrf_if_needed, + mp->create_vrf_if_needed, 0, &fib_index, &next_hop_fib_index); if (0 != rv) @@ -1002,11 +1020,11 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->is_drop, mp->is_unreach, mp->is_prohibit, - mp->is_local, + mp->is_local, 0, mp->is_classify, mp->classify_table_index, mp->is_resolve_host, - mp->is_resolve_attached, + mp->is_resolve_attached, 0, 0, fib_index, &pfx, 0, &nh, ntohl (mp->next_hop_sw_if_index), next_hop_fib_index, @@ -1075,6 +1093,7 @@ mroute_add_del_handler (u8 is_add, u32 fib_index, const mfib_prefix_t * prefix, u32 entry_flags, + fib_rpf_id_t rpf_id, u32 next_hop_sw_if_index, u32 itf_flags) { stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ ); @@ -1091,7 +1110,7 @@ mroute_add_del_handler (u8 is_add, if (!is_local && ~0 == next_hop_sw_if_index) { mfib_table_entry_update (fib_index, prefix, - MFIB_SOURCE_API, entry_flags); + MFIB_SOURCE_API, rpf_id, entry_flags); } else { @@ -1152,6 +1171,7 @@ api_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp) mp->is_local, fib_index, &pfx, ntohl (mp->entry_flags), + ntohl (mp->rpf_id), ntohl (mp->next_hop_sw_if_index), ntohl (mp->itf_flags))); } diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index ec9a1f97..597de06b 100755 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -450,6 +450,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, unformat_mpls_unicast_label, &rpath.frp_local_label)) { rpath.frp_weight = 1; + rpath.frp_eos = MPLS_NON_EOS; rpath.frp_proto = FIB_PROTOCOL_MPLS; rpath.frp_sw_if_index = ~0; vec_add1 (rpaths, rpath); @@ -923,7 +924,7 @@ vnet_ip_mroute_cmd (vlib_main_t * vm, else if (eflags) { mfib_table_entry_update (fib_index, &pfx, MFIB_SOURCE_CLI, - eflags); + MFIB_RPF_ID_NONE, eflags); } else { diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c index efa724e0..d2954e96 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c +++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c @@ -88,6 +88,7 @@ ip_dst_fib_add_route (u32 dst_fib_index, const ip_prefix_t * dst_prefix) (ip_prefix_version (dst_prefix) == IP6 ? DPO_PROTO_IP6 : DPO_PROTO_IP4), + LOOKUP_UNICAST, LOOKUP_INPUT_SRC_ADDR, LOOKUP_TABLE_FROM_CONFIG, &src_lkup_dpo); diff --git a/src/vnet/mfib/ip4_mfib.c b/src/vnet/mfib/ip4_mfib.c index 164cafa1..3ed7cba7 100644 --- a/src/vnet/mfib/ip4_mfib.c +++ b/src/vnet/mfib/ip4_mfib.c @@ -72,6 +72,7 @@ ip4_create_mfib_with_table_id (u32 table_id) mfib_table_entry_update(mfib_table->mft_index, &prefix, MFIB_SOURCE_DEFAULT_ROUTE, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_DROP); } diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c index 991b91c6..116fee22 100644 --- a/src/vnet/mfib/ip6_mfib.c +++ b/src/vnet/mfib/ip6_mfib.c @@ -195,6 +195,7 @@ ip6_create_mfib_with_table_id (u32 table_id) mfib_table_entry_update(mfib_table->mft_index, &all_zeros, MFIB_SOURCE_DEFAULT_ROUTE, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_DROP); /* diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c index 1aa8e086..847f25e7 100644 --- a/src/vnet/mfib/mfib_entry.c +++ b/src/vnet/mfib/mfib_entry.c @@ -48,6 +48,15 @@ #define MFIB_ENTRY_DBG(_e, _fmt, _args...) #endif +/** + * MFIB extensions to each path + */ +typedef struct mfib_path_ext_t_ +{ + mfib_itf_flags_t mfpe_flags; + fib_node_index_t mfpe_path; +} mfib_path_ext_t; + /** * The source of an MFIB entry */ @@ -58,22 +67,39 @@ typedef struct mfib_entry_src_t_ */ mfib_source_t mfes_src; + /** + * Route flags + */ + mfib_entry_flags_t mfes_flags; + /** * The path-list of forwarding interfaces */ fib_node_index_t mfes_pl; /** - * Route flags + * RPF-ID */ - mfib_entry_flags_t mfes_flags; + fib_rpf_id_t mfes_rpf_id; + + /** + * Hash table of path extensions + */ + mfib_path_ext_t *mfes_exts; /** - * The hash table of all interfaces + * The hash table of all interfaces. + * This is forwarding time information derived from the paths + * and their extensions. */ mfib_itf_t *mfes_itfs; } mfib_entry_src_t; +/** + * Pool of path extensions + */ +static mfib_path_ext_t *mfib_path_ext_pool; + /** * String names for each source */ @@ -123,6 +149,24 @@ format_mfib_entry_dpo (u8 * s, va_list * args) MFIB_ENTRY_FORMAT_BRIEF)); } +static inline mfib_path_ext_t * +mfib_entry_path_ext_get (index_t mi) +{ + return (pool_elt_at_index(mfib_path_ext_pool, mi)); +} + +static u8 * +format_mfib_entry_path_ext (u8 * s, va_list * args) +{ + mfib_path_ext_t *path_ext; + index_t mpi = va_arg(*args, index_t); + + path_ext = mfib_entry_path_ext_get(mpi); + return (format(s, "path:%d flags:%U", + path_ext->mfpe_path, + format_mfib_itf_flags, path_ext->mfpe_flags)); +} + u8 * format_mfib_entry (u8 * s, va_list * args) { @@ -141,6 +185,8 @@ format_mfib_entry (u8 * s, va_list * args) if (level >= MFIB_ENTRY_FORMAT_DETAIL) { + fib_node_index_t path_index, mpi; + s = format (s, "\n"); s = format (s, " fib:%d", mfib_entry->mfe_fib_index); s = format (s, " index:%d", mfib_entry_get_index(mfib_entry)); @@ -153,6 +199,14 @@ format_mfib_entry (u8 * s, va_list * args) { s = fib_path_list_format(msrc->mfes_pl, s); } + s = format (s, " Extensions:\n", + mfib_source_names[msrc->mfes_src]); + hash_foreach(path_index, mpi, msrc->mfes_exts, + ({ + s = format(s, " %U\n", format_mfib_entry_path_ext, mpi); + })); + s = format (s, " Interface-Forwarding:\n", + mfib_source_names[msrc->mfes_src]); hash_foreach(sw_if_index, mfi, msrc->mfes_itfs, ({ s = format(s, " %U\n", format_mfib_itf, mfi); @@ -165,7 +219,7 @@ format_mfib_entry (u8 * s, va_list * args) ({ s = format(s, "\n %U", format_mfib_itf, mfi); })); - + s = format(s, "\n RPF-ID:%d", mfib_entry->mfe_rpf_id); s = format(s, "\n %U-chain\n %U", format_fib_forw_chain_type, mfib_entry_get_default_chain_type(mfib_entry), @@ -314,13 +368,6 @@ mfib_entry_src_remove (mfib_entry_t *mfib_entry, } } -static int -mfib_entry_src_n_itfs (const mfib_entry_src_t *msrc) -{ - return (hash_elts(msrc->mfes_itfs)); -} - - static void mfib_entry_last_lock_gone (fib_node_t *node) { @@ -338,7 +385,6 @@ mfib_entry_last_lock_gone (fib_node_t *node) mfib_entry_src_flush(msrc); } - fib_path_list_unlock(mfib_entry->mfe_parent); vec_free(mfib_entry->mfe_srcs); fib_node_deinit(&mfib_entry->mfe_node); @@ -417,10 +463,9 @@ mfib_entry_alloc (u32 fib_index, mfib_entry->mfe_flags = 0; mfib_entry->mfe_fib_index = fib_index; mfib_entry->mfe_prefix = *prefix; - mfib_entry->mfe_parent = FIB_NODE_INDEX_INVALID; - mfib_entry->mfe_sibling = FIB_NODE_INDEX_INVALID; mfib_entry->mfe_srcs = NULL; mfib_entry->mfe_itfs = NULL; + mfib_entry->mfe_rpf_id = MFIB_RPF_ID_NONE; dpo_reset(&mfib_entry->mfe_rep); @@ -431,10 +476,57 @@ mfib_entry_alloc (u32 fib_index, return (mfib_entry); } +static inline mfib_path_ext_t * +mfib_entry_path_ext_find (mfib_path_ext_t *exts, + fib_node_index_t path_index) +{ + uword *p; + + p = hash_get(exts, path_index); + + if (NULL != p) + { + return (mfib_entry_path_ext_get(p[0])); + } + + return (NULL); +} + +static mfib_path_ext_t* +mfib_path_ext_add (mfib_entry_src_t *msrc, + fib_node_index_t path_index, + mfib_itf_flags_t mfi_flags) +{ + mfib_path_ext_t *path_ext; + + pool_get(mfib_path_ext_pool, path_ext); + + path_ext->mfpe_flags = mfi_flags; + path_ext->mfpe_path = path_index; + + hash_set(msrc->mfes_exts, path_index, + path_ext - mfib_path_ext_pool); + + return (path_ext); +} + +static void +mfib_path_ext_remove (mfib_entry_src_t *msrc, + fib_node_index_t path_index) +{ + mfib_path_ext_t *path_ext; + + path_ext = mfib_entry_path_ext_find(msrc->mfes_exts, path_index); + + hash_unset(msrc->mfes_exts, path_index); + pool_put(mfib_path_ext_pool, path_ext); +} + typedef struct mfib_entry_collect_forwarding_ctx_t_ { load_balance_path_t * next_hops; fib_forward_chain_type_t fct; + mfib_entry_src_t *msrc; } mfib_entry_collect_forwarding_ctx_t; static int @@ -455,6 +547,20 @@ mfib_entry_src_collect_forwarding (fib_node_index_t pl_index, return (!0); } + /* + * If the path is not forwarding to use it + */ + mfib_path_ext_t *path_ext; + + path_ext = mfib_entry_path_ext_find(ctx->msrc->mfes_exts, + path_index); + + if (NULL != path_ext && + !(path_ext->mfpe_flags & MFIB_ITF_FLAG_FORWARD)) + { + return (!0); + } + switch (ctx->fct) { case FIB_FORW_CHAIN_TYPE_MCAST_IP4: @@ -483,46 +589,61 @@ mfib_entry_src_collect_forwarding (fib_node_index_t pl_index, } static void -mfib_entry_stack (mfib_entry_t *mfib_entry) +mfib_entry_stack (mfib_entry_t *mfib_entry, + mfib_entry_src_t *msrc) { dpo_proto_t dp; dp = fib_proto_to_dpo(mfib_entry_get_proto(mfib_entry)); - if (FIB_NODE_INDEX_INVALID != mfib_entry->mfe_parent) + if (NULL != msrc && + FIB_NODE_INDEX_INVALID != msrc->mfes_pl) { mfib_entry_collect_forwarding_ctx_t ctx = { .next_hops = NULL, .fct = mfib_entry_get_default_chain_type(mfib_entry), + .msrc = msrc, }; - fib_path_list_walk(mfib_entry->mfe_parent, + fib_path_list_walk(msrc->mfes_pl, mfib_entry_src_collect_forwarding, &ctx); if (!(MFIB_ENTRY_FLAG_EXCLUSIVE & mfib_entry->mfe_flags)) { - /* - * each path contirbutes a next-hop. form a replicate - * from those choices. - */ - if (!dpo_id_is_valid(&mfib_entry->mfe_rep) || - dpo_is_drop(&mfib_entry->mfe_rep)) + if (NULL == ctx.next_hops) { - dpo_id_t tmp_dpo = DPO_INVALID; - - dpo_set(&tmp_dpo, - DPO_REPLICATE, dp, - replicate_create(0, dp)); - + /* + * no next-hops, stack directly on the drop + */ dpo_stack(DPO_MFIB_ENTRY, dp, &mfib_entry->mfe_rep, - &tmp_dpo); - - dpo_reset(&tmp_dpo); + drop_dpo_get(dp)); + } + else + { + /* + * each path contirbutes a next-hop. form a replicate + * from those choices. + */ + if (!dpo_id_is_valid(&mfib_entry->mfe_rep) || + dpo_is_drop(&mfib_entry->mfe_rep)) + { + dpo_id_t tmp_dpo = DPO_INVALID; + + dpo_set(&tmp_dpo, + DPO_REPLICATE, dp, + replicate_create(0, dp)); + + dpo_stack(DPO_MFIB_ENTRY, dp, + &mfib_entry->mfe_rep, + &tmp_dpo); + + dpo_reset(&tmp_dpo); + } + replicate_multipath_update(&mfib_entry->mfe_rep, + ctx.next_hops); } - replicate_multipath_update(&mfib_entry->mfe_rep, - ctx.next_hops); } else { @@ -548,11 +669,11 @@ mfib_entry_stack (mfib_entry_t *mfib_entry) } } -static void -mfib_entry_forwarding_path_add (mfib_entry_src_t *msrc, - const fib_route_path_t *rpath) +static fib_node_index_t +mfib_entry_src_path_add (mfib_entry_src_t *msrc, + const fib_route_path_t *rpath) { - fib_node_index_t old_pl_index; + fib_node_index_t path_index; fib_route_path_t *rpaths; ASSERT(!(MFIB_ENTRY_FLAG_EXCLUSIVE & msrc->mfes_flags)); @@ -563,32 +684,26 @@ mfib_entry_forwarding_path_add (mfib_entry_src_t *msrc, rpaths = NULL; vec_add1(rpaths, rpath[0]); - old_pl_index = msrc->mfes_pl; - if (FIB_NODE_INDEX_INVALID == msrc->mfes_pl) { - msrc->mfes_pl = - fib_path_list_create(FIB_PATH_LIST_FLAG_NO_URPF, - rpaths); - } - else - { - msrc->mfes_pl = - fib_path_list_copy_and_path_add(msrc->mfes_pl, - FIB_PATH_LIST_FLAG_NO_URPF, - rpaths); + /* A non-shared path-list */ + msrc->mfes_pl = fib_path_list_create(FIB_PATH_LIST_FLAG_NO_URPF, + NULL); + fib_path_list_lock(msrc->mfes_pl); } - fib_path_list_lock(msrc->mfes_pl); - fib_path_list_unlock(old_pl_index); + + path_index = fib_path_list_path_add(msrc->mfes_pl, rpaths); vec_free(rpaths); + + return (path_index); } -static int -mfib_entry_forwarding_path_remove (mfib_entry_src_t *msrc, - const fib_route_path_t *rpath) +static fib_node_index_t +mfib_entry_src_path_remove (mfib_entry_src_t *msrc, + const fib_route_path_t *rpath) { - fib_node_index_t old_pl_index; + fib_node_index_t path_index; fib_route_path_t *rpaths; ASSERT(!(MFIB_ENTRY_FLAG_EXCLUSIVE & msrc->mfes_flags)); @@ -599,56 +714,31 @@ mfib_entry_forwarding_path_remove (mfib_entry_src_t *msrc, rpaths = NULL; vec_add1(rpaths, rpath[0]); - old_pl_index = msrc->mfes_pl; - - msrc->mfes_pl = - fib_path_list_copy_and_path_remove(msrc->mfes_pl, - FIB_PATH_LIST_FLAG_NONE, - rpaths); - - fib_path_list_lock(msrc->mfes_pl); - fib_path_list_unlock(old_pl_index); + path_index = fib_path_list_path_remove(msrc->mfes_pl, rpaths); vec_free(rpaths); - return (FIB_NODE_INDEX_INVALID != msrc->mfes_pl); + return (path_index); } static void mfib_entry_recalculate_forwarding (mfib_entry_t *mfib_entry) { - fib_node_index_t old_pl_index; mfib_entry_src_t *bsrc; - old_pl_index = mfib_entry->mfe_parent; - /* * copy the forwarding data from the bast source */ bsrc = mfib_entry_get_best_src(mfib_entry); - if (NULL == bsrc) - { - mfib_entry->mfe_parent = FIB_NODE_INDEX_INVALID; - } - else + if (NULL != bsrc) { - mfib_entry->mfe_parent = bsrc->mfes_pl; mfib_entry->mfe_flags = bsrc->mfes_flags; mfib_entry->mfe_itfs = bsrc->mfes_itfs; + mfib_entry->mfe_rpf_id = bsrc->mfes_rpf_id; } - /* - * re-stack the entry on the best forwarding info. - */ - if (old_pl_index != mfib_entry->mfe_parent || - FIB_NODE_INDEX_INVALID == old_pl_index) - { - mfib_entry_stack(mfib_entry); - - fib_path_list_lock(mfib_entry->mfe_parent); - fib_path_list_unlock(old_pl_index); - } + mfib_entry_stack(mfib_entry, bsrc); } @@ -656,6 +746,7 @@ fib_node_index_t mfib_entry_create (u32 fib_index, mfib_source_t source, const mfib_prefix_t *prefix, + fib_rpf_id_t rpf_id, mfib_entry_flags_t entry_flags) { fib_node_index_t mfib_entry_index; @@ -666,6 +757,7 @@ mfib_entry_create (u32 fib_index, &mfib_entry_index); msrc = mfib_entry_src_find_or_create(mfib_entry, source); msrc->mfes_flags = entry_flags; + msrc->mfes_rpf_id = rpf_id; mfib_entry_recalculate_forwarding(mfib_entry); @@ -682,13 +774,14 @@ static int mfib_entry_src_ok_for_delete (const mfib_entry_src_t *msrc) { return ((MFIB_ENTRY_FLAG_NONE == msrc->mfes_flags && - 0 == mfib_entry_src_n_itfs(msrc))); + 0 == fib_path_list_get_n_paths(msrc->mfes_pl))); } int mfib_entry_update (fib_node_index_t mfib_entry_index, mfib_source_t source, mfib_entry_flags_t entry_flags, + fib_rpf_id_t rpf_id, index_t repi) { mfib_entry_t *mfib_entry; @@ -697,6 +790,7 @@ mfib_entry_update (fib_node_index_t mfib_entry_index, mfib_entry = mfib_entry_get(mfib_entry_index); msrc = mfib_entry_src_find_or_create(mfib_entry, source); msrc->mfes_flags = entry_flags; + msrc->mfes_rpf_id = rpf_id; if (INDEX_INVALID != repi) { @@ -768,55 +862,79 @@ mfib_entry_path_update (fib_node_index_t mfib_entry_index, const fib_route_path_t *rpath, mfib_itf_flags_t itf_flags) { + fib_node_index_t path_index; + mfib_path_ext_t *path_ext; + mfib_itf_flags_t old, new; mfib_entry_t *mfib_entry; mfib_entry_src_t *msrc; - mfib_itf_t *mfib_itf; mfib_entry = mfib_entry_get(mfib_entry_index); ASSERT(NULL != mfib_entry); msrc = mfib_entry_src_find_or_create(mfib_entry, source); /* - * search for the interface in the current set + * add the path to the path-list. If it's a duplicate we'll get + * back the original path. + */ + path_index = mfib_entry_src_path_add(msrc, rpath); + + /* + * find the path extension for that path */ - mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs, - rpath[0].frp_sw_if_index); + path_ext = mfib_entry_path_ext_find(msrc->mfes_exts, path_index); - if (NULL == mfib_itf) + if (NULL == path_ext) { - /* - * this is a path we do not yet have. If it is forwarding then we - * add it to the replication set - */ - if (itf_flags & MFIB_ITF_FLAG_FORWARD) - { - mfib_entry_forwarding_path_add(msrc, rpath); - } - /* - * construct a new ITF for this entry's list - */ - mfib_entry_itf_add(msrc, - rpath[0].frp_sw_if_index, - mfib_itf_create(rpath[0].frp_sw_if_index, - itf_flags)); + old = MFIB_ITF_FLAG_NONE; + path_ext = mfib_path_ext_add(msrc, path_index, itf_flags); } else { - int was_forwarding = !!(mfib_itf->mfi_flags & MFIB_ITF_FLAG_FORWARD); - int is_forwarding = !!(itf_flags & MFIB_ITF_FLAG_FORWARD); + old = path_ext->mfpe_flags; + path_ext->mfpe_flags = itf_flags; + } - if (!was_forwarding && is_forwarding) - { - mfib_entry_forwarding_path_add(msrc, rpath); - } - else if (was_forwarding && !is_forwarding) + /* + * Has the path changed its contribution to the input interface set. + * Which only paths with interfaces can do... + */ + if (~0 != rpath[0].frp_sw_if_index) + { + mfib_itf_t *mfib_itf; + + new = itf_flags; + + if (old != new) { - mfib_entry_forwarding_path_remove(msrc, rpath); + if (MFIB_ITF_FLAG_NONE == new) + { + /* + * no more interface flags on this path, remove + * from the data-plane set + */ + mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index); + } + else if (MFIB_ITF_FLAG_NONE == old) + { + /* + * This interface is now contributing + */ + mfib_entry_itf_add(msrc, + rpath[0].frp_sw_if_index, + mfib_itf_create(rpath[0].frp_sw_if_index, + itf_flags)); + } + else + { + /* + * change of flag contributions + */ + mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs, + rpath[0].frp_sw_if_index); + /* Seen by packets inflight */ + mfib_itf->mfi_flags = new; + } } - /* - * packets in flight see these updates. - */ - mfib_itf->mfi_flags = itf_flags; } mfib_entry_recalculate_forwarding(mfib_entry); @@ -833,9 +951,9 @@ mfib_entry_path_remove (fib_node_index_t mfib_entry_index, mfib_source_t source, const fib_route_path_t *rpath) { + fib_node_index_t path_index; mfib_entry_t *mfib_entry; mfib_entry_src_t *msrc; - mfib_itf_t *mfib_itf; mfib_entry = mfib_entry_get(mfib_entry_index); ASSERT(NULL != mfib_entry); @@ -850,33 +968,23 @@ mfib_entry_path_remove (fib_node_index_t mfib_entry_index, } /* - * search for the interface in the current set + * remove the path from the path-list. If it's not there we'll get + * back invalid */ - mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs, - rpath[0].frp_sw_if_index); + path_index = mfib_entry_src_path_remove(msrc, rpath); - if (NULL == mfib_itf) + if (FIB_NODE_INDEX_INVALID != path_index) { /* - * removing a path that does not exist + * don't need the extension, nor the interface anymore */ - return (mfib_entry_ok_for_delete(mfib_entry)); - } - - /* - * we have this path. If it is forwarding then we - * remove it to the replication set - */ - if (mfib_itf->mfi_flags & MFIB_ITF_FLAG_FORWARD) - { - mfib_entry_forwarding_path_remove(msrc, rpath); + mfib_path_ext_remove(msrc, path_index); + if (~0 != rpath[0].frp_sw_if_index) + { + mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index); + } } - /* - * remove the interface/path from this entry's list - */ - mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index); - if (mfib_entry_src_ok_for_delete(msrc)) { /* @@ -1057,11 +1165,14 @@ mfib_entry_encode (fib_node_index_t mfib_entry_index, fib_route_path_encode_t **api_rpaths) { mfib_entry_t *mfib_entry; + mfib_entry_src_t *bsrc; mfib_entry = mfib_entry_get(mfib_entry_index); - if (FIB_NODE_INDEX_INVALID != mfib_entry->mfe_parent) + bsrc = mfib_entry_get_best_src(mfib_entry); + + if (FIB_NODE_INDEX_INVALID != bsrc->mfes_pl) { - fib_path_list_walk(mfib_entry->mfe_parent, + fib_path_list_walk(bsrc->mfes_pl, fib_path_encode, api_rpaths); } diff --git a/src/vnet/mfib/mfib_entry.h b/src/vnet/mfib/mfib_entry.h index dc8f49aa..4f62b18e 100644 --- a/src/vnet/mfib/mfib_entry.h +++ b/src/vnet/mfib/mfib_entry.h @@ -42,17 +42,6 @@ typedef struct mfib_entry_t_ { * The index of the FIB table this entry is in */ u32 mfe_fib_index; - /** - * the path-list for which this entry is a child. This is also the path-list - * that is contributing forwarding for this entry. - */ - fib_node_index_t mfe_parent; - /** - * index of this entry in the parent's child list. - * This is set when this entry is added as a child, but can also - * be changed by the parent as it manages its list. - */ - u32 mfe_sibling; /** * A vector of sources contributing forwarding @@ -65,7 +54,7 @@ typedef struct mfib_entry_t_ { CLIB_CACHE_LINE_ALIGN_MARK(cacheline1); /** - * The Replicate DPO used for forwarding. + * The DPO used for forwarding; replicate, drop, etc.. */ dpo_id_t mfe_rep; @@ -74,6 +63,11 @@ typedef struct mfib_entry_t_ { */ mfib_entry_flags_t mfe_flags; + /** + * RPF-ID used when the packets ingress not from an interface + */ + fib_rpf_id_t mfe_rpf_id; + /** * A hash table of interfaces */ @@ -90,11 +84,13 @@ extern u8 *format_mfib_entry(u8 * s, va_list * args); extern fib_node_index_t mfib_entry_create(u32 fib_index, mfib_source_t source, const mfib_prefix_t *prefix, + fib_rpf_id_t rpf_id, mfib_entry_flags_t entry_flags); extern int mfib_entry_update(fib_node_index_t fib_entry_index, mfib_source_t source, mfib_entry_flags_t entry_flags, + fib_rpf_id_t rpf_id, index_t rep_dpo); extern void mfib_entry_path_update(fib_node_index_t fib_entry_index, diff --git a/src/vnet/mfib/mfib_forward.c b/src/vnet/mfib/mfib_forward.c index 5fe0a57c..3d8f4f98 100644 --- a/src/vnet/mfib/mfib_forward.c +++ b/src/vnet/mfib/mfib_forward.c @@ -380,13 +380,27 @@ mfib_forward_rpf (vlib_main_t * vm, * for the case of throughput traffic that is not replicated * to the host stack nor sets local flags */ - if (PREDICT_TRUE(NULL != mfi0)) + + /* + * If the mfib entry has a configured RPF-ID check that + * in preference to an interface based RPF + */ + if (MFIB_RPF_ID_NONE != mfe0->mfe_rpf_id) { - iflags0 = mfi0->mfi_flags; + iflags0 = (mfe0->mfe_rpf_id == vnet_buffer(b0)->ip.rpf_id ? + MFIB_ITF_FLAG_ACCEPT : + MFIB_ITF_FLAG_NONE); } else { - iflags0 = MFIB_ITF_FLAG_NONE; + if (PREDICT_TRUE(NULL != mfi0)) + { + iflags0 = mfi0->mfi_flags; + } + else + { + iflags0 = MFIB_ITF_FLAG_NONE; + } } eflags0 = mfe0->mfe_flags; @@ -436,17 +450,16 @@ mfib_forward_rpf (vlib_main_t * vm, { mfib_forward_rpf_trace_t *t0; - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); t0->entry_index = mfei0; + t0->itf_flags = iflags0; if (NULL == mfi0) { t0->sw_if_index = ~0; - t0->itf_flags = MFIB_ITF_FLAG_NONE; } else { t0->sw_if_index = mfi0->mfi_sw_if_index; - t0->itf_flags = mfi0->mfi_flags; } } vlib_validate_buffer_enqueue_x1 (vm, node, next, @@ -478,7 +491,7 @@ VLIB_REGISTER_NODE (ip4_mfib_forward_rpf_node, static) = { .n_next_nodes = MFIB_FORWARD_RPF_N_NEXT, .next_nodes = { - [MFIB_FORWARD_RPF_NEXT_DROP] = "error-drop", + [MFIB_FORWARD_RPF_NEXT_DROP] = "ip4-drop", }, }; @@ -503,7 +516,7 @@ VLIB_REGISTER_NODE (ip6_mfib_forward_rpf_node, static) = { .n_next_nodes = MFIB_FORWARD_RPF_N_NEXT, .next_nodes = { - [MFIB_FORWARD_RPF_NEXT_DROP] = "error-drop", + [MFIB_FORWARD_RPF_NEXT_DROP] = "ip6-drop", }, }; diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c index 3b4bd985..7ffe8941 100644 --- a/src/vnet/mfib/mfib_table.c +++ b/src/vnet/mfib/mfib_table.c @@ -165,6 +165,7 @@ fib_node_index_t mfib_table_entry_update (u32 fib_index, const mfib_prefix_t *prefix, mfib_source_t source, + fib_rpf_id_t rpf_id, mfib_entry_flags_t entry_flags) { fib_node_index_t mfib_entry_index; @@ -181,7 +182,8 @@ mfib_table_entry_update (u32 fib_index, * update to a non-existing entry with non-zero flags */ mfib_entry_index = mfib_entry_create(fib_index, source, - prefix, entry_flags); + prefix, rpf_id, + entry_flags); mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index); } @@ -198,6 +200,7 @@ mfib_table_entry_update (u32 fib_index, if (mfib_entry_update(mfib_entry_index, source, entry_flags, + rpf_id, INDEX_INVALID)) { /* @@ -230,6 +233,7 @@ mfib_table_entry_path_update (u32 fib_index, mfib_entry_index = mfib_entry_create(fib_index, source, prefix, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_NONE); mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index); @@ -304,6 +308,7 @@ mfib_table_entry_special_add (u32 fib_index, mfib_entry_index = mfib_entry_create(fib_index, source, prefix, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_NONE); mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index); @@ -311,6 +316,7 @@ mfib_table_entry_special_add (u32 fib_index, mfib_entry_update(mfib_entry_index, source, (MFIB_ENTRY_FLAG_EXCLUSIVE | entry_flags), + MFIB_RPF_ID_NONE, rep_dpo); return (mfib_entry_index); diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h index 95239f7c..83aa04ef 100644 --- a/src/vnet/mfib/mfib_table.h +++ b/src/vnet/mfib/mfib_table.h @@ -122,6 +122,7 @@ extern fib_node_index_t mfib_table_lookup_exact_match(u32 fib_index, extern fib_node_index_t mfib_table_entry_update(u32 fib_index, const mfib_prefix_t *prefix, mfib_source_t source, + fib_rpf_id_t rpf_id, mfib_entry_flags_t flags); /** diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c index 36a303e8..7c92ae99 100644 --- a/src/vnet/mfib/mfib_test.c +++ b/src/vnet/mfib/mfib_test.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include @@ -201,8 +203,8 @@ mfib_test_validate_rep_v (const replicate_t *rep, if (DPO_RECEIVE != dt) { MFIB_TEST_REP((ai == dpo->dpoi_index), - "bucket %d stacks on %U", - bucket, + "bucket %d [exp:%d] stacks on %U", + bucket, ai, format_dpo_id, dpo, 0); } } @@ -734,6 +736,7 @@ mfib_test_i (fib_protocol_t PROTO, mfib_table_entry_update(fib_index, pfx_s_g, MFIB_SOURCE_API, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_SIGNAL); MFIB_TEST(mfib_test_entry(mfei, MFIB_ENTRY_FLAG_SIGNAL, @@ -824,6 +827,7 @@ mfib_test_i (fib_protocol_t PROTO, mfib_table_entry_update(fib_index, pfx_s_g, MFIB_SOURCE_API, + MFIB_RPF_ID_NONE, (MFIB_ENTRY_FLAG_SIGNAL | MFIB_ENTRY_FLAG_CONNECTED)); MFIB_TEST(mfib_test_entry(mfei, @@ -965,6 +969,7 @@ mfib_test_i (fib_protocol_t PROTO, mfib_table_entry_update(fib_index, pfx_s_g, MFIB_SOURCE_API, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_NONE); mfei = mfib_table_lookup_exact_match(fib_index, pfx_s_g); @@ -1073,6 +1078,117 @@ mfib_test_i (fib_protocol_t PROTO, MFIB_SOURCE_SRv6); dpo_reset(&td); + /* + * A Multicast LSP. This a mLDP head-end + */ + fib_node_index_t ai_mpls_10_10_10_1, lfei; + ip46_address_t nh_10_10_10_1 = { + .ip4 = { + .as_u32 = clib_host_to_net_u32(0x0a0a0a01), + }, + }; + ai_mpls_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_MPLS, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index); + + fib_prefix_t pfx_3500 = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 3500, + .fp_eos = MPLS_EOS, + .fp_payload_proto = DPO_PROTO_IP4, + }; + fib_test_rep_bucket_t mc_0 = { + .type = FT_REP_LABEL_O_ADJ, + .label_o_adj = { + .adj = ai_mpls_10_10_10_1, + .label = 3300, + .eos = MPLS_EOS, + }, + }; + mpls_label_t *l3300 = NULL; + vec_add1(l3300, 3300); + + /* + * MPLS enable an interface so we get the MPLS table created + */ + mpls_sw_interface_enable_disable(&mpls_main, + tm->hw[0]->sw_if_index, + 1); + + lfei = fib_table_entry_update_one_path(0, // default MPLS Table + &pfx_3500, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_MULTICAST, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + l3300, + FIB_ROUTE_PATH_FLAG_NONE); + MFIB_TEST(fib_test_validate_entry(lfei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 1, + &mc_0), + "3500 via replicate over 10.10.10.1"); + + /* + * An (S,G) that resolves via the mLDP head-end + */ + fib_route_path_t path_via_mldp = { + .frp_proto = FIB_PROTOCOL_MPLS, + .frp_local_label = pfx_3500.fp_label, + .frp_eos = MPLS_EOS, + .frp_sw_if_index = 0xffffffff, + .frp_fib_index = 0, + .frp_weight = 1, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + }; + dpo_id_t mldp_dpo = DPO_INVALID; + + fib_entry_contribute_forwarding(lfei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + &mldp_dpo); + + mfei = mfib_table_entry_path_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_via_mldp, + MFIB_ITF_FLAG_FORWARD); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 1, + DPO_REPLICATE, mldp_dpo.dpoi_index), + "%U over-mLDP replicate OK", + format_mfib_prefix, pfx_s_g); + + /* + * add a for-us path. this tests two types of non-attached paths on one entry + */ + mfei = mfib_table_entry_path_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_for_us, + MFIB_ITF_FLAG_FORWARD); + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 2, + DPO_REPLICATE, mldp_dpo.dpoi_index, + DPO_RECEIVE, 0), + "%U mLDP+for-us replicate OK", + format_mfib_prefix, pfx_s_g); + + mfib_table_entry_delete(fib_index, + pfx_s_g, + MFIB_SOURCE_API); + fib_table_entry_delete(0, + &pfx_3500, + FIB_SOURCE_API); + dpo_reset(&mldp_dpo); + /* * Unlock the table - it's the last lock so should be gone thereafter */ @@ -1086,6 +1202,13 @@ mfib_test_i (fib_protocol_t PROTO, adj_unlock(ai_2); adj_unlock(ai_3); + /* + * MPLS disable the interface + */ + mpls_sw_interface_enable_disable(&mpls_main, + tm->hw[0]->sw_if_index, + 0); + /* * test we've leaked no resources */ diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api index 2e3bfaf5..a1e1270a 100644 --- a/src/vnet/mpls/mpls.api +++ b/src/vnet/mpls/mpls.api @@ -55,6 +55,7 @@ define mpls_ip_bind_unbind_reply @param context - sender context, to match reply w/ request @param mt_is_add - Is this a route add or delete @param mt_sw_if_index - The SW interface index of the tunnel to delete + @param mt_is_multicast - Is the tunnel's underlying LSP multicast @param mt_next_hop_proto_is_ip4 - The next-hop is IPV4 @param mt_next_hop_weight - The weight, for UCMP @param mt_next_hop[16] - the nextop address @@ -70,6 +71,7 @@ define mpls_tunnel_add_del u32 mt_sw_if_index; u8 mt_is_add; u8 mt_l2_only; + u8 mt_is_multicast; u8 mt_next_hop_proto_is_ip4; u8 mt_next_hop_weight; u8 mt_next_hop[16]; @@ -102,30 +104,43 @@ define mpls_tunnel_dump i32 tunnel_index; }; -/** \brief mpls eth tunnel operational state response - @param tunnel_index - eth tunnel identifier - @param intfc_address - interface ipv4 addr - @param mask_width - interface ipv4 addr mask - @param hw_if_index - interface id - @param l2_only - - @param tunnel_dst_mac - - @param tx_sw_if_index - - @param encap_index - reference to mpls label table - @param nlabels - number of resolved labels - @param labels - resolved labels +/** \brief FIB path + @param sw_if_index - index of the interface + @param weight - The weight, for UCMP + @param is_local - local if non-zero, else remote + @param is_drop - Drop the packet + @param is_unreach - Drop the packet and rate limit send ICMP unreachable + @param is_prohibit - Drop the packet and rate limit send ICMP prohibited + @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2 + @param next_hop[16] - the next hop address + + WARNING: this type is replicated, pending cleanup completion + +*/ +typeonly manual_print manual_endian define fib_path2 +{ + u32 sw_if_index; + u32 weight; + u8 is_local; + u8 is_drop; + u8 is_unreach; + u8 is_prohibit; + u8 afi; + u8 next_hop[16]; + u32 labels[16]; +}; + +/** \brief mpls tunnel details */ -define mpls_tunnel_details +manual_endian manual_print define mpls_tunnel_details { u32 context; - u32 tunnel_index; - u8 mt_l2_only; u8 mt_sw_if_index; - u8 mt_next_hop_proto_is_ip4; - u8 mt_next_hop[16]; - u32 mt_next_hop_sw_if_index; - u32 mt_next_hop_table_id; - u32 mt_next_hop_n_labels; - u32 mt_next_hop_out_labels[mt_next_hop_n_labels]; + u8 mt_tunnel_index; + u8 mt_l2_only; + u8 mt_is_multicast; + u32 mt_count; + vl_api_fib_path2_t mt_paths[mt_count]; }; /** \brief MPLS Route Add / del route @@ -140,10 +155,14 @@ define mpls_tunnel_details create them @param mr_is_add - Is this a route add or delete @param mr_is_classify - Is this route result a classify + @param mr_is_multicast - Is this a multicast route @param mr_is_multipath - Is this route update a multipath - i.e. is this a path addition to an existing route @param mr_is_resolve_host - Recurse resolution constraint via a host prefix @param mr_is_resolve_attached - Recurse resolution constraint via attached prefix + @param mr_is_interface_rx - Interface Receive path + @param mr_is_interface_rx - RPF-ID Receive path. The next-hop interface + is used as the RPF-ID @param mr_next_hop_proto_is_ip4 - The next-hop is IPV4 @param mr_next_hop_weight - The weight, for UCMP @param mr_next_hop[16] - the nextop address @@ -164,9 +183,12 @@ define mpls_route_add_del u8 mr_create_table_if_needed; u8 mr_is_add; u8 mr_is_classify; + u8 mr_is_multicast; u8 mr_is_multipath; u8 mr_is_resolve_host; u8 mr_is_resolve_attached; + u8 mr_is_interface_rx; + u8 mr_is_rpf_id; u8 mr_next_hop_proto_is_ip4; u8 mr_next_hop_weight; u8 mr_next_hop[16]; @@ -187,31 +209,6 @@ define mpls_route_add_del_reply i32 retval; }; -/** \brief FIB path - @param sw_if_index - index of the interface - @param weight - The weight, for UCMP - @param is_local - local if non-zero, else remote - @param is_drop - Drop the packet - @param is_unreach - Drop the packet and rate limit send ICMP unreachable - @param is_prohibit - Drop the packet and rate limit send ICMP prohibited - @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2 - @param next_hop[16] - the next hop address - - WARNING: this type is replicated, pending cleanup completion - -*/ -typeonly manual_print manual_endian define fib_path2 -{ - u32 sw_if_index; - u32 weight; - u8 is_local; - u8 is_drop; - u8 is_unreach; - u8 is_prohibit; - u8 afi; - u8 next_hop[16]; -}; - /** \brief Dump MPLS fib table @param client_index - opaque cookie to identify the sender */ diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c index 482577b1..451b15cf 100644 --- a/src/vnet/mpls/mpls.c +++ b/src/vnet/mpls/mpls.c @@ -286,7 +286,15 @@ vnet_mpls_local_label (vlib_main_t * vm, rpath.frp_proto = FIB_PROTOCOL_IP4; vec_add1(rpaths, rpath); } - + else if (unformat (line_input, "rx-ip4 %U", + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U %U", unformat_ip6_address, &rpath.frp_addr.ip6, @@ -512,10 +520,3 @@ mpls_init (vlib_main_t * vm) } VLIB_INIT_FUNCTION (mpls_init); - -mpls_main_t * mpls_get_main (vlib_main_t * vm) -{ - vlib_call_init_function (vm, mpls_init); - return &mpls_main; -} - diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index f1aef6c9..6bfc491d 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -163,6 +164,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, dpo_proto_to_fib (pfx.fp_payload_proto), mp->mr_next_hop_table_id, mp->mr_create_table_if_needed, + mp->mr_is_rpf_id, &fib_index, &next_hop_fib_index); if (0 != rv) @@ -192,10 +194,13 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, 0, // mp->is_unreach, 0, // mp->is_prohibit, 0, // mp->is_local, + mp->mr_is_multicast, mp->mr_is_classify, mp->mr_classify_table_index, mp->mr_is_resolve_host, mp->mr_is_resolve_attached, + mp->mr_is_interface_rx, + mp->mr_is_rpf_id, fib_index, &pfx, mp->mr_next_hop_proto_is_ip4, &nh, ntohl (mp->mr_next_hop_sw_if_index), @@ -229,46 +234,54 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) int rv = 0; u32 tunnel_sw_if_index; int ii; + fib_route_path_t rpath, *rpaths = NULL; + + memset (&rpath, 0, sizeof (rpath)); stats_dslock_with_hint (1 /* release hint */ , 5 /* tag */ ); - if (mp->mt_is_add) + if (mp->mt_next_hop_proto_is_ip4) { - fib_route_path_t rpath, *rpaths = NULL; - mpls_label_t *label_stack = NULL; - - memset (&rpath, 0, sizeof (rpath)); - - if (mp->mt_next_hop_proto_is_ip4) - { - rpath.frp_proto = FIB_PROTOCOL_IP4; - clib_memcpy (&rpath.frp_addr.ip4, - mp->mt_next_hop, sizeof (rpath.frp_addr.ip4)); - } - else - { - rpath.frp_proto = FIB_PROTOCOL_IP6; - clib_memcpy (&rpath.frp_addr.ip6, - mp->mt_next_hop, sizeof (rpath.frp_addr.ip6)); - } - rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index); + rpath.frp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&rpath.frp_addr.ip4, + mp->mt_next_hop, sizeof (rpath.frp_addr.ip4)); + } + else + { + rpath.frp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&rpath.frp_addr.ip6, + mp->mt_next_hop, sizeof (rpath.frp_addr.ip6)); + } + rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index); + rpath.frp_weight = 1; + if (mp->mt_is_add) + { for (ii = 0; ii < mp->mt_next_hop_n_out_labels; ii++) - vec_add1 (label_stack, ntohl (mp->mt_next_hop_out_label_stack[ii])); + vec_add1 (rpath.frp_label_stack, + ntohl (mp->mt_next_hop_out_label_stack[ii])); + } - vec_add1 (rpaths, rpath); + vec_add1 (rpaths, rpath); - vnet_mpls_tunnel_add (rpaths, label_stack, - mp->mt_l2_only, &tunnel_sw_if_index); - vec_free (rpaths); - vec_free (label_stack); + tunnel_sw_if_index = ntohl (mp->mt_sw_if_index); + + if (mp->mt_is_add) + { + if (~0 == tunnel_sw_if_index) + tunnel_sw_if_index = vnet_mpls_tunnel_create (mp->mt_l2_only, + mp->mt_is_multicast); + vnet_mpls_tunnel_path_add (tunnel_sw_if_index, rpaths); } else { tunnel_sw_if_index = ntohl (mp->mt_sw_if_index); - vnet_mpls_tunnel_del (tunnel_sw_if_index); + if (!vnet_mpls_tunnel_path_remove (tunnel_sw_if_index, rpaths)) + vnet_mpls_tunnel_del (tunnel_sw_if_index); } + vec_free (rpaths); + stats_dsunlock (); /* *INDENT-OFF* */ @@ -289,10 +302,12 @@ typedef struct mpls_tunnel_send_walk_ctx_t_ static void send_mpls_tunnel_entry (u32 mti, void *arg) { + fib_route_path_encode_t *api_rpaths, *api_rpath; mpls_tunnel_send_walk_ctx_t *ctx; vl_api_mpls_tunnel_details_t *mp; const mpls_tunnel_t *mt; - u32 nlabels; + vl_api_fib_path2_t *fp; + u32 n; ctx = arg; @@ -300,18 +315,34 @@ send_mpls_tunnel_entry (u32 mti, void *arg) return; mt = mpls_tunnel_get (mti); - nlabels = vec_len (mt->mt_label_stack); + n = fib_path_list_get_n_paths (mt->mt_path_list); + + mp = vl_msg_api_alloc (sizeof (*mp) + n * sizeof (vl_api_fib_path2_t)); + memset (mp, 0, sizeof (*mp) + n * sizeof (vl_api_fib_path2_t)); - mp = vl_msg_api_alloc (sizeof (*mp) + nlabels * sizeof (u32)); - memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_MPLS_TUNNEL_DETAILS); mp->context = ctx->context; - mp->tunnel_index = ntohl (mti); - memcpy (mp->mt_next_hop_out_labels, - mt->mt_label_stack, nlabels * sizeof (u32)); + mp->mt_tunnel_index = ntohl (mti); + mp->mt_count = ntohl (n); + + fib_path_list_walk (mt->mt_path_list, fib_path_encode, &api_rpaths); + + fp = mp->mt_paths; + vec_foreach (api_rpath, api_rpaths) + { + memset (fp, 0, sizeof (*fp)); + + fp->weight = htonl (api_rpath->rpath.frp_weight); + fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index); + copy_fib_next_hop (api_rpath, fp); + fp++; + } // FIXME + // memcpy (mp->mt_next_hop_out_labels, + // mt->mt_label_stack, nlabels * sizeof (u32)); + vl_msg_api_send_shmem (ctx->q, (u8 *) & mp); } diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c index 1b9bdd05..86ad8bba 100644 --- a/src/vnet/mpls/mpls_input.c +++ b/src/vnet/mpls/mpls_input.c @@ -291,7 +291,7 @@ mpls_setup_nodes (vlib_main_t * vm) rt->last_outer_fib_index = 0; rt->mpls_main = &mpls_main; - ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST, + ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS, mpls_input_node.index); } diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c index ace6a70f..3c6be7e8 100644 --- a/src/vnet/mpls/mpls_lookup.c +++ b/src/vnet/mpls/mpls_lookup.c @@ -20,8 +20,17 @@ #include #include #include +#include -vlib_node_registration_t mpls_lookup_node; +/** + * Static MPLS VLIB forwarding node + */ +static vlib_node_registration_t mpls_lookup_node; + +/** + * The arc/edge from the MPLS lookup node to the MPLS replicate node + */ +static u32 mpls_lookup_to_replicate_edge; typedef struct { u32 next_index; @@ -156,81 +165,123 @@ mpls_lookup (vlib_main_t * vm, lbi2 = mpls_fib_table_forwarding_lookup (lfib_index2, h2); lbi3 = mpls_fib_table_forwarding_lookup (lfib_index3, h3); - lb0 = load_balance_get(lbi0); - lb1 = load_balance_get(lbi1); - lb2 = load_balance_get(lbi2); - lb3 = load_balance_get(lbi3); - hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0; hash_c2 = vnet_buffer(b2)->ip.flow_hash = 0; hash_c3 = vnet_buffer(b3)->ip.flow_hash = 0; - if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + if (MPLS_IS_REPLICATE & lbi0) { - hash_c0 = vnet_buffer (b0)->ip.flow_hash = - mpls_compute_flow_hash(h0, lb0->lb_hash_config); + next0 = mpls_lookup_to_replicate_edge; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + (lbi0 & ~MPLS_IS_REPLICATE); } - if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) + else { - hash_c1 = vnet_buffer (b1)->ip.flow_hash = - mpls_compute_flow_hash(h1, lb1->lb_hash_config); + lb0 = load_balance_get(lbi0); + + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + } + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + next0 = dpo0->dpoi_next_node; + + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); } - if (PREDICT_FALSE(lb2->lb_n_buckets > 1)) + if (MPLS_IS_REPLICATE & lbi1) { - hash_c2 = vnet_buffer (b2)->ip.flow_hash = - mpls_compute_flow_hash(h2, lb2->lb_hash_config); + next1 = mpls_lookup_to_replicate_edge; + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = + (lbi1 & ~MPLS_IS_REPLICATE); } - if (PREDICT_FALSE(lb3->lb_n_buckets > 1)) + else { - hash_c3 = vnet_buffer (b3)->ip.flow_hash = - mpls_compute_flow_hash(h3, lb3->lb_hash_config); - } - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - ASSERT (lb1->lb_n_buckets > 0); - ASSERT (is_pow2 (lb1->lb_n_buckets)); - ASSERT (lb2->lb_n_buckets > 0); - ASSERT (is_pow2 (lb2->lb_n_buckets)); - ASSERT (lb3->lb_n_buckets > 0); - ASSERT (is_pow2 (lb3->lb_n_buckets)); - - dpo0 = load_balance_get_bucket_i(lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); - dpo1 = load_balance_get_bucket_i(lb1, - (hash_c1 & - (lb1->lb_n_buckets_minus_1))); - dpo2 = load_balance_get_bucket_i(lb2, - (hash_c2 & - (lb2->lb_n_buckets_minus_1))); - dpo3 = load_balance_get_bucket_i(lb3, - (hash_c3 & - (lb3->lb_n_buckets_minus_1))); + lb1 = load_balance_get(lbi1); - next0 = dpo0->dpoi_next_node; - next1 = dpo1->dpoi_next_node; - next2 = dpo2->dpoi_next_node; - next3 = dpo3->dpoi_next_node; + if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) + { + hash_c1 = vnet_buffer (b1)->ip.flow_hash = + mpls_compute_flow_hash(h1, lb1->lb_hash_config); + } + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + dpo1 = load_balance_get_bucket_i(lb1, + (hash_c1 & + (lb1->lb_n_buckets_minus_1))); + next1 = dpo1->dpoi_next_node; + + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi1, 1, + vlib_buffer_length_in_chain (vm, b1)); + } + if (MPLS_IS_REPLICATE & lbi2) + { + next2 = mpls_lookup_to_replicate_edge; + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = + (lbi2 & ~MPLS_IS_REPLICATE); + } + else + { + lb2 = load_balance_get(lbi2); - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; - vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; - vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; + if (PREDICT_FALSE(lb2->lb_n_buckets > 1)) + { + hash_c2 = vnet_buffer (b2)->ip.flow_hash = + mpls_compute_flow_hash(h2, lb2->lb_hash_config); + } + ASSERT (lb2->lb_n_buckets > 0); + ASSERT (is_pow2 (lb2->lb_n_buckets)); + dpo2 = load_balance_get_bucket_i(lb2, + (hash_c2 & + (lb2->lb_n_buckets_minus_1))); + next2 = dpo2->dpoi_next_node; + + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi2, 1, + vlib_buffer_length_in_chain (vm, b2)); + } + if (MPLS_IS_REPLICATE & lbi3) + { + next3 = mpls_lookup_to_replicate_edge; + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = + (lbi3 & ~MPLS_IS_REPLICATE); + } + else + { + lb3 = load_balance_get(lbi3); - vlib_increment_combined_counter - (cm, thread_index, lbi0, 1, - vlib_buffer_length_in_chain (vm, b0)); - vlib_increment_combined_counter - (cm, thread_index, lbi1, 1, - vlib_buffer_length_in_chain (vm, b1)); - vlib_increment_combined_counter - (cm, thread_index, lbi2, 1, - vlib_buffer_length_in_chain (vm, b2)); - vlib_increment_combined_counter - (cm, thread_index, lbi3, 1, - vlib_buffer_length_in_chain (vm, b3)); + if (PREDICT_FALSE(lb3->lb_n_buckets > 1)) + { + hash_c3 = vnet_buffer (b3)->ip.flow_hash = + mpls_compute_flow_hash(h3, lb3->lb_hash_config); + } + ASSERT (lb3->lb_n_buckets > 0); + ASSERT (is_pow2 (lb3->lb_n_buckets)); + dpo3 = load_balance_get_bucket_i(lb3, + (hash_c3 & + (lb3->lb_n_buckets_minus_1))); + next3 = dpo3->dpoi_next_node; + + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi3, 1, + vlib_buffer_length_in_chain (vm, b3)); + } /* * before we pop the label copy th values we need to maintain. @@ -331,31 +382,41 @@ mpls_lookup (vlib_main_t * vm, vnet_buffer(b0)->sw_if_index[VLIB_RX]); lbi0 = mpls_fib_table_forwarding_lookup(lfib_index0, h0); - lb0 = load_balance_get(lbi0); - hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; - if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + + if (MPLS_IS_REPLICATE & lbi0) { - hash_c0 = vnet_buffer (b0)->ip.flow_hash = - mpls_compute_flow_hash(h0, lb0->lb_hash_config); + next0 = mpls_lookup_to_replicate_edge; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + (lbi0 & ~MPLS_IS_REPLICATE); } + else + { + lb0 = load_balance_get(lbi0); - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + } - dpo0 = load_balance_get_bucket_i(lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); - next0 = dpo0->dpoi_next_node; - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); - vlib_increment_combined_counter - (cm, thread_index, lbi0, 1, - vlib_buffer_length_in_chain (vm, b0)); + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + } /* - * before we pop the label copy th values we need to maintain. + * before we pop the label copy, values we need to maintain. * The label header is in network byte order. * last byte is the TTL. * bits 2 to 4 inclusive are the EXP bits @@ -398,7 +459,7 @@ static char * mpls_error_strings[] = { #undef mpls_error }; -VLIB_REGISTER_NODE (mpls_lookup_node) = { +VLIB_REGISTER_NODE (mpls_lookup_node, static) = { .function = mpls_lookup, .name = "mpls-lookup", /* Takes a vector of packets. */ @@ -621,3 +682,22 @@ VLIB_REGISTER_NODE (mpls_load_balance_node) = { }; VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance) + + +static clib_error_t * +mpls_lookup_init (vlib_main_t * vm) +{ + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, mpls_init))) + return error; + + mpls_lookup_to_replicate_edge = + vlib_node_add_named_next(vm, + mpls_lookup_node.index, + "mpls-replicate"); + + return (NULL); +} + +VLIB_INIT_FUNCTION (mpls_lookup_init); diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c index ac6fdcdf..1254dd9d 100644 --- a/src/vnet/mpls/mpls_tunnel.c +++ b/src/vnet/mpls/mpls_tunnel.c @@ -18,9 +18,12 @@ #include #include #include +#include #include #include #include +#include +#include /** * @brief pool of tunnel instances @@ -37,6 +40,11 @@ static u32 * mpls_tunnel_free_hw_if_indices; */ static u32 *mpls_tunnel_db; +/** + * @brief MPLS tunnel flags strings + */ +static const char *mpls_tunnel_attribute_names[] = MPLS_TUNNEL_ATTRIBUTES; + /** * @brief Get a tunnel object from a SW interface index */ @@ -44,103 +52,178 @@ static mpls_tunnel_t* mpls_tunnel_get_from_sw_if_index (u32 sw_if_index) { if ((vec_len(mpls_tunnel_db) < sw_if_index) || - (~0 == mpls_tunnel_db[sw_if_index])) - return (NULL); + (~0 == mpls_tunnel_db[sw_if_index])) + return (NULL); return (pool_elt_at_index(mpls_tunnel_pool, - mpls_tunnel_db[sw_if_index])); + mpls_tunnel_db[sw_if_index])); } /** - * @brief Return true if the label stack is imp-null only + * @brief Build a rewrite string for the MPLS tunnel. */ -static fib_forward_chain_type_t -mpls_tunnel_get_fwd_chain_type (const mpls_tunnel_t *mt) +static u8* +mpls_tunnel_build_rewrite_i (void) { - if ((1 == vec_len(mt->mt_label_stack)) && - (mt->mt_label_stack[0] == MPLS_IETF_IMPLICIT_NULL_LABEL)) - { - /* - * the only label in the label stack is implicit null - * we need to build an IP chain. - */ - if (FIB_PROTOCOL_IP4 == fib_path_list_get_proto(mt->mt_path_list)) - { - return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); - } - else - { - return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); - } - } - else - { - return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); - } + /* + * passing the adj code a NULL rewirte means 'i don't have one cos + * t'other end is unresolved'. That's not the case here. For the mpls + * tunnel there are just no bytes of encap to apply in the adj. We'll impose + * the label stack once we choose a path. So return a zero length rewrite. + */ + u8 *rewrite = NULL; + + vec_validate(rewrite, 0); + vec_reset_length(rewrite); + + return (rewrite); } /** * @brief Build a rewrite string for the MPLS tunnel. - * - * We have choices here; - * 1 - have an Adjacency with a zero length string and stack it on - * MPLS label objects - * 2 - put the label header rewrites in the adjacency string. - * - * We choose 2 since it results in fewer graph nodes in the egress path */ static u8* mpls_tunnel_build_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - vnet_link_t link_type, - const void *dst_address) + u32 sw_if_index, + vnet_link_t link_type, + const void *dst_address) { - mpls_unicast_header_t *muh; - mpls_tunnel_t *mt; - u8 *rewrite; - u32 mti, ii; + return (mpls_tunnel_build_rewrite_i()); +} - rewrite = NULL; - mti = mpls_tunnel_db[sw_if_index]; - mt = pool_elt_at_index(mpls_tunnel_pool, mti); +typedef struct mpls_tunnel_collect_forwarding_ctx_t_ +{ + load_balance_path_t * next_hops; + const mpls_tunnel_t *mt; + fib_forward_chain_type_t fct; +} mpls_tunnel_collect_forwarding_ctx_t; + +static int +mpls_tunnel_collect_forwarding (fib_node_index_t pl_index, + fib_node_index_t path_index, + void *arg) +{ + mpls_tunnel_collect_forwarding_ctx_t *ctx; + fib_path_ext_t *path_ext; + int have_path_ext; + + ctx = arg; /* - * The vector must be allocated as u8 so the length is correct + * if the path is not resolved, don't include it. */ - ASSERT(0 < vec_len(mt->mt_label_stack)); - vec_validate(rewrite, (sizeof(*muh) * vec_len(mt->mt_label_stack)) - 1); - ASSERT(rewrite); - muh = (mpls_unicast_header_t *)rewrite; + if (!fib_path_is_resolved(path_index)) + { + return (!0); + } /* - * The last (inner most) label in the stack may be EOS, all the rest Non-EOS + * get the matching path-extension for the path being visited. */ - for (ii = 0; ii < vec_len(mt->mt_label_stack)-1; ii++) + have_path_ext = 0; + vec_foreach(path_ext, ctx->mt->mt_path_exts) { - vnet_mpls_uc_set_label(&muh[ii].label_exp_s_ttl, mt->mt_label_stack[ii]); - vnet_mpls_uc_set_ttl(&muh[ii].label_exp_s_ttl, 255); - vnet_mpls_uc_set_exp(&muh[ii].label_exp_s_ttl, 0); - vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_NON_EOS); - muh[ii].label_exp_s_ttl = clib_host_to_net_u32(muh[ii].label_exp_s_ttl); + if (path_ext->fpe_path_index == path_index) + { + have_path_ext = 1; + break; + } } - vnet_mpls_uc_set_label(&muh[ii].label_exp_s_ttl, mt->mt_label_stack[ii]); - vnet_mpls_uc_set_ttl(&muh[ii].label_exp_s_ttl, 255); - vnet_mpls_uc_set_exp(&muh[ii].label_exp_s_ttl, 0); - - if ((VNET_LINK_MPLS == link_type) && - (mt->mt_label_stack[ii] != MPLS_IETF_IMPLICIT_NULL_LABEL)) + if (have_path_ext) { - vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_NON_EOS); + /* + * found a matching extension. stack it to obtain the forwarding + * info for this path. + */ + ctx->next_hops = fib_path_ext_stack(path_ext, + ctx->fct, + ctx->fct, + ctx->next_hops); } else + ASSERT(0); + /* + * else + * There should be a path-extenios associated with each path + */ + + return (!0); +} + +static void +mpls_tunnel_mk_lb (mpls_tunnel_t *mt, + vnet_link_t linkt, + fib_forward_chain_type_t fct, + dpo_id_t *dpo_lb) +{ + dpo_proto_t lb_proto; + + /* + * If the entry has path extensions then we construct a load-balance + * by stacking the extensions on the forwarding chains of the paths. + * Otherwise we use the load-balance of the path-list + */ + mpls_tunnel_collect_forwarding_ctx_t ctx = { + .mt = mt, + .next_hops = NULL, + .fct = fct, + }; + + /* + * As an optimisation we allocate the vector of next-hops to be sized + * equal to the maximum nuber of paths we will need, which is also the + * most likely number we will need, since in most cases the paths are 'up'. + */ + vec_validate(ctx.next_hops, fib_path_list_get_n_paths(mt->mt_path_list)); + vec_reset_length(ctx.next_hops); + + lb_proto = vnet_link_to_dpo_proto(linkt); + + fib_path_list_walk(mt->mt_path_list, + mpls_tunnel_collect_forwarding, + &ctx); + + if (!dpo_id_is_valid(dpo_lb)) { - vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_EOS); + /* + * first time create + */ + if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST) + { + dpo_set(dpo_lb, + DPO_REPLICATE, + lb_proto, + replicate_create(0, lb_proto)); + } + else + { + flow_hash_config_t fhc; + + fhc = 0; // FIXME + /* fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, */ + /* dpo_proto_to_fib(lb_proto)); */ + dpo_set(dpo_lb, + DPO_LOAD_BALANCE, + lb_proto, + load_balance_create(0, lb_proto, fhc)); + } } - muh[ii].label_exp_s_ttl = clib_host_to_net_u32(muh[ii].label_exp_s_ttl); - - return (rewrite); + if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST) + { + /* + * MPLS multicast + */ + replicate_multipath_update(dpo_lb, ctx.next_hops); + } + else + { + load_balance_multipath_update(dpo_lb, + ctx.next_hops, + LOAD_BALANCE_FLAG_NONE); + vec_free(ctx.next_hops); + } } /** @@ -161,45 +244,47 @@ mpls_tunnel_stack (adj_index_t ai) mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); if (NULL == mt) - return; + return; /* - * find the adjacency that is contributed by the FIB path-list - * that this tunnel resovles via, and use it as the next adj - * in the midchain + * while we're stacking the adj, remove the tunnel from the child list + * of the path list. this breaks a circular dependency of walk updates + * where the create of adjacencies in the children can lead to walks + * that get back here. */ - if (vnet_hw_interface_get_flags(vnet_get_main(), - mt->mt_hw_if_index) & - VNET_HW_INTERFACE_FLAG_LINK_UP) - { - dpo_id_t dpo = DPO_INVALID; + fib_path_list_lock(mt->mt_path_list); - fib_path_list_contribute_forwarding(mt->mt_path_list, - mpls_tunnel_get_fwd_chain_type(mt), - &dpo); - - if (DPO_LOAD_BALANCE == dpo.dpoi_type) - { - /* - * we don't support multiple paths, so no need to load-balance. - * pull the first and only choice and stack directly on that. - */ - load_balance_t *lb; - - lb = load_balance_get (dpo.dpoi_index); + fib_path_list_child_remove(mt->mt_path_list, + mt->mt_sibling_index); - ASSERT(1 == lb->lb_n_buckets); + /* + * Construct the DPO (load-balance or replicate) that we can stack + * the tunnel's midchain on + */ + if (vnet_hw_interface_get_flags(vnet_get_main(), + mt->mt_hw_if_index) & + VNET_HW_INTERFACE_FLAG_LINK_UP) + { + dpo_id_t dpo = DPO_INVALID; - dpo_copy(&dpo, load_balance_get_bucket_i (lb, 0)); - } + mpls_tunnel_mk_lb(mt, + adj->ia_link, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + &dpo); - adj_nbr_midchain_stack(ai, &dpo); - dpo_reset(&dpo); + adj_nbr_midchain_stack(ai, &dpo); + dpo_reset(&dpo); } else { - adj_nbr_midchain_unstack(ai); + adj_nbr_midchain_unstack(ai); } + + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mt - mpls_tunnel_pool); + + fib_path_list_lock(mt->mt_path_list); } /** @@ -207,7 +292,7 @@ mpls_tunnel_stack (adj_index_t ai) */ static adj_walk_rc_t mpls_adj_walk_cb (adj_index_t ai, - void *ctx) + void *ctx) { mpls_tunnel_stack(ai); @@ -224,17 +309,17 @@ mpls_tunnel_restack (mpls_tunnel_t *mt) */ FOR_EACH_FIB_PROTOCOL(proto) { - adj_nbr_walk(mt->mt_sw_if_index, - proto, - mpls_adj_walk_cb, - NULL); + adj_nbr_walk(mt->mt_sw_if_index, + proto, + mpls_adj_walk_cb, + NULL); } } static clib_error_t * mpls_tunnel_admin_up_down (vnet_main_t * vnm, - u32 hw_if_index, - u32 flags) + u32 hw_if_index, + u32 flags) { vnet_hw_interface_t * hi; mpls_tunnel_t *mt; @@ -244,13 +329,13 @@ mpls_tunnel_admin_up_down (vnet_main_t * vnm, mt = mpls_tunnel_get_from_sw_if_index(hi->sw_if_index); if (NULL == mt) - return (NULL); + return (NULL); if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) - vnet_hw_interface_set_flags (vnm, hw_if_index, - VNET_HW_INTERFACE_FLAG_LINK_UP); + vnet_hw_interface_set_flags (vnm, hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); else - vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */); + vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */); mpls_tunnel_restack(mt); @@ -263,22 +348,58 @@ mpls_tunnel_admin_up_down (vnet_main_t * vnm, */ static void mpls_tunnel_fixup (vlib_main_t *vm, - ip_adjacency_t *adj, - vlib_buffer_t *b0) + ip_adjacency_t *adj, + vlib_buffer_t *b0) { + /* + * A no-op w.r.t. the header. but reset the 'have we pushed any + * MPLS labels onto the packet' flag. That way when we enter the + * tunnel we'll get a TTL set to 255 + */ + vnet_buffer(b0)->mpls.first = 0; } static void mpls_tunnel_update_adj (vnet_main_t * vnm, - u32 sw_if_index, - adj_index_t ai) + u32 sw_if_index, + adj_index_t ai) { - adj_nbr_midchain_update_rewrite( - ai, mpls_tunnel_fixup, - ADJ_FLAG_NONE, - mpls_tunnel_build_rewrite(vnm, sw_if_index, - adj_get_link_type(ai), - NULL)); + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != ai); + + adj = adj_get(ai); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + case IP_LOOKUP_NEXT_GLEAN: + adj_nbr_midchain_update_rewrite(ai, mpls_tunnel_fixup, + ADJ_FLAG_NONE, + mpls_tunnel_build_rewrite_i()); + break; + case IP_LOOKUP_NEXT_MCAST: + /* + * Construct a partial rewrite from the known ethernet mcast dest MAC + * There's no MAC fixup, so the last 2 parameters are 0 + */ + adj_mcast_midchain_update_rewrite(ai, mpls_tunnel_fixup, + ADJ_FLAG_NONE, + mpls_tunnel_build_rewrite_i(), + 0, 0); + break; + + case IP_LOOKUP_NEXT_DROP: + case IP_LOOKUP_NEXT_PUNT: + case IP_LOOKUP_NEXT_LOCAL: + case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_MIDCHAIN: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: + case IP_LOOKUP_NEXT_ICMP_ERROR: + case IP_LOOKUP_N_NEXT: + ASSERT (0); + break; + } mpls_tunnel_stack(ai); } @@ -312,7 +433,7 @@ typedef struct mpls_tunnel_trace_t_ static u8 * format_mpls_tunnel_tx_trace (u8 * s, - va_list * args) + va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); @@ -327,8 +448,8 @@ format_mpls_tunnel_tx_trace (u8 * s, */ static uword mpls_tunnel_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) + vlib_node_runtime_t * node, + vlib_frame_t * frame) { u32 next_index; u32 * from, * to_next, n_left_from, n_left_to_next; @@ -355,32 +476,32 @@ mpls_tunnel_tx (vlib_main_t * vm, * FIXME DUAL LOOP */ while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t * b0; - u32 bi0; + { + vlib_buffer_t * b0; + u32 bi0; - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; - b0 = vlib_get_buffer(vm, bi0); + b0 = vlib_get_buffer(vm, bi0); - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_adj; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_adj; - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - mpls_tunnel_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->tunnel_id = rd->dev_instance; - } + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_tunnel_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = rd->dev_instance; + } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, mt->mt_l2_tx_arc); - } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, mt->mt_l2_tx_arc); + } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } @@ -417,13 +538,13 @@ mpls_tunnel_get (u32 mti) */ void mpls_tunnel_walk (mpls_tunnel_walk_cb_t cb, - void *ctx) + void *ctx) { u32 mti; pool_foreach_index(mti, mpls_tunnel_pool, ({ - cb(mti, ctx); + cb(mti, ctx); })); } @@ -435,25 +556,22 @@ vnet_mpls_tunnel_del (u32 sw_if_index) mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); if (NULL == mt) - return; - - fib_path_list_child_remove(mt->mt_path_list, - mt->mt_sibling_index); - if (ADJ_INDEX_INVALID != mt->mt_l2_adj) - adj_unlock(mt->mt_l2_adj); + return; - vec_free(mt->mt_label_stack); + if (FIB_NODE_INDEX_INVALID != mt->mt_path_list) + fib_path_list_child_remove(mt->mt_path_list, + mt->mt_sibling_index); + if (ADJ_INDEX_INVALID != mt->mt_l2_adj) + adj_unlock(mt->mt_l2_adj); vec_add1 (mpls_tunnel_free_hw_if_indices, mt->mt_hw_if_index); pool_put(mpls_tunnel_pool, mt); mpls_tunnel_db[sw_if_index] = ~0; } -void -vnet_mpls_tunnel_add (fib_route_path_t *rpaths, - mpls_label_t *label_stack, - u8 l2_only, - u32 *sw_if_index) +u32 +vnet_mpls_tunnel_create (u8 l2_only, + u8 is_multicast) { vnet_hw_interface_t * hi; mpls_tunnel_t *mt; @@ -466,28 +584,33 @@ vnet_mpls_tunnel_add (fib_route_path_t *rpaths, mti = mt - mpls_tunnel_pool; fib_node_init(&mt->mt_node, FIB_NODE_TYPE_MPLS_TUNNEL); mt->mt_l2_adj = ADJ_INDEX_INVALID; + mt->mt_path_list = FIB_NODE_INDEX_INVALID; + mt->mt_sibling_index = FIB_NODE_INDEX_INVALID; + + if (is_multicast) + mt->mt_flags |= MPLS_TUNNEL_FLAG_MCAST; /* * Create a new, or re=use and old, tunnel HW interface */ if (vec_len (mpls_tunnel_free_hw_if_indices) > 0) { - mt->mt_hw_if_index = - mpls_tunnel_free_hw_if_indices[vec_len(mpls_tunnel_free_hw_if_indices)-1]; - _vec_len (mpls_tunnel_free_hw_if_indices) -= 1; - hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index); - hi->hw_instance = mti; - hi->dev_instance = mti; + mt->mt_hw_if_index = + mpls_tunnel_free_hw_if_indices[vec_len(mpls_tunnel_free_hw_if_indices)-1]; + _vec_len (mpls_tunnel_free_hw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index); + hi->hw_instance = mti; + hi->dev_instance = mti; } - else + else { - mt->mt_hw_if_index = vnet_register_interface( - vnm, - mpls_tunnel_class.index, - mti, - mpls_tunnel_hw_interface_class.index, - mti); - hi = vnet_get_hw_interface(vnm, mt->mt_hw_if_index); + mt->mt_hw_if_index = vnet_register_interface( + vnm, + mpls_tunnel_class.index, + mti, + mpls_tunnel_hw_interface_class.index, + mti); + hi = vnet_get_hw_interface(vnm, mt->mt_hw_if_index); } /* @@ -497,43 +620,218 @@ vnet_mpls_tunnel_add (fib_route_path_t *rpaths, vec_validate_init_empty(mpls_tunnel_db, mt->mt_sw_if_index, ~0); mpls_tunnel_db[mt->mt_sw_if_index] = mti; + if (l2_only) + { + mt->mt_l2_adj = + adj_nbr_add_or_lock(fib_path_list_get_proto(mt->mt_path_list), + VNET_LINK_ETHERNET, + &zero_addr, + mt->mt_sw_if_index); + + mt->mt_l2_tx_arc = vlib_node_add_named_next(vlib_get_main(), + hi->tx_node_index, + "adj-l2-midchain"); + } + + return (mt->mt_sw_if_index); +} + +/* + * mpls_tunnel_path_ext_add + * + * append a path extension to the entry's list + */ +static void +mpls_tunnel_path_ext_append (mpls_tunnel_t *mt, + const fib_route_path_t *rpath) +{ + if (NULL != rpath->frp_label_stack) + { + fib_path_ext_t *path_ext; + + vec_add2(mt->mt_path_exts, path_ext, 1); + + fib_path_ext_init(path_ext, mt->mt_path_list, rpath); + } +} + +/* + * mpls_tunnel_path_ext_insert + * + * insert, sorted, a path extension to the entry's list. + * It's not strictly necessary in sort the path extensions, since each + * extension has the path index to which it resolves. However, by being + * sorted the load-balance produced has a deterministic order, not an order + * based on the sequence of extension additions. this is a considerable benefit. + */ +static void +mpls_tunnel_path_ext_insert (mpls_tunnel_t *mt, + const fib_route_path_t *rpath) +{ + if (0 == vec_len(mt->mt_path_exts)) + return (mpls_tunnel_path_ext_append(mt, rpath)); + + if (NULL != rpath->frp_label_stack) + { + fib_path_ext_t path_ext; + int i = 0; + + fib_path_ext_init(&path_ext, mt->mt_path_list, rpath); + + while (i < vec_len(mt->mt_path_exts) && + (fib_path_ext_cmp(&mt->mt_path_exts[i], rpath) < 0)) + { + i++; + } + + vec_insert_elts(mt->mt_path_exts, &path_ext, 1, i); + } +} + +void +vnet_mpls_tunnel_path_add (u32 sw_if_index, + fib_route_path_t *rpaths) +{ + mpls_tunnel_t *mt; + u32 mti; + + mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); + + if (NULL == mt) + return; + + mti = mt - mpls_tunnel_pool; + /* * construct a path-list from the path provided */ - mt->mt_path_list = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, rpaths); - mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, - FIB_NODE_TYPE_MPLS_TUNNEL, - mti); + if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) + { + mt->mt_path_list = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, rpaths); + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + } + else + { + fib_node_index_t old_pl_index; + fib_path_ext_t *path_ext; + + old_pl_index = mt->mt_path_list; + + mt->mt_path_list = + fib_path_list_copy_and_path_add(old_pl_index, + FIB_PATH_LIST_FLAG_SHARED, + rpaths); + + fib_path_list_child_remove(old_pl_index, + mt->mt_sibling_index); + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + /* + * re-resolve all the path-extensions with the new path-list + */ + vec_foreach(path_ext, mt->mt_path_exts) + { + fib_path_ext_resolve(path_ext, mt->mt_path_list); + } + } + mpls_tunnel_path_ext_insert(mt, rpaths); + mpls_tunnel_restack(mt); +} + +int +vnet_mpls_tunnel_path_remove (u32 sw_if_index, + fib_route_path_t *rpaths) +{ + mpls_tunnel_t *mt; + u32 mti; - mt->mt_label_stack = vec_dup(label_stack); + mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); - if (l2_only) + if (NULL == mt) + return (0); + + mti = mt - mpls_tunnel_pool; + + /* + * construct a path-list from the path provided + */ + if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) { - mt->mt_l2_adj = - adj_nbr_add_or_lock(fib_path_list_get_proto(mt->mt_path_list), - VNET_LINK_ETHERNET, - &zero_addr, - mt->mt_sw_if_index); - - mt->mt_l2_tx_arc = vlib_node_add_named_next(vlib_get_main(), - hi->tx_node_index, - "adj-l2-midchain"); + /* can't remove a path if we have onoe */ + return (0); } - - *sw_if_index = mt->mt_sw_if_index; + else + { + fib_node_index_t old_pl_index; + fib_path_ext_t *path_ext; + + old_pl_index = mt->mt_path_list; + + mt->mt_path_list = + fib_path_list_copy_and_path_remove(old_pl_index, + FIB_PATH_LIST_FLAG_SHARED, + rpaths); + + fib_path_list_child_remove(old_pl_index, + mt->mt_sibling_index); + + if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) + { + /* no paths left */ + return (0); + } + else + { + mt->mt_sibling_index = + fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + } + /* + * find the matching path extension and remove it + */ + vec_foreach(path_ext, mt->mt_path_exts) + { + if (!fib_path_ext_cmp(path_ext, rpaths)) + { + /* + * delete the element moving the remaining elements down 1 position. + * this preserves the sorted order. + */ + vec_free(path_ext->fpe_label_stack); + vec_delete(mt->mt_path_exts, 1, + (path_ext - mt->mt_path_exts)); + break; + } + } + /* + * re-resolve all the path-extensions with the new path-list + */ + vec_foreach(path_ext, mt->mt_path_exts) + { + fib_path_ext_resolve(path_ext, mt->mt_path_list); + } + + mpls_tunnel_restack(mt); + } + + return (fib_path_list_get_n_paths(mt->mt_path_list)); } + static clib_error_t * vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, + vlib_cli_command_t * cmd) { unformat_input_t _line_input, * line_input = &_line_input; vnet_main_t * vnm = vnet_get_main(); - u8 is_del = 0; - u8 l2_only = 0; + u8 is_del = 0, l2_only = 0, is_multicast =0; fib_route_path_t rpath, *rpaths = NULL; - mpls_label_t out_label = MPLS_LABEL_INVALID, *labels = NULL; + mpls_label_t out_label = MPLS_LABEL_INVALID; u32 sw_if_index; clib_error_t *error = NULL; @@ -541,87 +839,89 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, /* Get a line of input. */ if (! unformat_user (input, unformat_line_input, line_input)) - return 0; + return 0; while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "del %U", - unformat_vnet_sw_interface, vnm, - &sw_if_index)) - is_del = 1; - else if (unformat (line_input, "add")) - is_del = 0; - else if (unformat (line_input, "out-label %U", - unformat_mpls_unicast_label, &out_label)) - { - vec_add1(labels, out_label); - } - else if (unformat (line_input, "via %U %U", - unformat_ip4_address, - &rpath.frp_addr.ip4, - unformat_vnet_sw_interface, vnm, - &rpath.frp_sw_if_index)) - { - rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; - } - - else if (unformat (line_input, "via %U %U", - unformat_ip6_address, - &rpath.frp_addr.ip6, - unformat_vnet_sw_interface, vnm, - &rpath.frp_sw_if_index)) - { - rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP6; - } - else if (unformat (line_input, "via %U", - unformat_ip6_address, - &rpath.frp_addr.ip6)) - { - rpath.frp_fib_index = 0; - rpath.frp_weight = 1; - rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; - } - else if (unformat (line_input, "via %U", - unformat_ip4_address, - &rpath.frp_addr.ip4)) - { - rpath.frp_fib_index = 0; - rpath.frp_weight = 1; - rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; - } - else if (unformat (line_input, "l2-only")) - l2_only = 1; - else - { - error = clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); - goto done; - } + if (unformat (line_input, "del %U", + unformat_vnet_sw_interface, vnm, + &sw_if_index)) + is_del = 1; + else if (unformat (line_input, "add")) + is_del = 0; + else if (unformat (line_input, "out-label %U", + unformat_mpls_unicast_label, &out_label)) + { + vec_add1(rpath.frp_label_stack, out_label); + } + else if (unformat (line_input, "via %U %U", + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP4; + } + + else if (unformat (line_input, "via %U %U", + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP6; + } + else if (unformat (line_input, "via %U", + unformat_ip6_address, + &rpath.frp_addr.ip6)) + { + rpath.frp_fib_index = 0; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = FIB_PROTOCOL_IP6; + } + else if (unformat (line_input, "via %U", + unformat_ip4_address, + &rpath.frp_addr.ip4)) + { + rpath.frp_fib_index = 0; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = FIB_PROTOCOL_IP4; + } + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else if (unformat (line_input, "multicast")) + is_multicast = 1; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } if (is_del) { - vnet_mpls_tunnel_del(sw_if_index); + vnet_mpls_tunnel_del(sw_if_index); } else { - if (0 == vec_len(labels)) - { - error = clib_error_return (0, "No Output Labels '%U'", - format_unformat_error, line_input); - goto done; - } - - vec_add1(rpaths, rpath); - vnet_mpls_tunnel_add(rpaths, labels, l2_only, &sw_if_index); + if (0 == vec_len(rpath.frp_label_stack)) + { + error = clib_error_return (0, "No Output Labels '%U'", + format_unformat_error, line_input); + goto done; + } + + vec_add1(rpaths, rpath); + sw_if_index = vnet_mpls_tunnel_create(l2_only, is_multicast); + vnet_mpls_tunnel_path_add(sw_if_index, rpaths); } done: - vec_free(labels); vec_free(rpaths); unformat_free (line_input); @@ -638,7 +938,7 @@ done: ?*/ VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = { .path = "mpls tunnel", - .short_help = + .short_help = "mpls tunnel via [addr] [interface] [out-labels]", .function = vnet_create_mpls_tunnel_command_fn, }; @@ -647,19 +947,28 @@ static u8 * format_mpls_tunnel (u8 * s, va_list * args) { mpls_tunnel_t *mt = va_arg (*args, mpls_tunnel_t *); - int ii; + mpls_tunnel_attribute_t attr; + fib_path_ext_t *path_ext; s = format(s, "mpls_tunnel%d: sw_if_index:%d hw_if_index:%d", - mt - mpls_tunnel_pool, - mt->mt_sw_if_index, - mt->mt_hw_if_index); - s = format(s, "\n label-stack:\n "); - for (ii = 0; ii < vec_len(mt->mt_label_stack); ii++) - { - s = format(s, "%d, ", mt->mt_label_stack[ii]); + mt - mpls_tunnel_pool, + mt->mt_sw_if_index, + mt->mt_hw_if_index); + if (MPLS_TUNNEL_FLAG_NONE != mt->mt_flags) { + s = format(s, " \n flags:"); + FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(attr) { + if ((1<mt_flags) { + s = format (s, "%s,", mpls_tunnel_attribute_names[attr]); + } + } } s = format(s, "\n via:\n"); s = fib_path_list_format(mt->mt_path_list, s); + s = format(s, " Extensions:"); + vec_foreach(path_ext, mt->mt_path_exts) + { + s = format(s, "\n %U", format_fib_path_ext, path_ext); + } s = format(s, "\n"); return (s); @@ -667,42 +976,42 @@ format_mpls_tunnel (u8 * s, va_list * args) static clib_error_t * show_mpls_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, + vlib_cli_command_t * cmd) { mpls_tunnel_t * mt; u32 mti = ~0; if (pool_elts (mpls_tunnel_pool) == 0) - vlib_cli_output (vm, "No MPLS tunnels configured..."); + vlib_cli_output (vm, "No MPLS tunnels configured..."); while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "%d", &mti)) - ; - else - break; + if (unformat (input, "%d", &mti)) + ; + else + break; } if (~0 == mti) { - pool_foreach (mt, mpls_tunnel_pool, - ({ - vlib_cli_output (vm, "[@%d] %U", - mt - mpls_tunnel_pool, - format_mpls_tunnel, mt); - })); + pool_foreach (mt, mpls_tunnel_pool, + ({ + vlib_cli_output (vm, "[@%d] %U", + mt - mpls_tunnel_pool, + format_mpls_tunnel, mt); + })); } else { - if (pool_is_free_index(mpls_tunnel_pool, mti)) - return clib_error_return (0, "Not atunnel index %d", mti); + if (pool_is_free_index(mpls_tunnel_pool, mti)) + return clib_error_return (0, "Not atunnel index %d", mti); - mt = pool_elt_at_index(mpls_tunnel_pool, mti); + mt = pool_elt_at_index(mpls_tunnel_pool, mti); - vlib_cli_output (vm, "[@%d] %U", - mt - mpls_tunnel_pool, - format_mpls_tunnel, mt); + vlib_cli_output (vm, "[@%d] %U", + mt - mpls_tunnel_pool, + format_mpls_tunnel, mt); } return 0; @@ -715,7 +1024,7 @@ show_mpls_tunnel_command_fn (vlib_main_t * vm, * @cliexstart{sh mpls tunnel 2} * [@2] mpls_tunnel2: sw_if_index:5 hw_if_index:5 * label-stack: - * 3, + * 3, * via: * index:26 locks:1 proto:ipv4 uPRF-list:26 len:1 itfs:[2, ] * index:26 pl-index:26 ipv4 weight=1 attached-nexthop: oper-flags:resolved, @@ -743,7 +1052,7 @@ mpls_tunnel_from_fib_node (fib_node_t *node) */ static fib_node_back_walk_rc_t mpls_tunnel_back_walk (fib_node_t *node, - fib_node_back_walk_ctx_t *ctx) + fib_node_back_walk_ctx_t *ctx) { mpls_tunnel_restack(mpls_tunnel_from_fib_node(node)); diff --git a/src/vnet/mpls/mpls_tunnel.h b/src/vnet/mpls/mpls_tunnel.h index ee56c0fc..0b55d0db 100644 --- a/src/vnet/mpls/mpls_tunnel.h +++ b/src/vnet/mpls/mpls_tunnel.h @@ -17,6 +17,31 @@ #define __MPLS_TUNNEL_H__ #include +#include + +typedef enum mpls_tunnel_attribute_t_ +{ + MPLS_TUNNEL_ATTRIBUTE_FIRST = 0, + /** + * @brief The tunnel has an underlying multicast LSP + */ + MPLS_TUNNEL_ATTRIBUTE_MCAST = MPLS_TUNNEL_ATTRIBUTE_FIRST, + MPLS_TUNNEL_ATTRIBUTE_LAST = MPLS_TUNNEL_ATTRIBUTE_MCAST, +} mpls_tunnel_attribute_t; + +#define MPLS_TUNNEL_ATTRIBUTES { \ + [MPLS_TUNNEL_ATTRIBUTE_MCAST] = "multicast", \ +} +#define FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(_item) \ + for (_item = MPLS_TUNNEL_ATTRIBUTE_FIRST; \ + _item < MPLS_TUNNEL_ATTRIBUTE_LAST; \ + _item++) + +typedef enum mpls_tunnel_flag_t_ { + MPLS_TUNNEL_FLAG_NONE = 0, + MPLS_TUNNEL_FLAG_MCAST = (1 << MPLS_TUNNEL_ATTRIBUTE_MCAST), +} __attribute__ ((packed)) mpls_tunnel_flags_t; + /** * @brief A uni-directional MPLS tunnel @@ -28,6 +53,11 @@ typedef struct mpls_tunnel_t_ */ fib_node_t mt_node; + /** + * @brief Tunnel flags + */ + mpls_tunnel_flags_t mt_flags; + /** * @brief If the tunnel is an L2 tunnel, this is the link type ETHERNET * adjacency @@ -50,9 +80,9 @@ typedef struct mpls_tunnel_t_ u32 mt_sibling_index; /** - * @brief The Label stack to apply to egress packets + * A vector of path extensions o hold the label stack for each path */ - mpls_label_t *mt_label_stack; + fib_path_ext_t *mt_path_exts; /** * @brief Flag to indicate the tunnel is only for L2 traffic, that is @@ -74,12 +104,27 @@ typedef struct mpls_tunnel_t_ /** * @brief Create a new MPLS tunnel + * @return the SW Interface index of the newly created tuneel */ -extern void vnet_mpls_tunnel_add (fib_route_path_t *rpath, - mpls_label_t *label_stack, - u8 l2_only, - u32 *sw_if_index); +extern u32 vnet_mpls_tunnel_create (u8 l2_only, + u8 is_multicast); +/** + * @brief Add a path to an MPLS tunnel + */ +extern void vnet_mpls_tunnel_path_add (u32 sw_if_index, + fib_route_path_t *rpath); + +/** + * @brief remove a path from a tunnel. + * @return the number of remaining paths. 0 implies the tunnel can be deleted + */ +extern int vnet_mpls_tunnel_path_remove (u32 sw_if_index, + fib_route_path_t *rpath); + +/** + * @brief Delete an MPLS tunnel + */ extern void vnet_mpls_tunnel_del (u32 sw_if_index); extern const mpls_tunnel_t *mpls_tunnel_get(u32 index); diff --git a/src/vnet/mpls/mpls_types.h b/src/vnet/mpls/mpls_types.h index d7c629df..b1075cdd 100644 --- a/src/vnet/mpls/mpls_types.h +++ b/src/vnet/mpls/mpls_types.h @@ -1,3 +1,17 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef __MPLS_TYPES_H__ #define __MPLS_TYPES_H__ @@ -36,4 +50,10 @@ (((_lbl) > MPLS_IETF_MIN_UNRES_LABEL) && \ ((_lbl) <= MPLS_IETF_MAX_UNRES_LABEL)) +/** + * The top bit of the index, which is the result of the MPLS lookup + * is used to determine if the DPO is a load-balance or a replicate + */ +#define MPLS_IS_REPLICATE 0x80000000 + #endif diff --git a/src/vnet/srp/interface.c b/src/vnet/srp/interface.c index d427cc3c..44e2b0d6 100644 --- a/src/vnet/srp/interface.c +++ b/src/vnet/srp/interface.c @@ -58,7 +58,7 @@ srp_build_rewrite (vnet_main_t * vnm, #define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break _ (IP4, IP4); _ (IP6, IP6); - _ (MPLS, MPLS_UNICAST); + _ (MPLS, MPLS); _ (ARP, ARP); #undef _ default: diff --git a/test/test_ip_mcast.py b/test/test_ip_mcast.py index 36d597a7..c1397d70 100644 --- a/test/test_ip_mcast.py +++ b/test/test_ip_mcast.py @@ -622,6 +622,7 @@ class TestIPMcast(VppTestCase): (MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT | MRouteItfFlags.MFIB_ITF_FLAG_NEGATE_SIGNAL)) + self.vapi.cli("clear trace") tx = self._mcast_connected_send_stream("232.1.1.1") signals = self.vapi.mfib_signal_dump() diff --git a/test/test_mpls.py b/test/test_mpls.py index fc832644..700b7091 100644 --- a/test/test_mpls.py +++ b/test/test_mpls.py @@ -5,7 +5,9 @@ import socket from framework import VppTestCase, VppTestRunner from vpp_ip_route import VppIpRoute, VppRoutePath, VppMplsRoute, \ - VppMplsIpBind + VppMplsIpBind, VppIpMRoute, VppMRoutePath, \ + MRouteItfFlags, MRouteEntryFlags +from vpp_mpls_tunnel_interface import VppMPLSTunnelInterface from scapy.packet import Raw from scapy.layers.l2 import Ether @@ -21,7 +23,7 @@ class TestMPLS(VppTestCase): super(TestMPLS, self).setUp() # create 2 pg interfaces - self.create_pg_interfaces(range(2)) + self.create_pg_interfaces(range(4)) # setup both interfaces # assign them different tables. @@ -53,10 +55,12 @@ class TestMPLS(VppTestCase): mpls_labels, mpls_ttl=255, ping=0, - ip_itf=None): + ip_itf=None, + dst_ip=None, + n=257): self.reset_packet_infos() pkts = [] - for i in range(0, 257): + for i in range(0, n): info = self.create_packet_info(src_if, src_if) payload = self.info_to_payload(info) p = Ether(dst=src_if.local_mac, src=src_if.remote_mac) @@ -67,9 +71,14 @@ class TestMPLS(VppTestCase): else: p = p / MPLS(label=mpls_labels[ii], ttl=mpls_ttl, s=0) if not ping: - p = (p / IP(src=src_if.local_ip4, dst=src_if.remote_ip4) / - UDP(sport=1234, dport=1234) / - Raw(payload)) + if not dst_ip: + p = (p / IP(src=src_if.local_ip4, dst=src_if.remote_ip4) / + UDP(sport=1234, dport=1234) / + Raw(payload)) + else: + p = (p / IP(src=src_if.local_ip4, dst=dst_ip) / + UDP(sport=1234, dport=1234) / + Raw(payload)) else: p = (p / IP(src=ip_itf.remote_ip4, dst=ip_itf.local_ip4) / @@ -254,6 +263,13 @@ class TestMPLS(VppTestCase): except: raise + def send_and_assert_no_replies(self, intf, pkts, remark): + intf.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + for i in self.pg_interfaces: + i.assert_nothing_captured(remark=remark) + def test_swap(self): """ MPLS label swap tests """ @@ -278,7 +294,7 @@ class TestMPLS(VppTestCase): self.pg_start() rx = self.pg0.get_capture() - self.verify_capture_labelled_ip4(self.pg0, rx, tx, [33]) + self.verify_capture_labelled(self.pg0, rx, tx, [33]) # # A simple MPLS xconnect - non-eos label in label out @@ -358,7 +374,7 @@ class TestMPLS(VppTestCase): self.pg_start() rx = self.pg0.get_capture() - self.verify_capture_labelled_ip4(self.pg0, rx, tx, [33, 44, 45]) + self.verify_capture_labelled(self.pg0, rx, tx, [33, 44, 45], num=2) # # A recursive non-EOS x-connect, which resolves through another @@ -576,25 +592,19 @@ class TestMPLS(VppTestCase): # # Create a tunnel with a single out label # - nh_addr = socket.inet_pton(socket.AF_INET, self.pg0.remote_ip4) - - reply = self.vapi.mpls_tunnel_add_del( - 0xffffffff, # don't know the if index yet - 1, # IPv4 next-hop - nh_addr, - self.pg0.sw_if_index, - 0, # next-hop-table-id - 1, # next-hop-weight - 2, # num-out-labels, - [44, 46]) - self.vapi.sw_interface_set_flags(reply.sw_if_index, admin_up_down=1) + mpls_tun = VppMPLSTunnelInterface(self, + [VppRoutePath(self.pg0.remote_ip4, + self.pg0.sw_if_index, + labels=[44, 46])]) + mpls_tun.add_vpp_config() + mpls_tun.admin_up() # # add an unlabelled route through the new tunnel # route_10_0_0_3 = VppIpRoute(self, "10.0.0.3", 32, [VppRoutePath("0.0.0.0", - reply.sw_if_index)]) + mpls_tun._sw_if_index)]) route_10_0_0_3.add_vpp_config() self.vapi.cli("clear trace") @@ -738,6 +748,229 @@ class TestMPLS(VppTestCase): route_35_eos.remove_vpp_config() route_34_eos.remove_vpp_config() + def test_interface_rx(self): + """ MPLS Interface Receive """ + + # + # Add a non-recursive route that will forward the traffic + # post-interface-rx + # + route_10_0_0_1 = VppIpRoute(self, "10.0.0.1", 32, + table_id=1, + paths=[VppRoutePath(self.pg1.remote_ip4, + self.pg1.sw_if_index)]) + route_10_0_0_1.add_vpp_config() + + # + # An interface receive label that maps traffic to RX on interface + # pg1 + # by injecting the packet in on pg0, which is in table 0 + # doing an interface-rx on pg1 and matching a route in table 1 + # if the packet egresses, then we must have swapped to pg1 + # so as to have matched the route in table 1 + # + route_34_eos = VppMplsRoute(self, 34, 1, + [VppRoutePath("0.0.0.0", + self.pg1.sw_if_index, + is_interface_rx=1)]) + route_34_eos.add_vpp_config() + + # + # ping an interface in the default table + # PG0 is in the default table + # + self.vapi.cli("clear trace") + tx = self.create_stream_labelled_ip4(self.pg0, [34], n=257, + dst_ip="10.0.0.1") + self.pg0.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(257) + self.verify_capture_ip4(self.pg1, rx, tx) + + def test_mcast_mid_point(self): + """ MPLS Multicast Mid Point """ + + # + # Add a non-recursive route that will forward the traffic + # post-interface-rx + # + route_10_0_0_1 = VppIpRoute(self, "10.0.0.1", 32, + table_id=1, + paths=[VppRoutePath(self.pg1.remote_ip4, + self.pg1.sw_if_index)]) + route_10_0_0_1.add_vpp_config() + + # + # Add a mcast entry that replicate to pg2 and pg3 + # and replicate to a interface-rx (like a bud node would) + # + route_3400_eos = VppMplsRoute(self, 3400, 1, + [VppRoutePath(self.pg2.remote_ip4, + self.pg2.sw_if_index, + labels=[3401]), + VppRoutePath(self.pg3.remote_ip4, + self.pg3.sw_if_index, + labels=[3402]), + VppRoutePath("0.0.0.0", + self.pg1.sw_if_index, + is_interface_rx=1)], + is_multicast=1) + route_3400_eos.add_vpp_config() + + # + # ping an interface in the default table + # PG0 is in the default table + # + self.vapi.cli("clear trace") + tx = self.create_stream_labelled_ip4(self.pg0, [3400], n=257, + dst_ip="10.0.0.1") + self.pg0.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(257) + self.verify_capture_ip4(self.pg1, rx, tx) + + rx = self.pg2.get_capture(257) + self.verify_capture_labelled(self.pg2, rx, tx, [3401]) + rx = self.pg3.get_capture(257) + self.verify_capture_labelled(self.pg3, rx, tx, [3402]) + + def test_mcast_head(self): + """ MPLS Multicast Head-end """ + + # + # Create a multicast tunnel with two replications + # + mpls_tun = VppMPLSTunnelInterface(self, + [VppRoutePath(self.pg2.remote_ip4, + self.pg2.sw_if_index, + labels=[42]), + VppRoutePath(self.pg3.remote_ip4, + self.pg3.sw_if_index, + labels=[43])], + is_multicast=1) + mpls_tun.add_vpp_config() + mpls_tun.admin_up() + + # + # add an unlabelled route through the new tunnel + # + route_10_0_0_3 = VppIpRoute(self, "10.0.0.3", 32, + [VppRoutePath("0.0.0.0", + mpls_tun._sw_if_index)]) + route_10_0_0_3.add_vpp_config() + + self.vapi.cli("clear trace") + tx = self.create_stream_ip4(self.pg0, "10.0.0.3") + self.pg0.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg2.get_capture(257) + self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [42]) + rx = self.pg3.get_capture(257) + self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [43]) + + # + # An an IP multicast route via the tunnel + # A (*,G). + # one accepting interface, pg0, 1 forwarding interface via the tunnel + # + route_232_1_1_1 = VppIpMRoute( + self, + "0.0.0.0", + "232.1.1.1", 32, + MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE, + [VppMRoutePath(self.pg0.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT), + VppMRoutePath(mpls_tun._sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)]) + route_232_1_1_1.add_vpp_config() + + self.vapi.cli("clear trace") + tx = self.create_stream_ip4(self.pg0, "232.1.1.1") + self.pg0.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg2.get_capture(257) + self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [42]) + rx = self.pg3.get_capture(257) + self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [43]) + + def test_mcast_tail(self): + """ MPLS Multicast Tail """ + + # + # Add a multicast route that will forward the traffic + # post-disposition + # + route_232_1_1_1 = VppIpMRoute( + self, + "0.0.0.0", + "232.1.1.1", 32, + MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE, + table_id=1, + paths=[VppMRoutePath(self.pg1.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)]) + route_232_1_1_1.add_vpp_config() + + # + # An interface receive label that maps traffic to RX on interface + # pg1 + # by injecting the packet in on pg0, which is in table 0 + # doing an rpf-id and matching a route in table 1 + # if the packet egresses, then we must have matched the route in + # table 1 + # + route_34_eos = VppMplsRoute(self, 34, 1, + [VppRoutePath("0.0.0.0", + self.pg1.sw_if_index, + nh_table_id=1, + rpf_id=55)], + is_multicast=1) + + route_34_eos.add_vpp_config() + + # + # Drop due to interface lookup miss + # + self.vapi.cli("clear trace") + tx = self.create_stream_labelled_ip4(self.pg0, [34], + dst_ip="232.1.1.1", n=1) + self.send_and_assert_no_replies(self.pg0, tx, "RPF-ID drop none") + + # + # set the RPF-ID of the enrtry to match the input packet's + # + route_232_1_1_1.update_rpf_id(55) + + self.vapi.cli("clear trace") + tx = self.create_stream_labelled_ip4(self.pg0, [34], + dst_ip="232.1.1.1", n=257) + self.pg0.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(257) + self.verify_capture_ip4(self.pg1, rx, tx) + + # + # set the RPF-ID of the enrtry to not match the input packet's + # + route_232_1_1_1.update_rpf_id(56) + tx = self.create_stream_labelled_ip4(self.pg0, [34], + dst_ip="232.1.1.1") + self.send_and_assert_no_replies(self.pg0, tx, "RPF-ID drop 56") + class TestMPLSDisabled(VppTestCase): """ MPLS disabled """ diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py index faf5f801..d6146f28 100644 --- a/test/vpp_ip_route.py +++ b/test/vpp_ip_route.py @@ -55,15 +55,24 @@ class VppRoutePath(object): nh_table_id=0, labels=[], nh_via_label=MPLS_LABEL_INVALID, - is_ip6=0): + is_ip6=0, + rpf_id=0, + is_interface_rx=0): self.nh_itf = nh_sw_if_index self.nh_table_id = nh_table_id self.nh_via_label = nh_via_label self.nh_labels = labels + self.weight = 1 + self.rpf_id = rpf_id if is_ip6: self.nh_addr = inet_pton(AF_INET6, nh_addr) else: self.nh_addr = inet_pton(AF_INET, nh_addr) + self.is_interface_rx = is_interface_rx + self.is_rpf_id = 0 + if rpf_id != 0: + self.is_rpf_id = 1 + self.nh_itf = rpf_id class VppMRoutePath(VppRoutePath): @@ -176,13 +185,15 @@ class VppIpMRoute(VppObject): """ def __init__(self, test, src_addr, grp_addr, - grp_addr_len, e_flags, paths, table_id=0, is_ip6=0): + grp_addr_len, e_flags, paths, table_id=0, + rpf_id=0, is_ip6=0): self._test = test self.paths = paths self.grp_addr_len = grp_addr_len self.table_id = table_id self.e_flags = e_flags self.is_ip6 = is_ip6 + self.rpf_id = rpf_id if is_ip6: self.grp_addr = inet_pton(AF_INET6, grp_addr) @@ -199,6 +210,7 @@ class VppIpMRoute(VppObject): self.e_flags, path.nh_itf, path.nh_i_flags, + rpf_id=self.rpf_id, table_id=self.table_id, is_ipv6=self.is_ip6) self._test.registry.register(self, self._test.logger) @@ -226,6 +238,18 @@ class VppIpMRoute(VppObject): table_id=self.table_id, is_ipv6=self.is_ip6) + def update_rpf_id(self, rpf_id): + self.rpf_id = rpf_id + self._test.vapi.ip_mroute_add_del(self.src_addr, + self.grp_addr, + self.grp_addr_len, + self.e_flags, + 0xffffffff, + 0, + rpf_id=self.rpf_id, + table_id=self.table_id, + is_ipv6=self.is_ip6) + def update_path_flags(self, itf, flags): for path in self.paths: if path.nh_itf == itf: @@ -342,14 +366,17 @@ class VppMplsRoute(VppObject): MPLS Route/LSP """ - def __init__(self, test, local_label, eos_bit, paths, table_id=0): + def __init__(self, test, local_label, eos_bit, paths, table_id=0, + is_multicast=0): self._test = test self.paths = paths self.local_label = local_label self.eos_bit = eos_bit self.table_id = table_id + self.is_multicast = is_multicast def add_vpp_config(self): + is_multipath = len(self.paths) > 1 for path in self.paths: self._test.vapi.mpls_route_add_del( self.local_label, @@ -357,7 +384,11 @@ class VppMplsRoute(VppObject): 1, path.nh_addr, path.nh_itf, + is_multicast=self.is_multicast, + is_multipath=is_multipath, table_id=self.table_id, + is_interface_rx=path.is_interface_rx, + is_rpf_id=path.is_rpf_id, next_hop_out_label_stack=path.nh_labels, next_hop_n_out_labels=len( path.nh_labels), @@ -372,6 +403,7 @@ class VppMplsRoute(VppObject): 1, path.nh_addr, path.nh_itf, + is_rpf_id=path.is_rpf_id, table_id=self.table_id, is_add=0) diff --git a/test/vpp_mpls_tunnel_interface.py b/test/vpp_mpls_tunnel_interface.py new file mode 100644 index 00000000..f2001574 --- /dev/null +++ b/test/vpp_mpls_tunnel_interface.py @@ -0,0 +1,46 @@ + +from vpp_interface import VppInterface +from vpp_ip_route import VppRoutePath +import socket + + +class VppMPLSTunnelInterface(VppInterface): + """ + VPP MPLS Tunnel interface + """ + + def __init__(self, test, paths, is_multicast=0): + """ Create MPLS Tunnel interface """ + self._sw_if_index = 0 + super(VppMPLSTunnelInterface, self).__init__(test) + self._test = test + self.t_paths = paths + self.is_multicast = is_multicast + + def add_vpp_config(self): + self._sw_if_index = 0xffffffff + for path in self.t_paths: + reply = self.test.vapi.mpls_tunnel_add_del( + self._sw_if_index, + 1, # IPv4 next-hop + path.nh_addr, + path.nh_itf, + path.nh_table_id, + path.weight, + next_hop_out_label_stack=path.nh_labels, + next_hop_n_out_labels=len(path.nh_labels), + is_multicast=self.is_multicast) + self._sw_if_index = reply.sw_if_index + + def remove_vpp_config(self): + for path in self.t_paths: + reply = self.test.vapi.mpls_tunnel_add_del( + self.sw_if_index, + 1, # IPv4 next-hop + path.nh_addr, + path.nh_itf, + path.nh_table_id, + path.weight, + next_hop_out_label_stack=path.nh_labels, + next_hop_n_out_labels=len(path.nh_labels), + is_add=0) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index e8025dff..ceb684b7 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -849,6 +849,9 @@ class VppPapiProvider(object): create_vrf_if_needed=0, is_resolve_host=0, is_resolve_attached=0, + is_interface_rx=0, + is_rpf_id=0, + is_multicast=0, is_add=1, is_drop=0, is_multipath=0, @@ -872,6 +875,7 @@ class VppPapiProvider(object): :param is_local: (Default value = 0) :param is_classify: (Default value = 0) :param is_multipath: (Default value = 0) + :param is_multicast: (Default value = 0) :param is_resolve_host: (Default value = 0) :param is_resolve_attached: (Default value = 0) :param not_last: (Default value = 0) @@ -889,8 +893,11 @@ class VppPapiProvider(object): 'mr_is_add': is_add, 'mr_is_classify': is_classify, 'mr_is_multipath': is_multipath, + 'mr_is_multicast': is_multicast, 'mr_is_resolve_host': is_resolve_host, 'mr_is_resolve_attached': is_resolve_attached, + 'mr_is_interface_rx': is_interface_rx, + 'mr_is_rpf_id': is_rpf_id, 'mr_next_hop_proto_is_ip4': next_hop_proto_is_ip4, 'mr_next_hop_weight': next_hop_weight, 'mr_next_hop': next_hop_address, @@ -936,7 +943,8 @@ class VppPapiProvider(object): next_hop_via_label=MPLS_LABEL_INVALID, create_vrf_if_needed=0, is_add=1, - l2_only=0): + l2_only=0, + is_multicast=0): """ :param dst_address_length: @@ -956,8 +964,8 @@ class VppPapiProvider(object): :param is_multipath: (Default value = 0) :param is_resolve_host: (Default value = 0) :param is_resolve_attached: (Default value = 0) - :param not_last: (Default value = 0) :param next_hop_weight: (Default value = 1) + :param is_multicast: (Default value = 0) """ return self.api( @@ -965,6 +973,7 @@ class VppPapiProvider(object): {'mt_sw_if_index': tun_sw_if_index, 'mt_is_add': is_add, 'mt_l2_only': l2_only, + 'mt_is_multicast': is_multicast, 'mt_next_hop_proto_is_ip4': next_hop_proto_is_ip4, 'mt_next_hop_weight': next_hop_weight, 'mt_next_hop': next_hop_address, @@ -1469,6 +1478,7 @@ class VppPapiProvider(object): e_flags, next_hop_sw_if_index, i_flags, + rpf_id=0, table_id=0, create_vrf_if_needed=0, is_add=1, @@ -1481,6 +1491,8 @@ class VppPapiProvider(object): {'next_hop_sw_if_index': next_hop_sw_if_index, 'entry_flags': e_flags, 'itf_flags': i_flags, + 'table_id': table_id, + 'rpf_id': rpf_id, 'create_vrf_if_needed': create_vrf_if_needed, 'is_add': is_add, 'is_ipv6': is_ipv6, -- cgit 1.2.3-korg From a0558307187ef2317f31e3e876a1a5e1faa2541c Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Thu, 13 Apr 2017 00:44:52 -0700 Subject: Remove unsed parameter from fib_table_entry_special_add() (only used in FIB tests). The DPO was incorrectly initialised with FIB_PROTO_MAX Change-Id: I962df9e162e4dfb6837a5ce79ea795d5ff2d7315 Signed-off-by: Neale Ranns --- src/plugins/ila/ila.c | 3 +-- src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c | 4 +--- src/plugins/lb/lb.c | 3 +-- src/vnet/dhcp/client.c | 3 +-- src/vnet/dhcp/dhcp4_proxy_node.c | 3 +-- src/vnet/fib/fib_bfd.c | 3 +-- src/vnet/fib/fib_path.c | 3 +-- src/vnet/fib/fib_table.c | 15 ++------------- src/vnet/fib/fib_table.h | 14 ++++++-------- src/vnet/fib/fib_test.c | 16 ++++++++-------- src/vnet/fib/ip4_fib.c | 3 +-- src/vnet/fib/ip6_fib.c | 6 ++---- src/vnet/gre/interface.c | 3 +-- src/vnet/ip/ip4_forward.c | 6 ++---- src/vnet/ip/ip4_source_check.c | 2 +- src/vnet/lisp-gpe/lisp_gpe_tunnel.c | 3 +-- src/vnet/map/map.c | 4 +--- src/vnet/vxlan/vxlan.c | 2 +- 18 files changed, 33 insertions(+), 63 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/plugins/ila/ila.c b/src/plugins/ila/ila.c index edbf3017..fd56043e 100644 --- a/src/plugins/ila/ila.c +++ b/src/plugins/ila/ila.c @@ -736,8 +736,7 @@ ila_add_del_entry (ila_add_del_entry_args_t * args) fib_table_entry_special_add(0, &next_hop, FIB_SOURCE_RR, - FIB_ENTRY_FLAG_NONE, - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_NONE); e->next_hop_child_index = fib_entry_child_add(e->next_hop_fib_entry_index, ila_fib_node_type, diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c index 88d7d205..cfc550cd 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c @@ -336,9 +336,7 @@ vxlan_gpe_enable_disable_ioam_for_dest (vlib_main_t * vm, t1->fib_entry_index = fib_table_entry_special_add (outer_fib_index, &tun_dst_pfx, - FIB_SOURCE_RR, - FIB_ENTRY_FLAG_NONE, - ADJ_INDEX_INVALID); + FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE); t1->sibling_index = fib_entry_child_add (t1->fib_entry_index, hm->fib_entry_type, t1 - hm->dst_tunnels); diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c index addc2a42..cc3f8532 100644 --- a/src/plugins/lb/lb.c +++ b/src/plugins/lb/lb.c @@ -510,8 +510,7 @@ next: fib_table_entry_special_add(0, &nh, FIB_SOURCE_RR, - FIB_ENTRY_FLAG_NONE, - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_NONE); as->next_hop_child_index = fib_entry_child_add(as->next_hop_fib_entry_index, lbm->fib_node_type, diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c index 29749a33..7c3f7f6a 100644 --- a/src/vnet/dhcp/client.c +++ b/src/vnet/dhcp/client.c @@ -781,8 +781,7 @@ int dhcp_client_add_del (dhcp_client_add_del_args_t * a) c->sw_if_index), &all_1s, FIB_SOURCE_DHCP, - FIB_ENTRY_FLAG_LOCAL, - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_LOCAL); /* * enable the interface to RX IPv4 packets diff --git a/src/vnet/dhcp/dhcp4_proxy_node.c b/src/vnet/dhcp/dhcp4_proxy_node.c index 1c84881a..26e1e65c 100644 --- a/src/vnet/dhcp/dhcp4_proxy_node.c +++ b/src/vnet/dhcp/dhcp4_proxy_node.c @@ -807,8 +807,7 @@ dhcp4_proxy_set_server (ip46_address_t *addr, fib_table_entry_special_add(rx_fib_index, &all_1s, FIB_SOURCE_DHCP, - FIB_ENTRY_FLAG_LOCAL, - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_LOCAL); fib_table_lock (rx_fib_index, FIB_PROTOCOL_IP4); } } diff --git a/src/vnet/fib/fib_bfd.c b/src/vnet/fib/fib_bfd.c index e5affb8d..734ee8cc 100644 --- a/src/vnet/fib/fib_bfd.c +++ b/src/vnet/fib/fib_bfd.c @@ -109,8 +109,7 @@ fib_bfd_notify (bfd_listen_event_e event, fei = fib_table_entry_special_add(key->fib_index, &pfx, FIB_SOURCE_RR, - FIB_ENTRY_FLAG_NONE, - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_NONE); fib_entry_lock(fei); fed = fib_entry_delegate_find_or_add(fib_entry_get(fei), diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index cd7d9278..70c87905 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -1621,8 +1621,7 @@ fib_path_resolve (fib_node_index_t path_index) fei = fib_table_entry_special_add(path->recursive.fp_tbl_id, &pfx, FIB_SOURCE_RR, - FIB_ENTRY_FLAG_NONE, - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_NONE); path = fib_path_get(path_index); path->fp_via_fib = fei; diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index b31f35e3..0938ce9b 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -371,23 +371,12 @@ fib_node_index_t fib_table_entry_special_add (u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, - fib_entry_flag_t flags, - adj_index_t adj_index) + fib_entry_flag_t flags) { fib_node_index_t fib_entry_index; dpo_id_t tmp_dpo = DPO_INVALID; - if (ADJ_INDEX_INVALID != adj_index) - { - dpo_set(&tmp_dpo, - DPO_ADJACENCY, - FIB_PROTOCOL_MAX, - adj_index); - } - else - { - dpo_copy(&tmp_dpo, drop_dpo_get(fib_proto_to_dpo(prefix->fp_proto))); - } + dpo_copy(&tmp_dpo, drop_dpo_get(fib_proto_to_dpo(prefix->fp_proto))); fib_entry_index = fib_table_entry_special_dpo_add(fib_index, prefix, source, flags, &tmp_dpo); diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h index b310aea6..f24d28b7 100644 --- a/src/vnet/fib/fib_table.h +++ b/src/vnet/fib/fib_table.h @@ -126,14 +126,16 @@ extern fib_node_index_t fib_table_get_less_specific(u32 fib_index, /** * @brief - * Add a 'special' entry to the FIB that links to the adj passed + * Add a 'special' entry to the FIB. * A special entry is an entry that the FIB is not expect to resolve * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup). - * Instead the client/source provides the adj to link to. + * Instead the will link to a DPO valid for the source and/or the flags. * This add is reference counting per-source. So n 'removes' are required * for n 'adds', if the entry is no longer required. + * If the source needs to provide non-default forwarding use: + * fib_table_entry_special_dpo_add() * - * @param fib_index + * @param fib_index * The index of the FIB * * @param prefix @@ -145,17 +147,13 @@ extern fib_node_index_t fib_table_get_less_specific(u32 fib_index, * @param flags * Flags for the entry. * - * @param adj_index - * The adjacency to link to. - * * @return * the index of the fib_entry_t that is created (or exists already). */ extern fib_node_index_t fib_table_entry_special_add(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, - fib_entry_flag_t flags, - adj_index_t adj_index); + fib_entry_flag_t flags); /** * @brief diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index e4a8a70e..c58dc5a1 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -1378,8 +1378,8 @@ fib_test_v4 (void) fib_entry_pool_size()); /* - * An EXCLUSIVE route; one where the user (me) provides the exclusive - * adjacency through which the route will resovle + * An special route; one where the user (me) provides the + * adjacency through which the route will resovle by setting the flags */ fib_prefix_t ex_pfx = { .fp_len = 32, @@ -1393,11 +1393,12 @@ fib_test_v4 (void) fib_table_entry_special_add(fib_index, &ex_pfx, FIB_SOURCE_SPECIAL, - FIB_ENTRY_FLAG_EXCLUSIVE, - locked_ai); + FIB_ENTRY_FLAG_LOCAL); fei = fib_table_lookup_exact_match(fib_index, &ex_pfx); - FIB_TEST((ai == fib_entry_get_adj(fei)), - "Exclusive route links to user adj"); + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST((DPO_RECEIVE == dpo->dpoi_type), + "local interface adj is local"); fib_table_entry_special_remove(fib_index, &ex_pfx, @@ -3675,8 +3676,7 @@ fib_test_v4 (void) fei = fib_table_entry_special_add(fib_index, &pfx_4_1_1_1_s_32, FIB_SOURCE_URPF_EXEMPT, - FIB_ENTRY_FLAG_DROP, - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_DROP); dpo = fib_entry_contribute_ip_forwarding(fei); FIB_TEST(load_balance_is_drop(dpo), "uRPF exempt 4.1.1.1/32 DROP"); diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index b03186e8..8e92d851 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -149,8 +149,7 @@ ip4_create_fib_with_table_id (u32 table_id) fib_table_entry_special_add(fib_table->ft_index, &prefix, ip4_specials[ii].ift_source, - ip4_specials[ii].ift_flag, - ADJ_INDEX_INVALID); + ip4_specials[ii].ift_flag); } return (fib_table->ft_index); diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index 00297140..d00f4c55 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -35,8 +35,7 @@ vnet_ip6_fib_init (u32 fib_index) fib_table_entry_special_add(fib_index, &pfx, FIB_SOURCE_DEFAULT_ROUTE, - FIB_ENTRY_FLAG_DROP, - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_DROP); /* * all link local for us @@ -47,8 +46,7 @@ vnet_ip6_fib_init (u32 fib_index) fib_table_entry_special_add(fib_index, &pfx, FIB_SOURCE_SPECIAL, - FIB_ENTRY_FLAG_LOCAL, - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_LOCAL); } static u32 diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c index 91a3899f..d574e596 100644 --- a/src/vnet/gre/interface.c +++ b/src/vnet/gre/interface.c @@ -427,8 +427,7 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, fib_table_entry_special_add(outer_fib_index, &t->tunnel_dst, FIB_SOURCE_RR, - FIB_ENTRY_FLAG_NONE, - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_NONE); t->sibling_index = fib_entry_child_add(t->fib_entry_index, FIB_NODE_TYPE_GRE_TUNNEL, diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index e42b3637..0f562037 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -732,16 +732,14 @@ ip4_add_interface_routes (u32 sw_if_index, &net_pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_DROP | - FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT), - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT)); net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len]; if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32) fib_table_entry_special_add(fib_index, &net_pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_DROP | - FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT), - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT)); } else if (pfx.fp_len == 31) { diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c index 63b7594d..17a1cb1b 100644 --- a/src/vnet/ip/ip4_source_check.c +++ b/src/vnet/ip/ip4_source_check.c @@ -509,7 +509,7 @@ ip_source_check_accept (vlib_main_t * vm, fib_table_entry_special_add (fib_index, &pfx, FIB_SOURCE_URPF_EXEMPT, - FIB_ENTRY_FLAG_DROP, ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_DROP); } else { diff --git a/src/vnet/lisp-gpe/lisp_gpe_tunnel.c b/src/vnet/lisp-gpe/lisp_gpe_tunnel.c index 444bfe14..dd6c6fdd 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_tunnel.c +++ b/src/vnet/lisp-gpe/lisp_gpe_tunnel.c @@ -179,8 +179,7 @@ lisp_gpe_tunnel_find_or_create_and_lock (const locator_pair_t * pair, lgt->fib_entry_index = fib_table_entry_special_add (rloc_fib_index, &pfx, FIB_SOURCE_RR, - FIB_ENTRY_FLAG_NONE, - ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_NONE); hash_set_mem (lisp_gpe_tunnel_db, &lgt->key, (lgt - lisp_gpe_tunnel_pool)); diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c index 811a0abc..6a707df1 100644 --- a/src/vnet/map/map.c +++ b/src/vnet/map/map.c @@ -518,9 +518,7 @@ map_fib_resolve (map_main_pre_resolved_t * pr, pr->fei = fib_table_entry_special_add (0, // default fib &pfx, - FIB_SOURCE_RR, - FIB_ENTRY_FLAG_NONE, - ADJ_INDEX_INVALID); + FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE); pr->sibling = fib_entry_child_add (pr->fei, FIB_NODE_TYPE_MAP_E, proto); map_stack (pr); } diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c index 61cb13c9..1b3df2a8 100644 --- a/src/vnet/vxlan/vxlan.c +++ b/src/vnet/vxlan/vxlan.c @@ -486,7 +486,7 @@ int vnet_vxlan_add_del_tunnel vtep_addr_ref(&t->src); t->fib_entry_index = fib_table_entry_special_add (t->encap_fib_index, &tun_dst_pfx, FIB_SOURCE_RR, - FIB_ENTRY_FLAG_NONE, ADJ_INDEX_INVALID); + FIB_ENTRY_FLAG_NONE); t->sibling_index = fib_entry_child_add (t->fib_entry_index, FIB_NODE_TYPE_VXLAN_TUNNEL, t - vxm->tunnels); vxlan_tunnel_restack_dpo(t); -- cgit 1.2.3-korg From 11b8dbf78af49d270a0e72abe7dea73eec30d85f Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Mon, 24 Apr 2017 10:46:54 -0400 Subject: "autoreply" flag: autogenerate standard xxx_reply_t messages Change-Id: I72298aaae7d172082ece3a8edea4217c11b28d79 Signed-off-by: Dave Barach --- src/examples/sample-plugin/sample/sample.api | 10 +- src/plugins/acl/acl.api | 60 +--- src/plugins/dpdk/api/dpdk.api | 35 +- src/plugins/flowperpkt/flowperpkt.api | 23 +- .../export-vxlan-gpe/vxlan_gpe_ioam_export.api | 10 +- src/plugins/ioam/export/ioam_export.api | 10 +- src/plugins/ioam/ip6/ioam_cache.api | 10 +- src/plugins/ioam/lib-pot/pot.api | 34 +- src/plugins/ioam/lib-trace/trace.api | 26 +- src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api | 82 +---- src/plugins/lb/lb.api | 21 +- src/plugins/memif/memif.api | 12 +- src/plugins/snat/snat.api | 88 +---- src/tools/vppapigen/gram.y | 3 +- src/tools/vppapigen/lex.c | 57 +++- src/tools/vppapigen/lex.h | 1 + src/tools/vppapigen/node.c | 5 + src/tools/vppapigen/node.h | 2 + src/vlibmemory/memclnt.api | 7 +- src/vlibmemory/memory_vlib.c | 8 +- src/vnet/bfd/bfd.api | 132 +------- src/vnet/classify/classify.api | 37 +-- src/vnet/cop/cop.api | 28 +- src/vnet/devices/af_packet/af_packet.api | 12 +- src/vnet/devices/netmap/netmap.api | 24 +- src/vnet/devices/virtio/vhost_user.api | 24 +- src/vnet/dhcp/dhcp.api | 38 +-- src/vnet/flow/flow.api | 32 +- src/vnet/interface.api | 108 +----- src/vnet/ip/ip.api | 108 +----- src/vnet/ipsec/ipsec.api | 224 ++----------- src/vnet/l2/l2.api | 96 +----- src/vnet/l2tp/l2tp.api | 28 +- src/vnet/lisp-cp/lisp.api | 164 +-------- src/vnet/lisp-cp/one.api | 185 +---------- src/vnet/lisp-gpe/lisp_gpe.api | 48 +-- src/vnet/map/map.api | 22 +- src/vnet/mpls/mpls.api | 26 +- src/vnet/session/session.api | 68 +--- src/vnet/span/span.api | 10 +- src/vnet/sr/sr.api | 60 +--- src/vnet/unix/tap.api | 12 +- src/vnet/vxlan/vxlan.api | 12 +- src/vpp/api/vpe.api | 367 ++------------------- 44 files changed, 271 insertions(+), 2098 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/examples/sample-plugin/sample/sample.api b/src/examples/sample-plugin/sample/sample.api index f99cdb38..d565c0b1 100644 --- a/src/examples/sample-plugin/sample/sample.api +++ b/src/examples/sample-plugin/sample/sample.api @@ -16,7 +16,7 @@ /* Define a simple binary API to control the feature */ -define sample_macswap_enable_disable { +autoreply define sample_macswap_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -29,11 +29,3 @@ define sample_macswap_enable_disable { /* Interface handle */ u32 sw_if_index; }; - -define sample_macswap_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api index d981338d..3b334113 100644 --- a/src/plugins/acl/acl.api +++ b/src/plugins/acl/acl.api @@ -161,24 +161,13 @@ define acl_add_replace_reply @param acl_index - ACL index to delete */ -manual_print define acl_del +autoreply manual_print define acl_del { u32 client_index; u32 context; u32 acl_index; }; -/** \brief Reply to delete the ACL - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define acl_del_reply -{ - u32 context; - i32 retval; -}; - /* acl_interface_add_del(_reply) to be deprecated in lieu of acl_interface_set_acl_list */ /** \brief Use acl_interface_set_acl_list instead Append/remove an ACL index to/from the list of ACLs checked for an interface @@ -190,7 +179,7 @@ define acl_del_reply @param acl_index - index of ACL for the operation */ -manual_print define acl_interface_add_del +autoreply manual_print define acl_interface_add_del { u32 client_index; u32 context; @@ -204,17 +193,6 @@ manual_print define acl_interface_add_del u32 acl_index; }; -/** \brief Reply to alter the ACL list - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define acl_interface_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set the vector of input/output ACLs checked for an interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -224,7 +202,7 @@ define acl_interface_add_del_reply @param acls - vector of ACL indices */ -manual_print define acl_interface_set_acl_list +autoreply manual_print define acl_interface_set_acl_list { u32 client_index; u32 context; @@ -239,12 +217,6 @@ manual_print define acl_interface_set_acl_list @param retval 0 - no error */ -define acl_interface_set_acl_list_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump the specific ACL contents or all of the ACLs' contents @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -341,24 +313,13 @@ define macip_acl_add_reply @param acl_index - MACIP ACL index to delete */ -manual_print define macip_acl_del +autoreply manual_print define macip_acl_del { u32 client_index; u32 context; u32 acl_index; }; -/** \brief Reply to delete the MACIP ACL - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define macip_acl_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add or delete a MACIP ACL to/from interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -367,7 +328,7 @@ define macip_acl_del_reply @param acl_index - MACIP ACL index */ -manual_print define macip_acl_interface_add_del +autoreply manual_print define macip_acl_interface_add_del { u32 client_index; u32 context; @@ -377,17 +338,6 @@ manual_print define macip_acl_interface_add_del u32 acl_index; }; -/** \brief Reply to apply/unapply the MACIP ACL - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define macip_acl_interface_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump one or all defined MACIP ACLs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/dpdk/api/dpdk.api b/src/plugins/dpdk/api/dpdk.api index 21215d45..d43f8a36 100644 --- a/src/plugins/dpdk/api/dpdk.api +++ b/src/plugins/dpdk/api/dpdk.api @@ -21,7 +21,7 @@ @param pipe - pipe ID within its subport @param profile - pipe profile ID */ -define sw_interface_set_dpdk_hqos_pipe { +autoreply define sw_interface_set_dpdk_hqos_pipe { u32 client_index; u32 context; u32 sw_if_index; @@ -30,15 +30,6 @@ define sw_interface_set_dpdk_hqos_pipe { u32 profile; }; -/** \brief DPDK interface HQoS pipe profile set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_pipe_reply { - u32 context; - i32 retval; -}; - /** \brief DPDK interface HQoS subport parameters set request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -49,7 +40,7 @@ define sw_interface_set_dpdk_hqos_pipe_reply { @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) @param tc_period - enforcement period for rates (measured in milliseconds) */ -define sw_interface_set_dpdk_hqos_subport { +autoreply define sw_interface_set_dpdk_hqos_subport { u32 client_index; u32 context; u32 sw_if_index; @@ -60,15 +51,6 @@ define sw_interface_set_dpdk_hqos_subport { u32 tc_period; }; -/** \brief DPDK interface HQoS subport parameters set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_subport_reply { - u32 context; - i32 retval; -}; - /** \brief DPDK interface HQoS tctbl entry set request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -77,7 +59,7 @@ define sw_interface_set_dpdk_hqos_subport_reply { @param tc - traffic class (0 .. 3) @param queue - traffic class queue (0 .. 3) */ -define sw_interface_set_dpdk_hqos_tctbl { +autoreply define sw_interface_set_dpdk_hqos_tctbl { u32 client_index; u32 context; u32 sw_if_index; @@ -86,18 +68,9 @@ define sw_interface_set_dpdk_hqos_tctbl { u32 queue; }; -/** \brief DPDK interface HQoS tctbl entry set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_tctbl_reply { - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/plugins/flowperpkt/flowperpkt.api b/src/plugins/flowperpkt/flowperpkt.api index 1cf62c54..3ff92dca 100644 --- a/src/plugins/flowperpkt/flowperpkt.api +++ b/src/plugins/flowperpkt/flowperpkt.api @@ -12,7 +12,7 @@ @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param sw_if_index - index of the interface */ -manual_print define flowperpkt_tx_interface_add_del +autoreply manual_print define flowperpkt_tx_interface_add_del { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -28,20 +28,7 @@ manual_print define flowperpkt_tx_interface_add_del u32 sw_if_index; }; -/** \brief Reply to enable/disable per-packet IPFIX recording messages - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define flowperpkt_tx_interface_add_del_reply -{ - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; - -define flowperpkt_params +autoreply define flowperpkt_params { u32 client_index; u32 context; @@ -51,9 +38,3 @@ define flowperpkt_params u32 active_timer; /* ~0 is off, 0 is default */ u32 passive_timer; /* ~0 is off, 0 is default */ }; - -define flowperpkt_params_reply -{ - u32 context; - i32 retval; -}; diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api index 7b17c3f7..caa97e6e 100644 --- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api @@ -16,7 +16,7 @@ /* Define a simple binary API to control the feature */ -define vxlan_gpe_ioam_export_enable_disable { +autoreply define vxlan_gpe_ioam_export_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -32,11 +32,3 @@ define vxlan_gpe_ioam_export_enable_disable { /* Src ip address */ }; - -define vxlan_gpe_ioam_export_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; \ No newline at end of file diff --git a/src/plugins/ioam/export/ioam_export.api b/src/plugins/ioam/export/ioam_export.api index f22d9fc8..bb830561 100644 --- a/src/plugins/ioam/export/ioam_export.api +++ b/src/plugins/ioam/export/ioam_export.api @@ -16,7 +16,7 @@ /* Define a simple binary API to control the feature */ -define ioam_export_ip6_enable_disable { +autoreply define ioam_export_ip6_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -32,11 +32,3 @@ define ioam_export_ip6_enable_disable { /* Src ip address */ }; - -define ioam_export_ip6_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; diff --git a/src/plugins/ioam/ip6/ioam_cache.api b/src/plugins/ioam/ip6/ioam_cache.api index de50d57d..dd9c0186 100644 --- a/src/plugins/ioam/ip6/ioam_cache.api +++ b/src/plugins/ioam/ip6/ioam_cache.api @@ -16,7 +16,7 @@ /* API to control ioam caching */ -define ioam_cache_ip6_enable_disable { +autoreply define ioam_cache_ip6_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -27,11 +27,3 @@ define ioam_cache_ip6_enable_disable { u8 is_disable; }; - -define ioam_cache_ip6_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; diff --git a/src/plugins/ioam/lib-pot/pot.api b/src/plugins/ioam/lib-pot/pot.api index fa2fc126..c377cde0 100644 --- a/src/plugins/ioam/lib-pot/pot.api +++ b/src/plugins/ioam/lib-pot/pot.api @@ -27,7 +27,7 @@ @param list_name_len - length of the name of this profile list @param list_name - name of this profile list */ -define pot_profile_add { +autoreply define pot_profile_add { u32 client_index; u32 context; u8 id; @@ -42,22 +42,12 @@ define pot_profile_add { u8 list_name[0]; }; -/** \brief Proof of Transit profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pot_profile_add_reply { - u32 context; - i32 retval; -}; - - /** \brief Proof of Transit(POT): Activate POT profile in the list @param id - id of the profile @param list_name_len - length of the name of this profile list @param list_name - name of this profile list */ -define pot_profile_activate { +autoreply define pot_profile_activate { u32 client_index; u32 context; u8 id; @@ -65,37 +55,19 @@ define pot_profile_activate { u8 list_name[0]; }; -/** \brief Proof of Transit profile activate response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pot_profile_activate_reply { - u32 context; - i32 retval; -}; - /** \brief Delete POT Profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param list_name_len - length of the name of the profile list @param list_name - name of profile list to delete */ -define pot_profile_del { +autoreply define pot_profile_del { u32 client_index; u32 context; u8 list_name_len; u8 list_name[0]; }; -/** \brief Proof of Transit profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pot_profile_del_reply { - u32 context; - i32 retval; -}; - /** \brief Show POT Profiles @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/ioam/lib-trace/trace.api b/src/plugins/ioam/lib-trace/trace.api index cb958325..2f45c6e2 100644 --- a/src/plugins/ioam/lib-trace/trace.api +++ b/src/plugins/ioam/lib-trace/trace.api @@ -22,7 +22,7 @@ @param trace_tsp- Timestamp resolution @param app_data - Application specific opaque */ -define trace_profile_add { +autoreply define trace_profile_add { u32 client_index; u32 context; u8 trace_type; @@ -32,37 +32,15 @@ define trace_profile_add { u32 app_data; }; -/** \brief Trace profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define trace_profile_add_reply { - u32 context; - i32 retval; -}; - - - /** \brief Delete trace Profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define trace_profile_del { +autoreply define trace_profile_del { u32 client_index; u32 context; }; -/** \brief Trace profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define trace_profile_del_reply { - u32 context; - i32 retval; -}; - - - /** \brief Show trace Profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api index 056529a4..a6761f07 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api @@ -24,7 +24,7 @@ @param trace_enable - iOAM Trace enabled or not flag */ -define vxlan_gpe_ioam_enable { +autoreply define vxlan_gpe_ioam_enable { u32 client_index; u32 context; u16 id; @@ -33,38 +33,18 @@ define vxlan_gpe_ioam_enable { u8 trace_enable; }; -/** \brief iOAM Over VxLAN-GPE - Set iOAM transport for VXLAN-GPE reply - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define vxlan_gpe_ioam_enable_reply { - u32 context; - i32 retval; -}; - - /** \brief iOAM for VxLAN-GPE disable @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param id - profile id */ -define vxlan_gpe_ioam_disable +autoreply define vxlan_gpe_ioam_disable { u32 client_index; u32 context; u16 id; }; -/** \brief vxlan_gpe_ioam disable response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define vxlan_gpe_ioam_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Enable iOAM for a VNI (VXLAN-GPE) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -73,7 +53,7 @@ define vxlan_gpe_ioam_disable_reply @param remote - IPv4/6 Address of the remote VTEP */ -define vxlan_gpe_ioam_vni_enable { +autoreply define vxlan_gpe_ioam_vni_enable { u32 client_index; u32 context; u32 vni; @@ -82,18 +62,6 @@ define vxlan_gpe_ioam_vni_enable { u8 is_ipv6; }; -/** \brief Reply to enable iOAM for a VNI (VXLAN-GPE) - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_vni_enable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - /** \brief Disable iOAM for a VNI (VXLAN-GPE) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -102,7 +70,7 @@ define vxlan_gpe_ioam_vni_enable_reply { @param remote - IPv4/6 Address of the remote VTEP */ -define vxlan_gpe_ioam_vni_disable { +autoreply define vxlan_gpe_ioam_vni_disable { u32 client_index; u32 context; u32 vni; @@ -111,19 +79,6 @@ define vxlan_gpe_ioam_vni_disable { u8 is_ipv6; }; -/** \brief Reply to disable iOAM for a VNI (VXLAN-GPE) - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_vni_disable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - - /** \brief Enable iOAM for a VXLAN-GPE transit @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -131,7 +86,7 @@ define vxlan_gpe_ioam_vni_disable_reply { @param outer_fib_index- FIB index */ -define vxlan_gpe_ioam_transit_enable { +autoreply define vxlan_gpe_ioam_transit_enable { u32 client_index; u32 context; u32 outer_fib_index; @@ -139,18 +94,6 @@ define vxlan_gpe_ioam_transit_enable { u8 is_ipv6; }; -/** \brief Reply to enable iOAM for VXLAN-GPE transit - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_transit_enable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - /** \brief Disable iOAM for VXLAN-GPE transit @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -158,7 +101,7 @@ define vxlan_gpe_ioam_transit_enable_reply { @param outer_fib_index- FIB index */ -define vxlan_gpe_ioam_transit_disable { +autoreply define vxlan_gpe_ioam_transit_disable { u32 client_index; u32 context; u32 outer_fib_index; @@ -166,16 +109,3 @@ define vxlan_gpe_ioam_transit_disable { u8 is_ipv6; }; -/** \brief Reply to disable iOAM for VXLAN-GPE transit - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_transit_disable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - - diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api index 39ee3c8f..32cc669b 100644 --- a/src/plugins/lb/lb.api +++ b/src/plugins/lb/lb.api @@ -8,7 +8,7 @@ @param flow_timeout - Time in seconds after which, if no packet is received for a given flow, the flow is removed from the established flow table. */ -define lb_conf +autoreply define lb_conf { u32 client_index; u32 context; @@ -18,11 +18,6 @@ define lb_conf u32 flow_timeout; }; -define lb_conf_reply { - u32 context; - i32 retval; -}; - /** \brief Add a virtual address (or prefix) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -33,7 +28,7 @@ define lb_conf_reply { for this VIP (must be power of 2). @param is_del - The VIP should be removed. */ -define lb_add_del_vip { +autoreply define lb_add_del_vip { u32 client_index; u32 context; u8 ip_prefix[16]; @@ -43,11 +38,6 @@ define lb_add_del_vip { u8 is_del; }; -define lb_add_del_vip_reply { - u32 context; - i32 retval; -}; - /** \brief Add an application server for a given VIP @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -56,7 +46,7 @@ define lb_add_del_vip_reply { @param as_address - The application server address (IPv4 in lower order 32 bits). @param is_del - The AS should be removed. */ -define lb_add_del_as { +autoreply define lb_add_del_as { u32 client_index; u32 context; u8 vip_ip_prefix[16]; @@ -64,8 +54,3 @@ define lb_add_del_as { u8 as_address[16]; u8 is_del; }; - -define lb_add_del_as_reply { - u32 context; - i32 retval; -}; diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api index 6f946421..95e016c3 100644 --- a/src/plugins/memif/memif.api +++ b/src/plugins/memif/memif.api @@ -57,7 +57,7 @@ define memif_create_reply @param context - sender context, to match reply w/ request @param sw_if_index - software index of the interface to delete */ -define memif_delete +autoreply define memif_delete { u32 client_index; u32 context; @@ -65,16 +65,6 @@ define memif_delete u32 sw_if_index; }; -/** \brief Delete host-interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define memif_delete_reply -{ - u32 context; - i32 retval; -}; - /** \brief Memory interface details structure @param context - sender context, to match reply w/ request (memif_dump) @param sw_if_index - index of the interface diff --git a/src/plugins/snat/snat.api b/src/plugins/snat/snat.api index 9689f5f9..573b6753 100644 --- a/src/plugins/snat/snat.api +++ b/src/plugins/snat/snat.api @@ -29,7 +29,7 @@ @param vrf_id - VRF id of tenant, ~0 means independent of VRF @param is_add - 1 if add, 0 if delete */ -define snat_add_address_range { +autoreply define snat_add_address_range { u32 client_index; u32 context; u8 is_ip4; @@ -39,15 +39,6 @@ define snat_add_address_range { u8 is_add; }; -/** \brief Add S-NAT address range reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_address_range_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT addresses @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -77,7 +68,7 @@ define snat_address_details { @param is_inside - 1 if inside, 0 if outside @param sw_if_index - software index of the interface */ -define snat_interface_add_del_feature { +autoreply define snat_interface_add_del_feature { u32 client_index; u32 context; u8 is_add; @@ -85,15 +76,6 @@ define snat_interface_add_del_feature { u32 sw_if_index; }; -/** \brief Enable/disable S-NAT feature on the interface reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_interface_add_del_feature_reply { - u32 context; - i32 retval; -}; - /** \brief Dump interfaces with S-NAT feature @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -130,7 +112,7 @@ define snat_interface_details { used) @param vfr_id - VRF ID */ -define snat_add_static_mapping { +autoreply define snat_add_static_mapping { u32 client_index; u32 context; u8 is_add; @@ -145,15 +127,6 @@ define snat_add_static_mapping { u32 vrf_id; }; -/** \brief Add/delete S-NAT static mapping reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_static_mapping_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT static mappings @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -257,21 +230,12 @@ define snat_show_config_reply @param context - sender context, to match reply w/ request @param worker_mask - S-NAT workers mask */ -define snat_set_workers { +autoreply define snat_set_workers { u32 client_index; u32 context; u64 worker_mask; }; -/** \brief Set S-NAT workers reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_set_workers_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT workers @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -300,7 +264,7 @@ define snat_worker_details { @param is_add - 1 if add, 0 if delete @param sw_if_index - software index of the interface */ -define snat_add_del_interface_addr { +autoreply define snat_add_del_interface_addr { u32 client_index; u32 context; u8 is_add; @@ -308,15 +272,6 @@ define snat_add_del_interface_addr { u32 sw_if_index; }; -/** \brief Add/delete S-NAT pool address from specific interfce reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_del_interface_addr_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT pool addresses interfaces @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -342,7 +297,7 @@ define snat_interface_addr_details { @param src_port - source port number @param enable - 1 if enable, 0 if disable */ -define snat_ipfix_enable_disable { +autoreply define snat_ipfix_enable_disable { u32 client_index; u32 context; u32 domain_id; @@ -350,15 +305,6 @@ define snat_ipfix_enable_disable { u8 enable; }; -/** \brief Enable/disable S-NAT IPFIX logging reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_ipfix_enable_disable_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT users @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -437,7 +383,7 @@ define snat_user_session_details { @param out_addr - outside IP address @param out_addr - outside IP address prefix length */ -define snat_add_det_map { +autoreply define snat_add_det_map { u32 client_index; u32 context; u8 is_add; @@ -449,15 +395,6 @@ define snat_add_det_map { u8 out_plen; }; -/** \brief Add/delete S-NAT deterministic mapping reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_det_map_reply { - u32 context; - i32 retval; -}; - /** \brief Get outside address and port range from inside address @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -556,7 +493,7 @@ define snat_det_map_details { @param tcp_transitory - TCP transitory timeout (default 240sec) @param icmp - ICMP timeout (default 60sec) */ -define snat_det_set_timeouts { +autoreply define snat_det_set_timeouts { u32 client_index; u32 context; u32 udp; @@ -565,15 +502,6 @@ define snat_det_set_timeouts { u32 icmp; }; -/** \brief Set values of timeouts for deterministic NAT reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_det_set_timeouts_reply { - u32 context; - i32 retval; -}; - /** \brief Get values of timeouts for deterministic NAT (seconds) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/tools/vppapigen/gram.y b/src/tools/vppapigen/gram.y index de26af8d..9cea6023 100644 --- a/src/tools/vppapigen/gram.y +++ b/src/tools/vppapigen/gram.y @@ -38,7 +38,7 @@ void generate (YYSTYPE); %token NAME RPAR LPAR SEMI LBRACK RBRACK NUMBER PRIMTYPE BARF %token TPACKED DEFINE LCURLY RCURLY STRING UNION %token HELPER_STRING COMMA -%token NOVERSION MANUAL_PRINT MANUAL_ENDIAN TYPEONLY DONT_TRACE +%token NOVERSION MANUAL_PRINT MANUAL_ENDIAN TYPEONLY DONT_TRACE AUTOREPLY %% @@ -64,6 +64,7 @@ flag: | MANUAL_ENDIAN {$$ = $1;} | DONT_TRACE {$$ = $1;} | TYPEONLY {$$ = $1;} + | AUTOREPLY {$$ = $1;} ; defn: DEFINE NAME LCURLY defbody RCURLY SEMI diff --git a/src/tools/vppapigen/lex.c b/src/tools/vppapigen/lex.c index 733942ad..e6358143 100644 --- a/src/tools/vppapigen/lex.c +++ b/src/tools/vppapigen/lex.c @@ -27,6 +27,9 @@ #include "lex.h" #include "node.h" #include "tools/vppapigen/gram.h" +#include +#include +#include FILE *ifp, *ofp, *pythonfp, *jsonfp; char *vlib_app_name = "vpp"; @@ -38,6 +41,9 @@ int current_filename_allocated; unsigned long input_crc; unsigned long message_crc; int yydebug; +char *push_input_fifo; +char saved_ungetc_char; +char have_ungetc_char; /* * lexer variable definitions @@ -469,9 +475,50 @@ static char namebuf [MAXNAME]; static inline char getc_char (FILE *ifp) { + char rv; + + if (have_ungetc_char) { + have_ungetc_char = 0; + return saved_ungetc_char; + } + + if (clib_fifo_elts (push_input_fifo)) { + clib_fifo_sub1(push_input_fifo, rv); + return (rv & 0x7f); + } return ((char)(getc(ifp) & 0x7f)); } +u32 fe (char *fifo) +{ + return clib_fifo_elts (fifo); +} + +static inline void +ungetc_char (char c, FILE *ifp) +{ + saved_ungetc_char = c; + have_ungetc_char = 1; +} + +void autoreply (void *np_arg) +{ + static u8 *s; + node_t *np = (node_t *)np_arg; + int i; + + vec_reset_length (s); + + s = format (0, " define %s_reply\n", (char *)(np->data[0])); + s = format (s, "{\n"); + s = format (s, " u32 context;\n"); + s = format (s, " i32 retval;\n"); + s = format (s, "};\n"); + + for (i = 0; i < vec_len (s); i++) + clib_fifo_add1 (push_input_fifo, s[i]); +} + /* * yylex (well, yylex_1: The real yylex below does crc-hackery) */ @@ -595,7 +642,7 @@ static int yylex_1 (void) return (EOF); if (!isalnum (c) && c != '_') { - ungetc (c, ifp); + ungetc_char (c, ifp); namebuf [nameidx] = 0; the_lexer_state = START_STATE; return (name_check (namebuf, &yylval)); @@ -616,7 +663,7 @@ static int yylex_1 (void) return (EOF); if (!isdigit (c)) { - ungetc (c, ifp); + ungetc_char (c, ifp); namebuf [nameidx] = 0; the_lexer_state = START_STATE; yylval = (void *) atol(namebuf); @@ -889,6 +936,7 @@ int yylex (void) case MANUAL_ENDIAN: code = 276; break; case TYPEONLY: code = 278; break; case DONT_TRACE: code = 279; break; + case AUTOREPLY: code = 280; break; case EOF: code = ~0; break; /* hysterical compatibility */ @@ -929,6 +977,7 @@ static struct keytab { } keytab [] = /* Keep the table sorted, binary search used below! */ { + {"autoreply", NODE_AUTOREPLY}, {"define", NODE_DEFINE}, {"dont_trace", NODE_DONT_TRACE}, {"f64", NODE_F64}, @@ -1005,6 +1054,10 @@ static int name_check (const char *s, YYSTYPE *token_value) *token_value = (YYSTYPE) NODE_FLAG_DONT_TRACE; return(DONT_TRACE); + case NODE_AUTOREPLY: + *token_value = (YYSTYPE) NODE_FLAG_AUTOREPLY; + return(AUTOREPLY); + case NODE_NOVERSION: return(NOVERSION); diff --git a/src/tools/vppapigen/lex.h b/src/tools/vppapigen/lex.h index a0fdc735..275cf685 100644 --- a/src/tools/vppapigen/lex.h +++ b/src/tools/vppapigen/lex.h @@ -24,6 +24,7 @@ extern int yylex (void); extern void yyerror (char *); extern int yyparse (void); +extern void autoreply (void *); #ifndef YYSTYPE #define YYSTYPE void * diff --git a/src/tools/vppapigen/node.c b/src/tools/vppapigen/node.c index 359ac9c9..9f234037 100644 --- a/src/tools/vppapigen/node.c +++ b/src/tools/vppapigen/node.c @@ -1050,6 +1050,11 @@ YYSTYPE set_flags(YYSTYPE a1, YYSTYPE a2) flags = (int)(uword) a1; np->flags |= flags; + + /* Generate a foo_reply_t right here */ + if (flags & NODE_FLAG_AUTOREPLY) + autoreply(np); + return (a2); } /* diff --git a/src/tools/vppapigen/node.h b/src/tools/vppapigen/node.h index 297d6036..65bd5d10 100644 --- a/src/tools/vppapigen/node.h +++ b/src/tools/vppapigen/node.h @@ -53,6 +53,7 @@ enum node_subclass { /* WARNING: indices must match the vft... */ NODE_MANUAL_PRINT, NODE_MANUAL_ENDIAN, NODE_DONT_TRACE, + NODE_AUTOREPLY, }; enum passid { @@ -84,6 +85,7 @@ typedef struct node_ { #define NODE_FLAG_MANUAL_ENDIAN (1<<1) #define NODE_FLAG_TYPEONLY (1<<3) #define NODE_FLAG_DONT_TRACE (1<<4) +#define NODE_FLAG_AUTOREPLY (1<<5) typedef struct node_vft_ { void (*print)(struct node_ *); diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api index c38b483c..32e51407 100644 --- a/src/vlibmemory/memclnt.api +++ b/src/vlibmemory/memclnt.api @@ -72,7 +72,7 @@ define memclnt_read_timeout { /* * RPC */ -define rpc_call { +autoreply define rpc_call { u32 client_index; u32 context; u64 function; @@ -82,11 +82,6 @@ define rpc_call { u8 data[0]; }; -define rpc_reply { - i32 retval; - u32 context; -}; - /* * Lookup message-ID base by name */ diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index 7a536ee8..43574dea 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -1275,7 +1275,7 @@ VLIB_CLI_COMMAND (cli_show_api_plugin_command, static) = { static void vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) { - vl_api_rpc_reply_t *rmp; + vl_api_rpc_call_reply_t *rmp; int (*fp) (void *); i32 rv = 0; vlib_main_t *vm = vlib_get_main (); @@ -1305,7 +1305,7 @@ vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) if (q) { rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_RPC_REPLY); + rmp->_vl_msg_id = ntohs (VL_API_RPC_CALL_REPLY); rmp->context = mp->context; rmp->retval = rv; vl_msg_api_send_shmem (q, (u8 *) & rmp); @@ -1318,7 +1318,7 @@ vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) } static void -vl_api_rpc_reply_t_handler (vl_api_rpc_reply_t * mp) +vl_api_rpc_call_reply_t_handler (vl_api_rpc_call_reply_t * mp) { clib_warning ("unimplemented"); } @@ -1415,7 +1415,7 @@ vl_api_trace_plugin_msg_ids_t_handler (vl_api_trace_plugin_msg_ids_t * mp) #define foreach_rpc_api_msg \ _(RPC_CALL,rpc_call) \ -_(RPC_REPLY,rpc_reply) +_(RPC_CALL_REPLY,rpc_call_reply) #define foreach_plugin_trace_msg \ _(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids) diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api index 2cdcfad3..7bcaa4c3 100644 --- a/src/vnet/bfd/bfd.api +++ b/src/vnet/bfd/bfd.api @@ -18,43 +18,23 @@ @param context - sender context, to match reply w/ request @param sw_if_index - interface to use as echo source */ -define bfd_udp_set_echo_source +autoreply define bfd_udp_set_echo_source { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Set BFD feature response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_set_echo_source_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete BFD echo source @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define bfd_udp_del_echo_source +autoreply define bfd_udp_del_echo_source { u32 client_index; u32 context; }; -/** \brief Delete BFD echo source response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_del_echo_source_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -69,7 +49,7 @@ define bfd_udp_del_echo_source_reply @param bfd_key_id - key id sent out in BFD packets (if is_authenticated) @param conf_key_id - id of already configured key (if is_authenticated) */ -define bfd_udp_add +autoreply define bfd_udp_add { u32 client_index; u32 context; @@ -85,16 +65,6 @@ define bfd_udp_add u32 conf_key_id; }; -/** \brief Add UDP BFD session response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_add_reply -{ - u32 context; - i32 retval; -}; - /** \brief Modify UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -106,7 +76,7 @@ define bfd_udp_add_reply @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param detect_mult - detect multiplier (# of packets missed before connection goes down) */ -define bfd_udp_mod +autoreply define bfd_udp_mod { u32 client_index; u32 context; @@ -119,16 +89,6 @@ define bfd_udp_mod u8 detect_mult; }; -/** \brief Modify UDP BFD session response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_mod_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -137,7 +97,7 @@ define bfd_udp_mod_reply @param peer_addr - peer address @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 */ -define bfd_udp_del +autoreply define bfd_udp_del { u32 client_index; u32 context; @@ -147,16 +107,6 @@ define bfd_udp_del u8 is_ipv6; }; -/** \brief Delete UDP BFD session response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get all BFD sessions @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -206,7 +156,7 @@ define bfd_udp_session_details @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param admin_up_down - set the admin state, 1 = up, 0 = down */ -define bfd_udp_session_set_flags +autoreply define bfd_udp_session_set_flags { u32 client_index; u32 context; @@ -217,23 +167,13 @@ define bfd_udp_session_set_flags u8 admin_up_down; }; -/** \brief Reply to bfd_udp_session_set_flags - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define bfd_udp_session_set_flags_reply -{ - u32 context; - i32 retval; -}; - /** \brief Register for BFD events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param enable_disable - 1 => register for events, 0 => cancel registration @param pid - sender's pid */ -define want_bfd_events +autoreply define want_bfd_events { u32 client_index; u32 context; @@ -241,16 +181,6 @@ define want_bfd_events u32 pid; }; -/** \brief Reply for BFD events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_bfd_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief BFD UDP - add/replace key to configuration @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -259,7 +189,7 @@ define want_bfd_events_reply @param auth_type - authentication type (RFC 5880/4.1/Auth Type) @param key - key data */ -define bfd_auth_set_key +autoreply define bfd_auth_set_key { u32 client_index; u32 context; @@ -269,16 +199,6 @@ define bfd_auth_set_key u8 key[20]; }; -/** \brief BFD UDP - add/replace key reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_auth_set_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief BFD UDP - delete key from configuration @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -286,23 +206,13 @@ define bfd_auth_set_key_reply @param key_len - length of key (must be non-zero) @param key - key data */ -define bfd_auth_del_key +autoreply define bfd_auth_del_key { u32 client_index; u32 context; u32 conf_key_id; }; -/** \brief BFD UDP - delete key reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_auth_del_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get a list of configured authentication keys @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -338,7 +248,7 @@ define bfd_auth_keys_details @param bfd_key_id - key id sent out in BFD packets @param conf_key_id - id of already configured key */ -define bfd_udp_auth_activate +autoreply define bfd_udp_auth_activate { u32 client_index; u32 context; @@ -351,16 +261,6 @@ define bfd_udp_auth_activate u32 conf_key_id; }; -/** \brief BFD UDP - activate/change authentication reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_udp_auth_activate_reply -{ - u32 context; - i32 retval; -}; - /** \brief BFD UDP - deactivate authentication @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -370,7 +270,7 @@ define bfd_udp_auth_activate_reply @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param is_delayed - change is applied once peer applies the change (on first received non-authenticated packet) */ -define bfd_udp_auth_deactivate +autoreply define bfd_udp_auth_deactivate { u32 client_index; u32 context; @@ -381,16 +281,6 @@ define bfd_udp_auth_deactivate u8 is_delayed; }; -/** \brief BFD UDP - deactivate authentication reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_udp_auth_deactivate_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/classify/classify.api b/src/vnet/classify/classify.api index 51ebd6c8..cacb9bed 100644 --- a/src/vnet/classify/classify.api +++ b/src/vnet/classify/classify.api @@ -92,7 +92,7 @@ define classify_add_del_table_reply VRF id if action is 1 or 2. @param match[] - for add, match value for session, required */ -define classify_add_del_session +autoreply define classify_add_del_session { u32 client_index; u32 context; @@ -106,16 +106,6 @@ define classify_add_del_session u8 match[0]; }; -/** \brief Classify add / del session response - @param context - sender context, to match reply w/ request - @param retval - return code for the add/del session request -*/ -define classify_add_del_session_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset policer classify interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -127,7 +117,7 @@ define classify_add_del_session_reply Note: User is recommeneded to use just one valid table_index per call. (ip4_table_index, ip6_table_index, or l2_table_index) */ -define policer_classify_set_interface +autoreply define policer_classify_set_interface { u32 client_index; u32 context; @@ -138,16 +128,6 @@ define policer_classify_set_interface u8 is_add; }; -/** \brief Set/unset policer classify interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define policer_classify_set_interface_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get list of policer classify interfaces and tables @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -308,7 +288,7 @@ define classify_session_details Note: User is recommeneded to use just one valid table_index per call. (ip4_table_index, ip6_table_index, or l2_table_index) */ -define flow_classify_set_interface { +autoreply define flow_classify_set_interface { u32 client_index; u32 context; u32 sw_if_index; @@ -317,15 +297,6 @@ define flow_classify_set_interface { u8 is_add; }; -/** \brief Set/unset flow classify interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define flow_classify_set_interface_reply { - u32 context; - i32 retval; -}; - /** \brief Get list of flow classify interfaces and tables @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -353,4 +324,4 @@ define flow_classify_details { * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/cop/cop.api b/src/vnet/cop/cop.api index b34dae80..69316001 100644 --- a/src/vnet/cop/cop.api +++ b/src/vnet/cop/cop.api @@ -20,7 +20,7 @@ @param enable_disable - 1 => enable, 0 => disable */ -define cop_interface_enable_disable +autoreply define cop_interface_enable_disable { u32 client_index; u32 context; @@ -28,17 +28,6 @@ define cop_interface_enable_disable u8 enable_disable; }; -/** \brief cop: interface enable/disable junk filtration reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define cop_interface_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief cop: enable/disable whitelist filtration features on an interface Note: the supplied fib_id must match in order to remove the feature! @@ -51,7 +40,7 @@ define cop_interface_enable_disable_reply @param default_cop - 1 => enable non-ip4, non-ip6 filtration 0=> disable it */ -define cop_whitelist_enable_disable +autoreply define cop_whitelist_enable_disable { u32 client_index; u32 context; @@ -62,17 +51,6 @@ define cop_whitelist_enable_disable u8 default_cop; }; -/** \brief cop: interface enable/disable junk filtration reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define cop_whitelist_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief get_node_graph - get a copy of the vpp node graph including the current set of graph arcs. @@ -85,4 +63,4 @@ define cop_whitelist_enable_disable_reply * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/devices/af_packet/af_packet.api b/src/vnet/devices/af_packet/af_packet.api index 9fb2a207..8d40ad60 100644 --- a/src/vnet/devices/af_packet/af_packet.api +++ b/src/vnet/devices/af_packet/af_packet.api @@ -46,7 +46,7 @@ define af_packet_create_reply @param context - sender context, to match reply w/ request @param host_if_name - interface name */ -define af_packet_delete +autoreply define af_packet_delete { u32 client_index; u32 context; @@ -54,16 +54,6 @@ define af_packet_delete u8 host_if_name[64]; }; -/** \brief Delete host-interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define af_packet_delete_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/devices/netmap/netmap.api b/src/vnet/devices/netmap/netmap.api index 377ccffd..8dc698b9 100644 --- a/src/vnet/devices/netmap/netmap.api +++ b/src/vnet/devices/netmap/netmap.api @@ -22,7 +22,7 @@ @param is_pipe - is pipe @param is_master - 0=slave, 1=master */ -define netmap_create +autoreply define netmap_create { u32 client_index; u32 context; @@ -34,22 +34,12 @@ define netmap_create u8 is_master; }; -/** \brief Create netmap response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define netmap_create_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete netmap @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param netmap_if_name - interface name */ -define netmap_delete +autoreply define netmap_delete { u32 client_index; u32 context; @@ -57,16 +47,6 @@ define netmap_delete u8 netmap_if_name[64]; }; -/** \brief Delete netmap response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define netmap_delete_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/vnet/devices/virtio/vhost_user.api index 4f604e45..df7ce7ab 100644 --- a/src/vnet/devices/virtio/vhost_user.api +++ b/src/vnet/devices/virtio/vhost_user.api @@ -53,7 +53,7 @@ define create_vhost_user_if_reply @param sock_filename - unix socket filename, used to speak with frontend @param operation_mode - polling=0, interrupt=1, or adaptive=2 */ -define modify_vhost_user_if +autoreply define modify_vhost_user_if { u32 client_index; u32 context; @@ -65,36 +65,16 @@ define modify_vhost_user_if u8 operation_mode; }; -/** \brief vhost-user interface modify response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define modify_vhost_user_if_reply -{ - u32 context; - i32 retval; -}; - /** \brief vhost-user interface delete request @param client_index - opaque cookie to identify the sender */ -define delete_vhost_user_if +autoreply define delete_vhost_user_if { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief vhost-user interface delete response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define delete_vhost_user_if_reply -{ - u32 context; - i32 retval; -}; - /** \brief Vhost-user interface details structure (fix this) @param sw_if_index - index of the interface @param interface_name - name of interface diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api index 2db85a79..eb0b070d 100644 --- a/src/vnet/dhcp/dhcp.api +++ b/src/vnet/dhcp/dhcp.api @@ -24,7 +24,7 @@ @param dhcp_server[] - server address @param dhcp_src_address[] - */ -define dhcp_proxy_config +autoreply define dhcp_proxy_config { u32 client_index; u32 context; @@ -36,16 +36,6 @@ define dhcp_proxy_config u8 dhcp_src_address[16]; }; -/** \brief DHCP Proxy config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_proxy_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief DHCP Proxy set / unset vss request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -55,7 +45,7 @@ define dhcp_proxy_config_reply @param is_ipv6 - ip6 if non-zero, else ip4 @param is_add - set vss if non-zero, else delete */ -define dhcp_proxy_set_vss +autoreply define dhcp_proxy_set_vss { u32 client_index; u32 context; @@ -66,16 +56,6 @@ define dhcp_proxy_set_vss u8 is_add; }; -/** \brief DHCP proxy set / unset vss response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_proxy_set_vss_reply -{ - u32 context; - i32 retval; -}; - /** \brief DHCP Client config add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -86,7 +66,7 @@ define dhcp_proxy_set_vss_reply via dhcp_compl_event API message if non-zero @param pid - sender's pid */ -define dhcp_client_config +autoreply define dhcp_client_config { u32 client_index; u32 context; @@ -97,16 +77,6 @@ define dhcp_client_config u32 pid; }; -/** \brief DHCP Client config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_client_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief Tell client about a DHCP completion event @param client_index - opaque cookie to identify the sender @param pid - client pid registered to receive notification @@ -162,4 +132,4 @@ manual_endian manual_print define dhcp_proxy_details * Local Variables: * eval: (c-set-style "gnu") * End: - */ \ No newline at end of file + */ diff --git a/src/vnet/flow/flow.api b/src/vnet/flow/flow.api index 0e0f99bf..1c5e8c5c 100644 --- a/src/vnet/flow/flow.api +++ b/src/vnet/flow/flow.api @@ -24,7 +24,7 @@ @param template_interval - number of seconds after which to resend template @param udp_checksum - UDP checksum calculation enable flag */ -define set_ipfix_exporter +autoreply define set_ipfix_exporter { u32 client_index; u32 context; @@ -37,15 +37,6 @@ define set_ipfix_exporter u8 udp_checksum; }; -/** \brief Reply to IPFIX exporter configure request - @param context - sender context which was passed in the request -*/ -define set_ipfix_exporter_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPFIX exporter dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -84,22 +75,13 @@ define ipfix_exporter_details @param domain_id - domain ID reported in IPFIX messages for classify stream @param src_port - source port of UDP session for classify stream */ -define set_ipfix_classify_stream { +autoreply define set_ipfix_classify_stream { u32 client_index; u32 context; u32 domain_id; u16 src_port; }; -/** \brief IPFIX classify stream configure response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define set_ipfix_classify_stream_reply { - u32 context; - i32 retval; -}; - /** \brief IPFIX classify stream dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -127,7 +109,7 @@ define ipfix_classify_stream_details { @param ip_version - version of IP used in the classifier table @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified */ -define ipfix_classify_table_add_del { +autoreply define ipfix_classify_table_add_del { u32 client_index; u32 context; u32 table_id; @@ -136,14 +118,6 @@ define ipfix_classify_table_add_del { u8 is_add; }; -/** \brief IPFIX add classifier table response - @param context - sender context which was passed in the request -*/ -define ipfix_classify_table_add_del_reply { - u32 context; - i32 retval; -}; - /** \brief IPFIX classify tables dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/interface.api b/src/vnet/interface.api index 85fd73fb..9df63f18 100644 --- a/src/vnet/interface.api +++ b/src/vnet/interface.api @@ -6,7 +6,7 @@ @param link_up_down - Oper state sent on change event, not used in config. @param deleted - interface was deleted */ -define sw_interface_set_flags +autoreply define sw_interface_set_flags { u32 client_index; u32 context; @@ -17,23 +17,13 @@ define sw_interface_set_flags u8 deleted; }; -/** \brief Reply to sw_interface_set_flags - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define sw_interface_set_flags_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set interface MTU @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - index of the interface to set MTU on @param mtu - MTU */ -define sw_interface_set_mtu +autoreply define sw_interface_set_mtu { u32 client_index; u32 context; @@ -41,23 +31,13 @@ define sw_interface_set_mtu u16 mtu; }; -/** \brief Reply to sw_interface_set_mtu - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define sw_interface_set_mtu_reply -{ - u32 context; - i32 retval; -}; - /** \brief Register for interface events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param enable_disable - 1 => register for events, 0 => cancel registration @param pid - sender's pid */ -define want_interface_events +autoreply define want_interface_events { u32 client_index; u32 context; @@ -65,16 +45,6 @@ define want_interface_events u32 pid; }; -/** \brief Reply for interface events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_interface_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface details structure (fix this) @param sw_if_index - index of the interface @param sup_sw_if_index - index of parent interface if any, else same as sw_if_index @@ -184,7 +154,7 @@ define sw_interface_dump @param address_length - address length in bytes, 4 for ip4, 16 for ip6 @param address - array of address bytes */ -define sw_interface_add_del_address +autoreply define sw_interface_add_del_address { u32 client_index; u32 context; @@ -196,16 +166,6 @@ define sw_interface_add_del_address u8 address[16]; }; -/** \brief Reply to sw_interface_add_del_address - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define sw_interface_add_del_address_reply -{ - u32 context; - i32 retval; -}; - /** \brief Associate the specified interface with a fib table @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -213,7 +173,7 @@ define sw_interface_add_del_address_reply @param is_ipv6 - if non-zero ipv6, else ipv4 @param vrf_id - fib table/vrd id to associate the interface with */ -define sw_interface_set_table +autoreply define sw_interface_set_table { u32 client_index; u32 context; @@ -222,16 +182,6 @@ define sw_interface_set_table u32 vrf_id; }; -/** \brief Reply to sw_interface_set_table - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define sw_interface_set_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get VRF id assigned to interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -280,7 +230,7 @@ define vnet_interface_counters @param unnumbered_sw_if_index - interface which will use the address @param is_add - if non-zero set the association, else unset it */ -define sw_interface_set_unnumbered +autoreply define sw_interface_set_unnumbered { u32 client_index; u32 context; @@ -289,38 +239,18 @@ define sw_interface_set_unnumbered u8 is_add; }; -/** \brief Set unnumbered interface add / del response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_set_unnumbered_reply -{ - u32 context; - i32 retval; -}; - /** \brief Clear interface statistics @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - index of the interface to clear statistics */ -define sw_interface_clear_stats +autoreply define sw_interface_clear_stats { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Reply to sw_interface_clear_stats - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define sw_interface_clear_stats_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set / clear software interface tag @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -328,7 +258,7 @@ define sw_interface_clear_stats_reply @param add_del - 1 = add, 0 = delete @param tag - an ascii tag */ -define sw_interface_tag_add_del +autoreply define sw_interface_tag_add_del { u32 client_index; u32 context; @@ -337,23 +267,13 @@ define sw_interface_tag_add_del u8 tag[64]; }; -/** \brief Reply to set / clear software interface tag - @param context - sender context which was passed in the request - @param retval - return code for the request -*/ -define sw_interface_tag_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set an interface's MAC address @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - the interface whose MAC will be set @param mac_addr - the new MAC address */ -define sw_interface_set_mac_address +autoreply define sw_interface_set_mac_address { u32 client_index; u32 context; @@ -361,16 +281,6 @@ define sw_interface_set_mac_address u8 mac_address[6]; }; -/** \brief Reply to setting an interface MAC address request - @param context - sender context which was passed in the request - @param retval - return code for the request -*/ -define sw_interface_set_mac_address_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api index 6af1714f..7097a130 100644 --- a/src/vnet/ip/ip.api +++ b/src/vnet/ip/ip.api @@ -136,7 +136,7 @@ define ip_neighbor_details { @param mac_address - l2 address of the neighbor @param dst_address - ip4 or ip6 address of the neighbor */ -define ip_neighbor_add_del +autoreply define ip_neighbor_add_del { u32 client_index; u32 context; @@ -150,16 +150,6 @@ define ip_neighbor_add_del u8 dst_address[16]; }; -/** \brief Reply for IP Neighbor add / delete request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_neighbor_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set the ip flow hash config for a fib request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -172,7 +162,7 @@ define ip_neighbor_add_del_reply @param proto -if non-zero include proto in flow hash @param reverse - if non-zero include reverse in flow hash */ -define set_ip_flow_hash +autoreply define set_ip_flow_hash { u32 client_index; u32 context; @@ -186,16 +176,6 @@ define set_ip_flow_hash u8 reverse; }; -/** \brief Set the ip flow hash config for a fib response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define set_ip_flow_hash_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 router advertisement config request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -213,7 +193,7 @@ define set_ip_flow_hash_reply @param initial_count - @param initial_interval - */ -define sw_interface_ip6nd_ra_config +autoreply define sw_interface_ip6nd_ra_config { u32 client_index; u32 context; @@ -233,16 +213,6 @@ define sw_interface_ip6nd_ra_config u32 initial_interval; }; -/** \brief IPv6 router advertisement config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_ip6nd_ra_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 router advertisement prefix config request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -272,7 +242,7 @@ define sw_interface_ip6nd_ra_config_reply preferred [ADDRCONF]. A value of all one bits (0xffffffff) represents infinity. */ -define sw_interface_ip6nd_ra_prefix +autoreply define sw_interface_ip6nd_ra_prefix { u32 client_index; u32 context; @@ -289,16 +259,6 @@ define sw_interface_ip6nd_ra_prefix u32 pref_lifetime; }; -/** \brief IPv6 router advertisement prefix config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_ip6nd_ra_prefix_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 ND proxy config @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -306,7 +266,7 @@ define sw_interface_ip6nd_ra_prefix_reply @param address - The address of the host for which to proxy for @param is_add - Adding or deleting */ -define ip6nd_proxy_add_del +autoreply define ip6nd_proxy_add_del { u32 client_index; u32 context; @@ -315,16 +275,6 @@ define ip6nd_proxy_add_del u8 address[16]; }; -/** \brief IPv6 ND proxy response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define ip6nd_proxy_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 ND proxy details returned after request @param context - sender context, to match reply w/ request @param retval - return code for the request @@ -355,7 +305,7 @@ define ip6nd_proxy_dump @param sw_if_index - interface used to reach neighbor @param enable - if non-zero enable ip6 on interface, else disable */ -define sw_interface_ip6_enable_disable +autoreply define sw_interface_ip6_enable_disable { u32 client_index; u32 context; @@ -363,23 +313,13 @@ define sw_interface_ip6_enable_disable u8 enable; /* set to true if enable */ }; -/** \brief IPv6 interface enable / disable response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_ip6_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 set link local address on interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - interface to set link local on @param address[] - the new link local address */ -define sw_interface_ip6_set_link_local_address +autoreply define sw_interface_ip6_set_link_local_address { u32 client_index; u32 context; @@ -387,16 +327,6 @@ define sw_interface_ip6_set_link_local_address u8 address[16]; }; -/** \brief IPv6 set link local address on interface response - @param context - sender context, to match reply w/ request - @param retval - error code for the request -*/ -define sw_interface_ip6_set_link_local_address_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add / del route request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -422,7 +352,7 @@ define sw_interface_ip6_set_link_local_address_reply @param next_hop_out_label_stack - the next-hop output label stack, outer most first @param next_hop_via_label - The next-hop is a resolved via a local label */ -define ip_add_del_route +autoreply define ip_add_del_route { u32 client_index; u32 context; @@ -452,16 +382,6 @@ define ip_add_del_route u32 next_hop_out_label_stack[next_hop_n_out_labels]; }; -/** \brief Reply for add / del route request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_add_del_route_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add / del route request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -470,7 +390,7 @@ define ip_add_del_route_reply FIXME */ -define ip_mroute_add_del +autoreply define ip_mroute_add_del { u32 client_index; u32 context; @@ -488,16 +408,6 @@ define ip_mroute_add_del u8 src_address[16]; }; -/** \brief Reply for add / del mroute request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_mroute_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump IP multicast fib table @param client_index - opaque cookie to identify the sender */ diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api index ef090f84..203c5272 100644 --- a/src/vnet/ipsec/ipsec.api +++ b/src/vnet/ipsec/ipsec.api @@ -20,7 +20,7 @@ @param spd_id - SPD instance id (control plane allocated) */ -define ipsec_spd_add_del +autoreply define ipsec_spd_add_del { u32 client_index; u32 context; @@ -28,17 +28,6 @@ define ipsec_spd_add_del u32 spd_id; }; -/** \brief Reply for IPsec: Add/delete Security Policy Database entry - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_spd_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Add/delete SPD from interface @param client_index - opaque cookie to identify the sender @@ -49,7 +38,7 @@ define ipsec_spd_add_del_reply */ -define ipsec_interface_add_del_spd +autoreply define ipsec_interface_add_del_spd { u32 client_index; u32 context; @@ -59,17 +48,6 @@ define ipsec_interface_add_del_spd u32 spd_id; }; -/** \brief Reply for IPsec: Add/delete SPD from interface - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_interface_add_del_spd_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Add/delete Security Policy Database entry See RFC 4301, 4.4.1.1 on how to match packet to selectors @@ -95,7 +73,7 @@ define ipsec_interface_add_del_spd_reply */ -define ipsec_spd_add_del_entry +autoreply define ipsec_spd_add_del_entry { u32 client_index; u32 context; @@ -125,17 +103,6 @@ define ipsec_spd_add_del_entry u32 sa_id; }; -/** \brief Reply for IPsec: Add/delete Security Policy Database entry - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_spd_add_del_entry_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Add/delete Security Association Database entry @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -167,7 +134,7 @@ define ipsec_spd_add_del_entry_reply IPsec tunnel address copy mode (to support GDOI) */ -define ipsec_sad_add_del_entry +autoreply define ipsec_sad_add_del_entry { u32 client_index; u32 context; @@ -195,17 +162,6 @@ define ipsec_sad_add_del_entry u8 tunnel_dst_address[16]; }; -/** \brief Reply for IPsec: Add/delete Security Association Database entry - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_sad_add_del_entry_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Update Security Association keys @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -219,7 +175,7 @@ define ipsec_sad_add_del_entry_reply @param integrity_key - integrity keying material */ -define ipsec_sa_set_key +autoreply define ipsec_sa_set_key { u32 client_index; u32 context; @@ -233,17 +189,6 @@ define ipsec_sa_set_key u8 integrity_key[128]; }; -/** \brief Reply for IPsec: Update Security Association keys - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_sa_set_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Add/delete profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -251,7 +196,7 @@ define ipsec_sa_set_key_reply @param name - IKEv2 profile name @param is_add - Add IKEv2 profile if non-zero, else delete */ -define ikev2_profile_add_del +autoreply define ikev2_profile_add_del { u32 client_index; u32 context; @@ -260,16 +205,6 @@ define ikev2_profile_add_del u8 is_add; }; -/** \brief Reply for IKEv2: Add/delete profile - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 profile authentication method @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -280,7 +215,7 @@ define ikev2_profile_add_del_reply @param data_len - Authentication data length @param data - Authentication data (for rsa-sig cert file path) */ -define ikev2_profile_set_auth +autoreply define ikev2_profile_set_auth { u32 client_index; u32 context; @@ -292,16 +227,6 @@ define ikev2_profile_set_auth u8 data[0]; }; -/** \brief Reply for IKEv2: Set IKEv2 profile authentication method - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_set_auth_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 profile local/remote identification @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -312,7 +237,7 @@ define ikev2_profile_set_auth_reply @param data_len - Identification data length @param data - Identification data */ -define ikev2_profile_set_id +autoreply define ikev2_profile_set_id { u32 client_index; u32 context; @@ -324,16 +249,6 @@ define ikev2_profile_set_id u8 data[0]; }; -/** \brief Reply for IKEv2: - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_set_id_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 profile traffic selector parameters @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -346,7 +261,7 @@ define ikev2_profile_set_id_reply @param start_addr - The smallest address included in traffic selector @param end_addr - The largest address included in traffic selector */ -define ikev2_profile_set_ts +autoreply define ikev2_profile_set_ts { u32 client_index; u32 context; @@ -360,23 +275,13 @@ define ikev2_profile_set_ts u32 end_addr; }; -/** \brief Reply for IKEv2: Set IKEv2 profile traffic selector parameters - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_set_ts_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 local RSA private key @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param key_file - Key file absolute path */ -define ikev2_set_local_key +autoreply define ikev2_set_local_key { u32 client_index; u32 context; @@ -384,16 +289,6 @@ define ikev2_set_local_key u8 key_file[256]; }; -/** \brief Reply for IKEv2: Set IKEv2 local key - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_local_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 responder interface and IP address @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -402,7 +297,7 @@ define ikev2_set_local_key_reply @param sw_if_index - interface index @param address - interface address */ -define ikev2_set_responder +autoreply define ikev2_set_responder { u32 client_index; u32 context; @@ -412,17 +307,6 @@ define ikev2_set_responder u8 address[4]; }; -/** \brief Reply for IKEv2: Set IKEv2 responder interface and IP address - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_responder_reply -{ - u32 context; - i32 retval; -}; - - /** \brief IKEv2: Set IKEv2 IKE transforms in SA_INIT proposal (RFC 7296) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -434,7 +318,7 @@ define ikev2_set_responder_reply @param dh_group - Diffie-Hellman group */ -define ikev2_set_ike_transforms +autoreply define ikev2_set_ike_transforms { u32 client_index; u32 context; @@ -446,16 +330,6 @@ define ikev2_set_ike_transforms u32 dh_group; }; -/** \brief Reply for IKEv2: Set IKEv2 IKE transforms - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_ike_transforms_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 ESP transforms in SA_INIT proposal (RFC 7296) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -467,7 +341,7 @@ define ikev2_set_ike_transforms_reply @param dh_group - Diffie-Hellman group */ -define ikev2_set_esp_transforms +autoreply define ikev2_set_esp_transforms { u32 client_index; u32 context; @@ -479,16 +353,6 @@ define ikev2_set_esp_transforms u32 dh_group; }; -/** \brief Reply for IKEv2: Set IKEv2 ESP transforms - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_esp_transforms_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set Child SA lifetime, limited by time and/or data @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -500,7 +364,7 @@ define ikev2_set_esp_transforms_reply @param lifetime_maxdata - SA maximum life time in bytes (0 to disable) */ -define ikev2_set_sa_lifetime +autoreply define ikev2_set_sa_lifetime { u32 client_index; u32 context; @@ -512,16 +376,6 @@ define ikev2_set_sa_lifetime u64 lifetime_maxdata; }; -/** \brief Reply for IKEv2: Set Child SA lifetime - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_sa_lifetime_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the SA_INIT exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -529,7 +383,7 @@ define ikev2_set_sa_lifetime_reply @param name - IKEv2 profile name */ -define ikev2_initiate_sa_init +autoreply define ikev2_initiate_sa_init { u32 client_index; u32 context; @@ -537,16 +391,6 @@ define ikev2_initiate_sa_init u8 name[64]; }; -/** \brief Reply for IKEv2: Initiate the SA_INIT exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_sa_init_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the delete IKE SA exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -554,7 +398,7 @@ define ikev2_initiate_sa_init_reply @param ispi - IKE SA initiator SPI */ -define ikev2_initiate_del_ike_sa +autoreply define ikev2_initiate_del_ike_sa { u32 client_index; u32 context; @@ -562,16 +406,6 @@ define ikev2_initiate_del_ike_sa u64 ispi; }; -/** \brief Reply for IKEv2: Initiate the delete IKE SA exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_del_ike_sa_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the delete Child SA exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -579,7 +413,7 @@ define ikev2_initiate_del_ike_sa_reply @param ispi - Child SA initiator SPI */ -define ikev2_initiate_del_child_sa +autoreply define ikev2_initiate_del_child_sa { u32 client_index; u32 context; @@ -587,16 +421,6 @@ define ikev2_initiate_del_child_sa u32 ispi; }; -/** \brief Reply for IKEv2: Initiate the delete Child SA exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_del_child_sa_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the rekey Child SA exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -604,7 +428,7 @@ define ikev2_initiate_del_child_sa_reply @param ispi - Child SA initiator SPI */ -define ikev2_initiate_rekey_child_sa +autoreply define ikev2_initiate_rekey_child_sa { u32 client_index; u32 context; @@ -612,16 +436,6 @@ define ikev2_initiate_rekey_child_sa u32 ispi; }; -/** \brief Reply for IKEv2: Initiate the rekey Child SA exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_rekey_child_sa_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump ipsec policy database data @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -682,4 +496,4 @@ define ipsec_spd_details { * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index c23eebec..db42d635 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -70,66 +70,36 @@ define l2_fib_table_dump @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define l2_fib_clear_table +autoreply define l2_fib_clear_table { u32 client_index; u32 context; }; -/** \brief L2 fib clear table response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_fib_clear_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 FIB flush bridge domain entries @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bd_id - the entry's bridge domain id */ -define l2fib_flush_bd +autoreply define l2fib_flush_bd { u32 client_index; u32 context; u32 bd_id; }; -/** \brief L2 FIB flush bridge domain entries response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2fib_flush_bd_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 FIB flush interface entries @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bd_id - the entry's bridge domain id */ -define l2fib_flush_int +autoreply define l2fib_flush_int { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief L2 FIB flush interface entries response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2fib_flush_int_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 FIB add entry request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -140,7 +110,7 @@ define l2fib_flush_int_reply @param static_mac - @param filter_mac - */ -define l2fib_add_del +autoreply define l2fib_add_del { u32 client_index; u32 context; @@ -153,16 +123,6 @@ define l2fib_add_del u8 bvi_mac; }; -/** \brief L2 FIB add entry response - @param context - sender context, to match reply w/ request - @param retval - return code for the add l2fib entry request -*/ -define l2fib_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set L2 flags request !!! TODO - need more info, feature bits in l2_input.h @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -196,7 +156,7 @@ define l2_flags_reply @param bd_id - the bridge domain to create @param mac_age - mac aging time in min, 0 for disabled */ -define bridge_domain_set_mac_age +autoreply define bridge_domain_set_mac_age { u32 client_index; u32 context; @@ -204,16 +164,6 @@ define bridge_domain_set_mac_age u8 mac_age; }; -/** \brief Set bridge domain response - @param context - sender context, to match reply w/ request - @param retval - return code for the set l2 bits request -*/ -define bridge_domain_set_mac_age_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 bridge domain add or delete request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -226,7 +176,7 @@ define bridge_domain_set_mac_age_reply @param mac_age - mac aging time in min, 0 for disabled @param is_add - add or delete flag */ -define bridge_domain_add_del +autoreply define bridge_domain_add_del { u32 client_index; u32 context; @@ -240,16 +190,6 @@ define bridge_domain_add_del u8 is_add; }; -/** \brief L2 bridge domain add or delete response - @param context - sender context, to match reply w/ request - @param retval - return code for the set bridge flags request -*/ -define bridge_domain_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 bridge domain request operational state details @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -337,7 +277,7 @@ define bridge_flags_reply @param tag1 - Needed for any push or translate vtr op @param tag2 - Needed for any push 2 or translate x-2 vtr ops */ -define l2_interface_vlan_tag_rewrite +autoreply define l2_interface_vlan_tag_rewrite { u32 client_index; u32 context; @@ -348,16 +288,6 @@ define l2_interface_vlan_tag_rewrite u32 tag2; // second pushed tag }; -/** \brief L2 interface vlan tag rewrite response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_vlan_tag_rewrite_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 interface pbb tag rewrite configure request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -370,7 +300,7 @@ define l2_interface_vlan_tag_rewrite_reply @param b_vlanid - B-tag vlanid, needed for any push or translate qinq vtr op @param i_sid - I-tag service id, needed for any push or translate qinq vtr op */ -define l2_interface_pbb_tag_rewrite +autoreply define l2_interface_pbb_tag_rewrite { u32 client_index; u32 context; @@ -383,16 +313,6 @@ define l2_interface_pbb_tag_rewrite u32 i_sid; }; -/** \brief L2 interface pbb tag rewrite response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_pbb_tag_rewrite_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/l2tp/l2tp.api b/src/vnet/l2tp/l2tp.api index 5a5a5a48..4587a807 100644 --- a/src/vnet/l2tp/l2tp.api +++ b/src/vnet/l2tp/l2tp.api @@ -52,7 +52,7 @@ define l2tpv3_create_tunnel_reply u32 sw_if_index; }; -define l2tpv3_set_tunnel_cookies +autoreply define l2tpv3_set_tunnel_cookies { u32 client_index; u32 context; @@ -61,16 +61,6 @@ define l2tpv3_set_tunnel_cookies u64 new_remote_cookie; }; -/** \brief L2TP tunnel set cookies response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2tpv3_set_tunnel_cookies_reply -{ - u32 context; - i32 retval; -}; - define sw_if_l2tpv3_tunnel_details { u32 context; @@ -91,7 +81,7 @@ define sw_if_l2tpv3_tunnel_dump u32 context; }; -define l2tpv3_interface_enable_disable +autoreply define l2tpv3_interface_enable_disable { u32 client_index; u32 context; @@ -99,13 +89,7 @@ define l2tpv3_interface_enable_disable u32 sw_if_index; }; -define l2tpv3_interface_enable_disable_reply -{ - u32 context; - i32 retval; -}; - -define l2tpv3_set_lookup_key +autoreply define l2tpv3_set_lookup_key { u32 client_index; u32 context; @@ -113,12 +97,6 @@ define l2tpv3_set_lookup_key u8 key; }; -define l2tpv3_set_lookup_key_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/lisp-cp/lisp.api b/src/vnet/lisp-cp/lisp.api index a50a5ccb..8bed71b3 100644 --- a/src/vnet/lisp-cp/lisp.api +++ b/src/vnet/lisp-cp/lisp.api @@ -59,7 +59,7 @@ define lisp_add_del_locator_set_reply @param priority - priority of the lisp locator @param weight - weight of the lisp locator */ -define lisp_add_del_locator +autoreply define lisp_add_del_locator { u32 client_index; u32 context; @@ -70,16 +70,6 @@ define lisp_add_del_locator u8 weight; }; -/** \brief Reply for locator add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_locator_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete lisp eid-table @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -98,7 +88,7 @@ define lisp_add_del_locator_reply HMAC_SHA_256_128 2 @param key - secret key */ -define lisp_add_del_local_eid +autoreply define lisp_add_del_local_eid { u32 client_index; u32 context; @@ -112,16 +102,6 @@ define lisp_add_del_local_eid u8 key[64]; }; -/** \brief Reply for local_eid add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_local_eid_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add/delete map server @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -129,7 +109,7 @@ define lisp_add_del_local_eid_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - map server IP address */ -define lisp_add_del_map_server +autoreply define lisp_add_del_map_server { u32 client_index; u32 context; @@ -138,16 +118,6 @@ define lisp_add_del_map_server u8 ip_address[16]; }; -/** \brief Reply for lisp_add_del_map_server - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_map_server_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map-resolver @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -155,7 +125,7 @@ define lisp_add_del_map_server_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - array of address bytes */ -define lisp_add_del_map_resolver +autoreply define lisp_add_del_map_resolver { u32 client_index; u32 context; @@ -164,45 +134,25 @@ define lisp_add_del_map_resolver u8 ip_address[16]; }; -/** \brief Reply for map_resolver add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_map_resolver_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable or disable LISP feature @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_en - enable protocol if non-zero, else disable */ -define lisp_enable_disable +autoreply define lisp_enable_disable { u32 client_index; u32 context; u8 is_en; }; -/** \brief Reply for gpe enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable LISP PITR node @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param ls_name - locator set name @param is_add - add locator set if non-zero, else disable pitr */ -define lisp_pitr_set_locator_set +autoreply define lisp_pitr_set_locator_set { u32 client_index; u32 context; @@ -210,16 +160,6 @@ define lisp_pitr_set_locator_set u8 ls_name[64]; }; -/** \brief Reply for lisp_pitr_set_locator_set - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_pitr_set_locator_set_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable use of PETR @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -227,7 +167,7 @@ define lisp_pitr_set_locator_set_reply @param address - PETR IP address @param is_add - add locator set if non-zero, else disable pitr */ -define lisp_use_petr +autoreply define lisp_use_petr { u32 client_index; u32 context; @@ -236,16 +176,6 @@ define lisp_use_petr u8 is_add; }; -/** \brief Reply for lisp_pitr_set_locator_set - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_use_petr_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for LISP PETR status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -298,45 +228,25 @@ define show_lisp_rloc_probe_state_reply @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define lisp_rloc_probe_enable_disable +autoreply define lisp_rloc_probe_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for lisp_rloc_probe_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_rloc_probe_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable/disable LISP map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define lisp_map_register_enable_disable +autoreply define lisp_map_register_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for lisp_map_register_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_map_register_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get state of LISP map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -366,23 +276,13 @@ define show_lisp_map_register_state_reply 0 - destination only 1 - source/destaination */ -define lisp_map_request_mode +autoreply define lisp_map_request_mode { u32 client_index; u32 context; u8 mode; }; -/** \brief Reply for lisp_map_request_mode - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_map_request_mode_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for LISP map-request mode @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -430,7 +330,7 @@ typeonly manual_endian manual_print define remote_locator @param rloc_num - number of remote locators @param rlocs - remote locator records */ -manual_print manual_endian define lisp_add_del_remote_mapping +autoreply manual_print manual_endian define lisp_add_del_remote_mapping { u32 client_index; u32 context; @@ -448,16 +348,6 @@ manual_print manual_endian define lisp_add_del_remote_mapping vl_api_remote_locator_t rlocs[rloc_num]; }; -/** \brief Reply for lisp_add_del_remote_mapping - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_remote_mapping_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete LISP adjacency adjacency @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -470,7 +360,7 @@ define lisp_add_del_remote_mapping_reply @param reid - remote EID @param leid - local EID */ -define lisp_add_del_adjacency +autoreply define lisp_add_del_adjacency { u32 client_index; u32 context; @@ -483,23 +373,13 @@ define lisp_add_del_adjacency u8 leid_len; }; -/** \brief Reply for lisp_add_del_adjacency - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_adjacency_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map request itr rlocs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add address if non-zero, else delete @param locator_set_name - locator set name */ -define lisp_add_del_map_request_itr_rlocs +autoreply define lisp_add_del_map_request_itr_rlocs { u32 client_index; u32 context; @@ -512,12 +392,6 @@ define lisp_add_del_map_request_itr_rlocs @param retval - return code */ -define lisp_add_del_map_request_itr_rlocs_reply -{ - u32 context; - i32 retval; -}; - /** \brief map/unmap vni/bd_index to vrf @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -525,7 +399,7 @@ define lisp_add_del_map_request_itr_rlocs_reply @param dp_table - virtual network id/bridge domain index @param vrf - vrf */ -define lisp_eid_table_add_del_map +autoreply define lisp_eid_table_add_del_map { u32 client_index; u32 context; @@ -535,16 +409,6 @@ define lisp_eid_table_add_del_map u8 is_l2; }; -/** \brief Reply for lisp_eid_table_add_del_map - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_eid_table_add_del_map_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for map lisp locator status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/lisp-cp/one.api b/src/vnet/lisp-cp/one.api index ca82f694..2fa1edf6 100644 --- a/src/vnet/lisp-cp/one.api +++ b/src/vnet/lisp-cp/one.api @@ -59,7 +59,7 @@ define one_add_del_locator_set_reply @param priority - priority of the locator @param weight - weight of the locator */ -define one_add_del_locator +autoreply define one_add_del_locator { u32 client_index; u32 context; @@ -70,16 +70,6 @@ define one_add_del_locator u8 weight; }; -/** \brief Reply for locator add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_locator_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete ONE eid-table @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -98,7 +88,7 @@ define one_add_del_locator_reply HMAC_SHA_256_128 2 @param key - secret key */ -define one_add_del_local_eid +autoreply define one_add_del_local_eid { u32 client_index; u32 context; @@ -112,16 +102,6 @@ define one_add_del_local_eid u8 key[64]; }; -/** \brief Reply for local_eid add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_local_eid_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add/delete map server @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -129,7 +109,7 @@ define one_add_del_local_eid_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - map server IP address */ -define one_add_del_map_server +autoreply define one_add_del_map_server { u32 client_index; u32 context; @@ -138,16 +118,6 @@ define one_add_del_map_server u8 ip_address[16]; }; -/** \brief Reply for one_add_del_map_server - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_map_server_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map-resolver @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -155,7 +125,7 @@ define one_add_del_map_server_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - array of address bytes */ -define one_add_del_map_resolver +autoreply define one_add_del_map_resolver { u32 client_index; u32 context; @@ -164,45 +134,25 @@ define one_add_del_map_resolver u8 ip_address[16]; }; -/** \brief Reply for map_resolver add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_map_resolver_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable or disable ONE feature @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_en - enable protocol if non-zero, else disable */ -define one_enable_disable +autoreply define one_enable_disable { u32 client_index; u32 context; u8 is_en; }; -/** \brief Reply for gpe enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable ONE PITR node @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param ls_name - locator set name @param is_add - add locator set if non-zero, else disable pitr */ -define one_pitr_set_locator_set +autoreply define one_pitr_set_locator_set { u32 client_index; u32 context; @@ -210,16 +160,6 @@ define one_pitr_set_locator_set u8 ls_name[64]; }; -/** \brief Reply for one_pitr_set_locator_set - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_pitr_set_locator_set_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable use of PETR @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -227,7 +167,7 @@ define one_pitr_set_locator_set_reply @param address - PETR IP address @param is_add - add locator set if non-zero, else disable PETR */ -define one_use_petr +autoreply define one_use_petr { u32 client_index; u32 context; @@ -236,16 +176,6 @@ define one_use_petr u8 is_add; }; -/** \brief Reply for one_use_petr - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_use_petr_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for ONE PETR status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -298,45 +228,25 @@ define show_one_rloc_probe_state_reply @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define one_rloc_probe_enable_disable +autoreply define one_rloc_probe_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for one_rloc_probe_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_rloc_probe_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable/disable ONE map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define one_map_register_enable_disable +autoreply define one_map_register_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for one_map_register_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_map_register_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get state of ONE map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -366,23 +276,13 @@ define show_one_map_register_state_reply 0 - destination only 1 - source/destaination */ -define one_map_request_mode +autoreply define one_map_request_mode { u32 client_index; u32 context; u8 mode; }; -/** \brief Reply for one_map_request_mode - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_map_request_mode_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for ONE map-request mode @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -430,7 +330,7 @@ typeonly manual_endian manual_print define one_remote_locator @param rloc_num - number of remote locators @param rlocs - remote locator records */ -manual_print manual_endian define one_add_del_remote_mapping +autoreply manual_print manual_endian define one_add_del_remote_mapping { u32 client_index; u32 context; @@ -448,16 +348,6 @@ manual_print manual_endian define one_add_del_remote_mapping vl_api_one_remote_locator_t rlocs[rloc_num]; }; -/** \brief Reply for one_add_del_remote_mapping - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_remote_mapping_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete ONE adjacency adjacency @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -470,7 +360,7 @@ define one_add_del_remote_mapping_reply @param reid - remote EID @param leid - local EID */ -define one_add_del_adjacency +autoreply define one_add_del_adjacency { u32 client_index; u32 context; @@ -483,23 +373,13 @@ define one_add_del_adjacency u8 leid_len; }; -/** \brief Reply for one_add_del_adjacency - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_adjacency_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map request itr rlocs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add address if non-zero, else delete @param locator_set_name - locator set name */ -define one_add_del_map_request_itr_rlocs +autoreply define one_add_del_map_request_itr_rlocs { u32 client_index; u32 context; @@ -507,17 +387,6 @@ define one_add_del_map_request_itr_rlocs u8 locator_set_name[64]; }; -/** \brief Reply for one_add_del_map_request_itr_rlocs - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define one_add_del_map_request_itr_rlocs_reply -{ - u32 context; - i32 retval; -}; - /** \brief map/unmap vni/bd_index to vrf @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -525,7 +394,7 @@ define one_add_del_map_request_itr_rlocs_reply @param dp_table - virtual network id/bridge domain index @param vrf - vrf */ -define one_eid_table_add_del_map +autoreply define one_eid_table_add_del_map { u32 client_index; u32 context; @@ -535,16 +404,6 @@ define one_eid_table_add_del_map u8 is_l2; }; -/** \brief Reply for one_eid_table_add_del_map - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_eid_table_add_del_map_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for map one locator status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -901,31 +760,19 @@ define one_stats_details u32 bytes; }; -define one_stats_flush +autoreply define one_stats_flush { u32 client_index; u32 context; }; -define one_stats_flush_reply -{ - u32 context; - i32 retval; -}; - -define one_stats_enable_disable +autoreply define one_stats_enable_disable { u32 client_index; u32 context; u8 is_en; }; -define one_stats_enable_disable_reply -{ - u32 context; - i32 retval; -}; - define show_one_stats_enable_disable { u32 client_index; diff --git a/src/vnet/lisp-gpe/lisp_gpe.api b/src/vnet/lisp-gpe/lisp_gpe.api index 43a6a6cd..f79d18c1 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.api +++ b/src/vnet/lisp-gpe/lisp_gpe.api @@ -43,7 +43,7 @@ typeonly manual_print manual_endian define gpe_locator @param loc_num - number of locators @param locs - array of remote locators */ -manual_print manual_endian define gpe_add_del_fwd_entry +autoreply manual_print manual_endian define gpe_add_del_fwd_entry { u32 client_index; u32 context; @@ -60,44 +60,24 @@ manual_print manual_endian define gpe_add_del_fwd_entry vl_api_gpe_locator_t locs[loc_num]; }; -/** \brief Reply for gpe_fwd_entry add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_add_del_fwd_entry_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable or disable gpe protocol @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_en - enable protocol if non-zero, else disable */ -define gpe_enable_disable +autoreply define gpe_enable_disable { u32 client_index; u32 context; u8 is_en; }; -/** \brief Reply for gpe enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete gpe_iface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add address if non-zero, else delete */ -define gpe_add_del_iface +autoreply define gpe_add_del_iface { u32 client_index; u32 context; @@ -107,16 +87,6 @@ define gpe_add_del_iface u32 vni; }; -/** \brief Reply for gpe_iface add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_add_del_iface_reply -{ - u32 context; - i32 retval; -}; - define gpe_fwd_entries_get { u32 client_index; @@ -163,23 +133,13 @@ manual_endian manual_print define gpe_fwd_entry_path_details @param context - sender context, to match reply w/ request @param mode - LISP (value 0) or VXLAN (value 1) */ -define gpe_set_encap_mode +autoreply define gpe_set_encap_mode { u32 client_index; u32 context; u8 mode; }; -/** \brief Reply for set_encap_mode - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_set_encap_mode_reply -{ - u32 context; - i32 retval; -}; - /** \brief get GPE encapsulation mode @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/map/map.api b/src/vnet/map/map.api index 4e4be85e..d68f13f0 100644 --- a/src/vnet/map/map.api +++ b/src/vnet/map/map.api @@ -62,22 +62,13 @@ define map_add_domain_reply @param context - sender context, to match reply w/ request @param index - MAP Domain index */ -define map_del_domain +autoreply define map_del_domain { u32 client_index; u32 context; u32 index; }; -/** \brief Reply for MAP domain del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define map_del_domain_reply -{ - u32 context; - i32 retval; -}; /** \brief Add or Delete MAP rule from a domain (Only used for shared IPv4 per subscriber) @param client_index - opaque cookie to identify the sender @@ -87,7 +78,7 @@ define map_del_domain_reply @param ip6_dst - MAP CE IPv6 address @param psid - Rule PSID */ -define map_add_del_rule +autoreply define map_add_del_rule { u32 client_index; u32 context; @@ -97,15 +88,6 @@ define map_add_del_rule u16 psid; }; -/** \brief Reply for MAP rule add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define map_add_del_rule_reply -{ - u32 context; - i32 retval; -}; /** \brief Get list of map domains @param client_index - opaque cookie to identify the sender diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api index a1e1270a..c8a3ffb7 100644 --- a/src/vnet/mpls/mpls.api +++ b/src/vnet/mpls/mpls.api @@ -26,7 +26,7 @@ @param mb_address_length - Length of IP prefix @param mb_address[16] - IP prefix/ */ -define mpls_ip_bind_unbind +autoreply define mpls_ip_bind_unbind { u32 client_index; u32 context; @@ -40,16 +40,6 @@ define mpls_ip_bind_unbind u8 mb_address[16]; }; -/** \brief Reply for MPLS IP bind/unbind request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define mpls_ip_bind_unbind_reply -{ - u32 context; - i32 retval; -}; - /** \brief MPLS tunnel Add / del route @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -172,7 +162,7 @@ manual_endian manual_print define mpls_tunnel_details @param mr_next_hop_out_label_stack - the next-hop output label stack, outer most first @param next_hop_via_label - The next-hop is a resolved via a local label */ -define mpls_route_add_del +autoreply define mpls_route_add_del { u32 client_index; u32 context; @@ -199,16 +189,6 @@ define mpls_route_add_del u32 mr_next_hop_out_label_stack[mr_next_hop_n_out_labels]; }; -/** \brief Reply for MPLS route add / del request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define mpls_route_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump MPLS fib table @param client_index - opaque cookie to identify the sender */ @@ -240,4 +220,4 @@ manual_endian manual_print define mpls_fib_details * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api index e207e46f..4aef09da 100644 --- a/src/vnet/session/session.api +++ b/src/vnet/session/session.api @@ -49,26 +49,17 @@ define application_attach_reply { @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ - define application_detach { +autoreply define application_detach { u32 client_index; u32 context; }; - /** \brief detach reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define application_detach_reply { - u32 context; - i32 retval; -}; - /** \brief vpp->client, please map an additional shared memory segment @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param segment_name - */ -define map_another_segment { +autoreply define map_another_segment { u32 client_index; u32 context; u32 segment_size; @@ -83,7 +74,7 @@ define map_another_segment { "tcp://::/0/80" [ipv6] etc. @param options - socket options, fifo sizes, etc. */ -define bind_uri { +autoreply define bind_uri { u32 client_index; u32 context; u32 accept_cookie; @@ -97,7 +88,7 @@ define bind_uri { "tcp://::/0/80" [ipv6], etc. @param options - socket options, fifo sizes, etc. */ -define unbind_uri { +autoreply define unbind_uri { u32 client_index; u32 context; u8 uri[128]; @@ -122,24 +113,6 @@ define connect_uri { u64 options[16]; }; -/** \brief Bind reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bind_uri_reply { - u32 context; - i32 retval; -}; - -/** \brief unbind reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define unbind_uri_reply { - u32 context; - i32 retval; -}; - /** \brief vpp->client, connect reply @param context - sender context, to match reply w/ request @param retval - return code for the request @@ -165,15 +138,6 @@ define connect_uri_reply { u8 segment_name[128]; }; -/** \brief client->vpp - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define map_another_segment_reply { - u32 context; - i32 retval; -}; - /** \brief vpp->client, accept this session @param context - sender context, to match reply w/ request @param listener_handle - tells client which listener this pertains to @@ -290,7 +254,7 @@ define bind_sock { @param context - sender context, to match reply w/ request @param handle - bind handle obtained from bind reply */ -define unbind_sock { +autoreply define unbind_sock { u32 client_index; u32 context; u64 handle; @@ -339,15 +303,6 @@ define bind_sock_reply { u8 segment_name[128]; }; -/** \brief unbind reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define unbind_sock_reply { - u32 context; - i32 retval; -}; - /** \brief vpp/server->client, connect reply @param context - sender context, to match reply w/ request @param retval - return code for the request @@ -378,23 +333,14 @@ define connect_sock_reply { @param context - sender context, to match reply w/ request @param is_enable - disable session layer if 0, enable otherwise */ -define session_enable_disable { +autoreply define session_enable_disable { u32 client_index; u32 context; u8 is_enable; }; -/** \brief Reply for session enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define session_enable_disable_reply { - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") * End: - */ \ No newline at end of file + */ diff --git a/src/vnet/span/span.api b/src/vnet/span/span.api index 4babdd83..914fd8d0 100644 --- a/src/vnet/span/span.api +++ b/src/vnet/span/span.api @@ -21,7 +21,7 @@ @param sw_if_index_to - interface where the traffic is mirrored @param state - 0 = disabled, 1 = rx enabled, 2 = tx enabled, 3 tx & rx enabled */ -define sw_interface_span_enable_disable { +autoreply define sw_interface_span_enable_disable { u32 client_index; u32 context; u32 sw_if_index_from; @@ -29,14 +29,6 @@ define sw_interface_span_enable_disable { u8 state; }; -/** \brief Reply to SPAN enable/disable request - @param context - sender context which was passed in the request -*/ -define sw_interface_span_enable_disable_reply { - u32 context; - i32 retval; -}; - /** \brief SPAN dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/sr/sr.api b/src/vnet/sr/sr.api index 5feadcb0..9e900741 100644 --- a/src/vnet/sr/sr.api +++ b/src/vnet/sr/sr.api @@ -25,7 +25,7 @@ @param fib_table FIB table in which we should install the localsid entry @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. */ -define sr_localsid_add_del +autoreply define sr_localsid_add_del { u32 client_index; u32 context; @@ -39,16 +39,6 @@ define sr_localsid_add_del u8 nh_addr[16]; }; -/** \brief IPv6 SR LocalSID add/del request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_localsid_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR policy add @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -59,7 +49,7 @@ define sr_localsid_add_del_reply @param fib_table is the VRF where to install the FIB entry for the BSID @param segments is a vector of IPv6 address composing the segment list */ -define sr_policy_add +autoreply define sr_policy_add { u32 client_index; u32 context; @@ -72,16 +62,6 @@ define sr_policy_add u8 segments[0]; }; -/** \brief IPv6 SR Policy add request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_add_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR policy modification @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -94,7 +74,7 @@ define sr_policy_add_reply @param weight is the weight of the sid list. optional. @param is_encap Mode. Encapsulation or SRH insertion. */ -define sr_policy_mod +autoreply define sr_policy_mod { u32 client_index; u32 context; @@ -108,23 +88,13 @@ define sr_policy_mod u8 segments[0]; }; -/** \brief IPv6 SR Policy modification request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_mod_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR policy deletion @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bsid is the bindingSID of the SR Policy @param index is the index of the SR policy */ -define sr_policy_del +autoreply define sr_policy_del { u32 client_index; u32 context; @@ -132,16 +102,6 @@ define sr_policy_del u32 sr_policy_index; }; -/** \brief IPv6 SR Policy deletion request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR steering add/del @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -154,7 +114,7 @@ define sr_policy_del_reply @param sw_if_index is the incoming interface for L2 traffic @param traffic_type describes the type of traffic */ -define sr_steering_add_del +autoreply define sr_steering_add_del { u32 client_index; u32 context; @@ -168,16 +128,6 @@ define sr_steering_add_del u8 traffic_type; }; -/** \brief IPv6 SR steering add/del request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_steering_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump the list of SR LocalSIDs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/unix/tap.api b/src/vnet/unix/tap.api index 1fd0bb09..d9fba371 100644 --- a/src/vnet/unix/tap.api +++ b/src/vnet/unix/tap.api @@ -93,23 +93,13 @@ define tap_modify_reply @param context - sender context, to match reply w/ request @param sw_if_index - interface index of existing tap interface */ -define tap_delete +autoreply define tap_delete { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Reply for tap delete request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define tap_delete_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump tap interfaces request */ define sw_interface_tap_dump { diff --git a/src/vnet/vxlan/vxlan.api b/src/vnet/vxlan/vxlan.api index 048220fb..6c331a58 100644 --- a/src/vnet/vxlan/vxlan.api +++ b/src/vnet/vxlan/vxlan.api @@ -61,7 +61,7 @@ define vxlan_tunnel_details @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass @param enable - if non-zero enable, else disable */ -define sw_interface_set_vxlan_bypass +autoreply define sw_interface_set_vxlan_bypass { u32 client_index; u32 context; @@ -69,13 +69,3 @@ define sw_interface_set_vxlan_bypass u8 is_ipv6; u8 enable; }; - -/** \brief Interface set vxlan-bypass response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_set_vxlan_bypass_reply -{ - u32 context; - i32 retval; -}; \ No newline at end of file diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index a4ba180d..7c07c822 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -80,7 +80,7 @@ define create_vlan_subif_reply @param sw_if_index - index of the interface @param enable - if non-zero enable, else disable */ -define sw_interface_set_mpls_enable +autoreply define sw_interface_set_mpls_enable { u32 client_index; u32 context; @@ -88,16 +88,6 @@ define sw_interface_set_mpls_enable u8 enable; }; -/** \brief Reply for MPLS state on an interface - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define sw_interface_set_mpls_enable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Proxy ARP add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -106,7 +96,7 @@ define sw_interface_set_mpls_enable_reply @param low_address[4] - Low address of the Proxy ARP range @param hi_address[4] - High address of the Proxy ARP range */ -define proxy_arp_add_del +autoreply define proxy_arp_add_del { u32 client_index; u32 context; @@ -116,23 +106,13 @@ define proxy_arp_add_del u8 hi_address[4]; }; -/** \brief Reply for proxy arp add / del request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define proxy_arp_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Proxy ARP add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - Which interface to enable / disable Proxy Arp on @param enable_disable - 1 to enable Proxy ARP on interface, 0 to disable */ -define proxy_arp_intfc_enable_disable +autoreply define proxy_arp_intfc_enable_disable { u32 client_index; u32 context; @@ -141,23 +121,13 @@ define proxy_arp_intfc_enable_disable u8 enable_disable; }; -/** \brief Reply for Proxy ARP interface enable / disable request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define proxy_arp_intfc_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Reset VRF (remove all routes etc) request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4 @param vrf_id - ID of th FIB table / VRF to reset */ -define reset_vrf +autoreply define reset_vrf { u32 client_index; u32 context; @@ -165,16 +135,6 @@ define reset_vrf u32 vrf_id; }; -/** \brief Reply for Reset VRF request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define reset_vrf_reply -{ - u32 context; - i32 retval; -}; - /** \brief Is Address Reachable request - DISABLED @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -200,7 +160,7 @@ define is_address_reachable @param enable_disable - 1 = enable stats, 0 = disable @param pid - pid of process requesting stats updates */ -define want_stats +autoreply define want_stats { u32 client_index; u32 context; @@ -208,16 +168,6 @@ define want_stats u32 pid; }; -/** \brief Reply for Want Stats request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_stats_reply -{ - u32 context; - i32 retval; -}; - typeonly manual_print manual_endian define ip4_fib_counter { u32 address; @@ -331,7 +281,7 @@ define oam_event @param enable_disable- enable if non-zero, else disable @param pid - pid of the requesting process */ -define want_oam_events +autoreply define want_oam_events { u32 client_index; u32 context; @@ -339,16 +289,6 @@ define want_oam_events u32 pid; }; -/** \brief Want OAM events response - @param context - sender context, to match reply w/ request - @param retval - return code for the want oam stats request -*/ -define want_oam_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief OAM add / del target request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -357,7 +297,7 @@ define want_oam_events_reply @param dst_address[] - destination address of the target @param is_add - add target if non-zero, else delete */ -define oam_add_del +autoreply define oam_add_del { u32 client_index; u32 context; @@ -367,23 +307,13 @@ define oam_add_del u8 is_add; }; -/** \brief OAM add / del target response - @param context - sender context, to match reply w/ request - @param retval - return code of the request -*/ -define oam_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Reset fib table request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param vrf_id - vrf/table id of the fib table to reset @param is_ipv6 - an ipv6 fib to reset if non-zero, else ipv4 */ -define reset_fib +autoreply define reset_fib { u32 client_index; u32 context; @@ -391,16 +321,6 @@ define reset_fib u8 is_ipv6; }; -/** \brief Reset fib response - @param context - sender context, to match reply w/ request - @param retval - return code for the reset bfib request -*/ -define reset_fib_reply -{ - u32 context; - i32 retval; -}; - /** \brief Create loopback interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -458,23 +378,13 @@ define create_loopback_instance_reply @param context - sender context, to match reply w/ request @param sw_if_index - sw index of the interface that was created */ -define delete_loopback +autoreply define delete_loopback { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Delete loopback interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define delete_loopback_reply -{ - u32 context; - i32 retval; -}; - /** \brief Control ping from client to api server request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -543,7 +453,7 @@ define cli_inband_reply @param is_ipv6 - neighbor limit if non-zero, else ARP limit @param arp_neighbor_limit - the new limit, defaults are ~ 50k */ -define set_arp_neighbor_limit +autoreply define set_arp_neighbor_limit { u32 client_index; u32 context; @@ -551,16 +461,6 @@ define set_arp_neighbor_limit u32 arp_neighbor_limit; }; -/** \brief Set max allowed ARP or ip6 neighbor entries response - @param context - sender context, to match reply w/ request - @param retval - return code for request -*/ -define set_arp_neighbor_limit_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 interface patch add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -568,7 +468,7 @@ define set_arp_neighbor_limit_reply @param tx_sw_if_index - transmit side interface @param is_add - if non-zero set up the interface patch, else remove it */ -define l2_patch_add_del +autoreply define l2_patch_add_del { u32 client_index; u32 context; @@ -577,23 +477,13 @@ define l2_patch_add_del u8 is_add; }; -/** \brief L2 interface patch add / del response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_patch_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface set vpath request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - interface used to reach neighbor @param enable - if non-zero enable, else disable */ -define sw_interface_set_vpath +autoreply define sw_interface_set_vpath { u32 client_index; u32 context; @@ -601,16 +491,6 @@ define sw_interface_set_vpath u8 enable; }; -/** \brief Interface set vpath response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_set_vpath_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set L2 XConnect between two interfaces request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -618,7 +498,7 @@ define sw_interface_set_vpath_reply @param tx_sw_if_index - Transmit interface index @param enable - enable xconnect if not 0, else set to L3 mode */ -define sw_interface_set_l2_xconnect +autoreply define sw_interface_set_l2_xconnect { u32 client_index; u32 context; @@ -627,16 +507,6 @@ define sw_interface_set_l2_xconnect u8 enable; }; -/** \brief Set L2 XConnect response - @param context - sender context, to match reply w/ request - @param retval - L2 XConnect request return code -*/ -define sw_interface_set_l2_xconnect_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface bridge mode request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -646,7 +516,7 @@ define sw_interface_set_l2_xconnect_reply @param shg - Shared horizon group, for bridge mode only @param enable - Enable beige mode if not 0, else set to L3 mode */ -define sw_interface_set_l2_bridge +autoreply define sw_interface_set_l2_bridge { u32 client_index; u32 context; @@ -657,16 +527,6 @@ define sw_interface_set_l2_bridge u8 enable; }; -/** \brief Interface bridge mode response - @param context - sender context, to match reply w/ request - @param retval - Bridge mode request return code -*/ -define sw_interface_set_l2_bridge_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set bridge domain ip to mac entry request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -676,7 +536,7 @@ define sw_interface_set_l2_bridge_reply @param mac_address - MAC address @param */ -define bd_ip_mac_add_del +autoreply define bd_ip_mac_add_del { u32 client_index; u32 context; @@ -687,16 +547,6 @@ define bd_ip_mac_add_del u8 mac_address[6]; }; -/** \brief Set bridge domain ip to mac entry response - @param context - sender context, to match reply w/ request - @param retval - return code for the set bridge flags request -*/ -define bd_ip_mac_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset the classification table for an interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -704,7 +554,7 @@ define bd_ip_mac_add_del_reply @param sw_if_index - interface to associate with the table @param table_index - index of the table, if ~0 unset the table */ -define classify_set_interface_ip_table +autoreply define classify_set_interface_ip_table { u32 client_index; u32 context; @@ -713,16 +563,6 @@ define classify_set_interface_ip_table u32 table_index; /* ~0 => off */ }; -/** \brief Set/unset interface classification table response - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define classify_set_interface_ip_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset l2 classification tables for an interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -731,7 +571,7 @@ define classify_set_interface_ip_table_reply @param ip6_table_index - ip6 index @param other_table_index - other index */ -define classify_set_interface_l2_tables +autoreply define classify_set_interface_l2_tables { u32 client_index; u32 context; @@ -743,16 +583,6 @@ define classify_set_interface_l2_tables u8 is_input; }; -/** \brief Set/unset l2 classification tables for an interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define classify_set_interface_l2_tables_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get node index using name request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -809,7 +639,7 @@ define add_node_next_reply @param sw_if_index - interface to enable/disable filtering on @param enable_disable - if non-zero enable filtering, else disable */ -define l2_interface_efp_filter +autoreply define l2_interface_efp_filter { u32 client_index; u32 context; @@ -817,16 +647,6 @@ define l2_interface_efp_filter u32 enable_disable; }; -/** \brief L2 interface ethernet flow point filtering response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_efp_filter_reply -{ - u32 context; - i32 retval; -}; - define create_subif { u32 client_index; @@ -882,7 +702,7 @@ define show_version_reply }; /* Gross kludge, DGMS */ -define interface_name_renumber +autoreply define interface_name_renumber { u32 client_index; u32 context; @@ -890,12 +710,6 @@ define interface_name_renumber u32 new_show_dev_instance; }; -define interface_name_renumber_reply -{ - u32 context; - i32 retval; -}; - /** \brief Register for ip4 arp resolution events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -903,7 +717,7 @@ define interface_name_renumber_reply @param pid - sender's pid @param address - the exact ip4 address of interest */ -define want_ip4_arp_events +autoreply define want_ip4_arp_events { u32 client_index; u32 context; @@ -912,16 +726,6 @@ define want_ip4_arp_events u32 address; }; -/** \brief Reply for interface events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_ip4_arp_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief Tell client about an ip4 arp resolution event @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -949,7 +753,7 @@ define ip4_arp_event @param pid - sender's pid @param address - the exact ip6 address of interest */ -define want_ip6_nd_events +autoreply define want_ip6_nd_events { u32 client_index; u32 context; @@ -958,16 +762,6 @@ define want_ip6_nd_events u8 address[16]; }; -/** \brief Reply for ip6 nd resolution events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_ip6_nd_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief Tell client about an ip6 nd resolution or mac/ip event @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -999,7 +793,7 @@ define ip6_nd_event Note: User is recommeneded to use just one valid table_index per call. (ip4_table_index, ip6_table_index, or l2_table_index) */ -define input_acl_set_interface +autoreply define input_acl_set_interface { u32 client_index; u32 context; @@ -1010,16 +804,6 @@ define input_acl_set_interface u8 is_add; }; -/** \brief Set/unset input ACL interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define input_acl_set_interface_reply -{ - u32 context; - i32 retval; -}; - define get_node_graph { u32 client_index; @@ -1048,7 +832,7 @@ define get_node_graph_reply @param pow_enable - Proof of Work enabled or not flag @param trace_enable - iOAM Trace enabled or not flag */ -define ioam_enable +autoreply define ioam_enable { u32 client_index; u32 context; @@ -1060,38 +844,18 @@ define ioam_enable u32 node_id; }; -/** \brief iOAM Trace profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define ioam_enable_reply -{ - u32 context; - i32 retval; -}; - /** \brief iOAM disable @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param index - MAP Domain index */ -define ioam_disable +autoreply define ioam_disable { u32 client_index; u32 context; u16 id; }; -/** \brief iOAM disable response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define ioam_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Query relative index via node names @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1149,7 +913,7 @@ define pg_create_interface_reply @param count - number of packets to be captured @param pcap_file - pacp file name to store captured packets */ -define pg_capture +autoreply define pg_capture { u32 client_index; u32 context; @@ -1160,23 +924,13 @@ define pg_capture u8 pcap_file_name[pcap_name_length]; }; -/** \brief PacketGenerator capture packets response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pg_capture_reply -{ - u32 context; - i32 retval; -}; - /** \brief Enable / disable packet generator request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_enabled - 1 if enabling streams, 0 if disabling @param stream - stream name to be enable/disabled, if not specified handle all streams */ -define pg_enable_disable +autoreply define pg_enable_disable { u32 client_index; u32 context; @@ -1185,16 +939,6 @@ define pg_enable_disable u8 stream_name[stream_name_length]; }; -/** \brief Reply for enable / disable packet generator - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define pg_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Configure IP source and L4 port-range check @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1208,7 +952,7 @@ define pg_enable_disable_reply @param vrf_id - fib table/vrf id to associate the source and port-range check with @note To specify a single port set low_port and high_port entry the same */ -define ip_source_and_port_range_check_add_del +autoreply define ip_source_and_port_range_check_add_del { u32 client_index; u32 context; @@ -1222,16 +966,6 @@ define ip_source_and_port_range_check_add_del u32 vrf_id; }; -/** \brief Configure IP source and L4 port-range check reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_source_and_port_range_check_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set interface source and L4 port-range request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1239,7 +973,7 @@ define ip_source_and_port_range_check_add_del_reply @param tcp_vrf_id - VRF associated with source and TCP port-range check @param udp_vrf_id - VRF associated with source and TCP port-range check */ -define ip_source_and_port_range_check_interface_add_del +autoreply define ip_source_and_port_range_check_interface_add_del { u32 client_index; u32 context; @@ -1251,36 +985,17 @@ define ip_source_and_port_range_check_interface_add_del u32 udp_out_vrf_id; }; -/** \brief Set interface source and L4 port-range response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define ip_source_and_port_range_check_interface_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete sub interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - sw index of the interface that was created by create_subif */ -define delete_subif { +autoreply define delete_subif { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Delete sub interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define delete_subif_reply { - u32 context; - i32 retval; -}; - /** \brief Punt traffic to the host @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1289,7 +1004,7 @@ define delete_subif_reply { @param l4_protocol - L4 protocol to be punted, only UDP (0x11) is supported @param l4_port - TCP/UDP port to be punted */ -define punt { +autoreply define punt { u32 client_index; u32 context; u8 is_add; @@ -1298,23 +1013,13 @@ define punt { u16 l4_port; }; -/** \brief Reply to the punt request - @param context - sender context which was passed in the request - @param retval - return code of punt request -*/ -define punt_reply -{ - u32 context; - i32 retval; -}; - /** \brief Feature path enable/disable request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - the interface @param enable - 1 = on, 0 = off */ -define feature_enable_disable { +autoreply define feature_enable_disable { u32 client_index; u32 context; u32 sw_if_index; @@ -1323,16 +1028,6 @@ define feature_enable_disable { u8 feature_name[64]; }; -/** \brief Reply to the eature path enable/disable request - @param context - sender context which was passed in the request - @param retval - return code for the request -*/ -define feature_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") -- cgit 1.2.3-korg From 69186d930ff43b127269abc568bdc656b1e635ad Mon Sep 17 00:00:00 2001 From: Jon Loeliger Date: Thu, 27 Apr 2017 21:20:51 -0500 Subject: Fix hostname fencepost error in dhcp_compl_event_callback. Hostnames are limited to 63 characters and a NUL terminator. Change-Id: Ie1724d83675cca5e8cdfcd99d8e56e530a044d5d Signed-off-by: Jon Loeliger --- src/vnet/dhcp/dhcp_api.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c index e9c757e8..2c0dd77d 100644 --- a/src/vnet/dhcp/dhcp_api.c +++ b/src/vnet/dhcp/dhcp_api.c @@ -192,6 +192,7 @@ dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, { unix_shared_memory_queue_t *q; vl_api_dhcp_compl_event_t *mp; + u32 len; q = vl_api_client_index_to_input_queue (client_index); if (!q) @@ -201,8 +202,9 @@ dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, mp->client_index = client_index; mp->pid = pid; mp->is_ipv6 = is_ipv6; - clib_memcpy (&mp->hostname, hostname, vec_len (hostname)); - mp->hostname[vec_len (hostname) + 1] = '\n'; + len = (vec_len (hostname) < 63) ? vec_len (hostname) : 63; + clib_memcpy (&mp->hostname, hostname, len); + mp->hostname[len] = 0; clib_memcpy (&mp->host_address[0], host_address, 16); clib_memcpy (&mp->router_address[0], router_address, 16); -- cgit 1.2.3-korg From 630198f04916deb35c5b7774823ae1a5dd168a6c Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Mon, 22 May 2017 09:20:20 -0400 Subject: IPv6 Performance bugs - inline the FIB lookup function; this requires access to the bihash, so for files that use more than one type this casues problems. those files that include ip6_fib.h unnecessarily have been updated - better use of the feature arcs. ip6-lookup and interface-output are now sentinels (end-node-index in the cm speak) rather than enabled features. Change-Id: I9d1375fee63f7dbb2d327da6124d8e60b63367ec Signed-off-by: Neale Ranns --- src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c | 1 - src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c | 2 +- src/vnet/dhcp/dhcp6_proxy_node.c | 9 +++--- src/vnet/fib/fib.h | 2 -- src/vnet/fib/ip6_fib.c | 39 ----------------------- src/vnet/fib/ip6_fib.h | 42 +++++++++++++++++++++++-- src/vnet/ip/ip6_forward.c | 32 +++++++++---------- src/vnet/vxlan-gpe/vxlan_gpe.c | 8 ++--- src/vpp/api/api.c | 2 -- 9 files changed, 63 insertions(+), 74 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c index f334c983..d90cd5e4 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c index cfc550cd..8558c505 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index de73154d..885313a5 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -19,9 +19,9 @@ #include #include #include -#include #include #include +#include static char * dhcpv6_proxy_error_strings[] = { #define dhcpv6_proxy_error(n,s) s, @@ -966,7 +966,7 @@ static u8 * format_dhcp6_proxy_server (u8 * s, va_list * args) { dhcp_proxy_t * proxy = va_arg (*args, dhcp_proxy_t *); - ip6_fib_t *server_fib; + fib_table_t *server_fib; dhcp_server_t *server; ip6_mfib_t *rx_fib; @@ -985,9 +985,10 @@ format_dhcp6_proxy_server (u8 * s, va_list * args) vec_foreach(server, proxy->dhcp_servers) { - server_fib = ip6_fib_get(server->server_fib_index); + server_fib = fib_table_get(server->server_fib_index, + FIB_PROTOCOL_IP6); s = format (s, "%u,%U ", - server_fib->table_id, + server_fib->ft_table_id, format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY); } diff --git a/src/vnet/fib/fib.h b/src/vnet/fib/fib.h index 7cf1d136..ec97c565 100644 --- a/src/vnet/fib/fib.h +++ b/src/vnet/fib/fib.h @@ -646,7 +646,5 @@ #include #include -#include -#include #endif diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index 4a24c212..527f9114 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -341,45 +341,6 @@ ip6_fib_table_entry_insert (u32 fib_index, compute_prefix_lengths_in_search_order (table); } -u32 -ip6_fib_table_fwding_lookup (ip6_main_t * im, - u32 fib_index, - const ip6_address_t * dst) -{ - const ip6_fib_table_instance_t *table; - int i, len; - int rv; - BVT(clib_bihash_kv) kv, value; - u64 fib; - - table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING]; - len = vec_len (table->prefix_lengths_in_search_order); - - kv.key[0] = dst->as_u64[0]; - kv.key[1] = dst->as_u64[1]; - fib = ((u64)((fib_index))<<32); - - for (i = 0; i < len; i++) - { - int dst_address_length = table->prefix_lengths_in_search_order[i]; - ip6_address_t * mask = &ip6_main.fib_masks[dst_address_length]; - - ASSERT(dst_address_length >= 0 && dst_address_length <= 128); - //As lengths are decreasing, masks are increasingly specific. - kv.key[0] &= mask->as_u64[0]; - kv.key[1] &= mask->as_u64[1]; - kv.key[2] = fib | dst_address_length; - - rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value); - if (rv == 0) - return value.value; - } - - /* default route is always present */ - ASSERT(0); - return 0; -} - u32 ip6_fib_table_fwding_lookup_with_if_index (ip6_main_t * im, u32 sw_if_index, const ip6_address_t * dst) diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h index 2bf8ef78..9789da4f 100644 --- a/src/vnet/fib/ip6_fib.h +++ b/src/vnet/fib/ip6_fib.h @@ -53,9 +53,6 @@ extern void ip6_fib_table_fwding_dpo_remove(u32 fib_index, u32 ip6_fib_table_fwding_lookup_with_if_index(ip6_main_t * im, u32 sw_if_index, const ip6_address_t * dst); -u32 ip6_fib_table_fwding_lookup(ip6_main_t * im, - u32 fib_index, - const ip6_address_t * dst); /** * @brief Walk all entries in a FIB table @@ -66,6 +63,45 @@ extern void ip6_fib_table_walk(u32 fib_index, fib_table_walk_fn_t fn, void *ctx); +always_inline u32 +ip6_fib_table_fwding_lookup (ip6_main_t * im, + u32 fib_index, + const ip6_address_t * dst) +{ + const ip6_fib_table_instance_t *table; + int i, len; + int rv; + BVT(clib_bihash_kv) kv, value; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING]; + len = vec_len (table->prefix_lengths_in_search_order); + + kv.key[0] = dst->as_u64[0]; + kv.key[1] = dst->as_u64[1]; + fib = ((u64)((fib_index))<<32); + + for (i = 0; i < len; i++) + { + int dst_address_length = table->prefix_lengths_in_search_order[i]; + ip6_address_t * mask = &ip6_main.fib_masks[dst_address_length]; + + ASSERT(dst_address_length >= 0 && dst_address_length <= 128); + //As lengths are decreasing, masks are increasingly specific. + kv.key[0] &= mask->as_u64[0]; + kv.key[1] &= mask->as_u64[1]; + kv.key[2] = fib | dst_address_length; + + rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value); + if (rv == 0) + return value.value; + } + + /* default route is always present */ + ASSERT(0); + return 0; +} + /** * @brief return the DPO that the LB stacks on. */ diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 25714e48..28c84d1c 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -444,12 +444,11 @@ ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) return; } - vnet_feature_enable_disable ("ip6-unicast", "ip6-lookup", sw_if_index, - is_enable, 0, 0); - - vnet_feature_enable_disable ("ip6-multicast", "ip6-mfib-forward-lookup", - sw_if_index, is_enable, 0, 0); + vnet_feature_enable_disable ("ip6-unicast", "ip6-drop", sw_if_index, + !is_enable, 0, 0); + vnet_feature_enable_disable ("ip6-multicast", "ip6-drop", sw_if_index, + !is_enable, 0, 0); } /* get first interface address */ @@ -624,17 +623,17 @@ VNET_FEATURE_INIT (ip6_vxlan_bypass, static) = .runs_before = VNET_FEATURES ("ip6-lookup"), }; -VNET_FEATURE_INIT (ip6_lookup, static) = +VNET_FEATURE_INIT (ip6_drop, static) = { .arc_name = "ip6-unicast", - .node_name = "ip6-lookup", - .runs_before = VNET_FEATURES ("ip6-drop"), + .node_name = "ip6-drop", + .runs_before = VNET_FEATURES ("ip6-lookup"), }; -VNET_FEATURE_INIT (ip6_drop, static) = +VNET_FEATURE_INIT (ip6_lookup, static) = { .arc_name = "ip6-unicast", - .node_name = "ip6-drop", + .node_name = "ip6-lookup", .runs_before = 0, /*last feature*/ }; @@ -652,15 +651,15 @@ VNET_FEATURE_INIT (ip6_vpath_mc, static) = { .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"), }; -VNET_FEATURE_INIT (ip6_mc_lookup, static) = { +VNET_FEATURE_INIT (ip6_drop_mc, static) = { .arc_name = "ip6-multicast", - .node_name = "ip6-mfib-forward-lookup", - .runs_before = VNET_FEATURES ("ip6-drop"), + .node_name = "ip6-drop", + .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"), }; -VNET_FEATURE_INIT (ip6_drop_mc, static) = { +VNET_FEATURE_INIT (ip6_mc_lookup, static) = { .arc_name = "ip6-multicast", - .node_name = "ip6-drop", + .node_name = "ip6-mfib-forward-lookup", .runs_before = 0, /* last feature */ }; @@ -699,9 +698,6 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) vnet_feature_enable_disable ("ip6-multicast", "ip6-drop", sw_if_index, is_add, 0, 0); - vnet_feature_enable_disable ("ip6-output", "interface-output", sw_if_index, - is_add, 0, 0); - return /* no error */ 0; } diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c index 2cba596f..1e674085 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.c +++ b/src/vnet/vxlan-gpe/vxlan_gpe.c @@ -490,9 +490,9 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "encap-vrf-id %d", &tmp)) { if (ipv6_set) - encap_fib_index = ip6_fib_index_from_table_id (tmp); + encap_fib_index = fib_table_find (FIB_PROTOCOL_IP6, tmp); else - encap_fib_index = ip4_fib_index_from_table_id (tmp); + encap_fib_index = fib_table_find (FIB_PROTOCOL_IP4, tmp); if (encap_fib_index == ~0) { @@ -503,9 +503,9 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "decap-vrf-id %d", &tmp)) { if (ipv6_set) - decap_fib_index = ip6_fib_index_from_table_id (tmp); + decap_fib_index = fib_table_find (FIB_PROTOCOL_IP6, tmp); else - decap_fib_index = ip4_fib_index_from_table_id (tmp); + decap_fib_index = fib_table_find (FIB_PROTOCOL_IP4, tmp); if (decap_fib_index == ~0) { diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 60eb5331..c1dcfb03 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -81,8 +81,6 @@ #include #include #include -#include -#include #include #include #include -- cgit 1.2.3-korg From 4729b1ec83855268adcea3e00a3462c06a631075 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Thu, 6 Jul 2017 01:39:05 -0700 Subject: DHCP complete event sends mask length Change-Id: I4a529dfab5d0ce6b0bbc0ccbbd89c6b109dbf917 Signed-off-by: Neale Ranns --- src/vnet/dhcp/client.c | 3 ++- src/vnet/dhcp/dhcp.api | 2 ++ src/vnet/dhcp/dhcp_api.c | 5 +++-- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c index 7c3f7f6a..014f17a1 100644 --- a/src/vnet/dhcp/client.c +++ b/src/vnet/dhcp/client.c @@ -194,7 +194,7 @@ int dhcp_client_for_us (u32 bi, vlib_buffer_t * b, /* OK, we own the address (etc), add to the routing table(s) */ if (c->state == DHCP_REQUEST) { - void (*fp)(u32, u32, u8 *, u8, u8 *, u8 *, u8 *) = c->event_callback; + void (*fp)(u32, u32, u8 *, u8, u8, u8 *, u8 *, u8 *) = c->event_callback; dhcp_client_acquire_address (dcm, c); @@ -236,6 +236,7 @@ int dhcp_client_for_us (u32 bi, vlib_buffer_t * b, (*fp) (c->client_index, /* clinet index */ c->pid, c->hostname, + c->subnet_mask_width, 0, /* is_ipv6 */ (u8 *)&c->leased_address, /* host IP address */ (u8 *)&c->router_address, /* router IP address */ diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api index eb0b070d..a2803728 100644 --- a/src/vnet/dhcp/dhcp.api +++ b/src/vnet/dhcp/dhcp.api @@ -81,6 +81,7 @@ autoreply define dhcp_client_config @param client_index - opaque cookie to identify the sender @param pid - client pid registered to receive notification @param is_ipv6 - if non-zero the address is ipv6, else ipv4 + @param mask_width - The length of the subnet mask assigned @param host_address - Host IP address @param router_address - Router IP address @param host_mac - Host MAC address @@ -91,6 +92,7 @@ define dhcp_compl_event u32 pid; u8 hostname[64]; u8 is_ipv6; + u8 mask_width; u8 host_address[16]; u8 router_address[16]; u8 host_mac[6]; diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c index 2c0dd77d..5ea93660 100644 --- a/src/vnet/dhcp/dhcp_api.c +++ b/src/vnet/dhcp/dhcp_api.c @@ -187,8 +187,8 @@ dhcp_send_details (fib_protocol_t proto, void dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, - u8 is_ipv6, u8 * host_address, u8 * router_address, - u8 * host_mac) + u8 mask_width, u8 is_ipv6, u8 * host_address, + u8 * router_address, u8 * host_mac) { unix_shared_memory_queue_t *q; vl_api_dhcp_compl_event_t *mp; @@ -205,6 +205,7 @@ dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, len = (vec_len (hostname) < 63) ? vec_len (hostname) : 63; clib_memcpy (&mp->hostname, hostname, len); mp->hostname[len] = 0; + mp->mask_width = mask_width; clib_memcpy (&mp->host_address[0], host_address, 16); clib_memcpy (&mp->router_address[0], router_address, 16); -- cgit 1.2.3-korg From 072401e8096c648b91f958bd911f64ce24fecff9 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 13 Jul 2017 18:53:27 +0200 Subject: Introduce l{2,3,4}_hdr_offset fields in the buffer metadata To save space in the first cacheline following is changed: - total_length_not_including_first_buffer moved to the 2nd cacheline. This field is used only when VLIB_BUFFER_TOTAL_LENGTH_VALID and VLIB_BUFFER_NEXT_PRESENT are both set. - free_list_index is now stored in 4bits inside flags, which allows up to 16 free lists. In case we need more we can store index in the 2nd cachelin Change-Id: Ic8521350819391af470d31d3fa1013e67ecb7681 Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/node.c | 8 ++++++- src/vlib/buffer.c | 16 ++++++++----- src/vlib/buffer.h | 40 +++++++++++++++++--------------- src/vlib/buffer_funcs.h | 50 +++++++++++++++++++++++++++++----------- src/vnet/bfd/bfd_udp.c | 4 ++-- src/vnet/buffer.h | 14 +++-------- src/vnet/dhcp/dhcp4_proxy_node.c | 2 +- src/vnet/dhcp/dhcp6_proxy_node.c | 2 +- src/vnet/ethernet/ethernet.h | 3 +-- src/vnet/ethernet/node.c | 23 ++++++++---------- src/vnet/ip/ip4_forward.c | 6 ++--- src/vnet/ip/ip6_forward.c | 6 ++--- src/vnet/ip/ip6_neighbor.c | 19 +++++++-------- src/vnet/l2/l2_bvi.h | 2 +- src/vnet/lisp-cp/control.c | 2 +- src/vnet/replication.c | 6 ++--- 16 files changed, 111 insertions(+), 92 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index 69acc529..74fb8da1 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -208,7 +208,13 @@ dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, mb_seg = mb->next; b_chain = b; - while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) + if (mb->nb_segs < 2) + return; + + b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + b->total_length_not_including_first_buffer = 0; + + while (nb_seg < mb->nb_segs) { ASSERT (mb_seg != 0); diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index b2a095cf..53b60c16 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -72,8 +72,8 @@ format_vlib_buffer (u8 * s, va_list * args) uword indent = format_get_indent (s); s = format (s, "current data %d, length %d, free-list %d, clone-count %u", - b->current_data, b->current_length, b->free_list_index, - b->n_add_refs); + b->current_data, b->current_length, + vlib_buffer_get_free_list_index (b), b->n_add_refs); if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID) s = format (s, ", totlen-nifb %d", @@ -163,10 +163,14 @@ vlib_validate_buffer_helper (vlib_main_t * vm, vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *fl; - if (pool_is_free_index (bm->buffer_free_list_pool, b->free_list_index)) - return format (0, "unknown free list 0x%x", b->free_list_index); + if (pool_is_free_index + (bm->buffer_free_list_pool, vlib_buffer_get_free_list_index (b))) + return format (0, "unknown free list 0x%x", + vlib_buffer_get_free_list_index (b)); - fl = pool_elt_at_index (bm->buffer_free_list_pool, b->free_list_index); + fl = + pool_elt_at_index (bm->buffer_free_list_pool, + vlib_buffer_get_free_list_index (b)); if ((signed) b->current_data < (signed) -VLIB_BUFFER_PRE_DATA_SIZE) return format (0, "current data %d before pre-data", b->current_data); @@ -388,7 +392,7 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, f->name = clib_mem_is_vec (name) ? name : format (0, "%s", name); /* Setup free buffer template. */ - f->buffer_init_template.free_list_index = f->index; + vlib_buffer_set_free_list_index (&f->buffer_init_template, f->index); f->buffer_init_template.n_add_refs = 0; if (is_public) diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index b20538b7..c810db4e 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -72,6 +72,7 @@ typedef struct the end of this buffer. */ u32 flags; /**< buffer flags: +
VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index,
VLIB_BUFFER_IS_TRACED: trace this buffer.
VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer.
VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says @@ -82,28 +83,26 @@ typedef struct set to avoid adding it to a flow report
VLIB_BUFFER_FLAG_USER(n): user-defined bit N */ -#define VLIB_BUFFER_IS_TRACED (1 << 0) -#define VLIB_BUFFER_LOG2_NEXT_PRESENT (1) + +/* any change to the following line requres update of + * vlib_buffer_get_free_list_index(...) and + * vlib_buffer_set_free_list_index(...) functions */ +#define VLIB_BUFFER_FREE_LIST_INDEX_MASK ((1 << 4) - 1) + +#define VLIB_BUFFER_IS_TRACED (1 << 4) +#define VLIB_BUFFER_LOG2_NEXT_PRESENT (5) #define VLIB_BUFFER_NEXT_PRESENT (1 << VLIB_BUFFER_LOG2_NEXT_PRESENT) -#define VLIB_BUFFER_IS_RECYCLED (1 << 2) -#define VLIB_BUFFER_TOTAL_LENGTH_VALID (1 << 3) -#define VLIB_BUFFER_REPL_FAIL (1 << 4) -#define VLIB_BUFFER_RECYCLE (1 << 5) -#define VLIB_BUFFER_FLOW_REPORT (1 << 6) -#define VLIB_BUFFER_EXT_HDR_VALID (1 << 7) +#define VLIB_BUFFER_IS_RECYCLED (1 << 6) +#define VLIB_BUFFER_TOTAL_LENGTH_VALID (1 << 7) +#define VLIB_BUFFER_REPL_FAIL (1 << 8) +#define VLIB_BUFFER_RECYCLE (1 << 9) +#define VLIB_BUFFER_FLOW_REPORT (1 << 10) +#define VLIB_BUFFER_EXT_HDR_VALID (1 << 11) /* User defined buffer flags. */ #define LOG2_VLIB_BUFFER_FLAG_USER(n) (32 - (n)) #define VLIB_BUFFER_FLAG_USER(n) (1 << LOG2_VLIB_BUFFER_FLAG_USER(n)) - u32 free_list_index; /**< Buffer free list that this buffer was - allocated from and will be freed to. - */ - - u32 total_length_not_including_first_buffer; - /**< Only valid for first buffer in chain. Current length plus - total length given here give total number of bytes in buffer chain. - */ STRUCT_MARK (template_end); u32 next_buffer; /**< Next buffer for this linked-list of buffers. @@ -128,7 +127,7 @@ typedef struct Before allocating any of it, discussion required! */ - u32 opaque[8]; /**< Opaque data used by sub-graphs for their own purposes. + u32 opaque[10]; /**< Opaque data used by sub-graphs for their own purposes. See .../vnet/vnet/buffer.h */ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); @@ -137,7 +136,12 @@ typedef struct if VLIB_PACKET_IS_TRACED flag is set. */ u32 recycle_count; /**< Used by L2 path recycle code */ - u32 opaque2[14]; /**< More opaque data, currently unused */ + + u32 total_length_not_including_first_buffer; + /**< Only valid for first buffer in chain. Current length plus + total length given here give total number of bytes in buffer chain. + */ + u32 opaque2[13]; /**< More opaque data, currently unused */ /***** end of second cache line */ CLIB_CACHE_LINE_ALIGN_MARK (cacheline2); diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index 97442e12..1aaac0b2 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -106,12 +106,15 @@ uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, always_inline uword vlib_buffer_length_in_chain (vlib_main_t * vm, vlib_buffer_t * b) { - uword l = b->current_length + b->total_length_not_including_first_buffer; - if (PREDICT_FALSE ((b->flags & (VLIB_BUFFER_NEXT_PRESENT - | VLIB_BUFFER_TOTAL_LENGTH_VALID)) - == VLIB_BUFFER_NEXT_PRESENT)) - return vlib_buffer_length_in_chain_slow_path (vm, b); - return l; + uword len = b->current_length; + + if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0)) + return len; + + if (PREDICT_TRUE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)) + return len + b->total_length_not_including_first_buffer; + + return vlib_buffer_length_in_chain_slow_path (vm, b); } /** \brief Get length in bytes of the buffer index buffer chain @@ -261,6 +264,24 @@ vlib_buffer_round_size (u32 size) return round_pow2 (size, sizeof (vlib_buffer_t)); } +always_inline u32 +vlib_buffer_get_free_list_index (vlib_buffer_t * b) +{ + return b->flags & VLIB_BUFFER_FREE_LIST_INDEX_MASK; +} + +always_inline void +vlib_buffer_set_free_list_index (vlib_buffer_t * b, u32 index) +{ + /* if there is an need for more free lists we should consider + storig data in the 2nd cacheline */ + ASSERT (VLIB_BUFFER_FREE_LIST_INDEX_MASK & 1); + ASSERT (index <= VLIB_BUFFER_FREE_LIST_INDEX_MASK); + + b->flags &= ~VLIB_BUFFER_FREE_LIST_INDEX_MASK; + b->flags |= index & VLIB_BUFFER_FREE_LIST_INDEX_MASK; +} + /** \brief Allocate buffers from specific freelist into supplied array @param vm - (vlib_main_t *) vlib main data structure pointer @@ -381,7 +402,7 @@ vlib_buffer_get_buffer_free_list (vlib_main_t * vm, vlib_buffer_t * b, vlib_buffer_main_t *bm = vm->buffer_main; u32 i; - *index = i = b->free_list_index; + *index = i = vlib_buffer_get_free_list_index (b); return pool_elt_at_index (bm->buffer_free_list_pool, i); } @@ -569,7 +590,8 @@ vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, } n_buffers = vlib_buffer_alloc_from_free_list (vm, buffers, n_buffers, - s->free_list_index); + vlib_buffer_get_free_list_index + (s)); if (PREDICT_FALSE (n_buffers == 0)) { buffers[0] = src_buffer; @@ -581,7 +603,8 @@ vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, vlib_buffer_t *d = vlib_get_buffer (vm, buffers[i]); d->current_data = s->current_data; d->current_length = head_end_offset; - d->free_list_index = s->free_list_index; + vlib_buffer_set_free_list_index (d, + vlib_buffer_get_free_list_index (s)); d->total_length_not_including_first_buffer = s->total_length_not_including_first_buffer + s->current_length - head_end_offset; @@ -615,7 +638,8 @@ vlib_buffer_attach_clone (vlib_main_t * vm, vlib_buffer_t * head, vlib_buffer_t * tail) { ASSERT ((head->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - ASSERT (head->free_list_index == tail->free_list_index); + ASSERT (vlib_buffer_get_free_list_index (head) == + vlib_buffer_get_free_list_index (tail)); head->flags |= VLIB_BUFFER_NEXT_PRESENT; head->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; @@ -791,7 +815,7 @@ vlib_buffer_init_for_free_list (vlib_buffer_t * dst, CLIB_CACHE_LINE_BYTES * 2); /* Make sure buffer template is sane. */ - ASSERT (fl->index == fl->buffer_init_template.free_list_index); + ASSERT (fl->index == vlib_buffer_get_free_list_index (src)); clib_memcpy (STRUCT_MARK_PTR (dst, template_start), STRUCT_MARK_PTR (src, template_start), @@ -806,7 +830,6 @@ vlib_buffer_init_for_free_list (vlib_buffer_t * dst, _(current_data); _(current_length); _(flags); - _(free_list_index); #undef _ ASSERT (dst->total_length_not_including_first_buffer == 0); ASSERT (dst->n_add_refs == 0); @@ -832,7 +855,7 @@ vlib_buffer_init_two_for_free_list (vlib_buffer_t * dst0, vlib_buffer_t *src = &fl->buffer_init_template; /* Make sure buffer template is sane. */ - ASSERT (fl->index == fl->buffer_init_template.free_list_index); + ASSERT (fl->index == vlib_buffer_get_free_list_index (src)); clib_memcpy (STRUCT_MARK_PTR (dst0, template_start), STRUCT_MARK_PTR (src, template_start), @@ -853,7 +876,6 @@ vlib_buffer_init_two_for_free_list (vlib_buffer_t * dst0, _(current_data); _(current_length); _(flags); - _(free_list_index); #undef _ ASSERT (dst0->total_length_not_including_first_buffer == 0); diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 346c5495..06b843c6 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -843,7 +843,7 @@ bfd_udp4_find_headers (vlib_buffer_t * b, ip4_header_t ** ip4, udp_header_t ** udp) { /* sanity check first */ - const i32 start = vnet_buffer (b)->ip.start_of_ip_header; + const i32 start = vnet_buffer (b)->l3_hdr_offset; if (start < 0 && start < sizeof (b->pre_data)) { BFD_ERR ("Start of ip header is before pre_data, ignoring"); @@ -1000,7 +1000,7 @@ bfd_udp6_find_headers (vlib_buffer_t * b, ip6_header_t ** ip6, udp_header_t ** udp) { /* sanity check first */ - const i32 start = vnet_buffer (b)->ip.start_of_ip_header; + const i32 start = vnet_buffer (b)->l3_hdr_offset; if (start < 0 && start < sizeof (b->pre_data)) { BFD_ERR ("Start of ip header is before pre_data, ignoring"); diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 9aba34da..8647db00 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -71,7 +71,6 @@ #define VNET_BUFFER_SPAN_CLONE (1 << LOG2_VNET_BUFFER_SPAN_CLONE) #define foreach_buffer_opaque_union_subtype \ -_(ethernet) \ _(ip) \ _(swt) \ _(l2) \ @@ -100,16 +99,12 @@ _(tcp) typedef struct { u32 sw_if_index[VLIB_N_RX_TX]; + i16 l2_hdr_offset; + i16 l3_hdr_offset; + i16 l4_hdr_offset; union { - /* Ethernet. */ - struct - { - /* Saved value of current header by ethernet-input. */ - i32 start_of_ethernet_header; - } ethernet; - /* IP4/6 buffer opaque. */ struct { @@ -143,9 +138,6 @@ typedef struct u8 code; u32 data; } icmp; - - /* IP header offset from vlib_buffer.data - saved by ip*_local nodes */ - i32 start_of_ip_header; }; } ip; diff --git a/src/vnet/dhcp/dhcp4_proxy_node.c b/src/vnet/dhcp/dhcp4_proxy_node.c index 26e1e65c..1b59cdea 100644 --- a/src/vnet/dhcp/dhcp4_proxy_node.c +++ b/src/vnet/dhcp/dhcp4_proxy_node.c @@ -231,7 +231,7 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); } - fl = vlib_buffer_get_free_list (vm, b0->free_list_index); + fl = vlib_buffer_get_free_list (vm, vlib_buffer_get_free_list_index (b0)); // start write at (option*)o, some packets have padding if (((u8 *)o - (u8 *)b0->data + VPP_DHCP_OPTION82_SIZE) > fl->n_data_bytes) { diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index 885313a5..e109cc4c 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -306,7 +306,7 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, copy_ip6_address(&r1->link_addr, ia0); link_address_set: - fl = vlib_buffer_get_free_list (vm, b0->free_list_index); + fl = vlib_buffer_get_free_list (vm, vlib_buffer_get_free_list_index (b0)); if ((b0->current_length+sizeof(*id1)+sizeof(*vss1)+sizeof(*cmac)) > fl->n_data_bytes) diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h index dcc656a7..2fc5b804 100644 --- a/src/vnet/ethernet/ethernet.h +++ b/src/vnet/ethernet/ethernet.h @@ -344,8 +344,7 @@ ethernet_setup_node (vlib_main_t * vm, u32 node_index) always_inline ethernet_header_t * ethernet_buffer_get_header (vlib_buffer_t * b) { - return (void *) - (b->data + vnet_buffer (b)->ethernet.start_of_ethernet_header); + return (void *) (b->data + vnet_buffer (b)->l2_hdr_offset); } /** Returns the number of VLAN headers in the current Ethernet frame in the diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index d9fdff48..421d501a 100755 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -101,7 +101,7 @@ parse_header (ethernet_input_variant_t variant, e0 = (void *) (b0->data + b0->current_data); - vnet_buffer (b0)->ethernet.start_of_ethernet_header = b0->current_data; + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; vlib_buffer_advance (b0, sizeof (e0[0])); @@ -205,9 +205,7 @@ identify_subint (vnet_hw_interface_t * hi, if (!(*is_l2)) { ethernet_header_t *e0; - e0 = - (void *) (b0->data + - vnet_buffer (b0)->ethernet.start_of_ethernet_header); + e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset); if (!(ethernet_address_cast (e0->dst_address))) { @@ -238,7 +236,7 @@ determine_next_node (ethernet_main_t * em, { *next0 = em->l2_next; // record the L2 len and reset the buffer so the L2 header is preserved - u32 eth_start = vnet_buffer (b0)->ethernet.start_of_ethernet_header; + u32 eth_start = vnet_buffer (b0)->l2_hdr_offset; vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start; ASSERT (vnet_buffer (b0)->l2.l2_len == ethernet_buffer_header_size (b0)); @@ -424,10 +422,8 @@ ethernet_input_inline (vlib_main_t * vm, cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2; } - vnet_buffer (b0)->ethernet.start_of_ethernet_header = - b0->current_data; - vnet_buffer (b1)->ethernet.start_of_ethernet_header = - b1->current_data; + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; + vnet_buffer (b1)->l2_hdr_offset = b1->current_data; if (PREDICT_TRUE (is_l20 != 0)) { @@ -519,9 +515,9 @@ ethernet_input_inline (vlib_main_t * vm, { len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data - - vnet_buffer (b0)->ethernet.start_of_ethernet_header; + - vnet_buffer (b0)->l2_hdr_offset; len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data - - vnet_buffer (b1)->ethernet.start_of_ethernet_header; + - vnet_buffer (b1)->l2_hdr_offset; stats_n_packets += 2; stats_n_bytes += len0 + len1; @@ -646,8 +642,7 @@ ethernet_input_inline (vlib_main_t * vm, cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2; } - vnet_buffer (b0)->ethernet.start_of_ethernet_header = - b0->current_data; + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; if (PREDICT_TRUE (is_l20 != 0)) { @@ -710,7 +705,7 @@ ethernet_input_inline (vlib_main_t * vm, { len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data - - vnet_buffer (b0)->ethernet.start_of_ethernet_header; + - vnet_buffer (b0)->l2_hdr_offset; stats_n_packets += 1; stats_n_bytes += len0; diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 8263e01c..b8dfa847 100755 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1585,8 +1585,8 @@ ip4_local_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); ip1 = vlib_buffer_get_current (p1); - vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; - vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data; + vnet_buffer (p0)->l3_hdr_offset = p0->current_data; + vnet_buffer (p1)->l3_hdr_offset = p1->current_data; sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; @@ -1788,7 +1788,7 @@ ip4_local_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); - vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; + vnet_buffer (p0)->l3_hdr_offset = p0->current_data; sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 4b574b9a..2b8c2bd2 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -1362,8 +1362,8 @@ ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip0 = vlib_buffer_get_current (p0); ip1 = vlib_buffer_get_current (p1); - vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; - vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data; + vnet_buffer (p0)->l3_hdr_offset = p0->current_data; + vnet_buffer (p1)->l3_hdr_offset = p1->current_data; type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol]; type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol]; @@ -1493,7 +1493,7 @@ ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip0 = vlib_buffer_get_current (p0); - vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; + vnet_buffer (p0)->l3_hdr_offset = p0->current_data; type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol]; next0 = lm->local_next_by_ip_protocol[ip0->protocol]; diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index b8f6f9b1..68a8cbbc 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -1479,9 +1479,8 @@ icmp6_router_solicitation (vlib_main_t * vm, sizeof (icmp6_router_advertisement_header_t); vlib_buffer_add_data (vm, - p0->free_list_index, - bi0, - (void *) &rh, + vlib_buffer_get_free_list_index + (p0), bi0, (void *) &rh, sizeof (icmp6_router_advertisement_header_t)); @@ -1499,9 +1498,8 @@ icmp6_router_solicitation (vlib_main_t * vm, eth_if0->address, 6); vlib_buffer_add_data (vm, - p0->free_list_index, - bi0, - (void *) &h, + vlib_buffer_get_free_list_index + (p0), bi0, (void *) &h, sizeof (icmp6_neighbor_discovery_ethernet_link_layer_address_option_t)); @@ -1525,9 +1523,8 @@ icmp6_router_solicitation (vlib_main_t * vm, sizeof (icmp6_neighbor_discovery_mtu_option_t); vlib_buffer_add_data (vm, - p0->free_list_index, - bi0, - (void *) &h, + vlib_buffer_get_free_list_index + (p0), bi0, (void *) &h, sizeof (icmp6_neighbor_discovery_mtu_option_t)); } @@ -1579,7 +1576,7 @@ icmp6_router_solicitation (vlib_main_t * vm, payload_length += sizeof( icmp6_neighbor_discovery_prefix_information_option_t); vlib_buffer_add_data (vm, - p0->free_list_index, + vlib_buffer_get_free_list_index (p0), bi0, (void *)&h, sizeof(icmp6_neighbor_discovery_prefix_information_option_t)); @@ -2326,7 +2323,7 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index) num_addr_records++; vlib_buffer_add_data - (vm, b0->free_list_index, bo0, + (vm, vlib_buffer_get_free_list_index (b0), bo0, (void *)&rr, sizeof(icmp6_multicast_address_record_t)); payload_length += sizeof( icmp6_multicast_address_record_t); diff --git a/src/vnet/l2/l2_bvi.h b/src/vnet/l2/l2_bvi.h index e21a1616..662ec402 100644 --- a/src/vnet/l2/l2_bvi.h +++ b/src/vnet/l2/l2_bvi.h @@ -57,7 +57,7 @@ l2_to_bvi (vlib_main_t * vlib_main, } /* Save L2 header position which may be changed due to packet replication */ - vnet_buffer (b0)->ethernet.start_of_ethernet_header = b0->current_data; + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; /* Strip L2 header */ l2_len = vnet_buffer (b0)->l2.l2_len; diff --git a/src/vnet/lisp-cp/control.c b/src/vnet/lisp-cp/control.c index 22b5c82c..d8a1372d 100644 --- a/src/vnet/lisp-cp/control.c +++ b/src/vnet/lisp-cp/control.c @@ -3706,7 +3706,7 @@ send_map_reply (lisp_cp_main_t * lcm, u32 mi, ip_address_t * dst, static void find_ip_header (vlib_buffer_t * b, u8 ** ip_hdr) { - const i32 start = vnet_buffer (b)->ip.start_of_ip_header; + const i32 start = vnet_buffer (b)->l3_hdr_offset; if (start < 0 && start < -sizeof (b->pre_data)) { *ip_hdr = 0; diff --git a/src/vnet/replication.c b/src/vnet/replication.c index 1c6f28d2..0fdca0bf 100644 --- a/src/vnet/replication.c +++ b/src/vnet/replication.c @@ -43,12 +43,12 @@ replication_prep (vlib_main_t * vm, ctx_id = ctx - rm->contexts[thread_index]; /* Save state from vlib buffer */ - ctx->saved_free_list_index = b0->free_list_index; + ctx->saved_free_list_index = vlib_buffer_get_free_list_index (b0); ctx->current_data = b0->current_data; /* Set up vlib buffer hooks */ b0->recycle_count = ctx_id; - b0->free_list_index = rm->recycle_list_index; + vlib_buffer_set_free_list_index (b0, rm->recycle_list_index); b0->flags |= VLIB_BUFFER_RECYCLE; /* Save feature state */ @@ -129,7 +129,7 @@ replication_recycle (vlib_main_t * vm, vlib_buffer_t * b0, u32 is_last) * This is the last replication in the list. * Restore original buffer free functionality. */ - b0->free_list_index = ctx->saved_free_list_index; + vlib_buffer_set_free_list_index (b0, ctx->saved_free_list_index); b0->flags &= ~VLIB_BUFFER_RECYCLE; /* Free context back to its pool */ -- cgit 1.2.3-korg From a2fbf6ba0e2553687f56d9a9fb63e2972bdb26c6 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 18 Jul 2017 08:23:32 -0700 Subject: DHCP client - remove interface address when DHCP de-configured Change-Id: I63c59e3c13859b51999d283774f7783ef0a6a5ed Signed-off-by: Neale Ranns --- src/vnet/dhcp/client.c | 1 + test/test_dhcp.py | 158 ++++++++++++++++++++++++++++++++++++++++------ test/vpp_papi_provider.py | 15 +++++ 3 files changed, 153 insertions(+), 21 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c index 014f17a1..8f033d25 100644 --- a/src/vnet/dhcp/client.c +++ b/src/vnet/dhcp/client.c @@ -821,6 +821,7 @@ int dhcp_client_add_del (dhcp_client_add_del_args_t * a) 1, FIB_ROUTE_PATH_FLAG_NONE); } + dhcp_client_release_address (dcm, c); ip4_sw_interface_enable_disable (c->sw_if_index, 0); vec_free (c->option_55_data); diff --git a/test/test_dhcp.py b/test/test_dhcp.py index 03c749d3..1700f6ba 100644 --- a/test/test_dhcp.py +++ b/test/test_dhcp.py @@ -6,6 +6,7 @@ import struct from framework import VppTestCase, VppTestRunner from vpp_neighbor import VppNeighbor +from vpp_ip_route import find_route from util import mk_ll_addr from scapy.layers.l2 import Ether, getmacbyip @@ -68,6 +69,18 @@ class TestDHCP(VppTestCase): for i in self.pg_interfaces: i.assert_nothing_captured(remark=remark) + def verify_dhcp_has_option(self, pkt, option, value): + dhcp = pkt[DHCP] + found = False + + for i in dhcp.options: + if type(i) is tuple: + if i[0] == option: + self.assertEqual(i[1], value) + found = True + + self.assertTrue(found) + def validate_relay_options(self, pkt, intf, ip_addr, fib_id, oui): dhcp = pkt[DHCP] found = 0 @@ -136,6 +149,16 @@ class TestDHCP(VppTestCase): return data + def verify_dhcp_msg_type(self, pkt, name): + dhcp = pkt[DHCP] + found = False + for o in dhcp.options: + if type(o) is tuple: + if o[0] == "message-type" \ + and DHCPTypes[o[1]] == name: + found = True + self.assertTrue(found) + def verify_dhcp_offer(self, pkt, intf, fib_id=0, oui=0): ether = pkt[Ether] self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff") @@ -149,20 +172,39 @@ class TestDHCP(VppTestCase): self.assertEqual(udp.dport, DHCP4_CLIENT_PORT) self.assertEqual(udp.sport, DHCP4_SERVER_PORT) - dhcp = pkt[DHCP] - is_offer = False - for o in dhcp.options: - if type(o) is tuple: - if o[0] == "message-type" \ - and DHCPTypes[o[1]] == "offer": - is_offer = True - self.assertTrue(is_offer) - + self.verify_dhcp_msg_type(pkt, "offer") data = self.validate_relay_options(pkt, intf, intf.local_ip4, fib_id, oui) - def verify_dhcp_discover(self, pkt, intf, src_intf=None, fib_id=0, oui=0, - dst_mac=None, dst_ip=None): + def verify_orig_dhcp_pkt(self, pkt, intf): + ether = pkt[Ether] + self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff") + self.assertEqual(ether.src, intf.local_mac) + + ip = pkt[IP] + self.assertEqual(ip.dst, "255.255.255.255") + self.assertEqual(ip.src, "0.0.0.0") + + udp = pkt[UDP] + self.assertEqual(udp.dport, DHCP4_SERVER_PORT) + self.assertEqual(udp.sport, DHCP4_CLIENT_PORT) + + def verify_orig_dhcp_discover(self, pkt, intf, hostname): + self.verify_orig_dhcp_pkt(pkt, intf) + + self.verify_dhcp_msg_type(pkt, "discover") + self.verify_dhcp_has_option(pkt, "hostname", hostname) + + def verify_orig_dhcp_request(self, pkt, intf, hostname, ip): + self.verify_orig_dhcp_pkt(pkt, intf) + + self.verify_dhcp_msg_type(pkt, "request") + self.verify_dhcp_has_option(pkt, "hostname", hostname) + self.verify_dhcp_has_option(pkt, "requested_addr", ip) + + def verify_relayed_dhcp_discover(self, pkt, intf, src_intf=None, + fib_id=0, oui=0, + dst_mac=None, dst_ip=None): if not dst_mac: dst_mac = intf.remote_mac if not dst_ip: @@ -341,7 +383,8 @@ class TestDHCP(VppTestCase): rx = self.pg0.get_capture(1) rx = rx[0] - option_82 = self.verify_dhcp_discover(rx, self.pg0, src_intf=self.pg2) + option_82 = self.verify_relayed_dhcp_discover(rx, self.pg0, + src_intf=self.pg2) # # Create an DHCP offer reply from the server with a correctly formatted @@ -446,7 +489,7 @@ class TestDHCP(VppTestCase): rx = self.pg1.get_capture(1) rx = rx[0] - self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3) + self.verify_relayed_dhcp_discover(rx, self.pg1, src_intf=self.pg3) # # Add VSS config @@ -459,8 +502,9 @@ class TestDHCP(VppTestCase): rx = self.pg1.get_capture(1) rx = rx[0] - self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3, - fib_id=1, oui=4) + self.verify_relayed_dhcp_discover(rx, self.pg1, + src_intf=self.pg3, + fib_id=1, oui=4) # # Add a second DHCP server in VRF 1 @@ -495,14 +539,15 @@ class TestDHCP(VppTestCase): rx = self.pg1.get_capture(2) - option_82 = self.verify_dhcp_discover( + option_82 = self.verify_relayed_dhcp_discover( rx[0], self.pg1, src_intf=self.pg3, dst_mac=self.pg1.remote_hosts[1].mac, dst_ip=self.pg1.remote_hosts[1].ip4, fib_id=1, oui=4) - self.verify_dhcp_discover(rx[1], self.pg1, src_intf=self.pg3, - fib_id=1, oui=4) + self.verify_relayed_dhcp_discover(rx[1], self.pg1, + src_intf=self.pg3, + fib_id=1, oui=4) # # Send both packets back. Client gets both. @@ -581,8 +626,9 @@ class TestDHCP(VppTestCase): rx = self.pg1.get_capture(1) rx = rx[0] - self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3, - fib_id=1, oui=4) + self.verify_relayed_dhcp_discover(rx, self.pg1, + src_intf=self.pg3, + fib_id=1, oui=4) # # Remove the VSS config @@ -596,7 +642,7 @@ class TestDHCP(VppTestCase): rx = self.pg1.get_capture(1) rx = rx[0] - self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3) + self.verify_relayed_dhcp_discover(rx, self.pg1, src_intf=self.pg3) # # remove DHCP config to cleanup @@ -990,5 +1036,75 @@ class TestDHCP(VppTestCase): is_ipv6=1, is_add=0) + def test_dhcp_client(self): + """ DHCP Client""" + + hostname = 'universal-dp' + + self.pg_enable_capture(self.pg_interfaces) + + # + # Configure DHCP client on PG2 and capture the discover sent + # + self.vapi.dhcp_client(self.pg2.sw_if_index, hostname) + + rx = self.pg2.get_capture(1) + + self.verify_orig_dhcp_discover(rx[0], self.pg2, hostname) + + # + # Sned back on offer, expect the request + # + p = (Ether(dst=self.pg2.local_mac, src=self.pg2.remote_mac) / + IP(src=self.pg2.remote_ip4, dst="255.255.255.255") / + UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_CLIENT_PORT) / + BOOTP(op=1, + yiaddr=self.pg2.local_ip4) / + DHCP(options=[('message-type', 'offer'), ('end')])) + + self.pg2.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg2.get_capture(1) + self.verify_orig_dhcp_request(rx[0], self.pg2, hostname, + self.pg2.local_ip4) + + # + # Send an acknowloedgement + # + p = (Ether(dst=self.pg2.local_mac, src=self.pg2.remote_mac) / + IP(src=self.pg2.remote_ip4, dst="255.255.255.255") / + UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_CLIENT_PORT) / + BOOTP(op=1, + yiaddr=self.pg2.local_ip4) / + DHCP(options=[('message-type', 'ack'), + ('subnet_mask', "255.255.255.0"), + ('router', self.pg2.remote_ip4), + ('server_id', self.pg2.remote_ip4), + ('lease_time', 43200), + ('end')])) + + self.pg2.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # + # At the end of this procedure there should be a connected route + # in the FIB + # + self.assertTrue(find_route(self, self.pg2.local_ip4, 32)) + + # + # remove the DHCP config + # + self.vapi.dhcp_client(self.pg2.sw_if_index, hostname, is_add=0) + + # + # and now the route should be gone + # + self.assertFalse(find_route(self, self.pg2.local_ip4, 32)) + + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 51c359e8..31eadad8 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -1755,6 +1755,21 @@ class VppPapiProvider(object): 'oui': oui, }) + def dhcp_client(self, + sw_if_index, + hostname, + is_add=1, + want_dhcp_events=0): + return self.api( + self.papi.dhcp_client_config, + { + 'sw_if_index': sw_if_index, + 'hostname': hostname, + 'is_add': is_add, + 'want_dhcp_event': want_dhcp_events, + 'pid': os.getpid(), + }) + def ip_mroute_add_del(self, src_address, grp_address, -- cgit 1.2.3-korg From 51822bf07a3f0fe72834ea94659faf6e262475ba Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 18 Jul 2017 09:26:53 -0700 Subject: DHCP client option 61 "client_id" the existing seeting of client_id to a VPP version number was unused and so overridden Change-Id: If9ebea936336f1fcca8d07e67186c95f8f8f0ccd Signed-off-by: Neale Ranns --- src/vnet/dhcp/client.c | 15 ++++++++++++++- src/vnet/dhcp/client.h | 1 + src/vnet/dhcp/dhcp.api | 2 ++ src/vnet/dhcp/dhcp_api.c | 3 ++- test/test_dhcp.py | 34 ++++++++++++++++++++++++++++++++-- test/vpp_papi_provider.py | 2 ++ 6 files changed, 53 insertions(+), 4 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c index 8f033d25..25ab3176 100644 --- a/src/vnet/dhcp/client.c +++ b/src/vnet/dhcp/client.c @@ -414,6 +414,16 @@ send_dhcp_pkt (dhcp_client_main_t * dcm, dhcp_client_t * c, o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); } + /* send option 61, client_id */ + if (vec_len (c->client_identifier)) + { + o->option = 61; + o->length = vec_len (c->client_identifier); + clib_memcpy (o->data, c->client_identifier, + vec_len (c->client_identifier)); + o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); + } + /* $$ maybe send the client s/w version if anyone cares */ /* @@ -838,6 +848,7 @@ int dhcp_client_config (vlib_main_t * vm, u32 sw_if_index, u8 * hostname, + u8 * client_id, u32 is_add, u32 client_index, void * event_callback, @@ -854,7 +865,9 @@ dhcp_client_config (vlib_main_t * vm, a->event_callback = event_callback; vec_validate(a->hostname, strlen((char *)hostname) - 1); strncpy((char *)a->hostname, (char *)hostname, vec_len(a->hostname)); - a->client_identifier = format (0, "vpe 1.0%c", 0); + vec_validate(a->client_identifier, strlen((char *)client_id) - 1); + strncpy((char *)a->client_identifier, (char *)client_id, vec_len(a->client_identifier)); + /* * Option 55 request list. These data precisely match * the Ubuntu dhcp client. YMMV. diff --git a/src/vnet/dhcp/client.h b/src/vnet/dhcp/client.h index 1f85d7ce..509d5d4c 100644 --- a/src/vnet/dhcp/client.h +++ b/src/vnet/dhcp/client.h @@ -113,6 +113,7 @@ int dhcp_client_for_us (u32 bi0, int dhcp_client_config (vlib_main_t * vm, u32 sw_if_index, u8 * hostname, + u8 * client_id, u32 is_add, u32 client_index, void *event_callback, diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api index a2803728..c632c087 100644 --- a/src/vnet/dhcp/dhcp.api +++ b/src/vnet/dhcp/dhcp.api @@ -61,6 +61,7 @@ autoreply define dhcp_proxy_set_vss @param context - sender context, to match reply w/ request @param sw_if_index - index of the interface for DHCP client @param hostname - hostname + @param client_id - Client ID - option 61 @param is_add - add the config if non-zero, else delete @param want_dhcp_event - DHCP event sent to the sender via dhcp_compl_event API message if non-zero @@ -72,6 +73,7 @@ autoreply define dhcp_client_config u32 context; u32 sw_if_index; u8 hostname[64]; + u8 client_id[64]; u8 is_add; u8 want_dhcp_event; u32 pid; diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c index 5ea93660..d6984f2d 100644 --- a/src/vnet/dhcp/dhcp_api.c +++ b/src/vnet/dhcp/dhcp_api.c @@ -227,7 +227,8 @@ static void vl_api_dhcp_client_config_t_handler VALIDATE_SW_IF_INDEX (mp); rv = dhcp_client_config (vm, ntohl (mp->sw_if_index), - mp->hostname, mp->is_add, mp->client_index, + mp->hostname, mp->client_id, + mp->is_add, mp->client_index, mp->want_dhcp_event ? dhcp_compl_event_callback : NULL, mp->pid); diff --git a/test/test_dhcp.py b/test/test_dhcp.py index 1700f6ba..4e8ed4ce 100644 --- a/test/test_dhcp.py +++ b/test/test_dhcp.py @@ -9,7 +9,7 @@ from vpp_neighbor import VppNeighbor from vpp_ip_route import find_route from util import mk_ll_addr -from scapy.layers.l2 import Ether, getmacbyip +from scapy.layers.l2 import Ether, getmacbyip, ARP from scapy.layers.inet import IP, UDP, ICMP from scapy.layers.inet6 import IPv6, in6_getnsmac, in6_mactoifaceid from scapy.layers.dhcp import DHCP, BOOTP, DHCPTypes @@ -189,11 +189,13 @@ class TestDHCP(VppTestCase): self.assertEqual(udp.dport, DHCP4_SERVER_PORT) self.assertEqual(udp.sport, DHCP4_CLIENT_PORT) - def verify_orig_dhcp_discover(self, pkt, intf, hostname): + def verify_orig_dhcp_discover(self, pkt, intf, hostname, client_id=None): self.verify_orig_dhcp_pkt(pkt, intf) self.verify_dhcp_msg_type(pkt, "discover") self.verify_dhcp_has_option(pkt, "hostname", hostname) + if client_id: + self.verify_dhcp_has_option(pkt, "client_id", client_id) def verify_orig_dhcp_request(self, pkt, intf, hostname, ip): self.verify_orig_dhcp_pkt(pkt, intf) @@ -1089,12 +1091,25 @@ class TestDHCP(VppTestCase): self.pg_enable_capture(self.pg_interfaces) self.pg_start() + # + # We'll get an ARP request for the router address + # + rx = self.pg2.get_capture(1) + + self.assertEqual(rx[0][ARP].pdst, self.pg2.remote_ip4) + self.pg_enable_capture(self.pg_interfaces) + # # At the end of this procedure there should be a connected route # in the FIB # self.assertTrue(find_route(self, self.pg2.local_ip4, 32)) + # remove the left over ARP entry + self.vapi.ip_neighbor_add_del(self.pg2.sw_if_index, + self.pg2.remote_mac, + self.pg2.remote_ip4, + is_add=0) # # remove the DHCP config # @@ -1105,6 +1120,21 @@ class TestDHCP(VppTestCase): # self.assertFalse(find_route(self, self.pg2.local_ip4, 32)) + # + # Start the procedure again. this time have VPP send the clientiid + # + self.vapi.dhcp_client(self.pg2.sw_if_index, hostname, + client_id=self.pg2.local_mac) + + rx = self.pg2.get_capture(1) + + self.verify_orig_dhcp_discover(rx[0], self.pg2, hostname, + self.pg2.local_mac) + + # + # remove the DHCP config + # + self.vapi.dhcp_client(self.pg2.sw_if_index, hostname, is_add=0) if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 31eadad8..2814ef97 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -1758,6 +1758,7 @@ class VppPapiProvider(object): def dhcp_client(self, sw_if_index, hostname, + client_id='', is_add=1, want_dhcp_events=0): return self.api( @@ -1765,6 +1766,7 @@ class VppPapiProvider(object): { 'sw_if_index': sw_if_index, 'hostname': hostname, + 'client_id': client_id, 'is_add': is_add, 'want_dhcp_event': want_dhcp_events, 'pid': os.getpid(), -- cgit 1.2.3-korg From 808c5b21c2759564689933d004223052b7895a42 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 2 Aug 2017 05:15:07 -0700 Subject: DHCP Client: receive unicast ACKs despite VPP DHCP client setting neither ciaddr nor giaddr and setting the broadcast bit (see RFC 2131 section 4.1) some DHCP servers will still send a unicast DCHPACK. So as not to drop this VPP must have both 1) a receive FIB entry for the OFFERED IP adress and 2) a 'don't drop me because of uRPF' FIB entry for the DHCP server's address. Change-Id: I167d858deb45629318cbdccf5bf67d971730a42f Signed-off-by: Neale Ranns --- src/vnet/dhcp/client.c | 79 ++++++++++++++++++++++++++++++++++++++++- src/vnet/fib/fib_entry.h | 9 ++--- test/test_dhcp.py | 92 +++++++++++++++++++++++++++++++++++++----------- 3 files changed, 151 insertions(+), 29 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c index 25ab3176..cfe62a6f 100644 --- a/src/vnet/dhcp/client.c +++ b/src/vnet/dhcp/client.c @@ -22,6 +22,70 @@ static u8 * format_dhcp_client_state (u8 * s, va_list * va); static vlib_node_registration_t dhcp_client_process_node; static void +dhcp_client_add_rx_address (dhcp_client_main_t * dcm, dhcp_client_t * c) +{ + /* Install a local entry for the offered address */ + fib_prefix_t rx = + { + .fp_len = 32, + .fp_addr.ip4 = c->leased_address, + .fp_proto = FIB_PROTOCOL_IP4, + }; + + fib_table_entry_special_add(fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &rx, + FIB_SOURCE_DHCP, + (FIB_ENTRY_FLAG_LOCAL)); + + /* And add the server's address as uRPF exempt so we can accept + * local packets from it */ + fib_prefix_t server = + { + .fp_len = 32, + .fp_addr.ip4 = c->dhcp_server, + .fp_proto = FIB_PROTOCOL_IP4, + }; + + fib_table_entry_special_add(fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &server, + FIB_SOURCE_URPF_EXEMPT, + (FIB_ENTRY_FLAG_DROP)); +} + +static void +dhcp_client_remove_rx_address (dhcp_client_main_t * dcm, dhcp_client_t * c) +{ + fib_prefix_t rx = + { + .fp_len = 32, + .fp_addr.ip4 = c->leased_address, + .fp_proto = FIB_PROTOCOL_IP4, + }; + + fib_table_entry_special_remove(fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &rx, + FIB_SOURCE_DHCP); + fib_prefix_t server = + { + .fp_len = 32, + .fp_addr.ip4 = c->dhcp_server, + .fp_proto = FIB_PROTOCOL_IP4, + }; + + fib_table_entry_special_remove(fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &server, + FIB_SOURCE_URPF_EXEMPT); +} + +static void dhcp_client_acquire_address (dhcp_client_main_t * dcm, dhcp_client_t * c) { /* @@ -95,7 +159,9 @@ int dhcp_client_for_us (u32 bi, vlib_buffer_t * b, /* parse through the packet, learn what we can */ if (dhcp->your_ip_address.as_u32) c->leased_address.as_u32 = dhcp->your_ip_address.as_u32; - + + c->dhcp_server.as_u32 = dhcp->server_ip_address.as_u32; + o = (dhcp_option_t *) dhcp->options; while (o->option != 0xFF /* end of options */ && @@ -172,6 +238,14 @@ int dhcp_client_for_us (u32 bi, vlib_buffer_t * b, c->next_transmit = now + 5.0; break; } + /* + * in order to accept unicasted ACKs we need to configure the offered + * address on the interface. However, at this point we may not know the + * subnet-mask (an OFFER may not contain it). So add a temporary receice + * and uRPF excempt entry + */ + dhcp_client_add_rx_address (dcm, c); + /* Received an offer, go send a request */ c->state = DHCP_REQUEST; c->retry_count = 0; @@ -196,6 +270,8 @@ int dhcp_client_for_us (u32 bi, vlib_buffer_t * b, { void (*fp)(u32, u32, u8 *, u8, u8, u8 *, u8 *, u8 *) = c->event_callback; + /* replace the temporary RX address with the correct subnet */ + dhcp_client_remove_rx_address (dcm, c); dhcp_client_acquire_address (dcm, c); /* @@ -831,6 +907,7 @@ int dhcp_client_add_del (dhcp_client_add_del_args_t * a) 1, FIB_ROUTE_PATH_FLAG_NONE); } + dhcp_client_remove_rx_address (dcm, c); dhcp_client_release_address (dcm, c); ip4_sw_interface_enable_disable (c->sw_if_index, 0); diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index 5f6ff312..93b8016d 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -205,14 +205,9 @@ typedef enum fib_entry_attribute_t_ { /** * Marker. add new entries before this one. */ - FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_MULTICAST, + FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT, } fib_entry_attribute_t; -/** - * The maximum number of sources - */ -#define FIB_ENTRY_ATTRIBUTE_MAX (FIB_ENTRY_ATTRIBUTE_LAST+1) - #define FIB_ENTRY_ATTRIBUTES { \ [FIB_ENTRY_ATTRIBUTE_CONNECTED] = "connected", \ [FIB_ENTRY_ATTRIBUTE_ATTACHED] = "attached", \ @@ -226,7 +221,7 @@ typedef enum fib_entry_attribute_t_ { #define FOR_EACH_FIB_ATTRIBUTE(_item) \ for (_item = FIB_ENTRY_ATTRIBUTE_FIRST; \ - _item < FIB_ENTRY_ATTRIBUTE_MAX; \ + _item <= FIB_ENTRY_ATTRIBUTE_LAST; \ _item++) typedef enum fib_entry_flag_t_ { diff --git a/test/test_dhcp.py b/test/test_dhcp.py index 4e8ed4ce..6fc29182 100644 --- a/test/test_dhcp.py +++ b/test/test_dhcp.py @@ -196,6 +196,10 @@ class TestDHCP(VppTestCase): self.verify_dhcp_has_option(pkt, "hostname", hostname) if client_id: self.verify_dhcp_has_option(pkt, "client_id", client_id) + bootp = pkt[BOOTP] + self.assertEqual(bootp.ciaddr, "0.0.0.0") + self.assertEqual(bootp.giaddr, "0.0.0.0") + self.assertEqual(bootp.flags, 0x8000) def verify_orig_dhcp_request(self, pkt, intf, hostname, ip): self.verify_orig_dhcp_pkt(pkt, intf) @@ -203,6 +207,10 @@ class TestDHCP(VppTestCase): self.verify_dhcp_msg_type(pkt, "request") self.verify_dhcp_has_option(pkt, "hostname", hostname) self.verify_dhcp_has_option(pkt, "requested_addr", ip) + bootp = pkt[BOOTP] + self.assertEqual(bootp.ciaddr, "0.0.0.0") + self.assertEqual(bootp.giaddr, "0.0.0.0") + self.assertEqual(bootp.flags, 0x8000) def verify_relayed_dhcp_discover(self, pkt, intf, src_intf=None, fib_id=0, oui=0, @@ -1057,14 +1065,15 @@ class TestDHCP(VppTestCase): # # Sned back on offer, expect the request # - p = (Ether(dst=self.pg2.local_mac, src=self.pg2.remote_mac) / - IP(src=self.pg2.remote_ip4, dst="255.255.255.255") / - UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_CLIENT_PORT) / - BOOTP(op=1, - yiaddr=self.pg2.local_ip4) / - DHCP(options=[('message-type', 'offer'), ('end')])) + p_offer = (Ether(dst=self.pg2.local_mac, src=self.pg2.remote_mac) / + IP(src=self.pg2.remote_ip4, dst="255.255.255.255") / + UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_CLIENT_PORT) / + BOOTP(op=1, yiaddr=self.pg2.local_ip4) / + DHCP(options=[('message-type', 'offer'), + ('server_id', self.pg2.remote_ip4), + ('end')])) - self.pg2.add_stream(p) + self.pg2.add_stream(p_offer) self.pg_enable_capture(self.pg_interfaces) self.pg_start() @@ -1075,19 +1084,18 @@ class TestDHCP(VppTestCase): # # Send an acknowloedgement # - p = (Ether(dst=self.pg2.local_mac, src=self.pg2.remote_mac) / - IP(src=self.pg2.remote_ip4, dst="255.255.255.255") / - UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_CLIENT_PORT) / - BOOTP(op=1, - yiaddr=self.pg2.local_ip4) / - DHCP(options=[('message-type', 'ack'), - ('subnet_mask', "255.255.255.0"), - ('router', self.pg2.remote_ip4), - ('server_id', self.pg2.remote_ip4), - ('lease_time', 43200), - ('end')])) - - self.pg2.add_stream(p) + p_ack = (Ether(dst=self.pg2.local_mac, src=self.pg2.remote_mac) / + IP(src=self.pg2.remote_ip4, dst="255.255.255.255") / + UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_CLIENT_PORT) / + BOOTP(op=1, yiaddr=self.pg2.local_ip4) / + DHCP(options=[('message-type', 'ack'), + ('subnet_mask', "255.255.255.0"), + ('router', self.pg2.remote_ip4), + ('server_id', self.pg2.remote_ip4), + ('lease_time', 43200), + ('end')])) + + self.pg2.add_stream(p_ack) self.pg_enable_capture(self.pg_interfaces) self.pg_start() @@ -1103,6 +1111,7 @@ class TestDHCP(VppTestCase): # At the end of this procedure there should be a connected route # in the FIB # + self.assertTrue(find_route(self, self.pg2.local_ip4, 24)) self.assertTrue(find_route(self, self.pg2.local_ip4, 32)) # remove the left over ARP entry @@ -1119,10 +1128,14 @@ class TestDHCP(VppTestCase): # and now the route should be gone # self.assertFalse(find_route(self, self.pg2.local_ip4, 32)) + self.assertFalse(find_route(self, self.pg2.local_ip4, 24)) # - # Start the procedure again. this time have VPP send the clientiid + # Start the procedure again. this time have VPP send the client-ID # + self.pg2.admin_down() + self.sleep(1) + self.pg2.admin_up() self.vapi.dhcp_client(self.pg2.sw_if_index, hostname, client_id=self.pg2.local_mac) @@ -1131,10 +1144,47 @@ class TestDHCP(VppTestCase): self.verify_orig_dhcp_discover(rx[0], self.pg2, hostname, self.pg2.local_mac) + self.pg2.add_stream(p_offer) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg2.get_capture(1) + self.verify_orig_dhcp_request(rx[0], self.pg2, hostname, + self.pg2.local_ip4) + + # + # unicast the ack to the offered address + # + p_ack = (Ether(dst=self.pg2.local_mac, src=self.pg2.remote_mac) / + IP(src=self.pg2.remote_ip4, dst=self.pg2.local_ip4) / + UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_CLIENT_PORT) / + BOOTP(op=1, yiaddr=self.pg2.local_ip4) / + DHCP(options=[('message-type', 'ack'), + ('subnet_mask', "255.255.255.0"), + ('router', self.pg2.remote_ip4), + ('server_id', self.pg2.remote_ip4), + ('lease_time', 43200), + ('end')])) + + self.pg2.add_stream(p_ack) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # + # At the end of this procedure there should be a connected route + # in the FIB + # + self.assertTrue(find_route(self, self.pg2.local_ip4, 32)) + self.assertTrue(find_route(self, self.pg2.local_ip4, 24)) + # # remove the DHCP config # self.vapi.dhcp_client(self.pg2.sw_if_index, hostname, is_add=0) + self.assertFalse(find_route(self, self.pg2.local_ip4, 32)) + self.assertFalse(find_route(self, self.pg2.local_ip4, 24)) + + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) -- cgit 1.2.3-korg From da78f957e46c686434149d332a477d7ea055d76a Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 24 May 2017 09:15:43 -0700 Subject: L2 over MPLS [support for VPWS/VPLS] - switch to using dpo_proto_t rather than fib_protocol_t in fib_paths so that we can describe L2 paths - VLIB nodes to handle pop/push of MPLS labels to L2 Change-Id: Id050d06a11fd2c9c1c81ce5a0654e6c5ae6afa6e Signed-off-by: Neale Ranns --- src/plugins/gtpu/gtpu.c | 2 +- src/plugins/snat/snat.c | 2 +- src/vat/api_format.c | 17 +- src/vnet/dhcp/client.c | 6 +- src/vnet/dhcp/dhcp6_proxy_node.c | 2 +- src/vnet/dpo/dpo.c | 19 ++ src/vnet/dpo/dpo.h | 11 +- src/vnet/dpo/interface_dpo.c | 30 +++ src/vnet/dpo/mpls_label_dpo.c | 45 +++- src/vnet/ethernet/arp.c | 4 +- src/vnet/fib/fib_api.h | 4 +- src/vnet/fib/fib_entry.c | 8 +- src/vnet/fib/fib_entry_src.c | 16 +- src/vnet/fib/fib_entry_src.h | 4 +- src/vnet/fib/fib_entry_src_api.c | 2 +- src/vnet/fib/fib_entry_src_default_route.c | 2 +- src/vnet/fib/fib_entry_src_interface.c | 2 +- src/vnet/fib/fib_entry_src_lisp.c | 8 +- src/vnet/fib/fib_entry_src_mpls.c | 4 +- src/vnet/fib/fib_entry_src_rr.c | 15 +- src/vnet/fib/fib_entry_src_special.c | 2 +- src/vnet/fib/fib_path.c | 79 +++---- src/vnet/fib/fib_path.h | 11 +- src/vnet/fib/fib_path_ext.c | 3 + src/vnet/fib/fib_path_list.c | 4 +- src/vnet/fib/fib_path_list.h | 4 +- src/vnet/fib/fib_table.c | 6 +- src/vnet/fib/fib_table.h | 6 +- src/vnet/fib/fib_test.c | 338 ++++++++++++++--------------- src/vnet/fib/fib_types.h | 8 +- src/vnet/interface_format.c | 12 +- src/vnet/ip/ip4_forward.c | 6 +- src/vnet/ip/ip6_forward.c | 4 +- src/vnet/ip/ip6_neighbor.c | 10 +- src/vnet/ip/ip_api.c | 38 ++-- src/vnet/ip/lookup.c | 18 +- src/vnet/lisp-gpe/lisp_gpe.c | 13 +- src/vnet/lisp-gpe/lisp_gpe_api.c | 10 +- src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c | 5 +- src/vnet/mfib/ip6_mfib.c | 6 +- src/vnet/mfib/mfib_entry.c | 10 +- src/vnet/mfib/mfib_test.c | 14 +- src/vnet/mpls/mpls.api | 4 +- src/vnet/mpls/mpls.c | 36 +-- src/vnet/mpls/mpls_api.c | 21 +- src/vnet/mpls/mpls_tunnel.c | 75 ++++--- src/vnet/mpls/mpls_tunnel.h | 38 ++-- src/vnet/srmpls/sr_mpls_policy.c | 6 +- src/vnet/srmpls/sr_mpls_steering.c | 2 +- src/vnet/srv6/sr_steering.c | 4 +- src/vnet/vxlan-gpe/vxlan_gpe.c | 2 +- src/vnet/vxlan/vxlan.c | 2 +- src/vpp/app/vpe_cli.c | 2 +- test/test_bfd.py | 6 +- test/test_gre.py | 24 +- test/test_ip6.py | 26 +-- test/test_map.py | 19 +- test/test_mpls.py | 318 ++++++++++++++++++++++----- test/test_p2p_ethernet.py | 16 +- test/vpp_ip_route.py | 24 +- test/vpp_mpls_tunnel_interface.py | 6 +- test/vpp_papi_provider.py | 4 +- 62 files changed, 889 insertions(+), 556 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/plugins/gtpu/gtpu.c b/src/plugins/gtpu/gtpu.c index 84745bd8..3dfb4210 100755 --- a/src/plugins/gtpu/gtpu.c +++ b/src/plugins/gtpu/gtpu.c @@ -534,7 +534,7 @@ int vnet_gtpu_add_del_tunnel fib_node_index_t mfei; adj_index_t ai; fib_route_path_t path = { - .frp_proto = fp, + .frp_proto = fib_proto_to_dpo (fp), .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, diff --git a/src/plugins/snat/snat.c b/src/plugins/snat/snat.c index 9fbc1e54..f196b5c2 100644 --- a/src/plugins/snat/snat.c +++ b/src/plugins/snat/snat.c @@ -135,7 +135,7 @@ snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL | FIB_ENTRY_FLAG_EXCLUSIVE), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, sw_if_index, ~0, diff --git a/src/vat/api_format.c b/src/vat/api_format.c index f97cdeef..009cf173 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -7498,7 +7498,7 @@ api_mpls_route_add_del (vat_main_t * vam) mpls_label_t *next_hop_out_label_stack = NULL; mpls_label_t local_label = MPLS_LABEL_INVALID; u8 is_eos = 0; - u8 next_hop_proto_is_ip4 = 1; + dpo_proto_t next_hop_proto = DPO_PROTO_IP4; /* Parse args required to build the message */ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) @@ -7517,13 +7517,13 @@ api_mpls_route_add_del (vat_main_t * vam) &v4_next_hop_address)) { next_hop_set = 1; - next_hop_proto_is_ip4 = 1; + next_hop_proto = DPO_PROTO_IP4; } else if (unformat (i, "via %U", unformat_ip6_address, &v6_next_hop_address)) { next_hop_set = 1; - next_hop_proto_is_ip4 = 0; + next_hop_proto = DPO_PROTO_IP6; } else if (unformat (i, "weight %d", &next_hop_weight)) ; @@ -7548,12 +7548,12 @@ api_mpls_route_add_del (vat_main_t * vam) else if (unformat (i, "lookup-in-ip4-table %d", &next_hop_table_id)) { next_hop_set = 1; - next_hop_proto_is_ip4 = 1; + next_hop_proto = DPO_PROTO_IP4; } else if (unformat (i, "lookup-in-ip6-table %d", &next_hop_table_id)) { next_hop_set = 1; - next_hop_proto_is_ip4 = 0; + next_hop_proto = DPO_PROTO_IP6; } else if (unformat (i, "next-hop-table %d", &next_hop_table_id)) ; @@ -7599,7 +7599,7 @@ api_mpls_route_add_del (vat_main_t * vam) mp->mr_create_table_if_needed = create_table_if_needed; mp->mr_is_add = is_add; - mp->mr_next_hop_proto_is_ip4 = next_hop_proto_is_ip4; + mp->mr_next_hop_proto = next_hop_proto; mp->mr_is_classify = is_classify; mp->mr_is_multipath = is_multipath; mp->mr_is_resolve_host = resolve_host; @@ -7622,13 +7622,14 @@ api_mpls_route_add_del (vat_main_t * vam) if (next_hop_set) { - if (next_hop_proto_is_ip4) + if (DPO_PROTO_IP4 == next_hop_proto) { clib_memcpy (mp->mr_next_hop, &v4_next_hop_address, sizeof (v4_next_hop_address)); } - else + else if (DPO_PROTO_IP6 == next_hop_proto) + { clib_memcpy (mp->mr_next_hop, &v6_next_hop_address, diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c index cfe62a6f..dd5e99f2 100644 --- a/src/vnet/dhcp/client.c +++ b/src/vnet/dhcp/client.c @@ -296,7 +296,7 @@ int dhcp_client_for_us (u32 bi, vlib_buffer_t * b, &all_0s, FIB_SOURCE_DHCP, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh, c->sw_if_index, ~0, @@ -605,7 +605,7 @@ dhcp_bound_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now) c->sw_if_index), &all_0s, FIB_SOURCE_DHCP, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh, c->sw_if_index, ~0, @@ -900,7 +900,7 @@ int dhcp_client_add_del (dhcp_client_add_del_args_t * a) c->sw_if_index), &all_0s, FIB_SOURCE_DHCP, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh, c->sw_if_index, ~0, diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index e109cc4c..9c2f5220 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -857,7 +857,7 @@ dhcp6_proxy_set_server (ip46_address_t *addr, else { const fib_route_path_t path_for_us = { - .frp_proto = FIB_PROTOCOL_IP6, + .frp_proto = DPO_PROTO_IP6, .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c index 389f995b..aa770838 100644 --- a/src/vnet/dpo/dpo.c +++ b/src/vnet/dpo/dpo.c @@ -109,6 +109,25 @@ vnet_link_to_dpo_proto (vnet_link_t linkt) return (0); } +vnet_link_t +dpo_proto_to_link (dpo_proto_t dp) +{ + switch (dp) + { + case DPO_PROTO_IP6: + return (VNET_LINK_IP6); + case DPO_PROTO_IP4: + return (VNET_LINK_IP4); + case DPO_PROTO_MPLS: + return (VNET_LINK_MPLS); + case DPO_PROTO_ETHERNET: + return (VNET_LINK_ETHERNET); + case DPO_PROTO_NSH: + return (VNET_LINK_NSH); + } + return (~0); +} + u8 * format_dpo_type (u8 * s, va_list * args) { diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h index 5aa4e2d2..42fc51d4 100644 --- a/src/vnet/dpo/dpo.h +++ b/src/vnet/dpo/dpo.h @@ -59,14 +59,10 @@ typedef u32 index_t; */ typedef enum dpo_proto_t_ { -#if CLIB_DEBUG > 0 - DPO_PROTO_IP4 = 1, -#else DPO_PROTO_IP4 = 0, -#endif DPO_PROTO_IP6, - DPO_PROTO_ETHERNET, DPO_PROTO_MPLS, + DPO_PROTO_ETHERNET, DPO_PROTO_NSH, } __attribute__((packed)) dpo_proto_t; @@ -272,6 +268,11 @@ extern u8 *format_dpo_type(u8 * s, va_list * args); */ extern u8 *format_dpo_proto(u8 * s, va_list * args); +/** + * @brief format a DPO protocol + */ +extern vnet_link_t dpo_proto_to_link(dpo_proto_t dp); + /** * @brief * Set and stack a DPO. diff --git a/src/vnet/dpo/interface_dpo.c b/src/vnet/dpo/interface_dpo.c index 8d700c23..780bfa2a 100644 --- a/src/vnet/dpo/interface_dpo.c +++ b/src/vnet/dpo/interface_dpo.c @@ -195,11 +195,17 @@ const static char* const interface_dpo_ip6_nodes[] = "interface-dpo-ip4", NULL, }; +const static char* const interface_dpo_l2_nodes[] = +{ + "interface-dpo-l2", + NULL, +}; const static char* const * const interface_dpo_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP4] = interface_dpo_ip4_nodes, [DPO_PROTO_IP6] = interface_dpo_ip6_nodes, + [DPO_PROTO_ETHERNET] = interface_dpo_l2_nodes, [DPO_PROTO_MPLS] = NULL, }; @@ -382,6 +388,14 @@ interface_dpo_ip6 (vlib_main_t * vm, return (interface_dpo_inline(vm, node, from_frame)); } +static uword +interface_dpo_l2 (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (interface_dpo_inline(vm, node, from_frame)); +} + VLIB_REGISTER_NODE (interface_dpo_ip4_node) = { .function = interface_dpo_ip4, .name = "interface-dpo-ip4", @@ -414,3 +428,19 @@ VLIB_REGISTER_NODE (interface_dpo_ip6_node) = { VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_ip6_node, interface_dpo_ip6) +VLIB_REGISTER_NODE (interface_dpo_l2_node) = { + .function = interface_dpo_l2, + .name = "interface-dpo-l2", + .vector_size = sizeof (u32), + .format_trace = format_interface_dpo_trace, + + .n_next_nodes = 2, + .next_nodes = { + [INTERFACE_DPO_DROP] = "error-drop", + [INTERFACE_DPO_INPUT] = "l2-input", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_l2_node, + interface_dpo_l2) + diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c index 1c451a51..b178a902 100644 --- a/src/vnet/dpo/mpls_label_dpo.c +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -192,7 +192,8 @@ mpls_label_imposition_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, u8 payload_is_ip4, - u8 payload_is_ip6) + u8 payload_is_ip6, + u8 payload_is_ethernet) { u32 n_left_from, next_index, * from, * to_next; @@ -320,6 +321,13 @@ mpls_label_imposition_inline (vlib_main_t * vm, ttl2 = ip2->hop_limit; ttl3 = ip3->hop_limit; } + else if (payload_is_ethernet) + { + /* + * nothing to chang ein the ethernet header + */ + ttl0 = ttl1 = ttl2 = ttl3 = 255; + } else { /* @@ -551,7 +559,7 @@ mpls_label_imposition (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return (mpls_label_imposition_inline(vm, node, frame, 0, 0)); + return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 0)); } VLIB_REGISTER_NODE (mpls_label_imposition_node) = { @@ -573,7 +581,7 @@ ip4_mpls_label_imposition (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return (mpls_label_imposition_inline(vm, node, frame, 1, 0)); + return (mpls_label_imposition_inline(vm, node, frame, 1, 0, 0)); } VLIB_REGISTER_NODE (ip4_mpls_label_imposition_node) = { @@ -595,7 +603,7 @@ ip6_mpls_label_imposition (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return (mpls_label_imposition_inline(vm, node, frame, 0, 1)); + return (mpls_label_imposition_inline(vm, node, frame, 0, 1, 0)); } VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = { @@ -612,6 +620,28 @@ VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = { VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_imposition_node, ip6_mpls_label_imposition) +static uword +ethernet_mpls_label_imposition (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 1)); +} + +VLIB_REGISTER_NODE (ethernet_mpls_label_imposition_node) = { + .function = ethernet_mpls_label_imposition, + .name = "ethernet-mpls-label-imposition", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_label_imposition_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + } +}; +VLIB_NODE_FUNCTION_MULTIARCH (ethernet_mpls_label_imposition_node, + ethernet_mpls_label_imposition) + static void mpls_label_dpo_mem_show (void) { @@ -643,11 +673,18 @@ const static char* const mpls_label_imp_mpls_nodes[] = "mpls-label-imposition", NULL, }; +const static char* const mpls_label_imp_ethernet_nodes[] = +{ + "ethernet-mpls-label-imposition", + NULL, +}; + const static char* const * const mpls_label_imp_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP4] = mpls_label_imp_ip4_nodes, [DPO_PROTO_IP6] = mpls_label_imp_ip6_nodes, [DPO_PROTO_MPLS] = mpls_label_imp_mpls_nodes, + [DPO_PROTO_ETHERNET] = mpls_label_imp_ethernet_nodes, }; diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index 4d9edaf5..8a394006 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -588,7 +588,7 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, e->fib_entry_index = fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, &pfx.fp_addr, + DPO_PROTO_IP4, &pfx.fp_addr, e->sw_if_index, ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); } @@ -1621,7 +1621,7 @@ arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e) fib_table_entry_path_remove (fib_index, &pfx, FIB_SOURCE_ADJ, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx.fp_addr, e->sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h index 73d76a42..d07d6cae 100644 --- a/src/vnet/fib/fib_api.h +++ b/src/vnet/fib/fib_api.h @@ -21,7 +21,7 @@ int add_del_route_check (fib_protocol_t table_proto, u32 table_id, u32 next_hop_sw_if_index, - fib_protocol_t next_hop_table_proto, + dpo_proto_t next_hop_table_proto, u32 next_hop_table_id, u8 create_missing_tables, u8 is_rpf_id, @@ -43,7 +43,7 @@ add_del_route_t_handler (u8 is_multipath, u8 is_rpf_id, u32 fib_index, const fib_prefix_t * prefix, - u8 next_hop_proto_is_ip4, + dpo_proto_t next_hop_proto, const ip46_address_t * next_hop, u32 next_hop_sw_if_index, u8 next_hop_fib_index, diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index d7ff1c8c..2027f2be 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -58,12 +58,18 @@ fib_entry_get_index (const fib_entry_t * fib_entry) return (fib_entry - fib_entry_pool); } -static fib_protocol_t +fib_protocol_t fib_entry_get_proto (const fib_entry_t * fib_entry) { return (fib_entry->fe_prefix.fp_proto); } +dpo_proto_t +fib_entry_get_dpo_proto (const fib_entry_t * fib_entry) +{ + return (fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto)); +} + fib_forward_chain_type_t fib_entry_get_default_chain_type (const fib_entry_t *fib_entry) { diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c index ff73cbf9..173df74f 100644 --- a/src/vnet/fib/fib_entry_src.c +++ b/src/vnet/fib/fib_entry_src.c @@ -29,12 +29,6 @@ */ static fib_entry_src_vft_t fib_entry_src_vft[FIB_SOURCE_MAX]; -static fib_protocol_t -fib_entry_get_proto (const fib_entry_t * fib_entry) -{ - return (fib_entry->fe_prefix.fp_proto); -} - void fib_entry_src_register (fib_source_t source, const fib_entry_src_vft_t *vft) @@ -861,7 +855,7 @@ fib_entry_src_action_add (fib_entry_t *fib_entry, fib_entry_src_vft[source].fesv_add(esrc, fib_entry, flags, - fib_entry_get_proto(fib_entry), + fib_entry_get_dpo_proto(fib_entry), dpo); } @@ -914,7 +908,7 @@ fib_entry_src_action_update (fib_entry_t *fib_entry, fib_entry_src_vft[source].fesv_add(esrc, fib_entry, flags, - fib_entry_get_proto(fib_entry), + fib_entry_get_dpo_proto(fib_entry), dpo); } @@ -1106,8 +1100,7 @@ fib_entry_src_action_path_add (fib_entry_t *fib_entry, source, flags, drop_dpo_get( - fib_proto_to_dpo( - fib_entry_get_proto(fib_entry)))); + fib_entry_get_dpo_proto(fib_entry))); esrc = fib_entry_src_find(fib_entry, source, NULL); } @@ -1166,8 +1159,7 @@ fib_entry_src_action_path_swap (fib_entry_t *fib_entry, source, flags, drop_dpo_get( - fib_proto_to_dpo( - fib_entry_get_proto(fib_entry)))); + fib_entry_get_dpo_proto(fib_entry))); esrc = fib_entry_src_find(fib_entry, source, NULL); } diff --git a/src/vnet/fib/fib_entry_src.h b/src/vnet/fib/fib_entry_src.h index 640c174d..35c43936 100644 --- a/src/vnet/fib/fib_entry_src.h +++ b/src/vnet/fib/fib_entry_src.h @@ -73,7 +73,7 @@ typedef void (*fib_entry_src_deactivate_t)(fib_entry_src_t *src, typedef void (*fib_entry_src_add_t)(fib_entry_src_t *src, const fib_entry_t *entry, fib_entry_flag_t flags, - fib_protocol_t proto, + dpo_proto_t proto, const dpo_id_t *dpo); /** @@ -277,6 +277,8 @@ extern void fib_entry_src_mk_lb (fib_entry_t *fib_entry, fib_forward_chain_type_t fct, dpo_id_t *dpo_lb); +extern fib_protocol_t fib_entry_get_proto(const fib_entry_t * fib_entry); +extern dpo_proto_t fib_entry_get_dpo_proto(const fib_entry_t * fib_entry); /* * Per-source registration. declared here so we save a separate .h file for each diff --git a/src/vnet/fib/fib_entry_src_api.c b/src/vnet/fib/fib_entry_src_api.c index f895886b..1cdcfbde 100644 --- a/src/vnet/fib/fib_entry_src_api.c +++ b/src/vnet/fib/fib_entry_src_api.c @@ -131,7 +131,7 @@ static void fib_entry_src_api_add (fib_entry_src_t *src, const fib_entry_t *entry, fib_entry_flag_t flags, - fib_protocol_t proto, + dpo_proto_t proto, const dpo_id_t *dpo) { if (FIB_ENTRY_FLAG_NONE != flags) diff --git a/src/vnet/fib/fib_entry_src_default_route.c b/src/vnet/fib/fib_entry_src_default_route.c index 9f4e7c36..431abb66 100644 --- a/src/vnet/fib/fib_entry_src_default_route.c +++ b/src/vnet/fib/fib_entry_src_default_route.c @@ -35,7 +35,7 @@ static void fib_entry_src_default_route_add (fib_entry_src_t *src, const fib_entry_t *entry, fib_entry_flag_t flags, - fib_protocol_t proto, + dpo_proto_t proto, const dpo_id_t *dpo) { src->fes_pl = fib_path_list_create_special(proto, diff --git a/src/vnet/fib/fib_entry_src_interface.c b/src/vnet/fib/fib_entry_src_interface.c index bb87818f..6c087f34 100644 --- a/src/vnet/fib/fib_entry_src_interface.c +++ b/src/vnet/fib/fib_entry_src_interface.c @@ -35,7 +35,7 @@ static void fib_entry_src_interface_add (fib_entry_src_t *src, const fib_entry_t *entry, fib_entry_flag_t flags, - fib_protocol_t proto, + dpo_proto_t proto, const dpo_id_t *dpo) { src->fes_pl = fib_path_list_create_special( diff --git a/src/vnet/fib/fib_entry_src_lisp.c b/src/vnet/fib/fib_entry_src_lisp.c index 7f8b91bb..e72dce63 100644 --- a/src/vnet/fib/fib_entry_src_lisp.c +++ b/src/vnet/fib/fib_entry_src_lisp.c @@ -79,10 +79,10 @@ fib_entry_src_lisp_path_remove (fib_entry_src_t *src, static void fib_entry_src_lisp_add (fib_entry_src_t *src, - const fib_entry_t *entry, - fib_entry_flag_t flags, - fib_protocol_t proto, - const dpo_id_t *dpo) + const fib_entry_t *entry, + fib_entry_flag_t flags, + dpo_proto_t proto, + const dpo_id_t *dpo) { if (FIB_ENTRY_FLAG_NONE != flags) { diff --git a/src/vnet/fib/fib_entry_src_mpls.c b/src/vnet/fib/fib_entry_src_mpls.c index 14c7310f..a616458f 100644 --- a/src/vnet/fib/fib_entry_src_mpls.c +++ b/src/vnet/fib/fib_entry_src_mpls.c @@ -57,13 +57,13 @@ static void fib_entry_src_mpls_add (fib_entry_src_t *src, const fib_entry_t *entry, fib_entry_flag_t flags, - fib_protocol_t proto, + dpo_proto_t proto, const dpo_id_t *dpo) { src->fes_pl = fib_path_list_create_special(proto, FIB_PATH_LIST_FLAG_DROP, - drop_dpo_get(fib_proto_to_dpo(proto))); + drop_dpo_get(proto)); } static void diff --git a/src/vnet/fib/fib_entry_src_rr.c b/src/vnet/fib/fib_entry_src_rr.c index d66ef7b1..1153f3f1 100644 --- a/src/vnet/fib/fib_entry_src_rr.c +++ b/src/vnet/fib/fib_entry_src_rr.c @@ -35,7 +35,7 @@ fib_entry_src_rr_resolve_via_connected (fib_entry_src_t *src, const fib_entry_t *cover) { const fib_route_path_t path = { - .frp_proto = fib_entry->fe_prefix.fp_proto, + .frp_proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto), .frp_addr = fib_entry->fe_prefix.fp_addr, .frp_sw_if_index = fib_entry_get_resolving_interface( fib_entry_get_index(cover)), @@ -90,18 +90,17 @@ fib_entry_src_rr_use_covers_pl (fib_entry_src_t *src, const fib_entry_t *cover) { fib_node_index_t *entries = NULL; - fib_protocol_t proto; + dpo_proto_t proto; - proto = fib_entry->fe_prefix.fp_proto; + proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto); vec_add1(entries, fib_entry_get_index(fib_entry)); if (fib_path_list_recursive_loop_detect(cover->fe_parent, &entries)) { - src->fes_pl = fib_path_list_create_special( - proto, - FIB_PATH_LIST_FLAG_DROP, - drop_dpo_get(fib_proto_to_dpo(proto))); + src->fes_pl = fib_path_list_create_special(proto, + FIB_PATH_LIST_FLAG_DROP, + drop_dpo_get(proto)); } else { @@ -126,7 +125,7 @@ fib_entry_src_rr_activate (fib_entry_src_t *src, */ if (FIB_PROTOCOL_MPLS == fib_entry->fe_prefix.fp_proto) { - src->fes_pl = fib_path_list_create_special(FIB_PROTOCOL_MPLS, + src->fes_pl = fib_path_list_create_special(DPO_PROTO_MPLS, FIB_PATH_LIST_FLAG_DROP, NULL); fib_path_list_lock(src->fes_pl); diff --git a/src/vnet/fib/fib_entry_src_special.c b/src/vnet/fib/fib_entry_src_special.c index 75605d7f..e979e18f 100644 --- a/src/vnet/fib/fib_entry_src_special.c +++ b/src/vnet/fib/fib_entry_src_special.c @@ -43,7 +43,7 @@ static void fib_entry_src_special_add (fib_entry_src_t *src, const fib_entry_t *entry, fib_entry_flag_t flags, - fib_protocol_t proto, + dpo_proto_t proto, const dpo_id_t *dpo) { src->fes_pl = diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index 3a67a544..58050ccb 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -193,7 +193,7 @@ typedef struct fib_path_t_ { * next-hop's address. We can't derive this from the address itself * since the address can be all zeros */ - fib_protocol_t fp_nh_proto; + dpo_proto_t fp_nh_proto; /** * UCMP [unnormalised] weigth @@ -381,7 +381,7 @@ format_fib_path (u8 * s, va_list * args) s = format (s, " index:%d ", fib_path_get_index(path)); s = format (s, "pl-index:%d ", path->fp_pl_index); - s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto); + s = format (s, "%U ", format_dpo_proto, path->fp_nh_proto); s = format (s, "weight=%d ", path->fp_weight); s = format (s, "pref=%d ", path->fp_preference); s = format (s, "%s: ", fib_path_type_names[path->fp_type]); @@ -454,7 +454,7 @@ format_fib_path (u8 * s, va_list * args) } break; case FIB_PATH_TYPE_RECURSIVE: - if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) + if (DPO_PROTO_MPLS == path->fp_nh_proto) { s = format (s, "via %U %U", format_mpls_unicast_label, @@ -552,14 +552,14 @@ fib_path_attached_next_hop_get_adj (fib_path_t *path, * the subnet address (the attached route) links to the * auto-adj (see below), we want that adj here too. */ - return (adj_nbr_add_or_lock(path->fp_nh_proto, + return (adj_nbr_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto), link, &zero_addr, path->attached_next_hop.fp_interface)); } else { - return (adj_nbr_add_or_lock(path->fp_nh_proto, + return (adj_nbr_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto), link, &path->attached_next_hop.fp_nh, path->attached_next_hop.fp_interface)); @@ -575,10 +575,10 @@ fib_path_attached_next_hop_set (fib_path_t *path) */ dpo_set(&path->fp_dpo, DPO_ADJACENCY, - fib_proto_to_dpo(path->fp_nh_proto), + path->fp_nh_proto, fib_path_attached_next_hop_get_adj( path, - fib_proto_to_link(path->fp_nh_proto))); + dpo_proto_to_link(path->fp_nh_proto))); /* * become a child of the adjacency so we receive updates @@ -607,14 +607,14 @@ fib_path_attached_get_adj (fib_path_t *path, * point-2-point interfaces do not require a glean, since * there is nothing to ARP. Install a rewrite/nbr adj instead */ - return (adj_nbr_add_or_lock(path->fp_nh_proto, + return (adj_nbr_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto), link, &zero_addr, path->attached.fp_interface)); } else { - return (adj_glean_add_or_lock(path->fp_nh_proto, + return (adj_glean_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto), path->attached.fp_interface, NULL)); } @@ -650,7 +650,7 @@ fib_path_recursive_adj_update (fib_path_t *path, if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP) { path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; - dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto)); } else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST) { @@ -668,7 +668,7 @@ fib_path_recursive_adj_update (fib_path_t *path, if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR) { path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; - dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto)); /* * PIC edge trigger. let the load-balance maps know @@ -685,7 +685,7 @@ fib_path_recursive_adj_update (fib_path_t *path, if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib))) { path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; - dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto)); /* * PIC edge trigger. let the load-balance maps know @@ -699,7 +699,7 @@ fib_path_recursive_adj_update (fib_path_t *path, if (!fib_entry_is_resolved(path->fp_via_fib)) { path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; - dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto)); /* * PIC edge trigger. let the load-balance maps know @@ -720,9 +720,7 @@ fib_path_recursive_adj_update (fib_path_t *path, */ dpo_copy(dpo, &via_dpo); - FIB_PATH_DBG(path, "recursive update: %U", - fib_get_lookup_main(path->fp_nh_proto), - &path->fp_dpo, 2); + FIB_PATH_DBG(path, "recursive update:"); dpo_reset(&via_dpo); } @@ -804,13 +802,8 @@ fib_path_unresolve (fib_path_t *path) static fib_forward_chain_type_t fib_path_to_chain_type (const fib_path_t *path) { - switch (path->fp_nh_proto) + if (DPO_PROTO_MPLS == path->fp_nh_proto) { - case FIB_PROTOCOL_IP4: - return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); - case FIB_PROTOCOL_IP6: - return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); - case FIB_PROTOCOL_MPLS: if (FIB_PATH_TYPE_RECURSIVE == path->fp_type && MPLS_EOS == path->recursive.fp_nh.fp_eos) { @@ -821,7 +814,10 @@ fib_path_to_chain_type (const fib_path_t *path) return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); } } - return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); + else + { + return (fib_forw_chain_type_from_dpo_proto(path->fp_nh_proto)); + } } /* @@ -927,7 +923,7 @@ FIXME comment ai = fib_path_attached_next_hop_get_adj( path, - fib_proto_to_link(path->fp_nh_proto)); + dpo_proto_to_link(path->fp_nh_proto)); path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; if (if_is_up && adj_is_up(ai)) @@ -935,9 +931,7 @@ FIXME comment path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; } - dpo_set(&path->fp_dpo, DPO_ADJACENCY, - fib_proto_to_dpo(path->fp_nh_proto), - ai); + dpo_set(&path->fp_dpo, DPO_ADJACENCY, path->fp_nh_proto, ai); adj_unlock(ai); if (!if_is_up) @@ -1141,7 +1135,7 @@ fib_path_create (fib_node_index_t pl_index, else { path->fp_type = FIB_PATH_TYPE_RECURSIVE; - if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) + if (DPO_PROTO_MPLS == path->fp_nh_proto) { path->recursive.fp_nh.fp_local_label = rpath->frp_local_label; path->recursive.fp_nh.fp_eos = rpath->frp_eos; @@ -1167,7 +1161,7 @@ fib_path_create (fib_node_index_t pl_index, */ fib_node_index_t fib_path_create_special (fib_node_index_t pl_index, - fib_protocol_t nh_proto, + dpo_proto_t nh_proto, fib_path_cfg_flags_t flags, const dpo_id_t *dpo) { @@ -1433,7 +1427,7 @@ fib_path_cmp_w_route_path (fib_node_index_t path_index, res = (path->attached.fp_interface - rpath->frp_sw_if_index); break; case FIB_PATH_TYPE_RECURSIVE: - if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) + if (DPO_PROTO_MPLS == path->fp_nh_proto) { res = path->recursive.fp_nh.fp_local_label - rpath->frp_local_label; @@ -1535,8 +1529,7 @@ fib_path_recursive_loop_detect (fib_node_index_t path_index, FIB_PATH_DBG(path, "recursive loop formed"); path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP; - dpo_copy(&path->fp_dpo, - drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&path->fp_dpo, drop_dpo_get(path->fp_nh_proto)); } else { @@ -1590,8 +1583,7 @@ fib_path_resolve (fib_node_index_t path_index) */ if (fib_path_is_permanent_drop(path)) { - dpo_copy(&path->fp_dpo, - drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&path->fp_dpo, drop_dpo_get(path->fp_nh_proto)); path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; return (fib_path_is_resolved(path_index)); } @@ -1612,9 +1604,9 @@ fib_path_resolve (fib_node_index_t path_index) } dpo_set(&path->fp_dpo, DPO_ADJACENCY, - fib_proto_to_dpo(path->fp_nh_proto), + path->fp_nh_proto, fib_path_attached_get_adj(path, - fib_proto_to_link(path->fp_nh_proto))); + dpo_proto_to_link(path->fp_nh_proto))); /* * become a child of the adjacency so we receive updates @@ -1639,7 +1631,7 @@ fib_path_resolve (fib_node_index_t path_index) ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib); - if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) + if (DPO_PROTO_MPLS == path->fp_nh_proto) { fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, path->recursive.fp_nh.fp_eos, @@ -1680,8 +1672,7 @@ fib_path_resolve (fib_node_index_t path_index) /* * Resolve via the drop */ - dpo_copy(&path->fp_dpo, - drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&path->fp_dpo, drop_dpo_get(path->fp_nh_proto)); break; case FIB_PATH_TYPE_DEAG: { @@ -1696,7 +1687,7 @@ fib_path_resolve (fib_node_index_t path_index) LOOKUP_UNICAST); lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id, - fib_proto_to_dpo(path->fp_nh_proto), + path->fp_nh_proto, cast, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_CONFIG, @@ -1707,7 +1698,7 @@ fib_path_resolve (fib_node_index_t path_index) /* * Resolve via a receive DPO. */ - receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto), + receive_dpo_add_or_lock(path->fp_nh_proto, path->receive.fp_interface, &path->receive.fp_addr, &path->fp_dpo); @@ -1716,7 +1707,7 @@ fib_path_resolve (fib_node_index_t path_index) /* * Resolve via a receive DPO. */ - interface_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto), + interface_dpo_add_or_lock(path->fp_nh_proto, path->intf_rx.fp_interface, &path->fp_dpo); break; @@ -2035,7 +2026,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, /* * Create the adj needed for sending IP multicast traffic */ - ai = adj_mcast_add_or_lock(path->fp_nh_proto, + ai = adj_mcast_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto), fib_forw_chain_type_to_link_type(fct), path->attached.fp_interface); dpo_set(dpo, DPO_ADJACENCY, @@ -2187,7 +2178,7 @@ fib_path_encode (fib_node_index_t path_list_index, return (FIB_PATH_LIST_WALK_CONTINUE); } -fib_protocol_t +dpo_proto_t fib_path_get_proto (fib_node_index_t path_index) { fib_path_t *path; diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h index a34cb43f..f986e437 100644 --- a/src/vnet/fib/fib_path.h +++ b/src/vnet/fib/fib_path.h @@ -78,6 +78,11 @@ typedef enum fib_path_cfg_attribute_t_ { * The path is an interface recieve */ FIB_PATH_CFG_ATTRIBUTE_LOCAL, + /** + * The path is L2. i.e. the parameters therein are to be interpreted as + * pertaining to L2 config. + */ + FIB_PATH_CFG_ATTRIBUTE_L2, /** * Marker. Add new types before this one, then update it. */ @@ -98,6 +103,7 @@ typedef enum fib_path_cfg_attribute_t_ { [FIB_PATH_CFG_ATTRIBUTE_ATTACHED] = "attached", \ [FIB_PATH_CFG_ATTRIBUTE_INTF_RX] = "interface-rx", \ [FIB_PATH_CFG_ATTRIBUTE_RPF_ID] = "rpf-id", \ + [FIB_PATH_CFG_ATTRIBUTE_L2] = "l2", \ } #define FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(_item) \ @@ -118,6 +124,7 @@ typedef enum fib_path_cfg_flags_t_ { FIB_PATH_CFG_FLAG_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_ATTACHED), FIB_PATH_CFG_FLAG_INTF_RX = (1 << FIB_PATH_CFG_ATTRIBUTE_INTF_RX), FIB_PATH_CFG_FLAG_RPF_ID = (1 << FIB_PATH_CFG_ATTRIBUTE_RPF_ID), + FIB_PATH_CFG_FLAG_L2 = (1 << FIB_PATH_CFG_ATTRIBUTE_L2), } __attribute__ ((packed)) fib_path_cfg_flags_t; @@ -131,7 +138,7 @@ extern u8 * format_fib_path(u8 * s, va_list * args); extern fib_node_index_t fib_path_create(fib_node_index_t pl_index, const fib_route_path_t *path); extern fib_node_index_t fib_path_create_special(fib_node_index_t pl_index, - fib_protocol_t nh_proto, + dpo_proto_t nh_proto, fib_path_cfg_flags_t flags, const dpo_id_t *dpo); @@ -148,7 +155,7 @@ extern int fib_path_is_recursive_constrained(fib_node_index_t path_index); extern int fib_path_is_exclusive(fib_node_index_t path_index); extern int fib_path_is_deag(fib_node_index_t path_index); extern int fib_path_is_looped(fib_node_index_t path_index); -extern fib_protocol_t fib_path_get_proto(fib_node_index_t path_index); +extern dpo_proto_t fib_path_get_proto(fib_node_index_t path_index); extern void fib_path_destroy(fib_node_index_t path_index); extern uword fib_path_hash(fib_node_index_t path_index); extern load_balance_path_t * fib_path_append_nh_for_multipath_hash( diff --git a/src/vnet/fib/fib_path_ext.c b/src/vnet/fib/fib_path_ext.c index 26f2b9b6..4438671b 100644 --- a/src/vnet/fib/fib_path_ext.c +++ b/src/vnet/fib/fib_path_ext.c @@ -191,6 +191,9 @@ fib_path_ext_stack (fib_path_ext_t *path_ext, case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: parent_fct = child_fct; break; + case FIB_FORW_CHAIN_TYPE_ETHERNET: + parent_fct = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS; + break; default: return (nhs); break; diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c index 7a9c328c..f30fd7ea 100644 --- a/src/vnet/fib/fib_path_list.c +++ b/src/vnet/fib/fib_path_list.c @@ -611,7 +611,7 @@ fib_path_list_get_resolving_interface (fib_node_index_t path_list_index) return (sw_if_index); } -fib_protocol_t +dpo_proto_t fib_path_list_get_proto (fib_node_index_t path_list_index) { fib_path_list_t *path_list; @@ -753,7 +753,7 @@ fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf) } fib_node_index_t -fib_path_list_create_special (fib_protocol_t nh_proto, +fib_path_list_create_special (dpo_proto_t nh_proto, fib_path_list_flags_t flags, const dpo_id_t *dpo) { diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h index b4b6985b..a54b79e2 100644 --- a/src/vnet/fib/fib_path_list.h +++ b/src/vnet/fib/fib_path_list.h @@ -106,7 +106,7 @@ typedef enum fib_path_list_flags_t_ { extern fib_node_index_t fib_path_list_create(fib_path_list_flags_t flags, const fib_route_path_t *paths); -extern fib_node_index_t fib_path_list_create_special(fib_protocol_t nh_proto, +extern fib_node_index_t fib_path_list_create_special(dpo_proto_t nh_proto, fib_path_list_flags_t flags, const dpo_id_t *dpo); @@ -150,7 +150,7 @@ extern int fib_path_list_recursive_loop_detect(fib_node_index_t path_list_index, extern u32 fib_path_list_get_resolving_interface(fib_node_index_t path_list_index); extern int fib_path_list_is_looped(fib_node_index_t path_list_index); extern int fib_path_list_is_popular(fib_node_index_t path_list_index); -extern fib_protocol_t fib_path_list_get_proto(fib_node_index_t path_list_index); +extern dpo_proto_t fib_path_list_get_proto(fib_node_index_t path_list_index); extern u8 * fib_path_list_format(fib_node_index_t pl_index, u8 * s); extern index_t fib_path_list_lb_map_add_or_lock(fib_node_index_t pl_index, diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 5aa02dd0..6b6cc5cb 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -505,7 +505,7 @@ fib_table_entry_path_add (u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, fib_entry_flag_t flags, - fib_protocol_t next_hop_proto, + dpo_proto_t next_hop_proto, const ip46_address_t *next_hop, u32 next_hop_sw_if_index, u32 next_hop_fib_index, @@ -664,7 +664,7 @@ void fib_table_entry_path_remove (u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, - fib_protocol_t next_hop_proto, + dpo_proto_t next_hop_proto, const ip46_address_t *next_hop, u32 next_hop_sw_if_index, u32 next_hop_fib_index, @@ -755,7 +755,7 @@ fib_table_entry_update_one_path (u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, fib_entry_flag_t flags, - fib_protocol_t next_hop_proto, + dpo_proto_t next_hop_proto, const ip46_address_t *next_hop, u32 next_hop_sw_if_index, u32 next_hop_fib_index, diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h index a65fea74..579740e9 100644 --- a/src/vnet/fib/fib_table.h +++ b/src/vnet/fib/fib_table.h @@ -288,7 +288,7 @@ extern fib_node_index_t fib_table_entry_path_add(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, fib_entry_flag_t flags, - fib_protocol_t next_hop_proto, + dpo_proto_t next_hop_proto, const ip46_address_t *next_hop, u32 next_hop_sw_if_index, u32 next_hop_fib_index, @@ -364,7 +364,7 @@ extern fib_node_index_t fib_table_entry_path_add2(u32 fib_index, extern void fib_table_entry_path_remove(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, - fib_protocol_t next_hop_proto, + dpo_proto_t next_hop_proto, const ip46_address_t *next_hop, u32 next_hop_sw_if_index, u32 next_hop_fib_index, @@ -471,7 +471,7 @@ extern fib_node_index_t fib_table_entry_update_one_path(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, fib_entry_flag_t flags, - fib_protocol_t next_hop_proto, + dpo_proto_t next_hop_proto, const ip46_address_t *next_hop, u32 next_hop_sw_if_index, u32 next_hop_fib_index, diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index 4c891667..59d5da2a 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -833,7 +833,7 @@ fib_test_v4 (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -861,7 +861,7 @@ fib_test_v4 (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -911,7 +911,7 @@ fib_test_v4 (void) fib_table_entry_path_add(fib_index, &pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -963,7 +963,7 @@ fib_test_v4 (void) pfx.fp_len = 0; fib_table_entry_path_remove(fib_index, &pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // non-recursive path, so no FIB index @@ -1029,7 +1029,7 @@ fib_test_v4 (void) &pfx_11_11_11_11_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_1_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1095,7 +1095,7 @@ fib_test_v4 (void) &pfx_10_10_10_1_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_1_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1110,7 +1110,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_11_11_11_11_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_1_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1144,7 +1144,7 @@ fib_test_v4 (void) &pfx_10_10_10_2_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_2_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1181,7 +1181,7 @@ fib_test_v4 (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1214,7 +1214,7 @@ fib_test_v4 (void) &pfx_1_1_2_0_s_24, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1241,7 +1241,7 @@ fib_test_v4 (void) &pfx_1_1_2_0_s_24, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1280,7 +1280,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_2_0_s_24, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, @@ -1327,7 +1327,7 @@ fib_test_v4 (void) &bgp_100_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_1_1_1_1, ~0, // no index provided. fib_index, // nexthop in same fib as route @@ -1363,7 +1363,7 @@ fib_test_v4 (void) &bgp_101_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_1_1_1_1, ~0, // no index provided. fib_index, // nexthop in same fib as route @@ -1487,7 +1487,7 @@ fib_test_v4 (void) &bgp_200_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, // no index provided. fib_index, // nexthop in same fib as route @@ -1534,7 +1534,7 @@ fib_test_v4 (void) &pfx_1_2_3_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -1545,7 +1545,7 @@ fib_test_v4 (void) &pfx_1_2_3_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_12_12_12_12, tm->hw[1]->sw_if_index, ~0, @@ -1586,7 +1586,7 @@ fib_test_v4 (void) &pfx_1_2_3_5_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_12_12_12_12, tm->hw[1]->sw_if_index, ~0, @@ -1597,7 +1597,7 @@ fib_test_v4 (void) &pfx_1_2_3_5_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -1669,7 +1669,7 @@ fib_test_v4 (void) &pfx_6_6_6_6_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1688,7 +1688,7 @@ fib_test_v4 (void) &pfx_6_6_6_6_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1770,7 +1770,7 @@ fib_test_v4 (void) &pfx_6_6_6_6_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_12_12_12_12, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -1915,7 +1915,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_6_6_6_6_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_12_12_12_12, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -1995,7 +1995,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_6_6_6_6_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -2026,7 +2026,7 @@ fib_test_v4 (void) &bgp_44_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_2_3_4_s_32.fp_addr, ~0, fib_index, @@ -2037,7 +2037,7 @@ fib_test_v4 (void) &bgp_44_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_2_3_5_s_32.fp_addr, ~0, fib_index, @@ -2107,7 +2107,7 @@ fib_test_v4 (void) &bgp_201_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_200_s_32.fp_addr, ~0, // no index provided. fib_index, // nexthop in same fib as route @@ -2151,7 +2151,7 @@ fib_test_v4 (void) &pfx_1_1_1_0_s_24, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -2209,7 +2209,7 @@ fib_test_v4 (void) &pfx_1_1_1_0_s_28, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -2244,7 +2244,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_0_s_28, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, @@ -2275,7 +2275,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_0_s_24, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -2316,7 +2316,7 @@ fib_test_v4 (void) &pfx_1_1_1_2_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -2351,7 +2351,7 @@ fib_test_v4 (void) &bgp_201_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, fib_index, @@ -2362,7 +2362,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_201_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, fib_index, @@ -2375,7 +2375,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_201_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_200_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2405,7 +2405,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_200_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2446,7 +2446,7 @@ fib_test_v4 (void) &bgp_102, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route @@ -2457,7 +2457,7 @@ fib_test_v4 (void) &bgp_102, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2483,7 +2483,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_102, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2492,7 +2492,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_102, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2507,7 +2507,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_100_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2516,7 +2516,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_101_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2546,7 +2546,7 @@ fib_test_v4 (void) &bgp_200_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, ~0, // no index provided. fib_index, // Same as route's FIB @@ -2593,7 +2593,7 @@ fib_test_v4 (void) &bgp_201_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, ~0, // no index provided. fib_index, @@ -2639,7 +2639,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_200_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, ~0, // no index provided. fib_index, // same as route's FIB @@ -2648,7 +2648,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_201_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, ~0, // no index provided. fib_index, // same as route's FIB @@ -2707,7 +2707,7 @@ fib_test_v4 (void) &pfx_5_5_5_5_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_6_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2718,7 +2718,7 @@ fib_test_v4 (void) &pfx_5_5_5_6_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_7_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2729,7 +2729,7 @@ fib_test_v4 (void) &pfx_5_5_5_7_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_5_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2768,7 +2768,7 @@ fib_test_v4 (void) &pfx_5_5_5_6_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -2801,7 +2801,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_5_5_5_6_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -2826,7 +2826,7 @@ fib_test_v4 (void) &pfx_5_5_5_5_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -2868,7 +2868,7 @@ fib_test_v4 (void) &pfx_5_5_5_5_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_6_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2892,7 +2892,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_5_5_5_5_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_6_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2901,7 +2901,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_5_5_5_6_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_7_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2910,7 +2910,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_5_5_5_7_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_5_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2919,7 +2919,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_5_5_5_6_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, ~0, // no index provided. fib_index, // same as route's FIB @@ -2943,7 +2943,7 @@ fib_test_v4 (void) &pfx_5_5_5_6_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_6_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2957,7 +2957,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_5_5_5_6_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_6_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2991,7 +2991,7 @@ fib_test_v4 (void) &pfx_23_23_23_0_s_24, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_23_23_23_23_s_32.fp_addr, ~0, // recursive fib_index, @@ -3021,7 +3021,7 @@ fib_test_v4 (void) &pfx_0_0_0_0_s_0, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_23_23_23_23_s_32.fp_addr, ~0, // recursive fib_index, @@ -3051,7 +3051,7 @@ fib_test_v4 (void) &bgp_200_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_1_1_1_1, ~0, fib_index, @@ -3081,7 +3081,7 @@ fib_test_v4 (void) &pfx_1_1_1_0_s_28, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3099,7 +3099,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3116,7 +3116,7 @@ fib_test_v4 (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3140,7 +3140,7 @@ fib_test_v4 (void) &pfx_1_1_1_3_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3152,7 +3152,7 @@ fib_test_v4 (void) &bgp_200_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_3_s_32.fp_addr, ~0, fib_index, @@ -3177,7 +3177,7 @@ fib_test_v4 (void) &bgp_78s[ii], FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_3_s_32.fp_addr, ~0, fib_index, @@ -3188,7 +3188,7 @@ fib_test_v4 (void) &bgp_78s[ii], FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_1_1_1_1, ~0, fib_index, @@ -3238,7 +3238,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3277,7 +3277,7 @@ fib_test_v4 (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3307,7 +3307,7 @@ fib_test_v4 (void) &bgp_200_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, fib_index, @@ -3320,7 +3320,7 @@ fib_test_v4 (void) &bgp_78s[ii], FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, fib_index, @@ -3354,7 +3354,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -3391,7 +3391,7 @@ fib_test_v4 (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -3412,7 +3412,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_200_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, fib_index, @@ -3421,7 +3421,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_200_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_1_1_1_1, ~0, fib_index, @@ -3430,7 +3430,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_200_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_3_s_32.fp_addr, ~0, fib_index, @@ -3481,7 +3481,7 @@ fib_test_v4 (void) &pfx_4_4_4_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -3492,7 +3492,7 @@ fib_test_v4 (void) &pfx_4_4_4_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, @@ -3503,7 +3503,7 @@ fib_test_v4 (void) &pfx_4_4_4_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, tm->hw[0]->sw_if_index, ~0, @@ -3539,7 +3539,7 @@ fib_test_v4 (void) for (ii = 0; ii < 4; ii++) { fib_route_path_t r_path = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_addr = { .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + ii), }, @@ -3588,7 +3588,7 @@ fib_test_v4 (void) &pfx_4_4_4_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &zero_addr, ~0, fib_index, @@ -3648,7 +3648,7 @@ fib_test_v4 (void) &pfx_34_34_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, 0, @@ -3659,7 +3659,7 @@ fib_test_v4 (void) &pfx_34_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_34_34_1_1_s_32.fp_addr, ~0, fib_index, @@ -3670,7 +3670,7 @@ fib_test_v4 (void) &pfx_34_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_34_34_1_1_s_32.fp_addr, ~0, fib_index, @@ -3691,7 +3691,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_2_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -3700,7 +3700,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -3709,7 +3709,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_2_0_s_24, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -3751,7 +3751,7 @@ fib_test_v4 (void) &pfx_4_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &zero_addr, tm->hw[0]->sw_if_index, fib_index, @@ -3805,7 +3805,7 @@ fib_test_v4 (void) &pfx_2001_s_64, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, fib_index, @@ -3863,7 +3863,7 @@ fib_test_v4 (void) &pfx_12_10_10_2_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_12_10_10_2_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3897,7 +3897,7 @@ fib_test_v4 (void) &pfx_10_10_10_127_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_127_s_32.fp_addr, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -3945,7 +3945,7 @@ fib_test_v4 (void) &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, tm->hw[0]->sw_if_index, fib_index, @@ -3956,7 +3956,7 @@ fib_test_v4 (void) &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_12_12_12_12, tm->hw[1]->sw_if_index, fib_index, @@ -3975,7 +3975,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, tm->hw[0]->sw_if_index, fib_index, @@ -3992,7 +3992,7 @@ fib_test_v4 (void) &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, tm->hw[0]->sw_if_index, fib_index, @@ -4011,7 +4011,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_12_12_12_12, tm->hw[1]->sw_if_index, fib_index, @@ -4030,7 +4030,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, tm->hw[0]->sw_if_index, fib_index, @@ -4269,7 +4269,7 @@ fib_test_v6 (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, NULL, tm->hw[0]->sw_if_index, ~0, @@ -4300,7 +4300,7 @@ fib_test_v6 (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -4345,7 +4345,7 @@ fib_test_v6 (void) fib_table_entry_path_add(fib_index, &pfx_0_0, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh_2001_2, tm->hw[0]->sw_if_index, ~0, @@ -4389,7 +4389,7 @@ fib_test_v6 (void) */ fib_table_entry_path_remove(fib_index, &pfx_0_0, FIB_SOURCE_API, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh_2001_2, tm->hw[0]->sw_if_index, ~0, @@ -4466,7 +4466,7 @@ fib_test_v6 (void) &pfx_2001_1_2_s_128, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &pfx_2001_1_2_s_128.fp_addr, tm->hw[0]->sw_if_index, ~0, @@ -4505,7 +4505,7 @@ fib_test_v6 (void) &pfx_2001_1_3_s_128, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &pfx_2001_1_3_s_128.fp_addr, tm->hw[0]->sw_if_index, ~0, @@ -4559,7 +4559,7 @@ fib_test_v6 (void) &pfx_2001_a_s_64, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh_2001_2, tm->hw[0]->sw_if_index, ~0, @@ -4573,7 +4573,7 @@ fib_test_v6 (void) &pfx_2001_b_s_64, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh_2001_2, tm->hw[0]->sw_if_index, ~0, @@ -4608,7 +4608,7 @@ fib_test_v6 (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh_2001_2, tm->hw[0]->sw_if_index, ~0, @@ -4646,7 +4646,7 @@ fib_test_v6 (void) &pfx_2001_c_s_64, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, NULL, tm->hw[0]->sw_if_index, ~0, @@ -4663,7 +4663,7 @@ fib_test_v6 (void) fib_table_entry_path_remove(fib_index, &pfx_2001_c_s_64, FIB_SOURCE_CLI, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, NULL, tm->hw[0]->sw_if_index, ~0, @@ -4748,7 +4748,7 @@ fib_test_v6 (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, NULL, tm->hw[1]->sw_if_index, ~0, @@ -4767,7 +4767,7 @@ fib_test_v6 (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5095,7 +5095,7 @@ fib_test_ae (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, @@ -5111,7 +5111,7 @@ fib_test_ae (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5140,7 +5140,7 @@ fib_test_ae (void) &pfx_10_10_10_1_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_1_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5167,7 +5167,7 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5209,7 +5209,7 @@ fib_test_ae (void) &pfx_10_10_10_2_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_2_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5243,7 +5243,7 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5280,7 +5280,7 @@ fib_test_ae (void) &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_3_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5352,7 +5352,7 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_2_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5375,7 +5375,7 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5407,7 +5407,7 @@ fib_test_ae (void) &pfx_10_0_0_0_s_8, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_3_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5463,7 +5463,7 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_1_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5499,7 +5499,7 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5538,7 +5538,7 @@ fib_test_ae (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, @@ -5626,7 +5626,7 @@ fib_test_pref (void) * 2 high, 2 medium and 2 low preference non-recursive paths */ fib_route_path_t nr_path_hi_1 = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = tm->hw[0]->sw_if_index, .frp_fib_index = ~0, .frp_weight = 1, @@ -5637,7 +5637,7 @@ fib_test_pref (void) }, }; fib_route_path_t nr_path_hi_2 = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = tm->hw[0]->sw_if_index, .frp_fib_index = ~0, .frp_weight = 1, @@ -5648,7 +5648,7 @@ fib_test_pref (void) }, }; fib_route_path_t nr_path_med_1 = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = tm->hw[1]->sw_if_index, .frp_fib_index = ~0, .frp_weight = 1, @@ -5659,7 +5659,7 @@ fib_test_pref (void) }, }; fib_route_path_t nr_path_med_2 = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = tm->hw[1]->sw_if_index, .frp_fib_index = ~0, .frp_weight = 1, @@ -5670,7 +5670,7 @@ fib_test_pref (void) }, }; fib_route_path_t nr_path_low_1 = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = tm->hw[2]->sw_if_index, .frp_fib_index = ~0, .frp_weight = 1, @@ -5681,7 +5681,7 @@ fib_test_pref (void) }, }; fib_route_path_t nr_path_low_2 = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = tm->hw[2]->sw_if_index, .frp_fib_index = ~0, .frp_weight = 1, @@ -5897,7 +5897,7 @@ fib_test_pref (void) }, }; fib_route_path_t r_path_hi = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = ~0, .frp_fib_index = 0, .frp_weight = 1, @@ -5906,7 +5906,7 @@ fib_test_pref (void) .frp_addr = pfx_1_1_1_1_s_32.fp_addr, }; fib_route_path_t r_path_med = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = ~0, .frp_fib_index = 0, .frp_weight = 1, @@ -5915,7 +5915,7 @@ fib_test_pref (void) .frp_addr = pfx_1_1_1_2_s_32.fp_addr, }; fib_route_path_t r_path_low = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = ~0, .frp_fib_index = 0, .frp_weight = 1, @@ -6099,7 +6099,7 @@ fib_test_label (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, @@ -6115,7 +6115,7 @@ fib_test_label (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -6145,7 +6145,7 @@ fib_test_label (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[1]->sw_if_index, ~0, @@ -6161,7 +6161,7 @@ fib_test_label (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -6243,7 +6243,7 @@ fib_test_label (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -6282,7 +6282,7 @@ fib_test_label (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_11_1, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -6360,7 +6360,7 @@ fib_test_label (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_11_2, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -6440,7 +6440,7 @@ fib_test_label (void) &pfx_2_2_2_2_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, fib_index, @@ -6612,7 +6612,7 @@ fib_test_label (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -6669,7 +6669,7 @@ fib_test_label (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_11_1, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -6711,7 +6711,7 @@ fib_test_label (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -6842,7 +6842,7 @@ fib_test_label (void) &pfx_1_1_1_2_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -6884,7 +6884,7 @@ fib_test_label (void) &pfx_2_2_2_2_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, fib_index, @@ -6912,7 +6912,7 @@ fib_test_label (void) &pfx_1_1_1_2_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_11_1, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -6945,7 +6945,7 @@ fib_test_label (void) &pfx_1_1_1_2_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_11_1, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -6987,7 +6987,7 @@ fib_test_label (void) &pfx_2_2_2_3_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, fib_index, @@ -7031,7 +7031,7 @@ fib_test_label (void) &pfx_2_2_2_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, fib_index, @@ -7081,7 +7081,7 @@ fib_test_label (void) &pfx_2_2_5_5_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_11_1, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -7689,7 +7689,7 @@ fib_test_bfd (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -7706,7 +7706,7 @@ fib_test_bfd (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -7780,7 +7780,7 @@ fib_test_bfd (void) &pfx_10_10_10_1_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -7819,7 +7819,7 @@ fib_test_bfd (void) &pfx_10_10_10_2_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -7851,7 +7851,7 @@ fib_test_bfd (void) &pfx_10_10_10_2_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -7907,7 +7907,7 @@ fib_test_bfd (void) &pfx_200_0_0_0_s_24, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, ~0, // recursive 0, // default fib index @@ -7926,7 +7926,7 @@ fib_test_bfd (void) &pfx_200_0_0_0_s_24, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, ~0, // recursive 0, // default fib index @@ -8065,7 +8065,7 @@ fib_test_bfd (void) &pfx_5_5_5_5_s_32, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -8096,7 +8096,7 @@ fib_test_bfd (void) &pfx_5_5_5_5_s_32, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -8234,7 +8234,7 @@ lfib_test (void) &pfx, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &zero_addr, ~0, fib_index, @@ -8285,7 +8285,7 @@ lfib_test (void) &pfx, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &zero_addr, ~0, lfib_index, @@ -8363,7 +8363,7 @@ lfib_test (void) &pfx_1200, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -8389,7 +8389,7 @@ lfib_test (void) }, }; fib_route_path_t *rpaths = NULL, rpath = { - .frp_proto = FIB_PROTOCOL_MPLS, + .frp_proto = DPO_PROTO_MPLS, .frp_local_label = 1200, .frp_eos = MPLS_NON_EOS, .frp_sw_if_index = ~0, // recurive @@ -8545,7 +8545,7 @@ lfib_test (void) &pfx_2500, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -8590,7 +8590,7 @@ lfib_test (void) &pfx_3500, FIB_SOURCE_API, FIB_ENTRY_FLAG_MULTICAST, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -8610,7 +8610,7 @@ lfib_test (void) &pfx_3500, FIB_SOURCE_API, FIB_ENTRY_FLAG_MULTICAST, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -8637,7 +8637,7 @@ lfib_test (void) &pfx_3500, FIB_SOURCE_API, FIB_ENTRY_FLAG_MULTICAST, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, 5, // rpf-id 0, // default table diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h index a209ff3c..f11a55da 100644 --- a/src/vnet/fib/fib_types.h +++ b/src/vnet/fib/fib_types.h @@ -32,9 +32,9 @@ typedef u32 fib_node_index_t; * Protocol Type. packed so it consumes a u8 only */ typedef enum fib_protocol_t_ { - FIB_PROTOCOL_IP4 = 0, - FIB_PROTOCOL_IP6, - FIB_PROTOCOL_MPLS, + FIB_PROTOCOL_IP4 = DPO_PROTO_IP4, + FIB_PROTOCOL_IP6 = DPO_PROTO_IP6, + FIB_PROTOCOL_MPLS = DPO_PROTO_MPLS, } __attribute__ ((packed)) fib_protocol_t; #define FIB_PROTOCOLS { \ @@ -338,7 +338,7 @@ typedef struct fib_route_path_t_ { * The protocol of the address below. We need this since the all * zeros address is ambiguous. */ - fib_protocol_t frp_proto; + dpo_proto_t frp_proto; union { /** diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c index df7e9388..5694bb2f 100644 --- a/src/vnet/interface_format.c +++ b/src/vnet/interface_format.c @@ -165,9 +165,15 @@ format_vnet_sw_if_index_name (u8 * s, va_list * args) { vnet_main_t *vnm = va_arg (*args, vnet_main_t *); u32 sw_if_index = va_arg (*args, u32); - return format (s, "%U", - format_vnet_sw_interface_name, vnm, - vnet_get_sw_interface (vnm, sw_if_index)); + vnet_sw_interface_t *si; + + si = vnet_get_sw_interface_safe (vnm, sw_if_index); + + if (NULL == si) + { + return format (s, "DELETED"); + } + return format (s, "%U", format_vnet_sw_interface_name, vnm, si); } u8 * diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index ee17ea88..7a8d7a0c 100755 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -724,7 +724,7 @@ ip4_add_interface_routes (u32 sw_if_index, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, /* No next-hop address */ NULL, sw_if_index, @@ -767,7 +767,7 @@ ip4_add_interface_routes (u32 sw_if_index, fib_table_entry_update_one_path (fib_index, &net_pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &net_pfx.fp_addr, sw_if_index, // invalid FIB index @@ -803,7 +803,7 @@ ip4_add_interface_routes (u32 sw_if_index, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx.fp_addr, sw_if_index, // invalid FIB index diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index bc66416e..8ae08a01 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -355,7 +355,7 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, /* No next-hop address */ NULL, sw_if_index, /* invalid FIB index */ @@ -390,7 +390,7 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &pfx.fp_addr, sw_if_index, ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index e8eebd4e..6a9139ab 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -284,7 +284,7 @@ ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm, (ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index), &pfx, FIB_SOURCE_ADJ, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &pfx.fp_addr, n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); pool_put (nm->neighbor_pool, n); @@ -645,7 +645,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, n->fib_entry_index = fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP6, &pfx.fp_addr, + DPO_PROTO_IP6, &pfx.fp_addr, n->key.sw_if_index, ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); } @@ -776,7 +776,7 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, (ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index), &pfx, FIB_SOURCE_ADJ, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &pfx.fp_addr, n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); } pool_put (nm->neighbor_pool, n); @@ -4110,7 +4110,7 @@ ip6_neighbor_proxy_add_del (u32 sw_if_index, ip6_address_t * addr, u8 is_del) fib_table_entry_path_remove (fib_index, &pfx, FIB_SOURCE_IP6_ND_PROXY, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh, sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); @@ -4124,7 +4124,7 @@ ip6_neighbor_proxy_add_del (u32 sw_if_index, ip6_address_t * addr, u8 is_del) &pfx, FIB_SOURCE_IP6_ND_PROXY, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh, sw_if_index, ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index 4cbf75a3..0676a387 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -156,9 +156,9 @@ copy_fib_next_hop (fib_route_path_encode_t * api_rpath, void *fp_arg) int is_ip4; vl_api_fib_path_t *fp = (vl_api_fib_path_t *) fp_arg; - if (api_rpath->rpath.frp_proto == FIB_PROTOCOL_IP4) + if (api_rpath->rpath.frp_proto == DPO_PROTO_IP4) fp->afi = IP46_TYPE_IP4; - else if (api_rpath->rpath.frp_proto == FIB_PROTOCOL_IP6) + else if (api_rpath->rpath.frp_proto == DPO_PROTO_IP6) fp->afi = IP46_TYPE_IP6; else { @@ -714,7 +714,7 @@ add_del_route_t_handler (u8 is_multipath, u8 is_rpf_id, u32 fib_index, const fib_prefix_t * prefix, - u8 next_hop_proto_is_ip4, + dpo_proto_t next_hop_proto, const ip46_address_t * next_hop, u32 next_hop_sw_if_index, u8 next_hop_fib_index, @@ -726,8 +726,7 @@ add_del_route_t_handler (u8 is_multipath, vnet_classify_main_t *cm = &vnet_classify_main; fib_route_path_flags_t path_flags = FIB_ROUTE_PATH_FLAG_NONE; fib_route_path_t path = { - .frp_proto = (next_hop_proto_is_ip4 ? - FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6), + .frp_proto = next_hop_proto, .frp_addr = (NULL == next_hop ? zero_addr : *next_hop), .frp_sw_if_index = next_hop_sw_if_index, .frp_fib_index = next_hop_fib_index, @@ -740,7 +739,7 @@ add_del_route_t_handler (u8 is_multipath, if (MPLS_LABEL_INVALID != next_hop_via_label) { - path.frp_proto = FIB_PROTOCOL_MPLS; + path.frp_proto = DPO_PROTO_MPLS; path.frp_local_label = next_hop_via_label; path.frp_eos = MPLS_NON_EOS; } @@ -855,7 +854,7 @@ int add_del_route_check (fib_protocol_t table_proto, u32 table_id, u32 next_hop_sw_if_index, - fib_protocol_t next_hop_table_proto, + dpo_proto_t next_hop_table_proto, u32 next_hop_table_id, u8 create_missing_tables, u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index) @@ -887,11 +886,18 @@ add_del_route_check (fib_protocol_t table_proto, } else { + fib_protocol_t fib_nh_proto; + + if (next_hop_table_proto > DPO_PROTO_MPLS) + return (0); + + fib_nh_proto = dpo_proto_to_fib (next_hop_table_proto); + if (is_rpf_id) - *next_hop_fib_index = mfib_table_find (next_hop_table_proto, + *next_hop_fib_index = mfib_table_find (fib_nh_proto, ntohl (next_hop_table_id)); else - *next_hop_fib_index = fib_table_find (next_hop_table_proto, + *next_hop_fib_index = fib_table_find (fib_nh_proto, ntohl (next_hop_table_id)); if (~0 == *next_hop_fib_index) @@ -900,12 +906,12 @@ add_del_route_check (fib_protocol_t table_proto, { if (is_rpf_id) *next_hop_fib_index = - mfib_table_find_or_create_and_lock (next_hop_table_proto, + mfib_table_find_or_create_and_lock (fib_nh_proto, ntohl (next_hop_table_id)); else *next_hop_fib_index = - fib_table_find_or_create_and_lock (next_hop_table_proto, + fib_table_find_or_create_and_lock (fib_nh_proto, ntohl (next_hop_table_id)); } @@ -930,7 +936,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) rv = add_del_route_check (FIB_PROTOCOL_IP4, mp->table_id, mp->next_hop_sw_if_index, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, mp->next_hop_table_id, mp->create_vrf_if_needed, 0, &fib_index, &next_hop_fib_index); @@ -970,7 +976,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->classify_table_index, mp->is_resolve_host, mp->is_resolve_attached, 0, 0, - fib_index, &pfx, 1, + fib_index, &pfx, DPO_PROTO_IP4, &nh, ntohl (mp->next_hop_sw_if_index), next_hop_fib_index, @@ -990,7 +996,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) rv = add_del_route_check (FIB_PROTOCOL_IP6, mp->table_id, mp->next_hop_sw_if_index, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, mp->next_hop_table_id, mp->create_vrf_if_needed, 0, &fib_index, &next_hop_fib_index); @@ -1030,7 +1036,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->classify_table_index, mp->is_resolve_host, mp->is_resolve_attached, 0, 0, - fib_index, &pfx, 0, + fib_index, &pfx, DPO_PROTO_IP6, &nh, ntohl (mp->next_hop_sw_if_index), next_hop_fib_index, mp->next_hop_weight, @@ -1106,7 +1112,7 @@ mroute_add_del_handler (u8 is_add, fib_route_path_t path = { .frp_sw_if_index = next_hop_sw_if_index, - .frp_proto = prefix->fp_proto, + .frp_proto = fib_proto_to_dpo (prefix->fp_proto), }; if (is_local) diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 533d010a..41e46070 100755 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -423,7 +423,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, { rpath.frp_weight = 1; rpath.frp_eos = MPLS_NON_EOS; - rpath.frp_proto = FIB_PROTOCOL_MPLS; + rpath.frp_proto = DPO_PROTO_MPLS; rpath.frp_sw_if_index = ~0; vec_add1 (rpaths, rpath); } @@ -449,7 +449,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1 (rpaths, rpath); } @@ -460,7 +460,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1 (rpaths, rpath); } else if (unformat (line_input, "weight %u", &weight)) @@ -479,7 +479,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, { rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1 (rpaths, rpath); } else if (unformat (line_input, "via %U next-hop-table %d", @@ -488,7 +488,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, { rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1 (rpaths, rpath); } else if (unformat (line_input, "via %U", @@ -501,7 +501,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, rpath.frp_fib_index = table_id; rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1 (rpaths, rpath); } else if (unformat (line_input, "via %U", @@ -510,13 +510,13 @@ vnet_ip_route_cmd (vlib_main_t * vm, rpath.frp_fib_index = table_id; rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1 (rpaths, rpath); } else if (unformat (line_input, "lookup in table %d", &rpath.frp_fib_index)) { - rpath.frp_proto = pfx.fp_proto; + rpath.frp_proto = fib_proto_to_dpo (pfx.fp_proto); rpath.frp_sw_if_index = ~0; vec_add1 (rpaths, rpath); } @@ -526,7 +526,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = prefixs[0].fp_proto; + rpath.frp_proto = fib_proto_to_dpo (prefixs[0].fp_proto); vec_add1 (rpaths, rpath); } else if (vec_len (prefixs) > 0 && diff --git a/src/vnet/lisp-gpe/lisp_gpe.c b/src/vnet/lisp-gpe/lisp_gpe.c index 0acc7349..018895ad 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.c +++ b/src/vnet/lisp-gpe/lisp_gpe.c @@ -454,7 +454,7 @@ vnet_gpe_add_del_native_fwd_rpath (vnet_gpe_native_fwd_rpath_args_t * a) fib_route_path_t *rpath; u8 ip_version; - ip_version = a->rpath.frp_proto == FIB_PROTOCOL_IP4 ? IP4 : IP6; + ip_version = a->rpath.frp_proto == DPO_PROTO_IP4 ? IP4 : IP6; if (a->is_add) { @@ -511,7 +511,7 @@ gpe_native_forward_command_fn (vlib_main_t * vm, unformat_input_t * input, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; } else if (unformat (line_input, "via %U %U", unformat_ip6_address, @@ -520,21 +520,21 @@ gpe_native_forward_command_fn (vlib_main_t * vm, unformat_input_t * input, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; } else if (unformat (line_input, "via %U", unformat_ip4_address, &rpath.frp_addr.ip4)) { rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; } else if (unformat (line_input, "via %U", unformat_ip6_address, &rpath.frp_addr.ip6)) { rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; } else { @@ -549,7 +549,8 @@ gpe_native_forward_command_fn (vlib_main_t * vm, unformat_input_t * input, } else { - rpath.frp_fib_index = fib_table_find (rpath.frp_proto, table_id); + rpath.frp_fib_index = + fib_table_find (dpo_proto_to_fib (rpath.frp_proto), table_id); if ((u32) ~ 0 == rpath.frp_fib_index) { error = clib_error_return (0, "Nonexistent table id %d", table_id); diff --git a/src/vnet/lisp-gpe/lisp_gpe_api.c b/src/vnet/lisp-gpe/lisp_gpe_api.c index f1663699..4367a719 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_api.c +++ b/src/vnet/lisp-gpe/lisp_gpe_api.c @@ -455,10 +455,10 @@ static void clib_memcpy (&a->rpath.frp_addr.ip6, mp->nh_addr, sizeof (ip6_address_t)); a->is_add = mp->is_add; - a->rpath.frp_proto = mp->is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; - a->rpath.frp_fib_index = fib_table_find (a->rpath.frp_proto, - clib_net_to_host_u32 - (mp->table_id)); + a->rpath.frp_proto = mp->is_ip4 ? DPO_PROTO_IP4 : DPO_PROTO_IP6; + a->rpath.frp_fib_index = + fib_table_find (dpo_proto_to_fib (a->rpath.frp_proto), + clib_net_to_host_u32 (mp->table_id)); if (~0 == a->rpath.frp_fib_index) { rv = VNET_API_ERROR_INVALID_VALUE; @@ -484,7 +484,7 @@ gpe_native_fwd_rpaths_copy (vl_api_gpe_native_fwd_rpath_t * dst, vec_foreach (e, src) { memset (&dst[i], 0, sizeof (*dst)); - table = fib_table_get (e->frp_fib_index, e->frp_proto); + table = fib_table_get (e->frp_fib_index, dpo_proto_to_fib (e->frp_proto)); dst[i].fib_index = table->ft_table_id; dst[i].nh_sw_if_index = e->frp_sw_if_index; dst[i].is_ip4 = is_ip4; diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c index 395b493a..ac048149 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c +++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c @@ -225,6 +225,7 @@ lisp_gpe_mk_fib_paths (const lisp_fwd_path_t * paths) { const lisp_gpe_adjacency_t *ladj; fib_route_path_t *rpaths = NULL; + fib_protocol_t fp; u8 best_priority; u32 ii; @@ -239,9 +240,9 @@ lisp_gpe_mk_fib_paths (const lisp_fwd_path_t * paths) ladj = lisp_gpe_adjacency_get (paths[ii].lisp_adj); - ip_address_to_46 (&ladj->remote_rloc, - &rpaths[ii].frp_addr, &rpaths[ii].frp_proto); + ip_address_to_46 (&ladj->remote_rloc, &rpaths[ii].frp_addr, &fp); + rpaths[ii].frp_proto = fib_proto_to_dpo (fp); rpaths[ii].frp_sw_if_index = ladj->sw_if_index; rpaths[ii].frp_weight = (paths[ii].weight ? paths[ii].weight : 1); } diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c index 5c6f8126..5e48e919 100644 --- a/src/vnet/mfib/ip6_mfib.c +++ b/src/vnet/mfib/ip6_mfib.c @@ -158,7 +158,7 @@ ip6_create_mfib_with_table_id (u32 table_id) .fp_proto = FIB_PROTOCOL_IP6, }; const fib_route_path_t path_for_us = { - .frp_proto = FIB_PROTOCOL_IP6, + .frp_proto = DPO_PROTO_IP6, .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, @@ -222,7 +222,7 @@ ip6_mfib_table_destroy (ip6_mfib_t *mfib) .fp_proto = FIB_PROTOCOL_IP6, }; const fib_route_path_t path_for_us = { - .frp_proto = FIB_PROTOCOL_IP6, + .frp_proto = DPO_PROTO_IP6, .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, @@ -259,7 +259,7 @@ void ip6_mfib_interface_enable_disable (u32 sw_if_index, int is_enable) { const fib_route_path_t path = { - .frp_proto = FIB_PROTOCOL_IP6, + .frp_proto = DPO_PROTO_IP6, .frp_addr = zero_addr, .frp_sw_if_index = sw_if_index, .frp_fib_index = ~0, diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c index cf25b67a..b37f8825 100644 --- a/src/vnet/mfib/mfib_entry.c +++ b/src/vnet/mfib/mfib_entry.c @@ -764,18 +764,16 @@ mfib_entry_update (fib_node_index_t mfib_entry_index, * entry */ fib_node_index_t old_pl_index; - fib_protocol_t fp; + dpo_proto_t dp; dpo_id_t dpo = DPO_INVALID; - fp = mfib_entry_get_proto(mfib_entry); + dp = fib_proto_to_dpo(mfib_entry_get_proto(mfib_entry)); old_pl_index = msrc->mfes_pl; - dpo_set(&dpo, DPO_REPLICATE, - fib_proto_to_dpo(fp), - repi); + dpo_set(&dpo, DPO_REPLICATE, dp, repi); msrc->mfes_pl = - fib_path_list_create_special(fp, + fib_path_list_create_special(dp, FIB_PATH_LIST_FLAG_EXCLUSIVE, &dpo); diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c index 7c92ae99..57787eca 100644 --- a/src/vnet/mfib/mfib_test.c +++ b/src/vnet/mfib/mfib_test.c @@ -387,7 +387,7 @@ mfib_test_i (fib_protocol_t PROTO, fib_route_path_t path_via_if0 = { - .frp_proto = PROTO, + .frp_proto = fib_proto_to_dpo(PROTO), .frp_addr = zero_addr, .frp_sw_if_index = tm->hw[0]->sw_if_index, .frp_fib_index = ~0, @@ -411,7 +411,7 @@ mfib_test_i (fib_protocol_t PROTO, MFIB_ITF_FLAG_ACCEPT)); fib_route_path_t path_via_if1 = { - .frp_proto = PROTO, + .frp_proto = fib_proto_to_dpo(PROTO), .frp_addr = zero_addr, .frp_sw_if_index = tm->hw[1]->sw_if_index, .frp_fib_index = ~0, @@ -419,7 +419,7 @@ mfib_test_i (fib_protocol_t PROTO, .frp_flags = 0, }; fib_route_path_t path_via_if2 = { - .frp_proto = PROTO, + .frp_proto = fib_proto_to_dpo(PROTO), .frp_addr = zero_addr, .frp_sw_if_index = tm->hw[2]->sw_if_index, .frp_fib_index = ~0, @@ -427,7 +427,7 @@ mfib_test_i (fib_protocol_t PROTO, .frp_flags = 0, }; fib_route_path_t path_via_if3 = { - .frp_proto = PROTO, + .frp_proto = fib_proto_to_dpo(PROTO), .frp_addr = zero_addr, .frp_sw_if_index = tm->hw[3]->sw_if_index, .frp_fib_index = ~0, @@ -435,7 +435,7 @@ mfib_test_i (fib_protocol_t PROTO, .frp_flags = 0, }; fib_route_path_t path_for_us = { - .frp_proto = PROTO, + .frp_proto = fib_proto_to_dpo(PROTO), .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, @@ -1121,7 +1121,7 @@ mfib_test_i (fib_protocol_t PROTO, &pfx_3500, FIB_SOURCE_API, FIB_ENTRY_FLAG_MULTICAST, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1138,7 +1138,7 @@ mfib_test_i (fib_protocol_t PROTO, * An (S,G) that resolves via the mLDP head-end */ fib_route_path_t path_via_mldp = { - .frp_proto = FIB_PROTOCOL_MPLS, + .frp_proto = DPO_PROTO_MPLS, .frp_local_label = pfx_3500.fp_label, .frp_eos = MPLS_EOS, .frp_sw_if_index = 0xffffffff, diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api index 67f1045d..5973a0a6 100644 --- a/src/vnet/mpls/mpls.api +++ b/src/vnet/mpls/mpls.api @@ -156,7 +156,7 @@ manual_endian manual_print define mpls_tunnel_details @param mr_is_interface_rx - Interface Receive path @param mr_is_interface_rx - RPF-ID Receive path. The next-hop interface is used as the RPF-ID - @param mr_next_hop_proto_is_ip4 - The next-hop is IPV4 + @param mr_next_hop_proto - The next-hop protocol, of type dpo_proto_t @param mr_next_hop_weight - The weight, for UCMP @param mr_next_hop[16] - the nextop address @param mr_next_hop_sw_if_index - the next-hop SW interface @@ -182,7 +182,7 @@ autoreply define mpls_route_add_del u8 mr_is_resolve_attached; u8 mr_is_interface_rx; u8 mr_is_rpf_id; - u8 mr_next_hop_proto_is_ip4; + u8 mr_next_hop_proto; u8 mr_next_hop_weight; u8 mr_next_hop_preference; u8 mr_next_hop[16]; diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c index 068d31f4..266ba42c 100644 --- a/src/vnet/mpls/mpls.c +++ b/src/vnet/mpls/mpls.c @@ -261,7 +261,7 @@ vnet_mpls_local_label (vlib_main_t * vm, &rpath.frp_sw_if_index, &rpath.frp_weight)) { - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1(rpaths, rpath); } @@ -272,7 +272,7 @@ vnet_mpls_local_label (vlib_main_t * vm, &rpath.frp_sw_if_index, &rpath.frp_weight)) { - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1(rpaths, rpath); } @@ -283,7 +283,7 @@ vnet_mpls_local_label (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1(rpaths, rpath); } else if (unformat (line_input, "rx-ip4 %U", @@ -291,7 +291,7 @@ vnet_mpls_local_label (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX; vec_add1(rpaths, rpath); } @@ -302,7 +302,7 @@ vnet_mpls_local_label (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1(rpaths, rpath); } else if (unformat (line_input, "via %U next-hop-table %d", @@ -312,7 +312,7 @@ vnet_mpls_local_label (vlib_main_t * vm, { rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1(rpaths, rpath); } else if (unformat (line_input, "via %U next-hop-table %d", @@ -322,7 +322,7 @@ vnet_mpls_local_label (vlib_main_t * vm, { rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1(rpaths, rpath); } else if (unformat (line_input, "via %U", @@ -336,7 +336,7 @@ vnet_mpls_local_label (vlib_main_t * vm, rpath.frp_fib_index = table_id; rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1(rpaths, rpath); } else if (unformat (line_input, "via %U", @@ -346,7 +346,7 @@ vnet_mpls_local_label (vlib_main_t * vm, rpath.frp_fib_index = table_id; rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1(rpaths, rpath); } else if (unformat (line_input, "%d", &local_label)) @@ -355,7 +355,7 @@ vnet_mpls_local_label (vlib_main_t * vm, "ip4-lookup-in-table %d", &rpath.frp_fib_index)) { - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; pfx.fp_payload_proto = DPO_PROTO_IP4; vec_add1(rpaths, rpath); @@ -364,7 +364,7 @@ vnet_mpls_local_label (vlib_main_t * vm, "ip6-lookup-in-table %d", &rpath.frp_fib_index)) { - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; vec_add1(rpaths, rpath); pfx.fp_payload_proto = DPO_PROTO_IP6; @@ -373,11 +373,21 @@ vnet_mpls_local_label (vlib_main_t * vm, "mpls-lookup-in-table %d", &rpath.frp_fib_index)) { - rpath.frp_proto = FIB_PROTOCOL_MPLS; + rpath.frp_proto = DPO_PROTO_MPLS; rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; pfx.fp_payload_proto = DPO_PROTO_MPLS; vec_add1(rpaths, rpath); } + else if (unformat (line_input, + "l2-input-on %U", + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_proto = DPO_PROTO_ETHERNET; + pfx.fp_payload_proto = DPO_PROTO_ETHERNET; + rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "out-label %U", unformat_mpls_unicast_label, &out_label)) @@ -440,7 +450,7 @@ vnet_mpls_local_label (vlib_main_t * vm, pfx.fp_proto = FIB_PROTOCOL_MPLS; pfx.fp_len = 21; pfx.fp_label = local_label; - pfx.fp_payload_proto = fib_proto_to_dpo(rpaths[0].frp_proto); + pfx.fp_payload_proto = rpaths[0].frp_proto; /* * the CLI parsing stored table Ids, swap to FIB indicies diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index 92fb24a6..737299e6 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -144,14 +144,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, }; if (pfx.fp_eos) { - if (mp->mr_next_hop_proto_is_ip4) - { - pfx.fp_payload_proto = DPO_PROTO_IP4; - } - else - { - pfx.fp_payload_proto = DPO_PROTO_IP6; - } + pfx.fp_payload_proto = mp->mr_next_hop_proto; } else { @@ -161,7 +154,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, rv = add_del_route_check (FIB_PROTOCOL_MPLS, mp->mr_table_id, mp->mr_next_hop_sw_if_index, - dpo_proto_to_fib (pfx.fp_payload_proto), + pfx.fp_payload_proto, mp->mr_next_hop_table_id, mp->mr_create_table_if_needed, mp->mr_is_rpf_id, @@ -173,9 +166,9 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, ip46_address_t nh; memset (&nh, 0, sizeof (nh)); - if (mp->mr_next_hop_proto_is_ip4) + if (DPO_PROTO_IP4 == mp->mr_next_hop_proto) memcpy (&nh.ip4, mp->mr_next_hop, sizeof (nh.ip4)); - else + else if (DPO_PROTO_IP6 == mp->mr_next_hop_proto) memcpy (&nh.ip6, mp->mr_next_hop, sizeof (nh.ip6)); n_labels = mp->mr_next_hop_n_out_labels; @@ -202,7 +195,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, mp->mr_is_interface_rx, mp->mr_is_rpf_id, fib_index, &pfx, - mp->mr_next_hop_proto_is_ip4, + mp->mr_next_hop_proto, &nh, ntohl (mp->mr_next_hop_sw_if_index), next_hop_fib_index, mp->mr_next_hop_weight, @@ -243,13 +236,13 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) if (mp->mt_next_hop_proto_is_ip4) { - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; clib_memcpy (&rpath.frp_addr.ip4, mp->mt_next_hop, sizeof (rpath.frp_addr.ip4)); } else { - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; clib_memcpy (&rpath.frp_addr.ip6, mp->mt_next_hop, sizeof (rpath.frp_addr.ip6)); } diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c index c025cc58..6452a60b 100644 --- a/src/vnet/mpls/mpls_tunnel.c +++ b/src/vnet/mpls/mpls_tunnel.c @@ -171,7 +171,7 @@ mpls_tunnel_mk_lb (mpls_tunnel_t *mt, vec_validate(ctx.next_hops, fib_path_list_get_n_paths(mt->mt_path_list)); vec_reset_length(ctx.next_hops); - lb_proto = vnet_link_to_dpo_proto(linkt); + lb_proto = fib_forw_chain_type_to_dpo_proto(fct); fib_path_list_walk(mt->mt_path_list, mpls_tunnel_collect_forwarding, @@ -313,12 +313,34 @@ mpls_tunnel_restack (mpls_tunnel_t *mt) /* * walk all the adjacencies on the MPLS interface and restack them */ - FOR_EACH_FIB_PROTOCOL(proto) + if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2) { - adj_nbr_walk(mt->mt_sw_if_index, - proto, - mpls_adj_walk_cb, - NULL); + /* + * Stack a load-balance that drops, whilst we have no paths + */ + vnet_hw_interface_t * hi; + dpo_id_t dpo = DPO_INVALID; + + mpls_tunnel_mk_lb(mt, + VNET_LINK_MPLS, + FIB_FORW_CHAIN_TYPE_ETHERNET, + &dpo); + + hi = vnet_get_hw_interface(vnet_get_main(), mt->mt_hw_if_index); + dpo_stack_from_node(hi->tx_node_index, + &mt->mt_l2_lb, + &dpo); + dpo_reset(&dpo); + } + else + { + FOR_EACH_FIB_PROTOCOL(proto) + { + adj_nbr_walk(mt->mt_sw_if_index, + proto, + mpls_adj_walk_cb, + NULL); + } } } @@ -495,7 +517,7 @@ mpls_tunnel_tx (vlib_main_t * vm, b0 = vlib_get_buffer(vm, bi0); - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_adj; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_lb.dpoi_index; if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -506,7 +528,7 @@ mpls_tunnel_tx (vlib_main_t * vm, vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, - bi0, mt->mt_l2_tx_arc); + bi0, mt->mt_l2_lb.dpoi_next_node); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); @@ -565,8 +587,7 @@ vnet_mpls_tunnel_del (u32 sw_if_index) if (FIB_NODE_INDEX_INVALID != mt->mt_path_list) fib_path_list_child_remove(mt->mt_path_list, mt->mt_sibling_index); - if (ADJ_INDEX_INVALID != mt->mt_l2_adj) - adj_unlock(mt->mt_l2_adj); + dpo_reset(&mt->mt_l2_lb); vec_add1 (mpls_tunnel_free_hw_if_indices, mt->mt_hw_if_index); pool_put(mpls_tunnel_pool, mt); @@ -587,12 +608,13 @@ vnet_mpls_tunnel_create (u8 l2_only, memset (mt, 0, sizeof (*mt)); mti = mt - mpls_tunnel_pool; fib_node_init(&mt->mt_node, FIB_NODE_TYPE_MPLS_TUNNEL); - mt->mt_l2_adj = ADJ_INDEX_INVALID; mt->mt_path_list = FIB_NODE_INDEX_INVALID; mt->mt_sibling_index = FIB_NODE_INDEX_INVALID; if (is_multicast) mt->mt_flags |= MPLS_TUNNEL_FLAG_MCAST; + if (l2_only) + mt->mt_flags |= MPLS_TUNNEL_FLAG_L2; /* * Create a new, or re=use and old, tunnel HW interface @@ -614,7 +636,7 @@ vnet_mpls_tunnel_create (u8 l2_only, mti, mpls_tunnel_hw_interface_class.index, mti); - hi = vnet_get_hw_interface(vnm, mt->mt_hw_if_index); + hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index); } /* @@ -624,19 +646,6 @@ vnet_mpls_tunnel_create (u8 l2_only, vec_validate_init_empty(mpls_tunnel_db, mt->mt_sw_if_index, ~0); mpls_tunnel_db[mt->mt_sw_if_index] = mti; - if (l2_only) - { - mt->mt_l2_adj = - adj_nbr_add_or_lock(fib_path_list_get_proto(mt->mt_path_list), - VNET_LINK_ETHERNET, - &zero_addr, - mt->mt_sw_if_index); - - mt->mt_l2_tx_arc = vlib_node_add_named_next(vlib_get_main(), - hi->tx_node_index, - "adj-l2-midchain"); - } - return (mt->mt_sw_if_index); } @@ -803,7 +812,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; } else if (unformat (line_input, "via %U %U", @@ -813,7 +822,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; } else if (unformat (line_input, "via %U", unformat_ip6_address, @@ -822,7 +831,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, rpath.frp_fib_index = 0; rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; } else if (unformat (line_input, "via %U", unformat_ip4_address, @@ -831,7 +840,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, rpath.frp_fib_index = 0; rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; } else if (unformat (line_input, "l2-only")) l2_only = 1; @@ -915,6 +924,14 @@ format_mpls_tunnel (u8 * s, va_list * args) s = format(s, "%U", format_fib_path_ext_list, &mt->mt_path_exts); s = format(s, "\n"); + if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2) + { + s = format(s, " forwarding: %U\n", + format_fib_forw_chain_type, + FIB_FORW_CHAIN_TYPE_ETHERNET); + s = format(s, " %U\n", format_dpo_id, &mt->mt_l2_lb, 2); + } + return (s); } diff --git a/src/vnet/mpls/mpls_tunnel.h b/src/vnet/mpls/mpls_tunnel.h index 4cb0a860..285817c3 100644 --- a/src/vnet/mpls/mpls_tunnel.h +++ b/src/vnet/mpls/mpls_tunnel.h @@ -22,15 +22,20 @@ typedef enum mpls_tunnel_attribute_t_ { MPLS_TUNNEL_ATTRIBUTE_FIRST = 0, + /** + * @brief The tunnel is L2 only + */ + MPLS_TUNNEL_ATTRIBUTE_L2 = MPLS_TUNNEL_ATTRIBUTE_FIRST, /** * @brief The tunnel has an underlying multicast LSP */ - MPLS_TUNNEL_ATTRIBUTE_MCAST = MPLS_TUNNEL_ATTRIBUTE_FIRST, + MPLS_TUNNEL_ATTRIBUTE_MCAST, MPLS_TUNNEL_ATTRIBUTE_LAST = MPLS_TUNNEL_ATTRIBUTE_MCAST, } mpls_tunnel_attribute_t; #define MPLS_TUNNEL_ATTRIBUTES { \ [MPLS_TUNNEL_ATTRIBUTE_MCAST] = "multicast", \ + [MPLS_TUNNEL_ATTRIBUTE_L2] = "L2", \ } #define FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(_item) \ for (_item = MPLS_TUNNEL_ATTRIBUTE_FIRST; \ @@ -39,6 +44,7 @@ typedef enum mpls_tunnel_attribute_t_ typedef enum mpls_tunnel_flag_t_ { MPLS_TUNNEL_FLAG_NONE = 0, + MPLS_TUNNEL_FLAG_L2 = (1 << MPLS_TUNNEL_ATTRIBUTE_L2), MPLS_TUNNEL_FLAG_MCAST = (1 << MPLS_TUNNEL_ATTRIBUTE_MCAST), } __attribute__ ((packed)) mpls_tunnel_flags_t; @@ -60,14 +66,19 @@ typedef struct mpls_tunnel_t_ /** * @brief If the tunnel is an L2 tunnel, this is the link type ETHERNET - * adjacency + * load-balance + */ + dpo_id_t mt_l2_lb; + + /** + * @brief The HW interface index of the tunnel interfaces */ - adj_index_t mt_l2_adj; + u32 mt_hw_if_index; /** - * @brief on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain + * @brief The SW interface index of the tunnel interfaces */ - u32 mt_l2_tx_arc; + u32 mt_sw_if_index; /** * @brief The path-list over which the tunnel's destination is reachable @@ -83,23 +94,6 @@ typedef struct mpls_tunnel_t_ * A vector of path extensions o hold the label stack for each path */ fib_path_ext_list_t mt_path_exts; - - /** - * @brief Flag to indicate the tunnel is only for L2 traffic, that is - * this tunnel belongs in a bridge domain. - */ - u8 mt_l2_only; - - /** - * @brief The HW interface index of the tunnel interfaces - */ - u32 mt_hw_if_index; - - /** - * @brief The SW interface index of the tunnel interfaces - */ - u32 mt_sw_if_index; - } mpls_tunnel_t; /** diff --git a/src/vnet/srmpls/sr_mpls_policy.c b/src/vnet/srmpls/sr_mpls_policy.c index 5ebbc60d..db4ad2a7 100755 --- a/src/vnet/srmpls/sr_mpls_policy.c +++ b/src/vnet/srmpls/sr_mpls_policy.c @@ -75,7 +75,7 @@ create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight) segment_list->segments = vec_dup (sl); fib_route_path_t path = { - .frp_proto = FIB_PROTOCOL_MPLS, + .frp_proto = DPO_PROTO_MPLS, .frp_sw_if_index = ~0, .frp_fib_index = 0, .frp_weight = segment_list->weight, @@ -203,7 +203,7 @@ sr_mpls_policy_del (mpls_label_t bsid, u32 index) segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); fib_route_path_t path = { - .frp_proto = FIB_PROTOCOL_MPLS, + .frp_proto = DPO_PROTO_MPLS, .frp_sw_if_index = ~0, .frp_fib_index = 0, .frp_weight = segment_list->weight, @@ -308,7 +308,7 @@ sr_mpls_policy_mod (mpls_label_t bsid, u32 index, u8 operation, mpls_eos_bit_t eos; fib_route_path_t path = { - .frp_proto = FIB_PROTOCOL_MPLS, + .frp_proto = DPO_PROTO_MPLS, .frp_sw_if_index = ~0, .frp_fib_index = 0, .frp_weight = segment_list->weight, diff --git a/src/vnet/srmpls/sr_mpls_steering.c b/src/vnet/srmpls/sr_mpls_steering.c index 37707049..3a9aea2d 100755 --- a/src/vnet/srmpls/sr_mpls_steering.c +++ b/src/vnet/srmpls/sr_mpls_steering.c @@ -218,7 +218,7 @@ sr_mpls_steering_policy (int is_del, mpls_label_t bsid, u32 sr_policy_index, update_fib:; fib_route_path_t path = { - .frp_proto = FIB_PROTOCOL_MPLS, + .frp_proto = DPO_PROTO_MPLS, .frp_local_label = sr_policy->bsid, .frp_eos = MPLS_EOS, .frp_sw_if_index = ~0, diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c index a7903751..704adaa7 100755 --- a/src/vnet/srv6/sr_steering.c +++ b/src/vnet/srv6/sr_steering.c @@ -310,7 +310,7 @@ update_fib: table_id : 0)), &pfx, FIB_SOURCE_SR, FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, (ip46_address_t *) & sr_policy->bsid, ~0, sm->fib_table_ip6, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); @@ -327,7 +327,7 @@ update_fib: table_id : 0)), &pfx, FIB_SOURCE_SR, FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, (ip46_address_t *) & sr_policy->bsid, ~0, sm->fib_table_ip4, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c index 97bb1b15..462c79a0 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.c +++ b/src/vnet/vxlan-gpe/vxlan_gpe.c @@ -638,7 +638,7 @@ int vnet_vxlan_gpe_add_del_tunnel fib_node_index_t mfei; adj_index_t ai; fib_route_path_t path = { - .frp_proto = fp, + .frp_proto = fib_proto_to_dpo(fp), .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c index 1b3df2a8..dc973372 100644 --- a/src/vnet/vxlan/vxlan.c +++ b/src/vnet/vxlan/vxlan.c @@ -505,7 +505,7 @@ int vnet_vxlan_add_del_tunnel fib_node_index_t mfei; adj_index_t ai; fib_route_path_t path = { - .frp_proto = fp, + .frp_proto = fib_proto_to_dpo(fp), .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, diff --git a/src/vpp/app/vpe_cli.c b/src/vpp/app/vpe_cli.c index 94bdc84c..fcc496ad 100644 --- a/src/vpp/app/vpe_cli.c +++ b/src/vpp/app/vpe_cli.c @@ -98,7 +98,7 @@ virtual_ip_cmd_fn_command_fn (vlib_main_t * vm, vec_add2 (rpaths, rpath, 1); - rpath->frp_proto = FIB_PROTOCOL_IP4; + rpath->frp_proto = DPO_PROTO_IP4; rpath->frp_addr = next_hops[i]; rpath->frp_sw_if_index = sw_if_index; rpath->frp_fib_index = ~0; diff --git a/test/test_bfd.py b/test/test_bfd.py index be42cdad..4cb6d379 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -20,7 +20,7 @@ from vpp_pg_interface import CaptureTimeoutError, is_ipv6_misc from vpp_lo_interface import VppLoInterface from util import ppp from vpp_papi_provider import UnexpectedApiReturnValueError -from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto USEC_IN_SEC = 1000000 @@ -1678,12 +1678,12 @@ class BFDFIBTestCase(VppTestCase): ip_2001_s_64 = VppIpRoute(self, "2001::", 64, [VppRoutePath(self.pg0.remote_ip6, self.pg0.sw_if_index, - is_ip6=1)], + proto=DPO_PROTO_IP6)], is_ip6=1) ip_2002_s_64 = VppIpRoute(self, "2002::", 64, [VppRoutePath(self.pg0.remote_ip6, 0xffffffff, - is_ip6=1)], + proto=DPO_PROTO_IP6)], is_ip6=1) ip_2001_s_64.add_vpp_config() ip_2002_s_64.add_vpp_config() diff --git a/test/test_gre.py b/test/test_gre.py index 18b67dbd..1afc44fb 100644 --- a/test/test_gre.py +++ b/test/test_gre.py @@ -6,7 +6,7 @@ from logging import * from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppDot1QSubint from vpp_gre_interface import VppGreInterface, VppGre6Interface -from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto from vpp_papi_provider import L2_VTR_OP from scapy.packet import Raw @@ -516,11 +516,12 @@ class TestGRE(VppTestCase): gre_if.admin_up() gre_if.config_ip6() - route_via_tun = VppIpRoute(self, "4004::1", 128, - [VppRoutePath("0::0", - gre_if.sw_if_index, - is_ip6=1)], - is_ip6=1) + route_via_tun = VppIpRoute( + self, "4004::1", 128, + [VppRoutePath("0::0", + gre_if.sw_if_index, + proto=DpoProto.DPO_PROTO_IP6)], + is_ip6=1) route_via_tun.add_vpp_config() @@ -542,11 +543,12 @@ class TestGRE(VppTestCase): # # Add a route that resolves the tunnel's destination # - route_tun_dst = VppIpRoute(self, "1002::1", 128, - [VppRoutePath(self.pg2.remote_ip6, - self.pg2.sw_if_index, - is_ip6=1)], - is_ip6=1) + route_tun_dst = VppIpRoute( + self, "1002::1", 128, + [VppRoutePath(self.pg2.remote_ip6, + self.pg2.sw_if_index, + proto=DpoProto.DPO_PROTO_IP6)], + is_ip6=1) route_tun_dst.add_vpp_config() # diff --git a/test/test_ip6.py b/test/test_ip6.py index 593f6868..285ce181 100644 --- a/test/test_ip6.py +++ b/test/test_ip6.py @@ -8,7 +8,7 @@ from vpp_sub_interface import VppSubInterface, VppDot1QSubint from vpp_pg_interface import is_ipv6_misc from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, VppIpMRoute, \ VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind, \ - VppMplsRoute + VppMplsRoute, DpoProto from vpp_neighbor import find_nbr, VppNeighbor from scapy.packet import Raw @@ -490,7 +490,7 @@ class TestIPv6(TestIPv6ND): inet=AF_INET6)) def test_ns_duplicates(self): - """ ARP Duplicates""" + """ ND Duplicates""" # # Generate some hosts on the LAN @@ -537,7 +537,7 @@ class TestIPv6(TestIPv6ND): # # remove the duplicate on pg1 - # packet stream shoud generate ARPs out of pg1 + # packet stream shoud generate NSs out of pg1 # ns_pg1.remove_vpp_config() @@ -1347,10 +1347,10 @@ class TestIP6LoadBalance(VppTestCase): route_3000_1 = VppIpRoute(self, "3000::1", 128, [VppRoutePath(self.pg1.remote_ip6, self.pg1.sw_if_index, - is_ip6=1), + proto=DpoProto.DPO_PROTO_IP6), VppRoutePath(self.pg2.remote_ip6, self.pg2.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_3000_1.add_vpp_config() @@ -1367,11 +1367,11 @@ class TestIP6LoadBalance(VppTestCase): [VppRoutePath(self.pg1.remote_ip6, self.pg1.sw_if_index, labels=[67], - is_ip6=1), + proto=DpoProto.DPO_PROTO_IP6), VppRoutePath(self.pg2.remote_ip6, self.pg2.sw_if_index, labels=[67], - is_ip6=1)]) + proto=DpoProto.DPO_PROTO_IP6)]) route_67.add_vpp_config() # @@ -1441,20 +1441,20 @@ class TestIP6LoadBalance(VppTestCase): route_3000_2 = VppIpRoute(self, "3000::2", 128, [VppRoutePath(self.pg3.remote_ip6, self.pg3.sw_if_index, - is_ip6=1), + proto=DpoProto.DPO_PROTO_IP6), VppRoutePath(self.pg4.remote_ip6, self.pg4.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_3000_2.add_vpp_config() route_4000_1 = VppIpRoute(self, "4000::1", 128, [VppRoutePath("3000::1", 0xffffffff, - is_ip6=1), + proto=DpoProto.DPO_PROTO_IP6), VppRoutePath("3000::2", 0xffffffff, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_4000_1.add_vpp_config() @@ -1485,14 +1485,14 @@ class TestIP6LoadBalance(VppTestCase): route_5000_2 = VppIpRoute(self, "5000::2", 128, [VppRoutePath(self.pg3.remote_ip6, self.pg3.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_5000_2.add_vpp_config() route_6000_1 = VppIpRoute(self, "6000::1", 128, [VppRoutePath("5000::2", 0xffffffff, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_6000_1.add_vpp_config() diff --git a/test/test_map.py b/test/test_map.py index 9ac3948a..bbf4aec2 100644 --- a/test/test_map.py +++ b/test/test_map.py @@ -4,7 +4,7 @@ import unittest import socket from framework import VppTestCase, VppTestRunner -from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto from scapy.layers.l2 import Ether, Raw from scapy.layers.inet import IP, UDP, ICMP @@ -75,7 +75,7 @@ class TestMAP(VppTestCase): map_br_pfx_len, [VppRoutePath(self.pg1.remote_ip6, self.pg1.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) map_route.add_vpp_config() @@ -138,13 +138,12 @@ class TestMAP(VppTestCase): # Add a route to 4001::1. Expect the encapped traffic to be # sent via that routes next-hop # - pre_res_route = VppIpRoute(self, - "4001::1", - 128, - [VppRoutePath(self.pg1.remote_hosts[2].ip6, - self.pg1.sw_if_index, - is_ip6=1)], - is_ip6=1) + pre_res_route = VppIpRoute( + self, "4001::1", 128, + [VppRoutePath(self.pg1.remote_hosts[2].ip6, + self.pg1.sw_if_index, + proto=DpoProto.DPO_PROTO_IP6)], + is_ip6=1) pre_res_route.add_vpp_config() self.send_and_assert_encapped(v4, map_src, @@ -156,7 +155,7 @@ class TestMAP(VppTestCase): # pre_res_route.modify([VppRoutePath(self.pg1.remote_hosts[3].ip6, self.pg1.sw_if_index, - is_ip6=1)]) + proto=DpoProto.DPO_PROTO_IP6)]) pre_res_route.add_vpp_config() self.send_and_assert_encapped(v4, map_src, diff --git a/test/test_mpls.py b/test/test_mpls.py index e3d013af..b2226a74 100644 --- a/test/test_mpls.py +++ b/test/test_mpls.py @@ -6,7 +6,7 @@ import socket from framework import VppTestCase, VppTestRunner from vpp_ip_route import VppIpRoute, VppRoutePath, VppMplsRoute, \ VppMplsIpBind, VppIpMRoute, VppMRoutePath, \ - MRouteItfFlags, MRouteEntryFlags + MRouteItfFlags, MRouteEntryFlags, DpoProto from vpp_mpls_tunnel_interface import VppMPLSTunnelInterface from scapy.packet import Raw @@ -16,6 +16,38 @@ from scapy.layers.inet6 import IPv6 from scapy.contrib.mpls import MPLS +def verify_filter(capture, sent): + if not len(capture) == len(sent): + # filter out any IPv6 RAs from the capture + for p in capture: + if p.haslayer(IPv6): + capture.remove(p) + return capture + + +def verify_mpls_stack(tst, rx, mpls_labels, ttl=255, num=0): + # the rx'd packet has the MPLS label popped + eth = rx[Ether] + tst.assertEqual(eth.type, 0x8847) + + rx_mpls = rx[MPLS] + + for ii in range(len(mpls_labels)): + tst.assertEqual(rx_mpls.label, mpls_labels[ii]) + tst.assertEqual(rx_mpls.cos, 0) + if ii == num: + tst.assertEqual(rx_mpls.ttl, ttl) + else: + tst.assertEqual(rx_mpls.ttl, 255) + if ii == len(mpls_labels) - 1: + tst.assertEqual(rx_mpls.s, 1) + else: + # not end of stack + tst.assertEqual(rx_mpls.s, 0) + # pop the label to expose the next + rx_mpls = rx_mpls[MPLS].payload + + class TestMPLS(VppTestCase): """ MPLS Test Case """ @@ -120,18 +152,9 @@ class TestMPLS(VppTestCase): pkts.append(p) return pkts - @staticmethod - def verify_filter(capture, sent): - if not len(capture) == len(sent): - # filter out any IPv6 RAs from the capture - for p in capture: - if p.haslayer(IPv6): - capture.remove(p) - return capture - def verify_capture_ip4(self, src_if, capture, sent, ping_resp=0): try: - capture = self.verify_filter(capture, sent) + capture = verify_filter(capture, sent) self.assertEqual(len(capture), len(sent)) @@ -158,33 +181,10 @@ class TestMPLS(VppTestCase): except: raise - def verify_mpls_stack(self, rx, mpls_labels, ttl=255, num=0): - # the rx'd packet has the MPLS label popped - eth = rx[Ether] - self.assertEqual(eth.type, 0x8847) - - rx_mpls = rx[MPLS] - - for ii in range(len(mpls_labels)): - self.assertEqual(rx_mpls.label, mpls_labels[ii]) - self.assertEqual(rx_mpls.cos, 0) - if ii == num: - self.assertEqual(rx_mpls.ttl, ttl) - else: - self.assertEqual(rx_mpls.ttl, 255) - - if ii == len(mpls_labels) - 1: - self.assertEqual(rx_mpls.s, 1) - else: - # not end of stack - self.assertEqual(rx_mpls.s, 0) - # pop the label to expose the next - rx_mpls = rx_mpls[MPLS].payload - def verify_capture_labelled_ip4(self, src_if, capture, sent, mpls_labels): try: - capture = self.verify_filter(capture, sent) + capture = verify_filter(capture, sent) self.assertEqual(len(capture), len(sent)) @@ -195,8 +195,8 @@ class TestMPLS(VppTestCase): rx_ip = rx[IP] # the MPLS TTL is copied from the IP - self.verify_mpls_stack( - rx, mpls_labels, rx_ip.ttl, len(mpls_labels) - 1) + verify_mpls_stack(self, rx, mpls_labels, rx_ip.ttl, + len(mpls_labels) - 1) self.assertEqual(rx_ip.src, tx_ip.src) self.assertEqual(rx_ip.dst, tx_ip.dst) @@ -211,7 +211,7 @@ class TestMPLS(VppTestCase): if top is None: top = len(mpls_labels) - 1 try: - capture = self.verify_filter(capture, sent) + capture = verify_filter(capture, sent) self.assertEqual(len(capture), len(sent)) @@ -222,8 +222,7 @@ class TestMPLS(VppTestCase): rx_ip = rx[IP] # the MPLS TTL is 255 since it enters a new tunnel - self.verify_mpls_stack( - rx, mpls_labels, ttl, top) + verify_mpls_stack(self, rx, mpls_labels, ttl, top) self.assertEqual(rx_ip.src, tx_ip.src) self.assertEqual(rx_ip.dst, tx_ip.dst) @@ -236,13 +235,13 @@ class TestMPLS(VppTestCase): def verify_capture_labelled(self, src_if, capture, sent, mpls_labels, ttl=254, num=0): try: - capture = self.verify_filter(capture, sent) + capture = verify_filter(capture, sent) self.assertEqual(len(capture), len(sent)) for i in range(len(capture)): rx = capture[i] - self.verify_mpls_stack(rx, mpls_labels, ttl, num) + verify_mpls_stack(self, rx, mpls_labels, ttl, num) except: raise @@ -1049,7 +1048,7 @@ class TestMPLS(VppTestCase): self.pg1.sw_if_index, nh_table_id=1, rpf_id=55, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_multicast=1) route_34_eos.add_vpp_config() @@ -1440,19 +1439,20 @@ class TestMPLSPIC(VppTestCase): for ii in range(64): dst = "3000::%d" % ii local_label = 1600 + ii - vpn_routes.append(VppIpRoute(self, dst, 128, - [VppRoutePath(self.pg2.remote_ip6, - 0xffffffff, - nh_table_id=1, - is_resolve_attached=1, - is_ip6=1), - VppRoutePath(self.pg3.remote_ip6, - 0xffffffff, - nh_table_id=1, - is_ip6=1, - is_resolve_attached=1)], - table_id=1, - is_ip6=1)) + vpn_routes.append(VppIpRoute( + self, dst, 128, + [VppRoutePath(self.pg2.remote_ip6, + 0xffffffff, + nh_table_id=1, + is_resolve_attached=1, + proto=DpoProto.DPO_PROTO_IP6), + VppRoutePath(self.pg3.remote_ip6, + 0xffffffff, + nh_table_id=1, + proto=DpoProto.DPO_PROTO_IP6, + is_resolve_attached=1)], + table_id=1, + is_ip6=1)) vpn_routes[ii].add_vpp_config() vpn_bindings.append(VppMplsIpBind(self, local_label, dst, 128, @@ -1525,5 +1525,211 @@ class TestMPLSPIC(VppTestCase): self.assertNotEqual(0, len(rx1)) +class TestMPLSL2(VppTestCase): + """ MPLS-L2 """ + + def setUp(self): + super(TestMPLSL2, self).setUp() + + # create 2 pg interfaces + self.create_pg_interfaces(range(2)) + + # use pg0 as the core facing interface + self.pg0.admin_up() + self.pg0.config_ip4() + self.pg0.resolve_arp() + self.pg0.enable_mpls() + + # use the other 2 for customer facg L2 links + for i in self.pg_interfaces[1:]: + i.admin_up() + + def tearDown(self): + super(TestMPLSL2, self).tearDown() + for i in self.pg_interfaces[1:]: + i.admin_down() + + self.pg0.disable_mpls() + self.pg0.unconfig_ip4() + self.pg0.admin_down() + + def verify_capture_tunneled_ethernet(self, capture, sent, mpls_labels, + ttl=255, top=None): + if top is None: + top = len(mpls_labels) - 1 + + capture = verify_filter(capture, sent) + + self.assertEqual(len(capture), len(sent)) + + for i in range(len(capture)): + tx = sent[i] + rx = capture[i] + + # the MPLS TTL is 255 since it enters a new tunnel + verify_mpls_stack(self, rx, mpls_labels, ttl, top) + + tx_eth = tx[Ether] + rx_eth = Ether(str(rx[MPLS].payload)) + + self.assertEqual(rx_eth.src, tx_eth.src) + self.assertEqual(rx_eth.dst, tx_eth.dst) + + def test_vpws(self): + """ Virtual Private Wire Service """ + + # + # Create an MPLS tunnel that pushes 1 label + # + mpls_tun_1 = VppMPLSTunnelInterface(self, + [VppRoutePath(self.pg0.remote_ip4, + self.pg0.sw_if_index, + labels=[42])], + is_l2=1) + mpls_tun_1.add_vpp_config() + mpls_tun_1.admin_up() + + # + # Create a label entry to for 55 that does L2 input to the tunnel + # + route_55_eos = VppMplsRoute( + self, 55, 1, + [VppRoutePath("0.0.0.0", + mpls_tun_1.sw_if_index, + is_interface_rx=1, + proto=DpoProto.DPO_PROTO_ETHERNET)]) + route_55_eos.add_vpp_config() + + # + # Cross-connect the tunnel with one of the customers L2 interfaces + # + self.vapi.sw_interface_set_l2_xconnect(self.pg1.sw_if_index, + mpls_tun_1.sw_if_index, + enable=1) + self.vapi.sw_interface_set_l2_xconnect(mpls_tun_1.sw_if_index, + self.pg1.sw_if_index, + enable=1) + + # + # inject a packet from the core + # + pcore = (Ether(dst=self.pg0.local_mac, + src=self.pg0.remote_mac) / + MPLS(label=55, ttl=64) / + Ether(dst="00:00:de:ad:ba:be", + src="00:00:de:ad:be:ef") / + IP(src="10.10.10.10", dst="11.11.11.11") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + + self.pg0.add_stream(pcore * 65) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx0 = self.pg1.get_capture(65) + tx = pcore[MPLS].payload + + self.assertEqual(rx0[0][Ether].dst, tx[Ether].dst) + self.assertEqual(rx0[0][Ether].src, tx[Ether].src) + + # + # Inject a packet from the custoer/L2 side + # + self.pg1.add_stream(tx * 65) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx0 = self.pg0.get_capture(65) + + self.verify_capture_tunneled_ethernet(rx0, tx*65, [42]) + + def test_vpls(self): + """ Virtual Private LAN Service """ + # + # Create an L2 MPLS tunnel + # + mpls_tun = VppMPLSTunnelInterface(self, + [VppRoutePath(self.pg0.remote_ip4, + self.pg0.sw_if_index, + labels=[42])], + is_l2=1) + mpls_tun.add_vpp_config() + mpls_tun.admin_up() + + # + # Create a label entry to for 55 that does L2 input to the tunnel + # + route_55_eos = VppMplsRoute( + self, 55, 1, + [VppRoutePath("0.0.0.0", + mpls_tun.sw_if_index, + is_interface_rx=1, + proto=DpoProto.DPO_PROTO_ETHERNET)]) + route_55_eos.add_vpp_config() + + # + # add to tunnel to the customers bridge-domain + # + self.vapi.sw_interface_set_l2_bridge(mpls_tun.sw_if_index, + bd_id=1) + self.vapi.sw_interface_set_l2_bridge(self.pg1.sw_if_index, + bd_id=1) + + # + # Packet from the customer interface and from the core + # + p_cust = (Ether(dst="00:00:de:ad:ba:be", + src="00:00:de:ad:be:ef") / + IP(src="10.10.10.10", dst="11.11.11.11") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + p_core = (Ether(src="00:00:de:ad:ba:be", + dst="00:00:de:ad:be:ef") / + IP(dst="10.10.10.10", src="11.11.11.11") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + + # + # The BD is learning, so send in one of each packet to learn + # + p_core_encap = (Ether(dst=self.pg0.local_mac, + src=self.pg0.remote_mac) / + MPLS(label=55, ttl=64) / + p_core) + + self.pg1.add_stream(p_cust) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.pg0.add_stream(p_core_encap) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # we've learnt this so expect it be be forwarded + rx0 = self.pg1.get_capture(1) + + self.assertEqual(rx0[0][Ether].dst, p_core[Ether].dst) + self.assertEqual(rx0[0][Ether].src, p_core[Ether].src) + + # + # now a stream in each direction + # + self.pg1.add_stream(p_cust * 65) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx0 = self.pg0.get_capture(65) + + self.verify_capture_tunneled_ethernet(rx0, p_cust*65, [42]) + + # + # remove interfaces from customers bridge-domain + # + self.vapi.sw_interface_set_l2_bridge(mpls_tun.sw_if_index, + bd_id=1, + enable=0) + self.vapi.sw_interface_set_l2_bridge(self.pg1.sw_if_index, + bd_id=1, + enable=0) + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/test_p2p_ethernet.py b/test/test_p2p_ethernet.py index 37a1d18b..8688f7e6 100644 --- a/test/test_p2p_ethernet.py +++ b/test/test_p2p_ethernet.py @@ -11,7 +11,7 @@ from scapy.layers.inet6 import IPv6 from framework import VppTestCase, VppTestRunner, running_extended_tests from vpp_sub_interface import VppP2PSubint -from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto from util import mactobinary @@ -219,7 +219,7 @@ class P2PEthernetIPV6(VppTestCase): route_8000 = VppIpRoute(self, "8000::", 64, [VppRoutePath(self.pg0.remote_ip6, self.pg0.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_8000.add_vpp_config() @@ -239,7 +239,7 @@ class P2PEthernetIPV6(VppTestCase): route_9001 = VppIpRoute(self, "9001::", 64, [VppRoutePath(self.pg1.remote_ip6, self.pg1.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_9001.add_vpp_config() @@ -264,7 +264,7 @@ class P2PEthernetIPV6(VppTestCase): route_3 = VppIpRoute(self, "9000::", 64, [VppRoutePath(self.pg1._remote_hosts[0].ip6, self.pg1.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_3.add_vpp_config() @@ -289,7 +289,7 @@ class P2PEthernetIPV6(VppTestCase): route_9001 = VppIpRoute(self, "9000::", 64, [VppRoutePath(self.pg1._remote_hosts[0].ip6, self.pg1.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_9001.add_vpp_config() @@ -310,19 +310,19 @@ class P2PEthernetIPV6(VppTestCase): route_8000 = VppIpRoute(self, "8000::", 64, [VppRoutePath(self.pg0.remote_ip6, self.pg0.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_8000.add_vpp_config() route_8001 = VppIpRoute(self, "8001::", 64, [VppRoutePath(self.p2p_sub_ifs[0].remote_ip6, self.p2p_sub_ifs[0].sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_8001.add_vpp_config() route_8002 = VppIpRoute(self, "8002::", 64, [VppRoutePath(self.p2p_sub_ifs[1].remote_ip6, self.p2p_sub_ifs[1].sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_8002.add_vpp_config() diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py index badb3102..2c489e3c 100644 --- a/test/vpp_ip_route.py +++ b/test/vpp_ip_route.py @@ -29,6 +29,14 @@ class MRouteEntryFlags: MFIB_ENTRY_FLAG_INHERIT_ACCEPT = 8 +class DpoProto: + DPO_PROTO_IP4 = 0 + DPO_PROTO_IP6 = 1 + DPO_PROTO_MPLS = 2 + DPO_PROTO_ETHERNET = 3 + DPO_PROTO_NSH = 4 + + def find_route(test, ip_addr, len, table_id=0, inet=AF_INET): if inet == AF_INET: s = 4 @@ -55,22 +63,24 @@ class VppRoutePath(object): nh_table_id=0, labels=[], nh_via_label=MPLS_LABEL_INVALID, - is_ip6=0, rpf_id=0, is_interface_rx=0, is_resolve_host=0, - is_resolve_attached=0): + is_resolve_attached=0, + proto=DpoProto.DPO_PROTO_IP4): self.nh_itf = nh_sw_if_index self.nh_table_id = nh_table_id self.nh_via_label = nh_via_label self.nh_labels = labels self.weight = 1 self.rpf_id = rpf_id - self.is_ip4 = 1 if is_ip6 == 0 else 0 - if self.is_ip4: + self.proto = proto + if self.proto is DpoProto.DPO_PROTO_IP6: + self.nh_addr = inet_pton(AF_INET6, nh_addr) + elif self.proto is DpoProto.DPO_PROTO_IP4: self.nh_addr = inet_pton(AF_INET, nh_addr) else: - self.nh_addr = inet_pton(AF_INET6, nh_addr) + self.nh_addr = inet_pton(AF_INET6, "::") self.is_resolve_host = is_resolve_host self.is_resolve_attached = is_resolve_attached self.is_interface_rx = is_interface_rx @@ -401,7 +411,7 @@ class VppMplsRoute(VppObject): self._test.vapi.mpls_route_add_del( self.local_label, self.eos_bit, - path.is_ip4, + path.proto, path.nh_addr, path.nh_itf, is_multicast=self.is_multicast, @@ -420,7 +430,7 @@ class VppMplsRoute(VppObject): for path in self.paths: self._test.vapi.mpls_route_add_del(self.local_label, self.eos_bit, - 1, + path.proto, path.nh_addr, path.nh_itf, is_rpf_id=path.is_rpf_id, diff --git a/test/vpp_mpls_tunnel_interface.py b/test/vpp_mpls_tunnel_interface.py index f2001574..0542b05c 100644 --- a/test/vpp_mpls_tunnel_interface.py +++ b/test/vpp_mpls_tunnel_interface.py @@ -9,13 +9,14 @@ class VppMPLSTunnelInterface(VppInterface): VPP MPLS Tunnel interface """ - def __init__(self, test, paths, is_multicast=0): + def __init__(self, test, paths, is_multicast=0, is_l2=0): """ Create MPLS Tunnel interface """ self._sw_if_index = 0 super(VppMPLSTunnelInterface, self).__init__(test) self._test = test self.t_paths = paths self.is_multicast = is_multicast + self.is_l2 = is_l2 def add_vpp_config(self): self._sw_if_index = 0xffffffff @@ -29,7 +30,8 @@ class VppMPLSTunnelInterface(VppInterface): path.weight, next_hop_out_label_stack=path.nh_labels, next_hop_n_out_labels=len(path.nh_labels), - is_multicast=self.is_multicast) + is_multicast=self.is_multicast, + l2_only=self.is_l2) self._sw_if_index = reply.sw_if_index def remove_vpp_config(self): diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 801a6c2d..3ba2ad4a 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -921,7 +921,7 @@ class VppPapiProvider(object): self, label, eos, - next_hop_proto_is_ip4, + next_hop_proto, next_hop_address, next_hop_sw_if_index=0xFFFFFFFF, table_id=0, @@ -982,7 +982,7 @@ class VppPapiProvider(object): 'mr_is_resolve_attached': is_resolve_attached, 'mr_is_interface_rx': is_interface_rx, 'mr_is_rpf_id': is_rpf_id, - 'mr_next_hop_proto_is_ip4': next_hop_proto_is_ip4, + 'mr_next_hop_proto': next_hop_proto, 'mr_next_hop_weight': next_hop_weight, 'mr_next_hop': next_hop_address, 'mr_next_hop_n_out_labels': next_hop_n_out_labels, -- cgit 1.2.3-korg From 1500254bee11355bbd69cc1dd9705be4f002f2bd Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Sun, 10 Sep 2017 04:39:11 -0700 Subject: FIB table add/delete API part 2; - this adds the code to create an IP and MPLS table via the API. - but the enforcement that the table must be created before it is used is still missing, this is so that CSIT can pass. Change-Id: Id124d884ade6cb7da947225200e3bb193454c555 Signed-off-by: Neale Ranns --- src/plugins/nat/nat.c | 17 +- src/plugins/nat/nat64.c | 13 +- src/vnet/classify/vnet_classify.c | 16 +- src/vnet/dhcp/dhcp4_proxy_node.c | 9 +- src/vnet/dhcp/dhcp6_proxy_node.c | 9 +- src/vnet/dhcp/dhcp_proxy.c | 19 ++- src/vnet/dpo/lookup_dpo.c | 20 ++- src/vnet/dpo/mpls_label_dpo.c | 12 +- src/vnet/ethernet/arp.c | 127 +++++++++++---- src/vnet/fib/fib_api.h | 1 - src/vnet/fib/fib_entry.c | 15 +- src/vnet/fib/fib_entry.h | 1 + src/vnet/fib/fib_entry_src_mpls.c | 7 +- src/vnet/fib/fib_table.c | 43 +++-- src/vnet/fib/fib_table.h | 32 +++- src/vnet/fib/fib_test.c | 27 ++-- src/vnet/fib/ip4_fib.c | 41 +++-- src/vnet/fib/ip4_fib.h | 5 +- src/vnet/fib/ip6_fib.c | 41 +++-- src/vnet/fib/ip6_fib.h | 5 +- src/vnet/fib/mpls_fib.c | 16 +- src/vnet/fib/mpls_fib.h | 5 +- src/vnet/interface_api.c | 177 ++++++++++++++++---- src/vnet/ip/ip.h | 7 + src/vnet/ip/ip4.h | 13 ++ src/vnet/ip/ip4_forward.c | 101 +----------- src/vnet/ip/ip4_source_and_port_range_check.c | 11 +- src/vnet/ip/ip6.h | 13 ++ src/vnet/ip/ip6_forward.c | 103 +----------- src/vnet/ip/ip6_neighbor.c | 108 +++++++++---- src/vnet/ip/ip_api.c | 122 +++++++++++--- src/vnet/ip/lookup.c | 225 ++++++++++++++++++++++++++ src/vnet/lisp-gpe/interface.c | 11 +- src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c | 9 +- src/vnet/lisp-gpe/lisp_gpe_sub_interface.c | 11 +- src/vnet/mfib/ip4_mfib.c | 12 +- src/vnet/mfib/ip4_mfib.h | 5 +- src/vnet/mfib/ip6_mfib.c | 12 +- src/vnet/mfib/ip6_mfib.h | 5 +- src/vnet/mfib/mfib_entry.c | 11 ++ src/vnet/mfib/mfib_entry.h | 2 + src/vnet/mfib/mfib_table.c | 88 ++++++++-- src/vnet/mfib/mfib_table.h | 29 +++- src/vnet/mfib/mfib_test.c | 11 +- src/vnet/mfib/mfib_types.h | 8 +- src/vnet/mpls/interface.c | 26 ++- src/vnet/mpls/mpls.c | 76 ++++++++- src/vnet/mpls/mpls.h | 16 +- src/vnet/mpls/mpls_api.c | 66 ++++++-- src/vnet/srv6/sr_policy_rewrite.c | 6 +- src/vnet/srv6/sr_steering.c | 6 +- src/vpp/api/api.c | 5 +- src/vpp/api/custom_dump.c | 3 - test/test_dhcp.py | 24 ++- test/test_gre.py | 8 +- test/test_ip4.py | 11 +- test/test_ip4_vrf_multi_instance.py | 4 +- test/test_ip6.py | 7 +- test/test_ip6_vrf_multi_instance.py | 4 +- test/test_ip_mcast.py | 98 ++++++++++- test/test_mpls.py | 48 +++++- test/test_nat.py | 13 ++ test/test_neighbor.py | 66 +++++++- test/vpp_ip_route.py | 73 +++++++++ test/vpp_papi_provider.py | 46 ++++-- 65 files changed, 1643 insertions(+), 538 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index aa7ef10a..8aecac6d 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -167,7 +167,8 @@ void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id) ap->addr = *addr; if (vrf_id != ~0) ap->fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + FIB_SOURCE_PLUGIN_HI); else ap->fib_index = ~0; #define _(N, i, n, s) \ @@ -625,7 +626,8 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, return VNET_API_ERROR_INVALID_VALUE; fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - vrf_id); + vrf_id, + FIB_SOURCE_PLUGIN_HI); /* Find external address in allocated addresses and reserve port for address and port pair mapping when dynamic translations enabled */ @@ -754,7 +756,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, if (!m) return VNET_API_ERROR_NO_SUCH_ENTRY; - fib_table_unlock (m->fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock (m->fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_PLUGIN_HI); /* Free external address port */ if (!sm->static_mapping_only) @@ -874,7 +876,8 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) } if (a->fib_index != ~0) - fib_table_unlock(a->fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock(a->fib_index, FIB_PROTOCOL_IP4, + FIB_SOURCE_PLUGIN_HI); /* Delete sessions using address */ if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports) @@ -2151,10 +2154,12 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) sm->max_translations_per_user = max_translations_per_user; sm->outside_vrf_id = outside_vrf_id; sm->outside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - outside_vrf_id); + outside_vrf_id, + FIB_SOURCE_PLUGIN_HI); sm->inside_vrf_id = inside_vrf_id; sm->inside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - inside_vrf_id); + inside_vrf_id, + FIB_SOURCE_PLUGIN_HI); sm->static_mapping_only = static_mapping_only; sm->static_mapping_connection_tracking = static_mapping_connection_tracking; diff --git a/src/plugins/nat/nat64.c b/src/plugins/nat/nat64.c index b04901fa..bfcfa9b3 100644 --- a/src/plugins/nat/nat64.c +++ b/src/plugins/nat/nat64.c @@ -107,7 +107,8 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) a->fib_index = 0; if (vrf_id != ~0) a->fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + FIB_SOURCE_PLUGIN_HI); #define _(N, i, n, s) \ clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535); foreach_snat_protocol @@ -119,7 +120,8 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) return VNET_API_ERROR_NO_SUCH_ENTRY; if (a->fib_index) - fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6); + fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, + FIB_SOURCE_PLUGIN_HI); #define _(N, id, n, s) \ clib_bitmap_free (a->busy_##n##_port_bitmap); @@ -353,8 +355,8 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr, { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; - u32 fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id); + u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + FIB_SOURCE_PLUGIN_HI); snat_protocol_t p = ip_proto_to_snat_proto (proto); ip46_address_t addr; int i; @@ -644,7 +646,8 @@ nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, u8 is_add) { vec_add2 (nm->pref64, p, 1); p->fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + FIB_SOURCE_PLUGIN_HI); p->vrf_id = vrf_id; } diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c index 879fba3c..57d86748 100644 --- a/src/vnet/classify/vnet_classify.c +++ b/src/vnet/classify/vnet_classify.c @@ -368,10 +368,10 @@ vnet_classify_entry_claim_resource (vnet_classify_entry_t *e) switch (e->action) { case CLASSIFY_ACTION_SET_IP4_FIB_INDEX: - fib_table_lock (e->metadata, FIB_PROTOCOL_IP4); + fib_table_lock (e->metadata, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY); break; case CLASSIFY_ACTION_SET_IP6_FIB_INDEX: - fib_table_lock (e->metadata, FIB_PROTOCOL_IP6); + fib_table_lock (e->metadata, FIB_PROTOCOL_IP6, FIB_SOURCE_CLASSIFY); break; } } @@ -382,10 +382,10 @@ vnet_classify_entry_release_resource (vnet_classify_entry_t *e) switch (e->action) { case CLASSIFY_ACTION_SET_IP4_FIB_INDEX: - fib_table_unlock (e->metadata, FIB_PROTOCOL_IP4); + fib_table_unlock (e->metadata, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY); break; case CLASSIFY_ACTION_SET_IP6_FIB_INDEX: - fib_table_unlock (e->metadata, FIB_PROTOCOL_IP6); + fib_table_unlock (e->metadata, FIB_PROTOCOL_IP6, FIB_SOURCE_CLASSIFY); break; } } @@ -2096,9 +2096,13 @@ int vnet_classify_add_del_session (vnet_classify_main_t * cm, e->flags = 0; e->action = action; if (e->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX) - e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, metadata); + e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + metadata, + FIB_SOURCE_CLASSIFY); else if (e->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX) - e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, metadata); + e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, + metadata, + FIB_SOURCE_CLASSIFY); else e->metadata = 0; diff --git a/src/vnet/dhcp/dhcp4_proxy_node.c b/src/vnet/dhcp/dhcp4_proxy_node.c index 1b59cdea..339a7885 100644 --- a/src/vnet/dhcp/dhcp4_proxy_node.c +++ b/src/vnet/dhcp/dhcp4_proxy_node.c @@ -785,7 +785,8 @@ dhcp4_proxy_set_server (ip46_address_t *addr, return VNET_API_ERROR_INVALID_SRC_ADDRESS; rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, - rx_table_id); + rx_table_id, + FIB_SOURCE_DHCP); if (is_del) { @@ -795,7 +796,7 @@ dhcp4_proxy_set_server (ip46_address_t *addr, fib_table_entry_special_remove(rx_fib_index, &all_1s, FIB_SOURCE_DHCP); - fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP); } } else @@ -808,10 +809,10 @@ dhcp4_proxy_set_server (ip46_address_t *addr, &all_1s, FIB_SOURCE_DHCP, FIB_ENTRY_FLAG_LOCAL); - fib_table_lock (rx_fib_index, FIB_PROTOCOL_IP4); + fib_table_lock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP); } } - fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP); return (rc); } diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index 9c2f5220..ce7a8fca 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -841,7 +841,8 @@ dhcp6_proxy_set_server (ip46_address_t *addr, return VNET_API_ERROR_INVALID_SRC_ADDRESS; rx_fib_index = mfib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, - rx_table_id); + rx_table_id, + MFIB_SOURCE_DHCP); if (is_del) { @@ -851,7 +852,7 @@ dhcp6_proxy_set_server (ip46_address_t *addr, mfib_table_entry_delete(rx_fib_index, &all_dhcp_servers, MFIB_SOURCE_DHCP); - mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); + mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP); } } else @@ -885,11 +886,11 @@ dhcp6_proxy_set_server (ip46_address_t *addr, MFIB_SOURCE_DHCP, MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF); - mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6); + mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP); } } - mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); + mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP); return (rc); } diff --git a/src/vnet/dhcp/dhcp_proxy.c b/src/vnet/dhcp/dhcp_proxy.c index ba7f354e..1784906b 100644 --- a/src/vnet/dhcp/dhcp_proxy.c +++ b/src/vnet/dhcp/dhcp_proxy.c @@ -29,9 +29,9 @@ dhcp_proxy_rx_table_lock (fib_protocol_t proto, u32 fib_index) { if (FIB_PROTOCOL_IP4 == proto) - fib_table_lock(fib_index, proto); + fib_table_lock(fib_index, proto, FIB_SOURCE_DHCP); else - mfib_table_lock(fib_index, proto); + mfib_table_lock(fib_index, proto, MFIB_SOURCE_DHCP); } static void @@ -39,9 +39,9 @@ dhcp_proxy_rx_table_unlock (fib_protocol_t proto, u32 fib_index) { if (FIB_PROTOCOL_IP4 == proto) - fib_table_unlock(fib_index, proto); + fib_table_unlock(fib_index, proto, FIB_SOURCE_DHCP); else - mfib_table_unlock(fib_index, proto); + mfib_table_unlock(fib_index, proto, MFIB_SOURCE_DHCP); } u32 @@ -169,7 +169,7 @@ dhcp_proxy_server_del (fib_protocol_t proto, if (~0 != index) { server = &proxy->dhcp_servers[index]; - fib_table_unlock (server->server_fib_index, proto); + fib_table_unlock (server->server_fib_index, proto, FIB_SOURCE_DHCP); vec_del1(proxy->dhcp_servers, index); @@ -228,7 +228,8 @@ dhcp_proxy_server_add (fib_protocol_t proto, dhcp_server_t server = { .dhcp_server = *addr, .server_fib_index = fib_table_find_or_create_and_lock(proto, - server_table_id), + server_table_id, + FIB_SOURCE_DHCP), }; vec_add1(proxy->dhcp_servers, server); @@ -297,9 +298,11 @@ int dhcp_proxy_set_vss (fib_protocol_t proto, int rc = 0; if (proto == FIB_PROTOCOL_IP4) - rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id); + rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id, + FIB_SOURCE_DHCP); else - rx_fib_index = mfib_table_find_or_create_and_lock(proto, tbl_id); + rx_fib_index = mfib_table_find_or_create_and_lock(proto, tbl_id, + MFIB_SOURCE_DHCP); v = dhcp_get_vss_info(dm, rx_fib_index, proto); if (NULL != v) diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c index 26363a2f..af189eda 100644 --- a/src/vnet/dpo/lookup_dpo.c +++ b/src/vnet/dpo/lookup_dpo.c @@ -135,11 +135,15 @@ lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index, { if (LOOKUP_UNICAST == cast) { - fib_table_lock(fib_index, dpo_proto_to_fib(proto)); + fib_table_lock(fib_index, + dpo_proto_to_fib(proto), + FIB_SOURCE_RR); } else { - mfib_table_lock(fib_index, dpo_proto_to_fib(proto)); + mfib_table_lock(fib_index, + dpo_proto_to_fib(proto), + MFIB_SOURCE_RR); } } lookup_dpo_add_or_lock_i(fib_index, proto, cast, input, table_config, dpo); @@ -161,13 +165,15 @@ lookup_dpo_add_or_lock_w_table_id (u32 table_id, { fib_index = fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), - table_id); + table_id, + FIB_SOURCE_RR); } else { fib_index = mfib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), - table_id); + table_id, + MFIB_SOURCE_RR); } } @@ -238,12 +244,14 @@ lookup_dpo_unlock (dpo_id_t *dpo) if (LOOKUP_UNICAST == lkd->lkd_cast) { fib_table_unlock(lkd->lkd_fib_index, - dpo_proto_to_fib(lkd->lkd_proto)); + dpo_proto_to_fib(lkd->lkd_proto), + FIB_SOURCE_RR); } else { mfib_table_unlock(lkd->lkd_fib_index, - dpo_proto_to_fib(lkd->lkd_proto)); + dpo_proto_to_fib(lkd->lkd_proto), + MFIB_SOURCE_RR); } } pool_put(lookup_dpo_pool, lkd); diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c index b178a902..2a6e7dd5 100644 --- a/src/vnet/dpo/mpls_label_dpo.c +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -105,10 +105,18 @@ format_mpls_label_dpo (u8 *s, va_list *args) mpls_label_dpo_t *mld; u32 ii; - mld = mpls_label_dpo_get(index); - s = format(s, "mpls-label:[%d]:", index); + if (pool_is_free_index(mpls_label_dpo_pool, index)) + { + /* + * the packet trace can be printed after the DPO has been deleted + */ + return (s); + } + + mld = mpls_label_dpo_get(index); + for (ii = 0; ii < mld->mld_n_labels; ii++) { hdr.label_exp_s_ttl = diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index c84ff47b..08e91373 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -522,6 +522,24 @@ arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) } } +static void +arp_adj_fib_add (ethernet_arp_ip4_entry_t * e, uint32_t fib_index) +{ + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = e->ip4_address, + }; + + e->fib_entry_index = + fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + DPO_PROTO_IP4, &pfx.fp_addr, + e->sw_if_index, ~0, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_lock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ); +} + int vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, vnet_arp_set_ip4_over_ethernet_rpc_args_t @@ -576,21 +594,9 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, if (!is_no_fib_entry) { - fib_prefix_t pfx = { - .fp_len = 32, - .fp_proto = FIB_PROTOCOL_IP4, - .fp_addr.ip4 = a->ip4, - }; - u32 fib_index; - - fib_index = - ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); - e->fib_entry_index = - fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, - FIB_ENTRY_FLAG_ATTACHED, - DPO_PROTO_IP4, &pfx.fp_addr, - e->sw_if_index, ~0, 1, NULL, - FIB_ROUTE_PATH_FLAG_NONE); + arp_adj_fib_add (e, + ip4_fib_table_get_index_for_sw_if_index + (e->sw_if_index)); } else { @@ -1561,6 +1567,65 @@ arp_add_del_interface_address (ip4_main_t * im, } } +void +arp_adj_fib_remove (ethernet_arp_ip4_entry_t * e, uint32_t fib_index) +{ + if (FIB_NODE_INDEX_INVALID != e->fib_entry_index) + { + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = e->ip4_address, + }; + u32 fib_index; + + fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); + + fib_table_entry_path_remove (fib_index, &pfx, + FIB_SOURCE_ADJ, + DPO_PROTO_IP4, + &pfx.fp_addr, + e->sw_if_index, ~0, 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ); + } +} + +static void +arp_table_bind (ip4_main_t * im, + uword opaque, + u32 sw_if_index, u32 new_fib_index, u32 old_fib_index) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_interface_t *eai; + ethernet_arp_ip4_entry_t *e; + hash_pair_t *pair; + + /* + * the IP table that the interface is bound to has changed. + * reinstall all the adj fibs. + */ + + if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index) + return; + + eai = &am->ethernet_arp_by_sw_if_index[sw_if_index]; + + /* *INDENT-OFF* */ + hash_foreach_pair (pair, eai->arp_entries, + ({ + e = pool_elt_at_index(am->ip4_entry_pool, + pair->value[0]); + /* + * remove the adj-fib from the old table and add to the new + */ + arp_adj_fib_remove(e, old_fib_index); + arp_adj_fib_add(e, new_fib_index); + })); + /* *INDENT-ON* */ + +} + static clib_error_t * ethernet_arp_init (vlib_main_t * vm) { @@ -1606,6 +1671,11 @@ ethernet_arp_init (vlib_main_t * vm) cb.function_opaque = 0; vec_add1 (im->add_del_interface_address_callbacks, cb); + ip4_table_bind_callback_t cbt; + cbt.function = arp_table_bind; + cbt.function_opaque = 0; + vec_add1 (im->table_bind_callbacks, cbt); + return 0; } @@ -1616,24 +1686,9 @@ arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e) { ethernet_arp_main_t *am = ðernet_arp_main; - if (FIB_NODE_INDEX_INVALID != e->fib_entry_index) - { - fib_prefix_t pfx = { - .fp_len = 32, - .fp_proto = FIB_PROTOCOL_IP4, - .fp_addr.ip4 = e->ip4_address, - }; - u32 fib_index; - - fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); - - fib_table_entry_path_remove (fib_index, &pfx, - FIB_SOURCE_ADJ, - DPO_PROTO_IP4, - &pfx.fp_addr, - e->sw_if_index, ~0, 1, - FIB_ROUTE_PATH_FLAG_NONE); - } + arp_adj_fib_remove (e, + ip4_fib_table_get_index_for_sw_if_index + (e->sw_if_index)); hash_unset (eai->arp_entries, e->ip4_address.as_u32); pool_put (am->ip4_entry_pool, e); } @@ -1693,7 +1748,11 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm, * does in response to interface events. unset is only done * by the control plane. */ - if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) + if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) + { + e->flags &= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC; + } + else if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) { arp_entry_free (eai, e); } diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h index d07d6cae..f5a107ca 100644 --- a/src/vnet/fib/fib_api.h +++ b/src/vnet/fib/fib_api.h @@ -23,7 +23,6 @@ add_del_route_check (fib_protocol_t table_proto, u32 next_hop_sw_if_index, dpo_proto_t next_hop_table_proto, u32 next_hop_table_id, - u8 create_missing_tables, u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index); diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index 2027f2be..4cb6cf60 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -89,6 +89,17 @@ fib_entry_get_default_chain_type (const fib_entry_t *fib_entry) return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); } +u8 * +format_fib_source (u8 * s, va_list * args) +{ + fib_source_t source = va_arg (*args, int); + + s = format (s, "\n src:%s ", + fib_source_names[source]); + + return (s); +} + u8 * format_fib_entry (u8 * s, va_list * args) { @@ -114,8 +125,8 @@ format_fib_entry (u8 * s, va_list * args) FOR_EACH_SRC_ADDED(fib_entry, src, source, ({ - s = format (s, "\n src:%s ", - fib_source_names[source]); + s = format (s, "\n src:%U ", + format_fib_source, source); s = fib_entry_src_format(fib_entry, source, s); s = format (s, " refs:%d ", src->fes_ref_count); if (FIB_ENTRY_FLAG_NONE != src->fes_entry_flags) { diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index 93b8016d..2f6e37fe 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -431,6 +431,7 @@ typedef struct fib_entry_t_ { #define FIB_ENTRY_FORMAT_DETAIL2 (0x2) extern u8 *format_fib_entry (u8 * s, va_list * args); +extern u8 *format_fib_source (u8 * s, va_list * args); extern fib_node_index_t fib_entry_create_special(u32 fib_index, const fib_prefix_t *prefix, diff --git a/src/vnet/fib/fib_entry_src_mpls.c b/src/vnet/fib/fib_entry_src_mpls.c index a616458f..6fdd5c0a 100644 --- a/src/vnet/fib/fib_entry_src_mpls.c +++ b/src/vnet/fib/fib_entry_src_mpls.c @@ -94,7 +94,9 @@ fib_entry_src_mpls_set_data (fib_entry_src_t *src, fib_table_entry_delete_index(src->mpls.fesm_lfes[eos], FIB_SOURCE_SPECIAL); } - fib_table_unlock(MPLS_FIB_DEFAULT_TABLE_ID, FIB_PROTOCOL_MPLS); + fib_table_unlock(MPLS_FIB_DEFAULT_TABLE_ID, + FIB_PROTOCOL_MPLS, + FIB_SOURCE_MPLS); src->mpls.fesm_label = label; } else @@ -113,7 +115,8 @@ fib_entry_src_mpls_set_data (fib_entry_src_t *src, { fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS, - MPLS_FIB_DEFAULT_TABLE_ID); + MPLS_FIB_DEFAULT_TABLE_ID, + FIB_SOURCE_MPLS); } else { diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 6b6cc5cb..75d15628 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -1039,7 +1039,8 @@ fib_table_find (fib_protocol_t proto, u32 fib_table_find_or_create_and_lock (fib_protocol_t proto, - u32 table_id) + u32 table_id, + fib_source_t src) { fib_table_t *fib_table; fib_node_index_t fi; @@ -1047,13 +1048,13 @@ fib_table_find_or_create_and_lock (fib_protocol_t proto, switch (proto) { case FIB_PROTOCOL_IP4: - fi = ip4_fib_table_find_or_create_and_lock(table_id); + fi = ip4_fib_table_find_or_create_and_lock(table_id, src); break; case FIB_PROTOCOL_IP6: - fi = ip6_fib_table_find_or_create_and_lock(table_id); + fi = ip6_fib_table_find_or_create_and_lock(table_id, src); break; case FIB_PROTOCOL_MPLS: - fi = mpls_fib_table_find_or_create_and_lock(table_id); + fi = mpls_fib_table_find_or_create_and_lock(table_id, src); break; default: return (~0); @@ -1070,6 +1071,7 @@ fib_table_find_or_create_and_lock (fib_protocol_t proto, u32 fib_table_create_and_lock (fib_protocol_t proto, + fib_source_t src, const char *const fmt, ...) { @@ -1082,13 +1084,13 @@ fib_table_create_and_lock (fib_protocol_t proto, switch (proto) { case FIB_PROTOCOL_IP4: - fi = ip4_fib_table_create_and_lock(); + fi = ip4_fib_table_create_and_lock(src); break; case FIB_PROTOCOL_IP6: - fi = ip6_fib_table_create_and_lock(); + fi = ip6_fib_table_create_and_lock(src); break; case FIB_PROTOCOL_MPLS: - fi = mpls_fib_table_create_and_lock(); + fi = mpls_fib_table_create_and_lock(src); break; default: return (~0); @@ -1143,26 +1145,43 @@ fib_table_walk (u32 fib_index, void fib_table_unlock (u32 fib_index, - fib_protocol_t proto) + fib_protocol_t proto, + fib_source_t source) { fib_table_t *fib_table; fib_table = fib_table_get(fib_index, proto); - fib_table->ft_locks--; + fib_table->ft_locks[source]--; + fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]--; - if (0 == fib_table->ft_locks) + if (0 == fib_table->ft_locks[source]) { + /* + * The source no longer needs the table. flush any routes + * from it just in case + */ + fib_table_flush(fib_index, proto, source); + } + + if (0 == fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]) + { + /* + * no more locak from any source - kill it + */ fib_table_destroy(fib_table); } } + void fib_table_lock (u32 fib_index, - fib_protocol_t proto) + fib_protocol_t proto, + fib_source_t source) { fib_table_t *fib_table; fib_table = fib_table_get(fib_index, proto); - fib_table->ft_locks++; + fib_table->ft_locks[source]++; + fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]++; } u32 diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h index 579740e9..6b7011b3 100644 --- a/src/vnet/fib/fib_table.h +++ b/src/vnet/fib/fib_table.h @@ -22,6 +22,12 @@ #include #include +/** + * Keep a lock per-source and a total + */ +#define FIB_TABLE_N_LOCKS (FIB_SOURCE_MAX+1) +#define FIB_TABLE_TOTAL_LOCKS FIB_SOURCE_MAX + /** * @brief * A protocol Independent FIB table @@ -34,9 +40,9 @@ typedef struct fib_table_t_ fib_protocol_t ft_proto; /** - * number of locks on the table + * per-source number of locks on the table */ - u16 ft_locks; + u16 ft_locks[FIB_TABLE_N_LOCKS]; /** * Table ID (hash key) for this FIB. @@ -628,9 +634,13 @@ extern u32 fib_table_find(fib_protocol_t proto, u32 table_id); * * @return fib_index * The index of the FIB + * + * @param source + * The ID of the client/source. */ extern u32 fib_table_find_or_create_and_lock(fib_protocol_t proto, - u32 table_id); + u32 table_id, + fib_source_t source); /** * @brief @@ -643,10 +653,14 @@ extern u32 fib_table_find_or_create_and_lock(fib_protocol_t proto, * @param fmt * A string to describe the table * + * @param source + * The ID of the client/source. + * * @return fib_index * The index of the FIB */ extern u32 fib_table_create_and_lock(fib_protocol_t proto, + fib_source_t source, const char *const fmt, ...); @@ -704,9 +718,13 @@ extern void fib_table_set_flow_hash_config(u32 fib_index, * * @paran proto * The protocol of the FIB (and thus the entries therein) + * + * @param source + * The ID of the client/source. */ extern void fib_table_unlock(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + fib_source_t source); /** * @brief @@ -718,9 +736,13 @@ extern void fib_table_unlock(u32 fib_index, * * @paran proto * The protocol of the FIB (and thus the entries therein) + * + * @param source + * The ID of the client/source. */ extern void fib_table_lock(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + fib_source_t source); /** * @brief diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index 6867cca8..572d7f0d 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -739,7 +739,8 @@ fib_test_v4 (void) lb_count = pool_elts(load_balance_pool); /* Find or create FIB table 11 */ - fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11); + fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11, + FIB_SOURCE_API); for (ii = 0; ii < 4; ii++) { @@ -4150,7 +4151,7 @@ fib_test_v4 (void) FIB_SOURCE_INTERFACE)), "NO INterface Source'd prefixes"); - fib_table_unlock(fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock(fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_API); FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); @@ -4201,7 +4202,8 @@ fib_test_v6 (void) dpo_drop = drop_dpo_get(DPO_PROTO_IP6); /* Find or create FIB table 11 */ - fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 11); + fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 11, + FIB_SOURCE_API); for (ii = 0; ii < 4; ii++) { @@ -5025,7 +5027,7 @@ fib_test_v6 (void) /* * now remove the VRF */ - fib_table_unlock(fib_index, FIB_PROTOCOL_IP6); + fib_table_unlock(fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_API); FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); @@ -5157,7 +5159,9 @@ fib_test_ae (void) */ u32 import_fib_index1; - import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11); + import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, + 11, + FIB_SOURCE_CLI); /* * Add an attached route in the import FIB @@ -5233,7 +5237,8 @@ fib_test_ae (void) */ u32 import_fib_index2; - import_fib_index2 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 12); + import_fib_index2 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 12, + FIB_SOURCE_CLI); /* * Add an attached route in the import FIB @@ -5595,8 +5600,8 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API); - fib_table_unlock(import_fib_index1, FIB_PROTOCOL_IP4); - fib_table_unlock(import_fib_index2, FIB_PROTOCOL_IP4); + fib_table_unlock(import_fib_index1, FIB_PROTOCOL_IP4, FIB_SOURCE_CLI); + fib_table_unlock(import_fib_index2, FIB_PROTOCOL_IP4, FIB_SOURCE_CLI); FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", adj_nbr_db_size()); @@ -8168,9 +8173,10 @@ lfib_test (void) /* * MPLS enable an interface so we get the MPLS table created */ + mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API); mpls_sw_interface_enable_disable(&mpls_main, tm->hw[0]->sw_if_index, - 1); + 1, 1); ip46_address_t nh_10_10_10_1 = { .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01), @@ -8662,7 +8668,8 @@ lfib_test (void) */ mpls_sw_interface_enable_disable(&mpls_main, tm->hw[0]->sw_if_index, - 0); + 0, 1); + mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API); FIB_TEST(lb_count == pool_elts(load_balance_pool), "Load-balance resources freed %d of %d", diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index d563bafd..865e2dd5 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -101,7 +101,8 @@ static const ip4_fib_table_special_prefix_t ip4_specials[] = { static u32 -ip4_create_fib_with_table_id (u32 table_id) +ip4_create_fib_with_table_id (u32 table_id, + fib_source_t src) { fib_table_t *fib_table; ip4_fib_t *v4_fib; @@ -128,7 +129,7 @@ ip4_create_fib_with_table_id (u32 table_id) v4_fib->fwd_classify_table_index = ~0; v4_fib->rev_classify_table_index = ~0; - fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4); + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4, src); ip4_mtrie_init(&v4_fib->mtrie); @@ -198,23 +199,24 @@ ip4_fib_table_destroy (u32 fib_index) u32 -ip4_fib_table_find_or_create_and_lock (u32 table_id) +ip4_fib_table_find_or_create_and_lock (u32 table_id, + fib_source_t src) { u32 index; index = ip4_fib_index_from_table_id(table_id); if (~0 == index) - return ip4_create_fib_with_table_id(table_id); + return ip4_create_fib_with_table_id(table_id, src); - fib_table_lock(index, FIB_PROTOCOL_IP4); + fib_table_lock(index, FIB_PROTOCOL_IP4, src); return (index); } u32 -ip4_fib_table_create_and_lock (void) +ip4_fib_table_create_and_lock (fib_source_t src) { - return (ip4_create_fib_with_table_id(~0)); + return (ip4_create_fib_with_table_id(~0, src)); } u32 @@ -525,17 +527,32 @@ ip4_show_fib (vlib_main_t * vm, pool_foreach (fib_table, im4->fibs, ({ ip4_fib_t *fib = pool_elt_at_index(im4->v4_fibs, fib_table->ft_index); + fib_source_t source; + u8 *s = NULL; if (table_id >= 0 && table_id != (int)fib->table_id) continue; if (fib_index != ~0 && fib_index != (int)fib->index) continue; - vlib_cli_output (vm, "%U, fib_index:%d, flow hash:[%U] locks:%d", - format_fib_table_name, fib->index, FIB_PROTOCOL_IP4, - fib->index, - format_ip_flow_hash_config, fib_table->ft_flow_hash_config, - fib_table->ft_locks); + s = format(s, "%U, fib_index:%d, flow hash:[%U] locks:[", + format_fib_table_name, fib->index, + FIB_PROTOCOL_IP4, + fib->index, + format_ip_flow_hash_config, + fib_table->ft_flow_hash_config); + FOR_EACH_FIB_SOURCE(source) + { + if (0 != fib_table->ft_locks[source]) + { + s = format(s, "%U:%d, ", + format_fib_source, source, + fib_table->ft_locks[source]); + } + } + s = format (s, "]"); + vlib_cli_output (vm, "%V", s); + vec_free(s); /* Show summary? */ if (! verbose) diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h index 006163b4..495b45cc 100644 --- a/src/vnet/fib/ip4_fib.h +++ b/src/vnet/fib/ip4_fib.h @@ -127,8 +127,9 @@ ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst) * @returns A pointer to the retrieved or created fib. * */ -extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id); -extern u32 ip4_fib_table_create_and_lock(void); +extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id, + fib_source_t src); +extern u32 ip4_fib_table_create_and_lock(fib_source_t src); static inline diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index 8fde6f9f..3ddb8453 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -50,7 +50,8 @@ vnet_ip6_fib_init (u32 fib_index) } static u32 -create_fib_with_table_id (u32 table_id) +create_fib_with_table_id (u32 table_id, + fib_source_t src) { fib_table_t *fib_table; ip6_fib_t *v6_fib; @@ -77,29 +78,30 @@ create_fib_with_table_id (u32 table_id) fib_table->ft_flow_hash_config = IP_FLOW_HASH_DEFAULT; vnet_ip6_fib_init(fib_table->ft_index); - fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6); + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6, src); return (fib_table->ft_index); } u32 -ip6_fib_table_find_or_create_and_lock (u32 table_id) +ip6_fib_table_find_or_create_and_lock (u32 table_id, + fib_source_t src) { uword * p; p = hash_get (ip6_main.fib_index_by_table_id, table_id); if (NULL == p) - return create_fib_with_table_id(table_id); + return create_fib_with_table_id(table_id, src); - fib_table_lock(p[0], FIB_PROTOCOL_IP6); + fib_table_lock(p[0], FIB_PROTOCOL_IP6, src); return (p[0]); } u32 -ip6_fib_table_create_and_lock (void) +ip6_fib_table_create_and_lock (fib_source_t src) { - return (create_fib_with_table_id(~0)); + return (create_fib_with_table_id(~0, src)); } void @@ -588,16 +590,33 @@ ip6_show_fib (vlib_main_t * vm, pool_foreach (fib_table, im6->fibs, ({ + fib_source_t source; + u8 *s = NULL; + fib = pool_elt_at_index(im6->v6_fibs, fib_table->ft_index); if (table_id >= 0 && table_id != (int)fib->table_id) continue; if (fib_index != ~0 && fib_index != (int)fib->index) continue; - vlib_cli_output (vm, "%s, fib_index:%d, flow hash:[%U] locks:%d", - fib_table->ft_desc, fib->index, - format_ip_flow_hash_config, fib_table->ft_flow_hash_config, - fib_table->ft_locks); + s = format(s, "%U, fib_index:%d, flow hash:[%U] locks:[", + format_fib_table_name, fib->index, + FIB_PROTOCOL_IP6, + fib->index, + format_ip_flow_hash_config, + fib_table->ft_flow_hash_config); + FOR_EACH_FIB_SOURCE(source) + { + if (0 != fib_table->ft_locks[source]) + { + s = format(s, "%U:%d, ", + format_fib_source, source, + fib_table->ft_locks[source]); + } + } + s = format (s, "]"); + vlib_cli_output (vm, "%V", s); + vec_free(s); /* Show summary? */ if (! verbose) diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h index aad8305c..9728eecc 100644 --- a/src/vnet/fib/ip6_fib.h +++ b/src/vnet/fib/ip6_fib.h @@ -144,8 +144,9 @@ ip6_src_lookup_for_packet (ip6_main_t * im, * \returns A pointer to the retrieved or created fib. * */ -extern u32 ip6_fib_table_find_or_create_and_lock(u32 table_id); -extern u32 ip6_fib_table_create_and_lock(void); +extern u32 ip6_fib_table_find_or_create_and_lock(u32 table_id, + fib_source_t src); +extern u32 ip6_fib_table_create_and_lock(fib_source_t src); static inline ip6_fib_t * ip6_fib_get (fib_node_index_t index) diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c index ca6271fe..4eeef7ab 100644 --- a/src/vnet/fib/mpls_fib.c +++ b/src/vnet/fib/mpls_fib.c @@ -83,7 +83,8 @@ mpls_fib_index_from_table_id (u32 table_id) } static u32 -mpls_fib_create_with_table_id (u32 table_id) +mpls_fib_create_with_table_id (u32 table_id, + fib_source_t src) { dpo_id_t dpo = DPO_INVALID; fib_table_t *fib_table; @@ -107,7 +108,7 @@ mpls_fib_create_with_table_id (u32 table_id) fib_table->ft_table_id = table_id; fib_table->ft_flow_hash_config = MPLS_FLOW_HASH_DEFAULT; - fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS); + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS, src); if (INDEX_INVALID == mpls_fib_drop_dpo_index) { @@ -220,22 +221,23 @@ mpls_fib_create_with_table_id (u32 table_id) } u32 -mpls_fib_table_find_or_create_and_lock (u32 table_id) +mpls_fib_table_find_or_create_and_lock (u32 table_id, + fib_source_t src) { u32 index; index = mpls_fib_index_from_table_id(table_id); if (~0 == index) - return mpls_fib_create_with_table_id(table_id); + return mpls_fib_create_with_table_id(table_id, src); - fib_table_lock(index, FIB_PROTOCOL_MPLS); + fib_table_lock(index, FIB_PROTOCOL_MPLS, src); return (index); } u32 -mpls_fib_table_create_and_lock (void) +mpls_fib_table_create_and_lock (fib_source_t src) { - return (mpls_fib_create_with_table_id(~0)); + return (mpls_fib_create_with_table_id(~0, src)); } void diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h index dfb8b7fc..29cd1d20 100644 --- a/src/vnet/fib/mpls_fib.h +++ b/src/vnet/fib/mpls_fib.h @@ -59,8 +59,9 @@ mpls_fib_get (fib_node_index_t index) return (pool_elt_at_index(mpls_main.mpls_fibs, index)); } -extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id); -extern u32 mpls_fib_table_create_and_lock(void); +extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id, + fib_source_t src); +extern u32 mpls_fib_table_create_and_lock(fib_source_t src); // extern mpls_fib_t * mpls_fib_find(u32 table_id); extern u32 mpls_fib_index_from_table_id(u32 table_id); diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 113728cd..419fef94 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -320,68 +320,189 @@ stats_dsunlock (void) static void vl_api_sw_interface_set_table_t_handler (vl_api_sw_interface_set_table_t * mp) { - int rv = 0; - u32 table_id = ntohl (mp->vrf_id); - u32 sw_if_index = ntohl (mp->sw_if_index); vl_api_sw_interface_set_table_reply_t *rmp; - CLIB_UNUSED (ip_interface_address_t * ia); - u32 fib_index; + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 table_id = ntohl (mp->vrf_id); + int rv = 0; VALIDATE_SW_IF_INDEX (mp); stats_dslock_with_hint (1 /* release hint */ , 4 /* tag */ ); if (mp->is_ipv6) + rv = ip_table_bind (FIB_PROTOCOL_IP6, sw_if_index, table_id, 1); + else + rv = ip_table_bind (FIB_PROTOCOL_IP4, sw_if_index, table_id, 1); + + stats_dsunlock (); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_TABLE_REPLY); +} + +int +ip_table_bind (fib_protocol_t fproto, + uint32_t sw_if_index, uint32_t table_id, u8 is_api) +{ + CLIB_UNUSED (ip_interface_address_t * ia); + u32 fib_index, mfib_index; + fib_source_t src; + mfib_source_t msrc; + + if (is_api) + { + src = FIB_SOURCE_API; + msrc = MFIB_SOURCE_API; + } + else + { + src = FIB_SOURCE_CLI; + msrc = MFIB_SOURCE_CLI; + } + + /* + * This is temporary whilst I do the song and dance with the CSIT version + */ + if (0 != table_id) { + fib_index = fib_table_find_or_create_and_lock (fproto, table_id, src); + mfib_index = + mfib_table_find_or_create_and_lock (fproto, table_id, msrc); + } + else + { + fib_index = 0; + mfib_index = 0; + } + + /* + * This if table does not exist = error is what we want in the end. + */ + /* fib_index = fib_table_find (fproto, table_id); */ + /* mfib_index = mfib_table_find (fproto, table_id); */ + + /* if (~0 == fib_index || ~0 == mfib_index) */ + /* { */ + /* return (VNET_API_ERROR_NO_SUCH_FIB); */ + /* } */ + + if (FIB_PROTOCOL_IP6 == fproto) + { + /* + * If the interface already has in IP address, then a change int + * VRF is not allowed. The IP address applied must first be removed. + * We do not do that automatically here, since VPP has no knowledge + * of whether thoses subnets are valid in the destination VRF. + */ /* *INDENT-OFF* */ foreach_ip_interface_address (&ip6_main.lookup_main, ia, sw_if_index, 1 /* honor unnumbered */ , ({ - rv = VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE; - goto done; + return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE); })); /* *INDENT-ON* */ - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - table_id); vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); - ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - table_id); vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index); - ip6_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index; + + /* + * tell those that are interested that the binding is changing. + */ + ip6_table_bind_callback_t *cb; + vec_foreach (cb, ip6_main.table_bind_callbacks) + cb->function (&ip6_main, cb->function_opaque, + sw_if_index, + fib_index, + ip6_main.fib_index_by_sw_if_index[sw_if_index]); + + if (0 == table_id) + { + /* reset back to default */ + if (0 != ip6_main.fib_index_by_sw_if_index[sw_if_index]) + fib_table_unlock (ip6_main.fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_IP6, src); + if (0 != ip6_main.mfib_index_by_sw_if_index[sw_if_index]) + mfib_table_unlock (ip6_main.mfib_index_by_sw_if_index + [sw_if_index], FIB_PROTOCOL_IP6, msrc); + + } + else + { + /* we need to lock the table now it's inuse */ + fib_table_lock (fib_index, FIB_PROTOCOL_IP6, src); + mfib_table_lock (mfib_index, FIB_PROTOCOL_IP6, msrc); + } + + ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + ip6_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index; } else { + /* + * If the interface already has in IP address, then a change int + * VRF is not allowed. The IP address applied must first be removed. + * We do not do that automatically here, since VPP has no knowledge + * of whether thoses subnets are valid in the destination VRF. + */ /* *INDENT-OFF* */ foreach_ip_interface_address (&ip4_main.lookup_main, ia, sw_if_index, 1 /* honor unnumbered */ , ({ - rv = VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE; - goto done; + return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE); })); /* *INDENT-ON* */ - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - table_id); vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); - ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - table_id); vec_validate (ip4_main.mfib_index_by_sw_if_index, sw_if_index); - ip4_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index; - } -done: - stats_dsunlock (); + /* + * tell those that are interested that the binding is changing. + */ + ip4_table_bind_callback_t *cb; + vec_foreach (cb, ip4_main.table_bind_callbacks) + cb->function (&ip4_main, cb->function_opaque, + sw_if_index, + fib_index, + ip4_main.fib_index_by_sw_if_index[sw_if_index]); + + if (0 == table_id) + { + /* reset back to default */ + if (0 != ip4_main.fib_index_by_sw_if_index[sw_if_index]) + fib_table_unlock (ip4_main.fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_IP4, src); + if (0 != ip4_main.mfib_index_by_sw_if_index[sw_if_index]) + mfib_table_unlock (ip4_main.mfib_index_by_sw_if_index + [sw_if_index], FIB_PROTOCOL_IP4, msrc); - BAD_SW_IF_INDEX_LABEL; + } + else + { + /* we need to lock the table now it's inuse */ + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + table_id, src); - REPLY_MACRO (VL_API_SW_INTERFACE_SET_TABLE_REPLY); + mfib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + table_id, msrc); + } + + ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + ip4_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index; + } + + /* + * Temporary. undo the locks from the find and create at the staart + */ + if (0 != table_id) + { + fib_table_unlock (fib_index, fproto, src); + mfib_table_unlock (mfib_index, fproto, msrc); + } + + return (0); } static void diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h index 70b4ccd8..7aae73ff 100644 --- a/src/vnet/ip/ip.h +++ b/src/vnet/ip/ip.h @@ -184,6 +184,13 @@ void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index); extern vlib_node_registration_t ip4_inacl_node; extern vlib_node_registration_t ip6_inacl_node; +void ip_table_create (fib_protocol_t fproto, uint32_t table_id, u8 is_api); + +void ip_table_delete (fib_protocol_t fproto, uint32_t table_id, u8 is_api); + +int ip_table_bind (fib_protocol_t fproto, + uint32_t sw_if_index, uint32_t table_id, u8 is_api); + #endif /* included_ip_main_h */ /* diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index 8f9a8e27..decb840b 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -72,6 +72,16 @@ typedef struct uword function_opaque; } ip4_add_del_interface_address_callback_t; +typedef void (ip4_table_bind_function_t) + (struct ip4_main_t * im, + uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index); + +typedef struct +{ + ip4_table_bind_function_t *function; + uword function_opaque; +} ip4_table_bind_callback_t; + /** * @brief IPv4 main type. * @@ -117,6 +127,9 @@ typedef struct ip4_main_t ip4_add_del_interface_address_callback_t * add_del_interface_address_callbacks; + /** Functions to call when interface to table biding changes. */ + ip4_table_bind_callback_t *table_bind_callbacks; + /** Template used to generate IP4 ARP packets. */ vlib_packet_template_t ip4_arp_request_packet_template; diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 2d48e8a9..ec4287bb 100755 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1198,8 +1198,10 @@ ip4_lookup_init (vlib_main_t * vm) ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0); /* Create FIB with index 0 and table id of 0. */ - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0); - mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0, + FIB_SOURCE_DEFAULT_ROUTE); + mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0, + MFIB_SOURCE_DEFAULT_ROUTE); { pg_node_t *pn; @@ -2794,101 +2796,6 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = { VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain); /* *INDENT-ON */ -static clib_error_t * -add_del_interface_table (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - vnet_main_t *vnm = vnet_get_main (); - ip_interface_address_t *ia; - clib_error_t *error = 0; - u32 sw_if_index, table_id; - - sw_if_index = ~0; - - if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) - { - error = clib_error_return (0, "unknown interface `%U'", - format_unformat_error, input); - goto done; - } - - if (unformat (input, "%d", &table_id)) - ; - else - { - error = clib_error_return (0, "expected table id `%U'", - format_unformat_error, input); - goto done; - } - - /* - * If the interface already has in IP address, then a change int - * VRF is not allowed. The IP address applied must first be removed. - * We do not do that automatically here, since VPP has no knowledge - * of whether thoses subnets are valid in the destination VRF. - */ - /* *INDENT-OFF* */ - foreach_ip_interface_address (&ip4_main.lookup_main, - ia, sw_if_index, - 1 /* honor unnumbered */, - ({ - ip4_address_t * a; - - a = ip_interface_address_get_address (&ip4_main.lookup_main, ia); - error = clib_error_return (0, "interface %U has address %U", - format_vnet_sw_if_index_name, vnm, - sw_if_index, - format_ip4_address, a); - goto done; - })); - /* *INDENT-ON* */ - -{ - ip4_main_t *im = &ip4_main; - u32 fib_index; - - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); - - vec_validate (im->fib_index_by_sw_if_index, sw_if_index); - im->fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); - vec_validate (im->mfib_index_by_sw_if_index, sw_if_index); - im->mfib_index_by_sw_if_index[sw_if_index] = fib_index; -} - -done: -return error; -} - -/*? - * Place the indicated interface into the supplied IPv4 FIB table (also known - * as a VRF). If the FIB table does not exist, this command creates it. To - * display the current IPv4 FIB table, use the command 'show ip fib'. - * FIB table will only be displayed if a route has been added to the table, or - * an IP Address is assigned to an interface in the table (which adds a route - * automatically). - * - * @note IP addresses added after setting the interface IP table are added to - * the indicated FIB table. If an IP address is added prior to changing the - * table then this is an error. The control plane must remove these addresses - * first and then change the table. VPP will not automatically move the - * addresses from the old to the new table as it does not know the validity - * of such a change. - * - * @cliexpar - * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id): - * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2} - ?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = -{ - .path = "set interface ip table", - .function = add_del_interface_table, - .short_help = "set interface ip table ", -}; -/* *INDENT-ON* */ - int ip4_lookup_validate (ip4_address_t * a, u32 fib_index0) { diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c index ae836a11..9aa880ae 100644 --- a/src/vnet/ip/ip4_source_and_port_range_check.c +++ b/src/vnet/ip/ip4_source_and_port_range_check.c @@ -1126,6 +1126,14 @@ ip6_source_and_port_range_check_add_del (ip6_address_t * address, u16 * low_ports, u16 * high_ports, int is_add) { + uint32_t fib_index; + + fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id); + + ASSERT (~0 != fib_index); + + fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY); + return 0; } @@ -1138,7 +1146,8 @@ ip4_source_and_port_range_check_add_del (ip4_address_t * address, { u32 fib_index; - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + FIB_SOURCE_CLASSIFY); if (is_add == 0) { diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index fa922725..8aef53a9 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -103,6 +103,16 @@ typedef struct uword function_opaque; } ip6_add_del_interface_address_callback_t; +typedef void (ip6_table_bind_function_t) + (struct ip6_main_t * im, + uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index); + +typedef struct +{ + ip6_table_bind_function_t *function; + uword function_opaque; +} ip6_table_bind_callback_t; + /** * Enumeration of the FIB table instance types */ @@ -183,6 +193,9 @@ typedef struct ip6_main_t ip6_add_del_interface_address_callback_t * add_del_interface_address_callbacks; + /** Functions to call when interface to table biding changes. */ + ip6_table_bind_callback_t *table_bind_callbacks; + /* Template used to generate IP6 neighbor solicitation packets. */ vlib_packet_template_t discover_neighbor_packet_template; diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 5832bd0b..1002f6b6 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -2999,8 +2999,10 @@ ip6_lookup_init (vlib_main_t * vm) im->lookup_table_nbuckets, im->lookup_table_size); /* Create FIB with index 0 and table id of 0. */ - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0); - mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0, + FIB_SOURCE_DEFAULT_ROUTE); + mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0, + MFIB_SOURCE_DEFAULT_ROUTE); { pg_node_t *pn; @@ -3045,103 +3047,6 @@ ip6_lookup_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (ip6_lookup_init); -static clib_error_t * -add_del_ip6_interface_table (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vnet_main_t *vnm = vnet_get_main (); - ip_interface_address_t *ia; - clib_error_t *error = 0; - u32 sw_if_index, table_id; - - sw_if_index = ~0; - - if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) - { - error = clib_error_return (0, "unknown interface `%U'", - format_unformat_error, input); - goto done; - } - - if (unformat (input, "%d", &table_id)) - ; - else - { - error = clib_error_return (0, "expected table id `%U'", - format_unformat_error, input); - goto done; - } - - /* - * If the interface already has in IP address, then a change int - * VRF is not allowed. The IP address applied must first be removed. - * We do not do that automatically here, since VPP has no knowledge - * of whether thoses subnets are valid in the destination VRF. - */ - /* *INDENT-OFF* */ - foreach_ip_interface_address (&ip6_main.lookup_main, - ia, sw_if_index, - 1 /* honor unnumbered */, - ({ - ip4_address_t * a; - - a = ip_interface_address_get_address (&ip6_main.lookup_main, ia); - error = clib_error_return (0, "interface %U has address %U", - format_vnet_sw_if_index_name, vnm, - sw_if_index, - format_ip6_address, a); - goto done; - })); - /* *INDENT-ON* */ - - { - u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - table_id); - - vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); - ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - table_id); - - vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index); - ip6_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index; - } - - -done: - return error; -} - -/*? - * Place the indicated interface into the supplied IPv6 FIB table (also known - * as a VRF). If the FIB table does not exist, this command creates it. To - * display the current IPv6 FIB table, use the command 'show ip6 fib'. - * FIB table will only be displayed if a route has been added to the table, or - * an IP Address is assigned to an interface in the table (which adds a route - * automatically). - * - * @note IP addresses added after setting the interface IP table are added to - * the indicated FIB table. If an IP address is added prior to changing the - * table then this is an error. The control plane must remove these addresses - * first and then change the table. VPP will not automatically move the - * addresses from the old to the new table as it does not know the validity - * of such a change. - * - * @cliexpar - * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id): - * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2} - ?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = -{ - .path = "set interface ip6 table", - .function = add_del_ip6_interface_table, - .short_help = "set interface ip6 table " -}; -/* *INDENT-ON* */ - void ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip, u8 * mac) diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 62cf23ac..56f33ac8 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -250,6 +250,26 @@ format_ip6_neighbor_ip6_entry (u8 * s, va_list * va) return s; } +static void +ip6_neighbor_adj_fib_remove (ip6_neighbor_t * n, uint32_t fib_index) +{ + if (FIB_NODE_INDEX_INVALID != n->fib_entry_index) + { + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = n->key.ip6_address, + }; + fib_table_entry_path_remove (fib_index, + &pfx, + FIB_SOURCE_ADJ, + DPO_PROTO_IP6, + &pfx.fp_addr, + n->key.sw_if_index, ~0, + 1, FIB_ROUTE_PATH_FLAG_NONE); + } +} + static clib_error_t * ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) @@ -273,22 +293,10 @@ ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm, { n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]); mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); - if (FIB_NODE_INDEX_INVALID != n->fib_entry_index) - { - fib_prefix_t pfx = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr.ip6 = n->key.ip6_address, - }; - fib_table_entry_path_remove - (ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index), - &pfx, - FIB_SOURCE_ADJ, - DPO_PROTO_IP6, - &pfx.fp_addr, - n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); - pool_put (nm->neighbor_pool, n); - } + ip6_neighbor_adj_fib_remove (n, + ip6_fib_table_get_index_for_sw_if_index + (n->key.sw_if_index)); + pool_put (nm->neighbor_pool, n); } vec_free (to_delete); } @@ -579,6 +587,24 @@ ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) } } + +static void +ip6_neighbor_adj_fib_add (ip6_neighbor_t * n, uint32_t fib_index) +{ + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = n->key.ip6_address, + }; + + n->fib_entry_index = + fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + DPO_PROTO_IP6, &pfx.fp_addr, + n->key.sw_if_index, ~0, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); +} + int vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, @@ -633,21 +659,9 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, */ if (!is_no_fib_entry) { - fib_prefix_t pfx = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr.ip6 = k.ip6_address, - }; - u32 fib_index; - - fib_index = - ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index); - n->fib_entry_index = - fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, - FIB_ENTRY_FLAG_ATTACHED, - DPO_PROTO_IP6, &pfx.fp_addr, - n->key.sw_if_index, ~0, 1, NULL, - FIB_ROUTE_PATH_FLAG_NONE); + ip6_neighbor_adj_fib_add (n, + ip6_fib_table_get_index_for_sw_if_index + (n->key.sw_if_index)); } else { @@ -3843,6 +3857,33 @@ ip6_set_neighbor_limit (u32 neighbor_limit) return 0; } +static void +ip6_neighbor_table_bind (ip6_main_t * im, + uword opaque, + u32 sw_if_index, + u32 new_fib_index, u32 old_fib_index) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_neighbor_t *n = NULL; + u32 i, *to_re_add = 0; + + /* *INDENT-OFF* */ + pool_foreach (n, nm->neighbor_pool, + ({ + if (n->key.sw_if_index == sw_if_index) + vec_add1 (to_re_add, n - nm->neighbor_pool); + })); + /* *INDENT-ON* */ + + for (i = 0; i < vec_len (to_re_add); i++) + { + n = pool_elt_at_index (nm->neighbor_pool, to_re_add[i]); + ip6_neighbor_adj_fib_remove (n, old_fib_index); + ip6_neighbor_adj_fib_add (n, new_fib_index); + } + vec_free (to_re_add); +} + static clib_error_t * ip6_neighbor_init (vlib_main_t * vm) { @@ -3874,6 +3915,11 @@ ip6_neighbor_init (vlib_main_t * vm) cb.function_opaque = 0; vec_add1 (im->add_del_interface_address_callbacks, cb); + ip6_table_bind_callback_t cbt; + cbt.function = ip6_neighbor_table_bind; + cbt.function_opaque = 0; + vec_add1 (im->table_bind_callbacks, cbt); + mhash_init (&nm->pending_resolutions_by_address, /* value size */ sizeof (uword), /* key size */ sizeof (ip6_address_t)); diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index bba65ab4..384ec3e0 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -699,12 +699,58 @@ vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp, REPLY_MACRO (VL_API_IP_NEIGHBOR_ADD_DEL_REPLY); } +void +ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api) +{ + u32 fib_index, mfib_index; + + /* + * ignore action on the default table - this is always present + * and cannot be added nor deleted from the API + */ + if (0 != table_id) + { + /* + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + * The FIB index for unicast and multicast is not necessarily the + * same, since internal VPP systesm (like LISP and SR) create + * their own unicast tables. + */ + fib_index = fib_table_find (fproto, table_id); + mfib_index = mfib_table_find (fproto, table_id); + + if (~0 != fib_index) + { + fib_table_unlock (fib_index, fproto, + (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI)); + } + if (~0 != mfib_index) + { + mfib_table_unlock (mfib_index, fproto, + (is_api ? MFIB_SOURCE_API : MFIB_SOURCE_CLI)); + } + } +} + void vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp) { vl_api_ip_table_add_del_reply_t *rmp; + fib_protocol_t fproto = (mp->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4); + u32 table_id = ntohl (mp->table_id); int rv = 0; + if (mp->is_add) + { + ip_table_create (fproto, table_id, 1); + } + else + { + ip_table_delete (fproto, table_id, 1); + } + REPLY_MACRO (VL_API_IP_TABLE_ADD_DEL_REPLY); } @@ -866,18 +912,21 @@ add_del_route_check (fib_protocol_t table_proto, u32 next_hop_sw_if_index, dpo_proto_t next_hop_table_proto, u32 next_hop_table_id, - u8 create_missing_tables, u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index) { vnet_main_t *vnm = vnet_get_main (); + /* Temporaray whilst I do the CSIT dance */ + u8 create_missing_tables = 1; + *fib_index = fib_table_find (table_proto, ntohl (table_id)); if (~0 == *fib_index) { if (create_missing_tables) { *fib_index = fib_table_find_or_create_and_lock (table_proto, - ntohl (table_id)); + ntohl (table_id), + FIB_SOURCE_API); } else { @@ -918,12 +967,14 @@ add_del_route_check (fib_protocol_t table_proto, *next_hop_fib_index = mfib_table_find_or_create_and_lock (fib_nh_proto, ntohl - (next_hop_table_id)); + (next_hop_table_id), + MFIB_SOURCE_API); else *next_hop_fib_index = fib_table_find_or_create_and_lock (fib_nh_proto, ntohl - (next_hop_table_id)); + (next_hop_table_id), + FIB_SOURCE_API); } else { @@ -948,8 +999,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->next_hop_sw_if_index, DPO_PROTO_IP4, mp->next_hop_table_id, - mp->create_vrf_if_needed, 0, - &fib_index, &next_hop_fib_index); + 0, &fib_index, &next_hop_fib_index); if (0 != rv) return (rv); @@ -1008,8 +1058,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->next_hop_sw_if_index, DPO_PROTO_IP6, mp->next_hop_table_id, - mp->create_vrf_if_needed, 0, - &fib_index, &next_hop_fib_index); + 0, &fib_index, &next_hop_fib_index); if (0 != rv) return (rv); @@ -1074,27 +1123,57 @@ vl_api_ip_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) REPLY_MACRO (VL_API_IP_ADD_DEL_ROUTE_REPLY); } +void +ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api) +{ + u32 fib_index, mfib_index; + + /* + * ignore action on the default table - this is always present + * and cannot be added nor deleted from the API + */ + if (0 != table_id) + { + /* + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + * The FIB index for unicast and multicast is not necessarily the + * same, since internal VPP systesm (like LISP and SR) create + * their own unicast tables. + */ + fib_index = fib_table_find (fproto, table_id); + mfib_index = mfib_table_find (fproto, table_id); + + if (~0 == fib_index) + { + fib_table_find_or_create_and_lock (fproto, table_id, + (is_api ? + FIB_SOURCE_API : + FIB_SOURCE_CLI)); + } + if (~0 == mfib_index) + { + mfib_table_find_or_create_and_lock (fproto, table_id, + (is_api ? + MFIB_SOURCE_API : + MFIB_SOURCE_CLI)); + } + } +} + static int add_del_mroute_check (fib_protocol_t table_proto, u32 table_id, - u32 next_hop_sw_if_index, - u8 is_local, u8 create_missing_tables, u32 * fib_index) + u32 next_hop_sw_if_index, u8 is_local, u32 * fib_index) { vnet_main_t *vnm = vnet_get_main (); *fib_index = mfib_table_find (table_proto, ntohl (table_id)); if (~0 == *fib_index) { - if (create_missing_tables) - { - *fib_index = mfib_table_find_or_create_and_lock (table_proto, - ntohl (table_id)); - } - else - { - /* No such VRF, and we weren't asked to create one */ - return VNET_API_ERROR_NO_SUCH_FIB; - } + /* No such table */ + return VNET_API_ERROR_NO_SUCH_FIB; } if (~0 != ntohl (next_hop_sw_if_index)) @@ -1163,8 +1242,7 @@ api_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp) rv = add_del_mroute_check (fproto, mp->table_id, mp->next_hop_sw_if_index, - mp->is_local, - mp->create_vrf_if_needed, &fib_index); + mp->is_local, &fib_index); if (0 != rv) return (rv); diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 5537bb04..667c6791 100755 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -687,6 +687,78 @@ done: return error; } +clib_error_t * +vnet_ip_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, + vlib_cli_command_t * cmd, fib_protocol_t fproto) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + u32 table_id, is_add; + + is_add = 1; + table_id = ~0; + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "add")) + is_add = 1; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == table_id) + { + error = clib_error_return (0, "No table id"); + goto done; + } + else if (0 == table_id) + { + error = clib_error_return (0, "Can't change the default table"); + goto done; + } + else + { + if (is_add) + { + ip_table_create (fproto, table_id, 0); + } + else + { + ip_table_delete (fproto, table_id, 0); + } + } + +done: + unformat_free (line_input); + return error; +} + +clib_error_t * +vnet_ip4_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP4)); +} + +clib_error_t * +vnet_ip6_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6)); +} + /* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = { .path = "ip", @@ -749,6 +821,159 @@ VLIB_CLI_COMMAND (ip_route_command, static) = { .function = vnet_ip_route_cmd, .is_mp_safe = 1, }; + +/* *INDENT-ON* */ +/*? + * This command is used to add or delete IPv4 Tables. All + * Tables must be explicitly added before that can be used. Creating a + * table will add both unicast and multicast FIBs + * + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip4_table_command, static) = { + .path = "ip table", + .short_help = "ip table [add|del] ", + .function = vnet_ip4_table_cmd, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +/* *INDENT-ON* */ +/*? + * This command is used to add or delete IPv4 Tables. All + * Tables must be explicitly added before that can be used. Creating a + * table will add both unicast and multicast FIBs + * + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_table_command, static) = { + .path = "ip6 table", + .short_help = "ip6 table [add|del] ", + .function = vnet_ip6_table_cmd, + .is_mp_safe = 1, +}; + +static clib_error_t * +ip_table_bind_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd, + fib_protocol_t fproto) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index, table_id; + int rv; + + sw_if_index = ~0; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "%d", &table_id)) + ; + else + { + error = clib_error_return (0, "expected table id `%U'", + format_unformat_error, input); + goto done; + } + + rv = ip_table_bind (fproto, sw_if_index, table_id, 0); + + if (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE == rv) + { + error = clib_error_return (0, "IP addresses are still present on %U", + format_vnet_sw_if_index_name, + vnet_get_main(), + sw_if_index); + } + else if (VNET_API_ERROR_NO_SUCH_FIB == rv) + { + error = clib_error_return (0, "no such table %d", table_id); + } + else if (0 != rv) + { + error = clib_error_return (0, "unknown error"); + } + + done: + return error; +} + +static clib_error_t * +ip4_table_bind_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP4)); +} + +static clib_error_t * +ip6_table_bind_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP6)); +} + +/*? + * Place the indicated interface into the supplied IPv4 FIB table (also known + * as a VRF). If the FIB table does not exist, this command creates it. To + * display the current IPv4 FIB table, use the command 'show ip fib'. + * FIB table will only be displayed if a route has been added to the table, or + * an IP Address is assigned to an interface in the table (which adds a route + * automatically). + * + * @note IP addresses added after setting the interface IP table are added to + * the indicated FIB table. If an IP address is added prior to changing the + * table then this is an error. The control plane must remove these addresses + * first and then change the table. VPP will not automatically move the + * addresses from the old to the new table as it does not know the validity + * of such a change. + * + * @cliexpar + * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id): + * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2} + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = +{ + .path = "set interface ip table", + .function = ip4_table_bind_cmd, + .short_help = "set interface ip table ", +}; +/* *INDENT-ON* */ + +/*? + * Place the indicated interface into the supplied IPv6 FIB table (also known + * as a VRF). If the FIB table does not exist, this command creates it. To + * display the current IPv6 FIB table, use the command 'show ip6 fib'. + * FIB table will only be displayed if a route has been added to the table, or + * an IP Address is assigned to an interface in the table (which adds a route + * automatically). + * + * @note IP addresses added after setting the interface IP table are added to + * the indicated FIB table. If an IP address is added prior to changing the + * table then this is an error. The control plane must remove these addresses + * first and then change the table. VPP will not automatically move the + * addresses from the old to the new table as it does not know the validity + * of such a change. + * + * @cliexpar + * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id): + * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2} + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = +{ + .path = "set interface ip6 table", + .function = ip6_table_bind_cmd, + .short_help = "set interface ip6 table " +}; /* *INDENT-ON* */ clib_error_t * diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c index e832c23f..a0c05e85 100644 --- a/src/vnet/lisp-gpe/interface.c +++ b/src/vnet/lisp-gpe/interface.c @@ -505,12 +505,14 @@ lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id) { fib_node_index_t fib_index; - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id, + FIB_SOURCE_LISP); vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; ip4_sw_interface_enable_disable (sw_if_index, 1); - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id, + FIB_SOURCE_LISP); vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; ip6_sw_interface_enable_disable (sw_if_index, 1); @@ -530,7 +532,7 @@ lisp_gpe_tenant_del_default_routes (u32 table_id) fib_index = fib_table_find (prefix.fp_proto, table_id); fib_table_entry_special_remove (fib_index, &prefix, FIB_SOURCE_LISP); - fib_table_unlock (fib_index, prefix.fp_proto); + fib_table_unlock (fib_index, prefix.fp_proto, FIB_SOURCE_LISP); } } @@ -549,7 +551,8 @@ lisp_gpe_tenant_add_default_routes (u32 table_id) /* * Add a deafult route that results in a control plane punt DPO */ - fib_index = fib_table_find_or_create_and_lock (prefix.fp_proto, table_id); + fib_index = fib_table_find_or_create_and_lock (prefix.fp_proto, table_id, + FIB_SOURCE_LISP); fib_table_entry_special_dpo_add (fib_index, &prefix, FIB_SOURCE_LISP, FIB_ENTRY_FLAG_EXCLUSIVE, lisp_cp_dpo_get (fib_proto_to_dpo diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c index d7d3cb86..0a8dc039 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c +++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c @@ -66,6 +66,7 @@ ip_dst_fib_add_route (u32 dst_fib_index, const ip_prefix_t * dst_prefix) /* create a new src FIB. */ src_fib_index = fib_table_create_and_lock (dst_fib_prefix.fp_proto, + FIB_SOURCE_LISP, "LISP-src for [%d,%U]", dst_fib_index, format_fib_prefix, &dst_fib_prefix); @@ -180,7 +181,8 @@ ip_src_dst_fib_del_route (u32 src_fib_index, */ fib_table_entry_special_remove (dst_fib_index, &dst_fib_prefix, FIB_SOURCE_LISP); - fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto); + fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto, + FIB_SOURCE_LISP); } } @@ -544,7 +546,8 @@ add_ip_fwd_entry (lisp_gpe_main_t * lgm, lfe->tenant = lisp_gpe_tenant_find_or_create (lfe->key->vni); lfe->eid_table_id = a->table_id; lfe->eid_fib_index = fib_table_find_or_create_and_lock (fproto, - lfe->eid_table_id); + lfe->eid_table_id, + FIB_SOURCE_LISP); lfe->is_src_dst = a->is_src_dst; if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type) @@ -578,7 +581,7 @@ del_ip_fwd_entry_i (lisp_gpe_main_t * lgm, lisp_gpe_fwd_entry_t * lfe) fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); - fib_table_unlock (lfe->eid_fib_index, fproto); + fib_table_unlock (lfe->eid_fib_index, fproto, FIB_SOURCE_LISP); hash_unset_mem (lgm->lisp_gpe_fwd_entries, lfe->key); clib_mem_free (lfe->key); diff --git a/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c b/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c index b234d9dc..26664f53 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c +++ b/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c @@ -89,13 +89,15 @@ lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id) { fib_node_index_t fib_index; - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id, + FIB_SOURCE_LISP); ASSERT (FIB_NODE_INDEX_INVALID != fib_index); vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id, + FIB_SOURCE_LISP); ASSERT (FIB_NODE_INDEX_INVALID != fib_index); vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); @@ -105,9 +107,13 @@ lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id) static void lisp_gpe_sub_interface_unset_table (u32 sw_if_index, u32 table_id) { + fib_table_unlock (ip4_main.fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_IP4, FIB_SOURCE_LISP); ip4_main.fib_index_by_sw_if_index[sw_if_index] = 0; ip4_sw_interface_enable_disable (sw_if_index, 0); + fib_table_unlock (ip6_main.fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_IP6, FIB_SOURCE_LISP); ip6_main.fib_index_by_sw_if_index[sw_if_index] = 0; ip6_sw_interface_enable_disable (sw_if_index, 0); } @@ -185,6 +191,7 @@ lisp_gpe_sub_interface_unlock (index_t l3si) l3s = lisp_gpe_sub_interface_get_i (l3si); + ASSERT (0 != l3s->locks); l3s->locks--; if (0 == l3s->locks) diff --git a/src/vnet/mfib/ip4_mfib.c b/src/vnet/mfib/ip4_mfib.c index 1849a3a4..b2482580 100644 --- a/src/vnet/mfib/ip4_mfib.c +++ b/src/vnet/mfib/ip4_mfib.c @@ -33,7 +33,8 @@ static const mfib_prefix_t ip4_specials[] = { }; static u32 -ip4_create_mfib_with_table_id (u32 table_id) +ip4_create_mfib_with_table_id (u32 table_id, + mfib_source_t src) { mfib_table_t *mfib_table; @@ -53,7 +54,7 @@ ip4_create_mfib_with_table_id (u32 table_id) mfib_table->v4.table_id = table_id; - mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP4); + mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP4, src); /* * add the special entries into the new FIB @@ -113,14 +114,15 @@ ip4_mfib_table_destroy (ip4_mfib_t *mfib) } u32 -ip4_mfib_table_find_or_create_and_lock (u32 table_id) +ip4_mfib_table_find_or_create_and_lock (u32 table_id, + mfib_source_t src) { u32 index; index = ip4_mfib_index_from_table_id(table_id); if (~0 == index) - return ip4_create_mfib_with_table_id(table_id); - mfib_table_lock(index, FIB_PROTOCOL_IP4); + return ip4_create_mfib_with_table_id(table_id, src); + mfib_table_lock(index, FIB_PROTOCOL_IP4, src); return (index); } diff --git a/src/vnet/mfib/ip4_mfib.h b/src/vnet/mfib/ip4_mfib.h index ea682651..e31fb744 100644 --- a/src/vnet/mfib/ip4_mfib.h +++ b/src/vnet/mfib/ip4_mfib.h @@ -72,8 +72,9 @@ ip4_mfib_get (u32 index) * @returns A pointer to the retrieved or created fib. * */ -extern u32 ip4_mfib_table_find_or_create_and_lock(u32 table_id); -extern u32 ip4_mfib_table_create_and_lock(void); +extern u32 ip4_mfib_table_find_or_create_and_lock(u32 table_id, + mfib_source_t src); +extern u32 ip4_mfib_table_create_and_lock(mfib_source_t src); static inline u32 ip4_mfib_index_from_table_id (u32 table_id) diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c index 5e48e919..e4861330 100644 --- a/src/vnet/mfib/ip6_mfib.c +++ b/src/vnet/mfib/ip6_mfib.c @@ -151,7 +151,8 @@ static const ip6_mfib_special_t ip6_mfib_specials[] = static u32 -ip6_create_mfib_with_table_id (u32 table_id) +ip6_create_mfib_with_table_id (u32 table_id, + mfib_source_t src) { mfib_table_t *mfib_table; mfib_prefix_t pfx = { @@ -182,7 +183,7 @@ ip6_create_mfib_with_table_id (u32 table_id) mfib_table->v6.table_id = table_id; - mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP6); + mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP6, src); mfib_table->v6.rhead = clib_mem_alloc_aligned (sizeof(*mfib_table->v6.rhead), @@ -297,14 +298,15 @@ ip6_mfib_interface_enable_disable (u32 sw_if_index, int is_enable) } u32 -ip6_mfib_table_find_or_create_and_lock (u32 table_id) +ip6_mfib_table_find_or_create_and_lock (u32 table_id, + mfib_source_t src) { u32 index; index = ip6_mfib_index_from_table_id(table_id); if (~0 == index) - return ip6_create_mfib_with_table_id(table_id); - mfib_table_lock(index, FIB_PROTOCOL_IP6); + return ip6_create_mfib_with_table_id(table_id, src); + mfib_table_lock(index, FIB_PROTOCOL_IP6, src); return (index); } diff --git a/src/vnet/mfib/ip6_mfib.h b/src/vnet/mfib/ip6_mfib.h index adaa7ec2..ea81b553 100644 --- a/src/vnet/mfib/ip6_mfib.h +++ b/src/vnet/mfib/ip6_mfib.h @@ -79,8 +79,9 @@ ip6_mfib_get (u32 index) * @returns A pointer to the retrieved or created fib. * */ -extern u32 ip6_mfib_table_find_or_create_and_lock(u32 table_id); -extern u32 ip6_mfib_table_create_and_lock(void); +extern u32 ip6_mfib_table_find_or_create_and_lock(u32 table_id, + mfib_source_t src); +extern u32 ip6_mfib_table_create_and_lock(mfib_source_t src); static inline diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c index 804e10ab..2302b9a1 100644 --- a/src/vnet/mfib/mfib_entry.c +++ b/src/vnet/mfib/mfib_entry.c @@ -334,6 +334,17 @@ mfib_entry_get_best_src (const mfib_entry_t *mfib_entry) return (bsrc); } +int +mfib_entry_is_sourced (fib_node_index_t mfib_entry_index, + mfib_source_t source) +{ + mfib_entry_t *mfib_entry; + + mfib_entry = mfib_entry_get(mfib_entry_index); + + return (NULL != mfib_entry_src_find(mfib_entry, source, NULL)); +} + static void mfib_entry_src_flush (mfib_entry_src_t *msrc) { diff --git a/src/vnet/mfib/mfib_entry.h b/src/vnet/mfib/mfib_entry.h index d4377878..96ee49f7 100644 --- a/src/vnet/mfib/mfib_entry.h +++ b/src/vnet/mfib/mfib_entry.h @@ -130,6 +130,8 @@ extern void mfib_entry_unlock(fib_node_index_t fib_entry_index); extern void mfib_entry_get_prefix(fib_node_index_t fib_entry_index, mfib_prefix_t *pfx); extern u32 mfib_entry_get_fib_index(fib_node_index_t fib_entry_index); +extern int mfib_entry_is_sourced(fib_node_index_t fib_entry_index, + mfib_source_t source); extern void mfib_entry_contribute_forwarding( fib_node_index_t mfib_entry_index, diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c index 7ffe8941..e5550adc 100644 --- a/src/vnet/mfib/mfib_table.c +++ b/src/vnet/mfib/mfib_table.c @@ -424,7 +424,8 @@ mfib_table_find (fib_protocol_t proto, u32 mfib_table_find_or_create_and_lock (fib_protocol_t proto, - u32 table_id) + u32 table_id, + mfib_source_t src) { mfib_table_t *mfib_table; fib_node_index_t fi; @@ -432,10 +433,10 @@ mfib_table_find_or_create_and_lock (fib_protocol_t proto, switch (proto) { case FIB_PROTOCOL_IP4: - fi = ip4_mfib_table_find_or_create_and_lock(table_id); + fi = ip4_mfib_table_find_or_create_and_lock(table_id, src); break; case FIB_PROTOCOL_IP6: - fi = ip6_mfib_table_find_or_create_and_lock(table_id); + fi = ip6_mfib_table_find_or_create_and_lock(table_id, src); break; case FIB_PROTOCOL_MPLS: default: @@ -451,6 +452,59 @@ mfib_table_find_or_create_and_lock (fib_protocol_t proto, return (fi); } +/** + * @brief Table flush context. Store the indicies of matching FIB entries + * that need to be removed. + */ +typedef struct mfib_table_flush_ctx_t_ +{ + /** + * The list of entries to flush + */ + fib_node_index_t *mftf_entries; + + /** + * The source we are flushing + */ + mfib_source_t mftf_source; +} mfib_table_flush_ctx_t; + +static int +mfib_table_flush_cb (fib_node_index_t mfib_entry_index, + void *arg) +{ + mfib_table_flush_ctx_t *ctx = arg; + + if (mfib_entry_is_sourced(mfib_entry_index, ctx->mftf_source)) + { + vec_add1(ctx->mftf_entries, mfib_entry_index); + } + return (1); +} + +void +mfib_table_flush (u32 mfib_index, + fib_protocol_t proto, + mfib_source_t source) +{ + fib_node_index_t *mfib_entry_index; + mfib_table_flush_ctx_t ctx = { + .mftf_entries = NULL, + .mftf_source = source, + }; + + mfib_table_walk(mfib_index, proto, + mfib_table_flush_cb, + &ctx); + + vec_foreach(mfib_entry_index, ctx.mftf_entries) + { + mfib_table_entry_delete_index(*mfib_entry_index, source); + } + + vec_free(ctx.mftf_entries); +} + static void mfib_table_destroy (mfib_table_t *mfib_table) { @@ -472,27 +526,43 @@ mfib_table_destroy (mfib_table_t *mfib_table) void mfib_table_unlock (u32 fib_index, - fib_protocol_t proto) + fib_protocol_t proto, + mfib_source_t source) { mfib_table_t *mfib_table; mfib_table = mfib_table_get(fib_index, proto); - mfib_table->mft_locks--; + mfib_table->mft_locks[source]--; + mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]--; + + if (0 == mfib_table->mft_locks[source]) + { + /* + * The source no longer needs the table. flush any routes + * from it just in case + */ + mfib_table_flush(fib_index, proto, source); + } - if (0 == mfib_table->mft_locks) + if (0 == mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]) { - mfib_table_destroy(mfib_table); + /* + * no more locak from any source - kill it + */ + mfib_table_destroy(mfib_table); } } void mfib_table_lock (u32 fib_index, - fib_protocol_t proto) + fib_protocol_t proto, + mfib_source_t source) { mfib_table_t *mfib_table; mfib_table = mfib_table_get(fib_index, proto); - mfib_table->mft_locks++; + mfib_table->mft_locks[source]++; + mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]++; } void diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h index 83aa04ef..c6b0b097 100644 --- a/src/vnet/mfib/mfib_table.h +++ b/src/vnet/mfib/mfib_table.h @@ -22,6 +22,12 @@ #include +/** + * Keep a lock per-source and a total + */ +#define MFIB_TABLE_N_LOCKS (MFIB_N_SOURCES+1) +#define MFIB_TABLE_TOTAL_LOCKS MFIB_N_SOURCES + /** * @brief * A protocol Independent IP multicast FIB table @@ -47,7 +53,7 @@ typedef struct mfib_table_t_ /** * number of locks on the table */ - u16 mft_locks; + u16 mft_locks[MFIB_TABLE_N_LOCKS]; /** * Table ID (hash key) for this FIB. @@ -259,7 +265,8 @@ extern fib_node_index_t mfib_table_entry_special_add(u32 fib_index, * the source to flush */ extern void mfib_table_flush(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + mfib_source_t source); /** * @brief @@ -307,9 +314,13 @@ extern u32 mfib_table_find(fib_protocol_t proto, u32 table_id); * * @return fib_index * The index of the FIB + * + * @param source + * The ID of the client/source. */ extern u32 mfib_table_find_or_create_and_lock(fib_protocol_t proto, - u32 table_id); + u32 table_id, + mfib_source_t source); /** @@ -321,9 +332,13 @@ extern u32 mfib_table_find_or_create_and_lock(fib_protocol_t proto, * * @paran proto * The protocol of the FIB (and thus the entries therein) + * + * @param source + * The ID of the client/source. */ extern void mfib_table_unlock(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + mfib_source_t source); /** * @brief @@ -335,9 +350,13 @@ extern void mfib_table_unlock(u32 fib_index, * * @paran proto * The protocol of the FIB (and thus the entries therein) + * + * @param source + * The ID of the client/source. */ extern void mfib_table_lock(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + mfib_source_t source); /** * @brief diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c index 57787eca..3055844d 100644 --- a/src/vnet/mfib/mfib_test.c +++ b/src/vnet/mfib/mfib_test.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -366,7 +367,7 @@ mfib_test_i (fib_protocol_t PROTO, MFIB_TEST(3 == adj_mcast_db_size(), "3 MCAST adjs"); /* Find or create FIB table 11 */ - fib_index = mfib_table_find_or_create_and_lock(PROTO, 11); + fib_index = mfib_table_find_or_create_and_lock(PROTO, 11, MFIB_SOURCE_API); mfib_prefix_t pfx_dft = { .fp_len = 0, @@ -1113,9 +1114,10 @@ mfib_test_i (fib_protocol_t PROTO, /* * MPLS enable an interface so we get the MPLS table created */ + mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API); mpls_sw_interface_enable_disable(&mpls_main, tm->hw[0]->sw_if_index, - 1); + 1, 0); lfei = fib_table_entry_update_one_path(0, // default MPLS Table &pfx_3500, @@ -1192,7 +1194,7 @@ mfib_test_i (fib_protocol_t PROTO, /* * Unlock the table - it's the last lock so should be gone thereafter */ - mfib_table_unlock(fib_index, PROTO); + mfib_table_unlock(fib_index, PROTO, MFIB_SOURCE_API); MFIB_TEST((FIB_NODE_INDEX_INVALID == mfib_table_find(PROTO, fib_index)), @@ -1207,7 +1209,8 @@ mfib_test_i (fib_protocol_t PROTO, */ mpls_sw_interface_enable_disable(&mpls_main, tm->hw[0]->sw_if_index, - 0); + 0, 0); + mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API); /* * test we've leaked no resources diff --git a/src/vnet/mfib/mfib_types.h b/src/vnet/mfib/mfib_types.h index 863fad16..50aede04 100644 --- a/src/vnet/mfib/mfib_types.h +++ b/src/vnet/mfib/mfib_types.h @@ -166,9 +166,10 @@ typedef enum mfib_source_t_ MFIB_SOURCE_VXLAN, MFIB_SOURCE_DHCP, MFIB_SOURCE_SRv6, - MFIB_SOURCE_DEFAULT_ROUTE, MFIB_SOURCE_GTPU, MFIB_SOURCE_VXLAN_GPE, + MFIB_SOURCE_RR, + MFIB_SOURCE_DEFAULT_ROUTE, } mfib_source_t; #define MFIB_SOURCE_NAMES { \ @@ -178,11 +179,14 @@ typedef enum mfib_source_t_ [MFIB_SOURCE_DHCP] = "DHCP", \ [MFIB_SOURCE_VXLAN] = "VXLAN", \ [MFIB_SOURCE_SRv6] = "SRv6", \ - [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \ [MFIB_SOURCE_GTPU] = "GTPU", \ [MFIB_SOURCE_VXLAN_GPE] = "VXLAN-GPE", \ + [MFIB_SOURCE_RR] = "Recursive-resolution", \ + [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \ } +#define MFIB_N_SOURCES (MFIB_SOURCE_DEFAULT_ROUTE) + /** * \brief Compare two prefixes for equality */ diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c index a085aaa2..d7c8e7d3 100644 --- a/src/vnet/mpls/interface.c +++ b/src/vnet/mpls/interface.c @@ -35,25 +35,33 @@ mpls_sw_interface_is_enabled (u32 sw_if_index) return (mm->mpls_enabled_by_sw_if_index[sw_if_index]); } -void +int mpls_sw_interface_enable_disable (mpls_main_t * mm, u32 sw_if_index, - u8 is_enable) + u8 is_enable, + u8 is_api) { fib_node_index_t lfib_index; vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); + lfib_index = fib_table_find(FIB_PROTOCOL_MPLS, + MPLS_FIB_DEFAULT_TABLE_ID); + + if (~0 == lfib_index) + return VNET_API_ERROR_NO_SUCH_FIB; + /* * enable/disable only on the 1<->0 transition */ if (is_enable) { if (1 != ++mm->mpls_enabled_by_sw_if_index[sw_if_index]) - return; + return (0); + + fib_table_lock(lfib_index, FIB_PROTOCOL_MPLS, + (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI)); - lfib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS, - MPLS_FIB_DEFAULT_TABLE_ID); vec_validate(mm->fib_index_by_sw_if_index, 0); mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index; } @@ -61,15 +69,17 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm, { ASSERT(mm->mpls_enabled_by_sw_if_index[sw_if_index] > 0); if (0 != --mm->mpls_enabled_by_sw_if_index[sw_if_index]) - return; + return (0); fib_table_unlock(mm->fib_index_by_sw_if_index[sw_if_index], - FIB_PROTOCOL_MPLS); + FIB_PROTOCOL_MPLS, + (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI)); } vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled", sw_if_index, !is_enable, 0, 0); + return (0); } static clib_error_t * @@ -101,7 +111,7 @@ mpls_interface_enable_disable (vlib_main_t * vm, goto done; } - mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable); + mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable, 0); done: return error; diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c index 5021ac23..7bdfd8c7 100644 --- a/src/vnet/mpls/mpls.c +++ b/src/vnet/mpls/mpls.c @@ -536,6 +536,78 @@ VLIB_CLI_COMMAND (mpls_local_label_command, static) = { .short_help = "Create/Delete MPL local labels", }; +clib_error_t * +vnet_mpls_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, + vlib_cli_command_t * cmdo) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + u32 table_id, is_add; + + is_add = 1; + table_id = ~0; + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "add")) + is_add = 1; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == table_id) + { + error = clib_error_return (0, "No table id"); + goto done; + } + else if (0 == table_id) + { + error = clib_error_return (0, "Can't change the default table"); + goto done; + } + else + { + if (is_add) + { + mpls_table_create (table_id, 0); + } + else + { + mpls_table_delete (table_id, 0); + } + } + + done: + unformat_free (line_input); + return error; +} + +/* *INDENT-ON* */ +/*? + * This command is used to add or delete MPLS Tables. All + * Tables must be explicitly added before that can be used, + * Including the default table. + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_table_command, static) = { + .path = "mpla table", + .short_help = "mpls table [add|del] ", + .function = vnet_mpls_table_cmd, + .is_mp_safe = 1, +}; + int mpls_fib_reset_labels (u32 fib_id) { @@ -546,12 +618,8 @@ mpls_fib_reset_labels (u32 fib_id) static clib_error_t * mpls_init (vlib_main_t * vm) { - mpls_main_t * mm = &mpls_main; clib_error_t * error; - mm->vlib_main = vm; - mm->vnet_main = vnet_get_main(); - if ((error = vlib_call_init_function (vm, ip_main_init))) return error; diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h index b0125e60..31cb1746 100644 --- a/src/vnet/mpls/mpls.h +++ b/src/vnet/mpls/mpls.h @@ -56,10 +56,6 @@ typedef struct { /* IP4 enabled count by software interface */ u8 * mpls_enabled_by_sw_if_index; - - /* convenience */ - vlib_main_t * vlib_main; - vnet_main_t * vnet_main; } mpls_main_t; extern mpls_main_t mpls_main; @@ -77,8 +73,6 @@ extern vlib_node_registration_t mpls_midchain_node; /* Parse mpls protocol as 0xXXXX or protocol name. In either host or network byte order. */ -unformat_function_t unformat_mpls_protocol_host_byte_order; -unformat_function_t unformat_mpls_protocol_net_byte_order; unformat_function_t unformat_mpls_label_net_byte_order; unformat_function_t unformat_mpls_unicast_label; @@ -86,9 +80,10 @@ unformat_function_t unformat_mpls_unicast_label; unformat_function_t unformat_mpls_header; unformat_function_t unformat_pg_mpls_header; -void mpls_sw_interface_enable_disable (mpls_main_t * mm, - u32 sw_if_index, - u8 is_enable); +int mpls_sw_interface_enable_disable (mpls_main_t * mm, + u32 sw_if_index, + u8 is_enable, + u8 is_api); u8 mpls_sw_interface_is_enabled (u32 sw_if_index); @@ -103,4 +98,7 @@ mpls_fib_index_cmp(void * a1, void * a2); int mpls_label_cmp(void * a1, void * a2); +void mpls_table_create(uint32_t table_id, u8 is_api); +void mpls_table_delete(uint32_t table_id, u8 is_api); + #endif /* included_vnet_mpls_h */ diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index a44b1a25..38f5b014 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -58,6 +58,29 @@ _(MPLS_FIB_DUMP, mpls_fib_dump) extern void stats_dslock_with_hint (int hint, int tag); extern void stats_dsunlock (void); +void +mpls_table_delete (u32 table_id, u8 is_api) +{ + u32 fib_index; + + /* + * The MPLS defult table must also be explicitly created via the API. + * So in contrast to IP, it gets no special treatment here. + * + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + */ + fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id); + + if (~0 != fib_index) + { + fib_table_unlock (fib_index, + FIB_PROTOCOL_MPLS, + (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI)); + } +} + void vl_api_mpls_table_add_del_t_handler (vl_api_mpls_table_add_del_t * mp) { @@ -68,6 +91,13 @@ vl_api_mpls_table_add_del_t_handler (vl_api_mpls_table_add_del_t * mp) vnm = vnet_get_main (); vnm->api_errno = 0; + if (mp->mt_is_add) + mpls_table_create (ntohl (mp->mt_table_id), 1); + else + mpls_table_delete (ntohl (mp->mt_table_id), 1); + + rv = (rv == 0) ? vnm->api_errno : rv; + REPLY_MACRO (VL_API_MPLS_TABLE_ADD_DEL_REPLY); } @@ -82,14 +112,7 @@ mpls_ip_bind_unbind_handler (vnet_main_t * vnm, if (~0 == mpls_fib_index) { - if (mp->mb_create_table_if_needed) - { - mpls_fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS, - ntohl (mp->mb_mpls_table_id)); - } - else - return VNET_API_ERROR_NO_SUCH_FIB; + return VNET_API_ERROR_NO_SUCH_FIB; } ip_fib_index = fib_table_find ((mp->mb_is_ip4 ? @@ -170,7 +193,6 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, mp->mr_next_hop_sw_if_index, pfx.fp_payload_proto, mp->mr_next_hop_table_id, - mp->mr_create_table_if_needed, mp->mr_is_rpf_id, &fib_index, &next_hop_fib_index); @@ -235,6 +257,32 @@ vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp) REPLY_MACRO (VL_API_MPLS_ROUTE_ADD_DEL_REPLY); } +void +mpls_table_create (u32 table_id, u8 is_api) +{ + u32 fib_index; + + /* + * The MPLS defult table must also be explicitly created via the API. + * So in contrast to IP, it gets no special treatment here. + */ + + /* + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + */ + fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id); + + if (~0 == fib_index) + { + fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS, + table_id, + (is_api ? + FIB_SOURCE_API : FIB_SOURCE_CLI)); + } +} + static void vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) { diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c index f427bbf3..2f90993a 100755 --- a/src/vnet/srv6/sr_policy_rewrite.c +++ b/src/vnet/srv6/sr_policy_rewrite.c @@ -595,8 +595,10 @@ sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments, if (sm->fib_table_ip6 == (u32) ~ 0) { sm->fib_table_ip6 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, + FIB_SOURCE_SR, "SRv6 steering of IP6 prefixes through BSIDs"); sm->fib_table_ip4 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, + FIB_SOURCE_SR, "SRv6 steering of IP4 prefixes through BSIDs"); } @@ -684,8 +686,8 @@ sr_policy_del (ip6_address_t * bsid, u32 index) /* If FIB empty unlock it */ if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies)) { - fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6); - fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6); + fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6, FIB_SOURCE_SR); + fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6, FIB_SOURCE_SR); sm->fib_table_ip6 = (u32) ~ 0; sm->fib_table_ip4 = (u32) ~ 0; } diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c index 57fe21f6..cf4e81ab 100755 --- a/src/vnet/srv6/sr_steering.c +++ b/src/vnet/srv6/sr_steering.c @@ -159,8 +159,10 @@ sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index, /* If no more SR policies or steering policies */ if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies)) { - fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6); - fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6); + fib_table_unlock (sm->fib_table_ip6, + FIB_PROTOCOL_IP6, FIB_SOURCE_SR); + fib_table_unlock (sm->fib_table_ip4, + FIB_PROTOCOL_IP6, FIB_SOURCE_SR); sm->fib_table_ip6 = (u32) ~ 0; sm->fib_table_ip4 = (u32) ~ 0; } diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index f9c3129c..044ddb5b 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -699,8 +699,9 @@ static void VALIDATE_SW_IF_INDEX (mp); - mpls_sw_interface_enable_disable (&mpls_main, - ntohl (mp->sw_if_index), mp->enable); + rv = mpls_sw_interface_enable_disable (&mpls_main, + ntohl (mp->sw_if_index), + mp->enable, 1); BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_SW_INTERFACE_SET_MPLS_ENABLE_REPLY); diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 1353fe28..be74b83a 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -560,9 +560,6 @@ static void *vl_api_ip_add_del_route_t_print if (mp->table_id != 0) s = format (s, "vrf %d ", ntohl (mp->table_id)); - if (mp->create_vrf_if_needed) - s = format (s, "create-vrf "); - if (mp->next_hop_weight != 1) s = format (s, "weight %d ", mp->next_hop_weight); diff --git a/test/test_dhcp.py b/test/test_dhcp.py index 6fc29182..fe97f6c9 100644 --- a/test/test_dhcp.py +++ b/test/test_dhcp.py @@ -6,7 +6,7 @@ import struct from framework import VppTestCase, VppTestRunner from vpp_neighbor import VppNeighbor -from vpp_ip_route import find_route +from vpp_ip_route import find_route, VppIpTable from util import mk_ll_addr from scapy.layers.l2 import Ether, getmacbyip, ARP @@ -34,9 +34,19 @@ class TestDHCP(VppTestCase): # create 3 pg interfaces self.create_pg_interfaces(range(4)) + self.tables = [] # pg0 and 1 are IP configured in VRF 0 and 1. # pg2 and 3 are non IP-configured in VRF 0 and 1 + table_id = 0 + for table_id in range(1, 4): + tbl4 = VppIpTable(self, table_id) + tbl4.add_vpp_config() + self.tables.append(tbl4) + tbl6 = VppIpTable(self, table_id, is_ip6=1) + tbl6.add_vpp_config() + self.tables.append(tbl6) + table_id = 0 for i in self.pg_interfaces[:2]: i.admin_up() @@ -56,11 +66,15 @@ class TestDHCP(VppTestCase): table_id += 1 def tearDown(self): - super(TestDHCP, self).tearDown() - for i in self.pg_interfaces: + for i in self.pg_interfaces[:2]: i.unconfig_ip4() i.unconfig_ip6() + + for i in self.pg_interfaces: + i.set_table_ip4(0) + i.set_table_ip6(0) i.admin_down() + super(TestDHCP, self).tearDown() def send_and_assert_no_replies(self, intf, pkts, remark): intf.add_stream(pkts) @@ -667,6 +681,8 @@ class TestDHCP(VppTestCase): "DHCP cleanup VRF 0") self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1, "DHCP cleanup VRF 1") + self.pg2.unconfig_ip4() + self.pg3.unconfig_ip4() def test_dhcp6_proxy(self): """ DHCPv6 Proxy""" @@ -1045,6 +1061,8 @@ class TestDHCP(VppTestCase): server_table_id=0, is_ipv6=1, is_add=0) + self.pg2.unconfig_ip6() + self.pg3.unconfig_ip6() def test_dhcp_client(self): """ DHCP Client""" diff --git a/test/test_gre.py b/test/test_gre.py index 1afc44fb..9046b05f 100644 --- a/test/test_gre.py +++ b/test/test_gre.py @@ -6,7 +6,7 @@ from logging import * from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppDot1QSubint from vpp_gre_interface import VppGreInterface, VppGre6Interface -from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto +from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto, VppIpTable from vpp_papi_provider import L2_VTR_OP from scapy.packet import Raw @@ -30,6 +30,9 @@ class TestGRE(VppTestCase): # create 3 pg interfaces - set one in a non-default table. self.create_pg_interfaces(range(3)) + + self.tbl = VppIpTable(self, 1) + self.tbl.add_vpp_config() self.pg1.set_table_ip4(1) for i in self.pg_interfaces: @@ -43,11 +46,12 @@ class TestGRE(VppTestCase): self.pg2.resolve_ndp() def tearDown(self): - super(TestGRE, self).tearDown() for i in self.pg_interfaces: i.unconfig_ip4() i.unconfig_ip6() i.admin_down() + self.pg1.set_table_ip4(0) + super(TestGRE, self).tearDown() def create_stream_ip4(self, src_if, src_ip, dst_ip): pkts = [] diff --git a/test/test_ip4.py b/test/test_ip4.py index 7a7098c3..55d16735 100644 --- a/test/test_ip4.py +++ b/test/test_ip4.py @@ -6,7 +6,8 @@ import unittest from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppSubInterface, VppDot1QSubint, VppDot1ADSubint from vpp_ip_route import VppIpRoute, VppRoutePath, VppIpMRoute, \ - VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind + VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind, \ + VppMplsTable from scapy.packet import Raw from scapy.layers.l2 import Ether, Dot1Q, ARP @@ -774,6 +775,8 @@ class TestIPLoadBalance(VppTestCase): super(TestIPLoadBalance, self).setUp() self.create_pg_interfaces(range(5)) + mpls_tbl = VppMplsTable(self, 0) + mpls_tbl.add_vpp_config() for i in self.pg_interfaces: i.admin_up() @@ -782,11 +785,11 @@ class TestIPLoadBalance(VppTestCase): i.enable_mpls() def tearDown(self): - super(TestIPLoadBalance, self).tearDown() for i in self.pg_interfaces: i.disable_mpls() i.unconfig_ip4() i.admin_down() + super(TestIPLoadBalance, self).tearDown() def send_and_expect_load_balancing(self, input, pkts, outputs): input.add_stream(pkts) @@ -966,6 +969,8 @@ class TestIPVlan0(VppTestCase): super(TestIPVlan0, self).setUp() self.create_pg_interfaces(range(2)) + mpls_tbl = VppMplsTable(self, 0) + mpls_tbl.add_vpp_config() for i in self.pg_interfaces: i.admin_up() @@ -974,11 +979,11 @@ class TestIPVlan0(VppTestCase): i.enable_mpls() def tearDown(self): - super(TestIPVlan0, self).tearDown() for i in self.pg_interfaces: i.disable_mpls() i.unconfig_ip4() i.admin_down() + super(TestIPVlan0, self).tearDown() def send_and_expect(self, input, pkts, output): input.add_stream(pkts) diff --git a/test/test_ip4_vrf_multi_instance.py b/test/test_ip4_vrf_multi_instance.py index b73ac948..5a8d6760 100644 --- a/test/test_ip4_vrf_multi_instance.py +++ b/test/test_ip4_vrf_multi_instance.py @@ -172,9 +172,10 @@ class TestIp4VrfMultiInst(VppTestCase): pg_if = self.pg_if_by_vrf_id[vrf_id][0] dest_addr = pg_if.remote_hosts[0].ip4n dest_addr_len = 24 + self.vapi.ip_table_add_del(vrf_id, is_add=1) self.vapi.ip_add_del_route( dest_addr, dest_addr_len, pg_if.local_ip4n, - table_id=vrf_id, create_vrf_if_needed=1, is_multipath=1) + table_id=vrf_id, is_multipath=1) self.logger.info("IPv4 VRF ID %d created" % vrf_id) if vrf_id not in self.vrf_list: self.vrf_list.append(vrf_id) @@ -216,6 +217,7 @@ class TestIp4VrfMultiInst(VppTestCase): self.logger.info("IPv4 VRF ID %d reset" % vrf_id) self.logger.debug(self.vapi.ppcli("show ip fib")) self.logger.debug(self.vapi.ppcli("show ip arp")) + self.vapi.ip_table_add_del(vrf_id, is_add=0) def create_stream(self, src_if, packet_sizes): """ diff --git a/test/test_ip6.py b/test/test_ip6.py index 285ce181..aad3713c 100644 --- a/test/test_ip6.py +++ b/test/test_ip6.py @@ -8,7 +8,7 @@ from vpp_sub_interface import VppSubInterface, VppDot1QSubint from vpp_pg_interface import is_ipv6_misc from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, VppIpMRoute, \ VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind, \ - VppMplsRoute, DpoProto + VppMplsRoute, DpoProto, VppMplsTable from vpp_neighbor import find_nbr, VppNeighbor from scapy.packet import Raw @@ -1260,6 +1260,9 @@ class TestIP6LoadBalance(VppTestCase): self.create_pg_interfaces(range(5)) + mpls_tbl = VppMplsTable(self, 0) + mpls_tbl.add_vpp_config() + for i in self.pg_interfaces: i.admin_up() i.config_ip6() @@ -1267,11 +1270,11 @@ class TestIP6LoadBalance(VppTestCase): i.enable_mpls() def tearDown(self): - super(TestIP6LoadBalance, self).tearDown() for i in self.pg_interfaces: i.unconfig_ip6() i.admin_down() i.disable_mpls() + super(TestIP6LoadBalance, self).tearDown() def send_and_expect_load_balancing(self, input, pkts, outputs): input.add_stream(pkts) diff --git a/test/test_ip6_vrf_multi_instance.py b/test/test_ip6_vrf_multi_instance.py index af80b5ba..769cb2e5 100644 --- a/test/test_ip6_vrf_multi_instance.py +++ b/test/test_ip6_vrf_multi_instance.py @@ -187,9 +187,10 @@ class TestIP6VrfMultiInst(VppTestCase): pg_if = self.pg_if_by_vrf_id[vrf_id][0] dest_addr = pg_if.remote_hosts[0].ip6n dest_addr_len = 64 + self.vapi.ip_table_add_del(vrf_id, is_add=1, is_ipv6=1) self.vapi.ip_add_del_route( dest_addr, dest_addr_len, pg_if.local_ip6n, is_ipv6=1, - table_id=vrf_id, create_vrf_if_needed=1, is_multipath=1) + table_id=vrf_id, is_multipath=1) self.logger.info("IPv6 VRF ID %d created" % vrf_id) if vrf_id not in self.vrf_list: self.vrf_list.append(vrf_id) @@ -232,6 +233,7 @@ class TestIP6VrfMultiInst(VppTestCase): self.logger.info("IPv6 VRF ID %d reset" % vrf_id) self.logger.debug(self.vapi.ppcli("show ip6 fib")) self.logger.debug(self.vapi.ppcli("show ip6 neighbors")) + self.vapi.ip_table_add_del(vrf_id, is_add=0, is_ipv6=1) def create_stream(self, src_if, packet_sizes): """ diff --git a/test/test_ip_mcast.py b/test/test_ip_mcast.py index 276555d6..7cad683c 100644 --- a/test/test_ip_mcast.py +++ b/test/test_ip_mcast.py @@ -5,7 +5,7 @@ import unittest from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppSubInterface, VppDot1QSubint, VppDot1ADSubint from vpp_ip_route import VppIpMRoute, VppMRoutePath, VppMFibSignal, \ - MRouteItfFlags, MRouteEntryFlags + MRouteItfFlags, MRouteEntryFlags, VppIpTable from scapy.packet import Raw from scapy.layers.l2 import Ether @@ -44,16 +44,37 @@ class TestIPMcast(VppTestCase): super(TestIPMcast, self).setUp() # create 8 pg interfaces - self.create_pg_interfaces(range(8)) + self.create_pg_interfaces(range(9)) # setup interfaces - for i in self.pg_interfaces: + for i in self.pg_interfaces[:8]: i.admin_up() i.config_ip4() i.config_ip6() i.resolve_arp() i.resolve_ndp() + # one more in a vrf + tbl4 = VppIpTable(self, 10) + tbl4.add_vpp_config() + self.pg8.set_table_ip4(10) + self.pg8.config_ip4() + + tbl6 = VppIpTable(self, 10, is_ip6=1) + tbl6.add_vpp_config() + self.pg8.set_table_ip6(10) + self.pg8.config_ip6() + + def tearDown(self): + for i in self.pg_interfaces: + i.unconfig_ip4() + i.unconfig_ip6() + i.admin_down() + + self.pg8.set_table_ip4(0) + self.pg8.set_table_ip6(0) + super(TestIPMcast, self).tearDown() + def create_stream_ip4(self, src_if, src_ip, dst_ip, payload_size=0): pkts = [] # default to small packet sizes @@ -663,6 +684,77 @@ class TestIPMcast(VppTestCase): # route_232_1_1_1.remove_vpp_config() + def test_ip_mcast_vrf(self): + """ IP Multicast Replication in non-default table""" + + # + # An (S,G). + # one accepting interface, pg0, 2 forwarding interfaces + # + route_1_1_1_1_232_1_1_1 = VppIpMRoute( + self, + "1.1.1.1", + "232.1.1.1", 64, + MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE, + [VppMRoutePath(self.pg8.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT), + VppMRoutePath(self.pg1.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD), + VppMRoutePath(self.pg2.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)], + table_id=10) + route_1_1_1_1_232_1_1_1.add_vpp_config() + + # + # a stream that matches the route for (1.1.1.1,232.1.1.1) + # small packets + # + self.vapi.cli("clear trace") + tx = self.create_stream_ip4(self.pg8, "1.1.1.1", "232.1.1.1") + self.pg8.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # We expect replications on Pg1 & 2 + self.verify_capture_ip4(self.pg1, tx) + self.verify_capture_ip4(self.pg2, tx) + + def test_ip6_mcast_vrf(self): + """ IPv6 Multicast Replication in non-default table""" + + # + # An (S,G). + # one accepting interface, pg0, 2 forwarding interfaces + # + route_2001_ff01_1 = VppIpMRoute( + self, + "2001::1", + "ff01::1", 256, + MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE, + [VppMRoutePath(self.pg8.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT), + VppMRoutePath(self.pg1.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD), + VppMRoutePath(self.pg2.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)], + table_id=10, + is_ip6=1) + route_2001_ff01_1.add_vpp_config() + + # + # a stream that matches the route for (2001::1, ff00::1) + # + self.vapi.cli("clear trace") + tx = self.create_stream_ip6(self.pg8, "2001::1", "ff01::1") + self.pg8.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # We expect replications on Pg1, 2, + self.verify_capture_ip6(self.pg1, tx) + self.verify_capture_ip6(self.pg2, tx) if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/test_mpls.py b/test/test_mpls.py index b2226a74..460a32d1 100644 --- a/test/test_mpls.py +++ b/test/test_mpls.py @@ -6,7 +6,7 @@ import socket from framework import VppTestCase, VppTestRunner from vpp_ip_route import VppIpRoute, VppRoutePath, VppMplsRoute, \ VppMplsIpBind, VppIpMRoute, VppMRoutePath, \ - MRouteItfFlags, MRouteEntryFlags, DpoProto + MRouteItfFlags, MRouteEntryFlags, DpoProto, VppIpTable, VppMplsTable from vpp_mpls_tunnel_interface import VppMPLSTunnelInterface from scapy.packet import Raw @@ -60,9 +60,23 @@ class TestMPLS(VppTestCase): # setup both interfaces # assign them different tables. table_id = 0 + self.tables = [] + + tbl = VppMplsTable(self, 0) + tbl.add_vpp_config() + self.tables.append(tbl) for i in self.pg_interfaces: i.admin_up() + + if table_id != 0: + tbl = VppIpTable(self, table_id) + tbl.add_vpp_config() + self.tables.append(tbl) + tbl = VppIpTable(self, table_id, is_ip6=1) + tbl.add_vpp_config() + self.tables.append(tbl) + i.set_table_ip4(table_id) i.set_table_ip6(table_id) i.config_ip4() @@ -73,12 +87,15 @@ class TestMPLS(VppTestCase): table_id += 1 def tearDown(self): - super(TestMPLS, self).tearDown() for i in self.pg_interfaces: i.unconfig_ip4() i.unconfig_ip6() i.ip6_disable() + i.set_table_ip4(0) + i.set_table_ip6(0) + i.disable_mpls() i.admin_down() + super(TestMPLS, self).tearDown() # the default of 64 matches the IP packet TTL default def create_stream_labelled_ip4( @@ -1092,6 +1109,9 @@ class TestMPLSDisabled(VppTestCase): # create 2 pg interfaces self.create_pg_interfaces(range(2)) + self.tbl = VppMplsTable(self, 0) + self.tbl.add_vpp_config() + # PG0 is MPLS enalbed self.pg0.admin_up() self.pg0.config_ip4() @@ -1102,11 +1122,13 @@ class TestMPLSDisabled(VppTestCase): self.pg1.admin_up() def tearDown(self): - super(TestMPLSDisabled, self).tearDown() for i in self.pg_interfaces: i.unconfig_ip4() i.admin_down() + self.pg0.disable_mpls() + super(TestMPLSDisabled, self).tearDown() + def send_and_assert_no_replies(self, intf, pkts, remark): intf.add_stream(pkts) self.pg_enable_capture(self.pg_interfaces) @@ -1174,6 +1196,13 @@ class TestMPLSPIC(VppTestCase): # create 2 pg interfaces self.create_pg_interfaces(range(4)) + mpls_tbl = VppMplsTable(self, 0) + mpls_tbl.add_vpp_config() + tbl4 = VppIpTable(self, 1) + tbl4.add_vpp_config() + tbl6 = VppIpTable(self, 1, is_ip6=1) + tbl6.add_vpp_config() + # core links self.pg0.admin_up() self.pg0.config_ip4() @@ -1201,14 +1230,15 @@ class TestMPLSPIC(VppTestCase): self.pg3.resolve_ndp() def tearDown(self): - super(TestMPLSPIC, self).tearDown() self.pg0.disable_mpls() + self.pg1.disable_mpls() for i in self.pg_interfaces: i.unconfig_ip4() i.unconfig_ip6() i.set_table_ip4(0) i.set_table_ip6(0) i.admin_down() + super(TestMPLSPIC, self).tearDown() def test_mpls_ibgp_pic(self): """ MPLS iBGP PIC edge convergence @@ -1534,24 +1564,30 @@ class TestMPLSL2(VppTestCase): # create 2 pg interfaces self.create_pg_interfaces(range(2)) + # create the default MPLS table + self.tables = [] + tbl = VppMplsTable(self, 0) + tbl.add_vpp_config() + self.tables.append(tbl) + # use pg0 as the core facing interface self.pg0.admin_up() self.pg0.config_ip4() self.pg0.resolve_arp() self.pg0.enable_mpls() - # use the other 2 for customer facg L2 links + # use the other 2 for customer facing L2 links for i in self.pg_interfaces[1:]: i.admin_up() def tearDown(self): - super(TestMPLSL2, self).tearDown() for i in self.pg_interfaces[1:]: i.admin_down() self.pg0.disable_mpls() self.pg0.unconfig_ip4() self.pg0.admin_down() + super(TestMPLSL2, self).tearDown() def verify_capture_tunneled_ethernet(self, capture, sent, mpls_labels, ttl=255, top=None): diff --git a/test/test_nat.py b/test/test_nat.py index 1f2d17ab..73e9e217 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -549,6 +549,8 @@ class TestNAT44(MethodHolder): cls.pg0.configure_ipv4_neighbors() cls.overlapping_interfaces = list(list(cls.pg_interfaces[4:7])) + cls.vapi.ip_table_add_del(10, is_add=1) + cls.vapi.ip_table_add_del(20, is_add=1) cls.pg4._local_ip4 = "172.16.255.1" cls.pg4._local_ip4n = socket.inet_pton(socket.AF_INET, i.local_ip4) @@ -1797,6 +1799,8 @@ class TestNAT44(MethodHolder): self.pg0.unconfig_ip4() self.pg1.unconfig_ip4() + self.vapi.ip_table_add_del(vrf_id1, is_add=1) + self.vapi.ip_table_add_del(vrf_id2, is_add=1) self.pg0.set_table_ip4(vrf_id1) self.pg1.set_table_ip4(vrf_id2) self.pg0.config_ip4() @@ -1825,6 +1829,13 @@ class TestNAT44(MethodHolder): capture = self.pg2.get_capture(len(pkts)) self.verify_capture_out(capture, nat_ip2) + self.pg0.unconfig_ip4() + self.pg1.unconfig_ip4() + self.pg0.set_table_ip4(0) + self.pg1.set_table_ip4(0) + self.vapi.ip_table_add_del(vrf_id1, is_add=0) + self.vapi.ip_table_add_del(vrf_id2, is_add=0) + def test_vrf_feature_independent(self): """ NAT44 tenant VRF independent address pool mode """ @@ -3042,6 +3053,8 @@ class TestNAT64(MethodHolder): cls.ip6_interfaces.append(cls.pg_interfaces[2]) cls.ip4_interfaces = list(cls.pg_interfaces[1:2]) + cls.vapi.ip_table_add_del(cls.vrf1_id, is_add=1, is_ipv6=1) + cls.pg_interfaces[2].set_table_ip6(cls.vrf1_id) cls.pg0.generate_remote_hosts(2) diff --git a/test/test_neighbor.py b/test/test_neighbor.py index 1c7cc267..68dde2fb 100644 --- a/test/test_neighbor.py +++ b/test/test_neighbor.py @@ -5,7 +5,8 @@ from socket import AF_INET, AF_INET6, inet_pton from framework import VppTestCase, VppTestRunner from vpp_neighbor import VppNeighbor, find_nbr -from vpp_ip_route import VppIpRoute, VppRoutePath, find_route +from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, \ + VppIpTable from scapy.packet import Raw from scapy.layers.l2 import Ether, ARP, Dot1Q @@ -39,11 +40,13 @@ class ARPTestCase(VppTestCase): self.pg1.config_ip6() # pg3 in a different VRF + self.tbl = VppIpTable(self, 1) + self.tbl.add_vpp_config() + self.pg3.set_table_ip4(1) self.pg3.config_ip4() def tearDown(self): - super(ARPTestCase, self).tearDown() self.pg0.unconfig_ip4() self.pg0.unconfig_ip6() @@ -51,10 +54,13 @@ class ARPTestCase(VppTestCase): self.pg1.unconfig_ip6() self.pg3.unconfig_ip4() + self.pg3.set_table_ip4(0) for i in self.pg_interfaces: i.admin_down() + super(ARPTestCase, self).tearDown() + def verify_arp_req(self, rx, smac, sip, dip): ether = rx[Ether] self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff") @@ -1080,6 +1086,62 @@ class ARPTestCase(VppTestCase): self.pg0.remote_ip4, self.pg1.remote_hosts[1].ip4) + def test_arp_static(self): + """ ARP Static""" + self.pg2.generate_remote_hosts(3) + + # + # Add a static ARP entry + # + static_arp = VppNeighbor(self, + self.pg2.sw_if_index, + self.pg2.remote_hosts[1].mac, + self.pg2.remote_hosts[1].ip4, + is_static=1) + static_arp.add_vpp_config() + + # + # Add the connected prefix to the interface + # + self.pg2.config_ip4() + + # + # We should now find the adj-fib + # + self.assertTrue(find_nbr(self, + self.pg2.sw_if_index, + self.pg2.remote_hosts[1].ip4, + is_static=1)) + self.assertTrue(find_route(self, + self.pg2.remote_hosts[1].ip4, + 32)) + + # + # remove the connected + # + self.pg2.unconfig_ip4() + + # + # put the interface into table 1 + # + self.pg2.set_table_ip4(1) + + # + # configure the same connected and expect to find the + # adj fib in the new table + # + self.pg2.config_ip4() + self.assertTrue(find_route(self, + self.pg2.remote_hosts[1].ip4, + 32, + table_id=1)) + + # + # clean-up + # + self.pg2.unconfig_ip4() + self.pg2.set_table_ip4(0) + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py index 2c489e3c..b7993793 100644 --- a/test/vpp_ip_route.py +++ b/test/vpp_ip_route.py @@ -54,6 +54,46 @@ def find_route(test, ip_addr, len, table_id=0, inet=AF_INET): return False +class VppIpTable(VppObject): + + def __init__(self, + test, + table_id, + is_ip6=0): + self._test = test + self.table_id = table_id + self.is_ip6 = is_ip6 + + def add_vpp_config(self): + self._test.vapi.ip_table_add_del( + self.table_id, + is_ipv6=self.is_ip6, + is_add=1) + self._test.registry.register(self, self._test.logger) + + def remove_vpp_config(self): + self._test.vapi.ip_table_add_del( + self.table_id, + is_ipv6=self.is_ip6, + is_add=0) + + def query_vpp_config(self): + # find the default route + return find_route(self._test, + "::" if self.is_ip6 else "0.0.0.0", + 0, + self.table_id, + inet=AF_INET6 if self.is_ip6 == 1 else AF_INET) + + def __str__(self): + return self.object_id() + + def object_id(self): + return ("table-%s-%d" % + ("v6" if self.is_ip6 == 1 else "v4", + self.table_id)) + + class VppRoutePath(object): def __init__( @@ -391,6 +431,39 @@ class VppMplsIpBind(VppObject): self.dest_addr_len)) +class VppMplsTable(VppObject): + + def __init__(self, + test, + table_id): + self._test = test + self.table_id = table_id + + def add_vpp_config(self): + self._test.vapi.mpls_table_add_del( + self.table_id, + is_add=1) + self._test.registry.register(self, self._test.logger) + + def remove_vpp_config(self): + self._test.vapi.mpls_table_add_del( + self.table_id, + is_add=0) + + def query_vpp_config(self): + # find the default route + dump = self._test.vapi.mpls_fib_dump() + if len(dump): + return True + return False + + def __str__(self): + return self.object_id() + + def object_id(self): + return ("table-mpls-%d" % (self.table_id)) + + class VppMplsRoute(VppObject): """ MPLS Route/LSP diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index b70da026..519aff80 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -652,6 +652,24 @@ class VppPapiProvider(object): return self.api(self.papi.delete_loopback, {'sw_if_index': sw_if_index, }) + def ip_table_add_del(self, + table_id, + is_add=1, + is_ipv6=0): + """ + + :param table_id + :param is_add: (Default value = 1) + :param is_ipv6: (Default value = 0) + + """ + + return self.api( + self.papi.ip_table_add_del, + {'table_id': table_id, + 'is_add': is_add, + 'is_ipv6': is_ipv6}) + def ip_add_del_route( self, dst_address, @@ -664,7 +682,6 @@ class VppPapiProvider(object): next_hop_n_out_labels=0, next_hop_out_label_stack=[], next_hop_via_label=MPLS_LABEL_INVALID, - create_vrf_if_needed=0, is_resolve_host=0, is_resolve_attached=0, classify_table_index=0xFFFFFFFF, @@ -687,7 +704,6 @@ class VppPapiProvider(object): :param vrf_id: (Default value = 0) :param lookup_in_vrf: (Default value = 0) :param classify_table_index: (Default value = 0xFFFFFFFF) - :param create_vrf_if_needed: (Default value = 0) :param is_add: (Default value = 1) :param is_drop: (Default value = 0) :param is_ipv6: (Default value = 0) @@ -707,7 +723,6 @@ class VppPapiProvider(object): 'table_id': table_id, 'classify_table_index': classify_table_index, 'next_hop_table_id': next_hop_table_id, - 'create_vrf_if_needed': create_vrf_if_needed, 'is_add': is_add, 'is_drop': is_drop, 'is_unreach': is_unreach, @@ -912,6 +927,22 @@ class VppPapiProvider(object): def mpls_fib_dump(self): return self.api(self.papi.mpls_fib_dump, {}) + def mpls_table_add_del( + self, + table_id, + is_add=1): + """ + + :param table_id + :param is_add: (Default value = 1) + + """ + + return self.api( + self.papi.mpls_table_add_del, + {'mt_table_id': table_id, + 'mt_is_add': is_add}) + def mpls_route_add_del( self, label, @@ -925,7 +956,6 @@ class VppPapiProvider(object): next_hop_n_out_labels=0, next_hop_out_label_stack=[], next_hop_via_label=MPLS_LABEL_INVALID, - create_vrf_if_needed=0, is_resolve_host=0, is_resolve_attached=0, is_interface_rx=0, @@ -947,7 +977,6 @@ class VppPapiProvider(object): :param vrf_id: (Default value = 0) :param lookup_in_vrf: (Default value = 0) :param classify_table_index: (Default value = 0xFFFFFFFF) - :param create_vrf_if_needed: (Default value = 0) :param is_add: (Default value = 1) :param is_drop: (Default value = 0) :param is_ipv6: (Default value = 0) @@ -968,7 +997,6 @@ class VppPapiProvider(object): 'mr_eos': eos, 'mr_table_id': table_id, 'mr_classify_table_index': classify_table_index, - 'mr_create_table_if_needed': create_vrf_if_needed, 'mr_is_add': is_add, 'mr_is_classify': is_classify, 'mr_is_multipath': is_multipath, @@ -994,7 +1022,6 @@ class VppPapiProvider(object): table_id=0, ip_table_id=0, is_ip4=1, - create_vrf_if_needed=0, is_bind=1): """ """ @@ -1003,7 +1030,6 @@ class VppPapiProvider(object): {'mb_mpls_table_id': table_id, 'mb_label': label, 'mb_ip_table_id': ip_table_id, - 'mb_create_table_if_needed': create_vrf_if_needed, 'mb_is_bind': is_bind, 'mb_is_ip4': is_ip4, 'mb_address_length': dst_address_length, @@ -1020,7 +1046,6 @@ class VppPapiProvider(object): next_hop_n_out_labels=0, next_hop_out_label_stack=[], next_hop_via_label=MPLS_LABEL_INVALID, - create_vrf_if_needed=0, is_add=1, l2_only=0, is_multicast=0): @@ -1034,7 +1059,6 @@ class VppPapiProvider(object): :param vrf_id: (Default value = 0) :param lookup_in_vrf: (Default value = 0) :param classify_table_index: (Default value = 0xFFFFFFFF) - :param create_vrf_if_needed: (Default value = 0) :param is_add: (Default value = 1) :param is_drop: (Default value = 0) :param is_ipv6: (Default value = 0) @@ -1844,7 +1868,6 @@ class VppPapiProvider(object): i_flags, rpf_id=0, table_id=0, - create_vrf_if_needed=0, is_add=1, is_ipv6=0, is_local=0): @@ -1857,7 +1880,6 @@ class VppPapiProvider(object): 'itf_flags': i_flags, 'table_id': table_id, 'rpf_id': rpf_id, - 'create_vrf_if_needed': create_vrf_if_needed, 'is_add': is_add, 'is_ipv6': is_ipv6, 'is_local': is_local, -- cgit 1.2.3-korg From 609707ea530de6a0f9fa989b8269b973dd89174e Mon Sep 17 00:00:00 2001 From: John Lo Date: Tue, 19 Sep 2017 21:45:10 -0400 Subject: Fix DHCP client so it works for worker threads Fix dhcp_client_for_us() function to utilize rpc_call_main_thread to call vlib_process_signal_event() to ensure proper handling irrespective of it being called in main thread or worker thread. Added ASSERT to vlib_process_sinal.. path to make sure it is called in main thread. Change-Id: I4109cc049d8e4225d896ce492ce201011dc9c911 Signed-off-by: John Lo --- src/vlib/node_funcs.h | 3 +++ src/vnet/dhcp/client.c | 16 ++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'src/vnet/dhcp') diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index 0059b9be..3ae4e541 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -851,6 +851,9 @@ vlib_process_signal_event_data (vlib_main_t * vm, vlib_process_t *p = vec_elt (nm->processes, n->runtime_index); uword *h, t; + /* Must be in main thread */ + ASSERT (vlib_get_thread_index () == 0); + h = hash_get (p->event_type_index_by_type_opaque, type_opaque); if (!h) { diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c index dd5e99f2..5986438b 100644 --- a/src/vnet/dhcp/client.c +++ b/src/vnet/dhcp/client.c @@ -120,6 +120,17 @@ set_l2_rewrite (dhcp_client_main_t * dcm, dhcp_client_t * c) 0 /* broadcast */); } +void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); + +static void +dhcp_client_proc_callback (uword * client_index) +{ + vlib_main_t *vm = vlib_get_main (); + ASSERT (vlib_get_thread_index () == 0); + vlib_process_signal_event (vm, dhcp_client_process_node.index, + EVENT_DHCP_CLIENT_WAKEUP, *client_index); +} + /* * dhcp_client_for_us - server-to-client callback. * Called from proxy_node.c:dhcp_proxy_to_client_input(). @@ -251,8 +262,9 @@ int dhcp_client_for_us (u32 bi, vlib_buffer_t * b, c->retry_count = 0; c->next_transmit = 0; /* send right now... */ /* Poke the client process, which will send the request */ - vlib_process_signal_event (vm, dhcp_client_process_node.index, - EVENT_DHCP_CLIENT_WAKEUP, c - dcm->clients); + uword client_id = c - dcm->clients; + vl_api_rpc_call_main_thread (dhcp_client_proc_callback, + (u8 *) &client_id, sizeof (uword)); break; case DHCP_BOUND: -- cgit 1.2.3-korg