diff options
Diffstat (limited to 'src/vnet/unix')
-rw-r--r-- | src/vnet/unix/gdb_funcs.c | 171 | ||||
-rw-r--r-- | src/vnet/unix/pcap.c | 241 | ||||
-rw-r--r-- | src/vnet/unix/pcap.h | 230 | ||||
-rw-r--r-- | src/vnet/unix/pcap2pg.c | 182 | ||||
-rw-r--r-- | src/vnet/unix/tap.api | 123 | ||||
-rw-r--r-- | src/vnet/unix/tap_api.c | 257 | ||||
-rw-r--r-- | src/vnet/unix/tapcli.c | 1328 | ||||
-rw-r--r-- | src/vnet/unix/tapcli.h | 52 | ||||
-rw-r--r-- | src/vnet/unix/tuntap.c | 1000 | ||||
-rw-r--r-- | src/vnet/unix/tuntap.h | 36 |
10 files changed, 3620 insertions, 0 deletions
diff --git a/src/vnet/unix/gdb_funcs.c b/src/vnet/unix/gdb_funcs.c new file mode 100644 index 00000000000..cfb4b247800 --- /dev/null +++ b/src/vnet/unix/gdb_funcs.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief Host utility functions + */ +#include <vppinfra/format.h> +#include <vlib/vlib.h> + +#include <vlib/threads.h> + + + +/** + * @brief GDB callable function: vl - Return vector length of vector + * + * @param *p - void - address of vector + * + * @return length - u32 + * + */ +u32 vl(void *p) +{ + return vec_len (p); +} + +/** + * @brief GDB callable function: pe - call pool_elts - number of elements in a pool + * + * @param *v - void - address of pool + * + * @return number - uword + * + */ +uword pe (void *v) +{ + return (pool_elts(v)); +} + +/** + * @brief GDB callable function: pifi - call pool_is_free_index - is passed index free? + * + * @param *p - void - address of pool + * @param *index - u32 + * + * @return 0|1 - int + * + */ +int pifi (void *p, u32 index) +{ + return pool_is_free_index (p, index); +} + +/** + * @brief GDB callable function: debug_hex_bytes - return formatted hex string + * + * @param *s - u8 + * @param n - u32 - number of bytes to format + * + */ +void debug_hex_bytes (u8 *s, u32 n) +{ + fformat (stderr, "%U\n", format_hex_bytes, s, n); +} + +/** + * @brief GDB callable function: vlib_dump_frame_ownership + * + */ +void vlib_dump_frame_ownership (void) +{ + vlib_main_t * vm = vlib_get_main(); + vlib_node_main_t * nm = &vm->node_main; + vlib_node_runtime_t * this_node_runtime; + vlib_next_frame_t * nf; + u32 first_nf_index; + u32 index; + + vec_foreach(this_node_runtime, nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]) + { + first_nf_index = this_node_runtime->next_frame_index; + + for (index = first_nf_index; index < first_nf_index + + this_node_runtime->n_next_nodes; index++) + { + vlib_node_runtime_t * owned_runtime; + nf = vec_elt_at_index (vm->node_main.next_frames, index); + if (nf->flags & VLIB_FRAME_OWNER) + { + owned_runtime = vec_elt_at_index (nm->nodes_by_type[0], + nf->node_runtime_index); + fformat(stderr, + "%s next index %d owns enqueue rights to %s\n", + nm->nodes[this_node_runtime->node_index]->name, + index - first_nf_index, + nm->nodes[owned_runtime->node_index]->name); + fformat (stderr, " nf index %d nf->frame_index %d\n", + nf - vm->node_main.next_frames, + nf->frame_index); + } + } + } +} + +/** + * @brief GDB callable function: vlib_runtime_index_to_node_name + * + * Takes node index and will return the node name. + * + * @param index - u32 + */ +void vlib_runtime_index_to_node_name (u32 index) +{ + vlib_main_t * vm = vlib_get_main(); + vlib_node_main_t * nm = &vm->node_main; + + if (index > vec_len (nm->nodes)) + { + fformat(stderr, "%d out of range, max %d\n", vec_len(nm->nodes)); + return; + } + + fformat(stderr, "node runtime index %d name %s\n", index, nm->nodes[index]->name); +} + + +/** + * @brief GDB callable function: show_gdb_command_fn - show gdb + * + * Shows list of functions for VPP available in GDB + * + * @return error - clib_error_t + */ +static clib_error_t * +show_gdb_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_cli_output (vm, "vl(p) returns vec_len(p)"); + vlib_cli_output (vm, "pe(p) returns pool_elts(p)"); + vlib_cli_output (vm, "pifi(p, i) returns pool_is_free_index(p, i)"); + vlib_cli_output (vm, "debug_hex_bytes (ptr, n_bytes) dumps n_bytes in hex"); + vlib_cli_output (vm, "vlib_dump_frame_ownership() does what it says"); + vlib_cli_output (vm, "vlib_runtime_index_to_node_name (index) prints NN"); + + return 0; +} + +VLIB_CLI_COMMAND (show_gdb_funcs_command, static) = { + .path = "show gdb", + .short_help = "Describe functions which can be called from gdb", + .function = show_gdb_command_fn, +}; + +/* Cafeteria plan, maybe you don't want these functions */ +clib_error_t * +gdb_func_init (vlib_main_t * vm) { return 0; } + +VLIB_INIT_FUNCTION (gdb_func_init); diff --git a/src/vnet/unix/pcap.c b/src/vnet/unix/pcap.c new file mode 100644 index 00000000000..bba225f74ab --- /dev/null +++ b/src/vnet/unix/pcap.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pcap.c: libpcap packet capture format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/unix/pcap.h> +#include <sys/fcntl.h> + +/** + * @file + * @brief PCAP function. + * + * Usage: + * + * <code><pre> + * \#include <vnet/unix/pcap.h> + * + * static pcap_main_t pcap = { + * .file_name = "/tmp/ip4", + * .n_packets_to_capture = 2, + * .packet_type = PCAP_PACKET_TYPE_ip, + * }; + * </pre></code> + * + * To add a buffer: + * + * <code><pre>pcap_add_buffer (&pcap, vm, pi0, 128);</pre></code> + * + * File will be written after @c n_packets_to_capture or call to pcap_write (&pcap). + * +*/ + +/** + * @brief Close PCAP file + * + * @return rc - clib_error_t + * + */ +clib_error_t * +pcap_close (pcap_main_t * pm) +{ + close (pm->file_descriptor); + pm->flags &= ~PCAP_MAIN_INIT_DONE; + pm->file_descriptor = -1; + return 0; +} + +/** + * @brief Write PCAP file + * + * @return rc - clib_error_t + * + */ +clib_error_t * +pcap_write (pcap_main_t * pm) +{ + clib_error_t * error = 0; + + if (! (pm->flags & PCAP_MAIN_INIT_DONE)) + { + pcap_file_header_t fh; + int n; + + if (! pm->file_name) + pm->file_name = "/tmp/vnet.pcap"; + + pm->file_descriptor = open (pm->file_name, O_CREAT | O_TRUNC | O_WRONLY, 0664); + if (pm->file_descriptor < 0) + { + error = clib_error_return_unix (0, "failed to open `%s'", pm->file_name); + goto done; + } + + pm->flags |= PCAP_MAIN_INIT_DONE; + pm->n_packets_captured = 0; + pm->n_pcap_data_written = 0; + + /* Write file header. */ + memset (&fh, 0, sizeof (fh)); + fh.magic = 0xa1b2c3d4; + fh.major_version = 2; + fh.minor_version = 4; + fh.time_zone = 0; + fh.max_packet_size_in_bytes = 1 << 16; + fh.packet_type = pm->packet_type; + n = write (pm->file_descriptor, &fh, sizeof (fh)); + if (n != sizeof (fh)) + { + if (n < 0) + error = clib_error_return_unix (0, "write file header `%s'", pm->file_name); + else + error = clib_error_return (0, "short write of file header `%s'", pm->file_name); + goto done; + } + } + + while (vec_len (pm->pcap_data) > pm->n_pcap_data_written) + { + int n = vec_len (pm->pcap_data) - pm->n_pcap_data_written; + + n = write (pm->file_descriptor, + vec_elt_at_index (pm->pcap_data, pm->n_pcap_data_written), n); + + if (n < 0 && unix_error_is_fatal (errno)) + { + error = clib_error_return_unix (0, "write `%s'", pm->file_name); + goto done; + } + pm->n_pcap_data_written += n; + } + + if (pm->n_pcap_data_written >= vec_len (pm->pcap_data)) + { + vec_reset_length (pm->pcap_data); + pm->n_pcap_data_written = 0; + } + + if (pm->n_packets_captured >= pm->n_packets_to_capture) + pcap_close(pm); + + done: + if (error) + { + if (pm->file_descriptor >= 0) + close (pm->file_descriptor); + } + return error; +} + +/** + * @brief Read PCAP file + * + * @return rc - clib_error_t + * + */ +clib_error_t * pcap_read (pcap_main_t * pm) +{ + clib_error_t * error = 0; + int fd, need_swap, n; + pcap_file_header_t fh; + pcap_packet_header_t ph; + + fd = open (pm->file_name, O_RDONLY); + if (fd < 0) + { + error = clib_error_return_unix (0, "open `%s'", pm->file_name); + goto done; + } + + if (read (fd, &fh, sizeof (fh)) != sizeof (fh)) + { + error = clib_error_return_unix (0, "read file header `%s'", pm->file_name); + goto done; + } + + need_swap = 0; + if (fh.magic == 0xd4c3b2a1) + { + need_swap = 1; +#define _(t,f) fh.f = clib_byte_swap_##t (fh.f); + foreach_pcap_file_header; +#undef _ + } + + if (fh.magic != 0xa1b2c3d4) + { + error = clib_error_return (0, "bad magic `%s'", pm->file_name); + goto done; + } + + pm->min_packet_bytes = 0; + pm->max_packet_bytes = 0; + while ((n = read (fd, &ph, sizeof (ph))) != 0) + { + u8 * data; + + if (need_swap) + { +#define _(t,f) ph.f = clib_byte_swap_##t (ph.f); + foreach_pcap_packet_header; +#undef _ + } + + data = vec_new (u8, ph.n_bytes_in_packet); + if (read (fd, data, ph.n_packet_bytes_stored_in_file) != ph.n_packet_bytes_stored_in_file) + { + error = clib_error_return (0, "short read `%s'", pm->file_name); + goto done; + } + + if (vec_len (pm->packets_read) == 0) + pm->min_packet_bytes = pm->max_packet_bytes = ph.n_bytes_in_packet; + else + { + pm->min_packet_bytes = clib_min (pm->min_packet_bytes, ph.n_bytes_in_packet); + pm->max_packet_bytes = clib_max (pm->max_packet_bytes, ph.n_bytes_in_packet); + } + + vec_add1 (pm->packets_read, data); + } + + done: + if (fd >= 0) + close (fd); + return error; + +} diff --git a/src/vnet/unix/pcap.h b/src/vnet/unix/pcap.h new file mode 100644 index 00000000000..6aaf32bef7e --- /dev/null +++ b/src/vnet/unix/pcap.h @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pcap.h: libpcap packet capture format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +/** + * @file + * @brief PCAP utility definitions + */ +#ifndef included_vnet_pcap_h +#define included_vnet_pcap_h + +#include <vlib/vlib.h> + +/** + * @brief Packet types supported by PCAP + * + * null 0 + * ethernet 1 + * ppp 9 + * ip 12 + * hdlc 104 + */ +#define foreach_vnet_pcap_packet_type \ + _ (null, 0) \ + _ (ethernet, 1) \ + _ (ppp, 9) \ + _ (ip, 12) \ + _ (hdlc, 104) + +typedef enum { +#define _(f,n) PCAP_PACKET_TYPE_##f = (n), + foreach_vnet_pcap_packet_type +#undef _ +} pcap_packet_type_t; + +#define foreach_pcap_file_header \ + /** 0xa1b2c3d4 host byte order. \ + 0xd4c3b2a1 => need to byte swap everything. */ \ + _ (u32, magic) \ + \ + /** Currently major 2 minor 4. */ \ + _ (u16, major_version) \ + _ (u16, minor_version) \ + \ + /** 0 for GMT. */ \ + _ (u32, time_zone) \ + \ + /** Accuracy of timestamps. Typically set to 0. */ \ + _ (u32, sigfigs) \ + \ + /** Size of largest packet in file. */ \ + _ (u32, max_packet_size_in_bytes) \ + \ + /** One of vnet_pcap_packet_type_t. */ \ + _ (u32, packet_type) + +/** File header struct */ +typedef struct { +#define _(t, f) t f; + foreach_pcap_file_header +#undef _ +} pcap_file_header_t; + +#define foreach_pcap_packet_header \ + /** Time stamp in seconds */ \ + _ (u32, time_in_sec) \ + /** Time stamp in microseconds. */ \ + _ (u32, time_in_usec) \ + \ + /** Number of bytes stored in file. */ \ + _ (u32, n_packet_bytes_stored_in_file) \ + /** Number of bytes in actual packet. */ \ + _ (u32, n_bytes_in_packet) + +/** Packet header. */ +typedef struct { +#define _(t, f) t f; + foreach_pcap_packet_header +#undef _ + + /** Packet data follows. */ + u8 data[0]; +} pcap_packet_header_t; + +/** + * @brief PCAP main state data structure + */ +typedef struct { + /** File name of pcap output. */ + char * file_name; + + /** Number of packets to capture. */ + u32 n_packets_to_capture; + + /** Packet type */ + pcap_packet_type_t packet_type; + + /** Number of packets currently captured. */ + u32 n_packets_captured; + + /** flags */ + u32 flags; +#define PCAP_MAIN_INIT_DONE (1 << 0) + + /** File descriptor for reading/writing. */ + int file_descriptor; + + /** Bytes written */ + u32 n_pcap_data_written; + + /** Vector of pcap data. */ + u8 * pcap_data; + + /** Packets read from file. */ + u8 ** packets_read; + + /** Min/Max Packet bytes */ + u32 min_packet_bytes, max_packet_bytes; +} pcap_main_t; + +/** Write out data to output file. */ +clib_error_t * pcap_write (pcap_main_t * pm); + +/** Read data from file. */ +clib_error_t * pcap_read (pcap_main_t * pm); + +/** + * @brief Add packet + * + * @param *pm - pcap_main_t + * @param time_now - f64 + * @param n_bytes_in_trace - u32 + * @param n_bytes_in_packet - u32 + * + * @return Packet Data + * + */ +static inline void * +pcap_add_packet (pcap_main_t * pm, + f64 time_now, + u32 n_bytes_in_trace, + u32 n_bytes_in_packet) +{ + pcap_packet_header_t * h; + u8 * d; + + vec_add2 (pm->pcap_data, d, sizeof (h[0]) + n_bytes_in_trace); + h = (void *) (d); + h->time_in_sec = time_now; + h->time_in_usec = 1e6*(time_now - h->time_in_sec); + h->n_packet_bytes_stored_in_file = n_bytes_in_trace; + h->n_bytes_in_packet = n_bytes_in_packet; + pm->n_packets_captured++; + return h->data; +} + +/** + * @brief Add buffer (vlib_buffer_t) to the trace + * + * @param *pm - pcap_main_t + * @param *vm - vlib_main_t + * @param buffer_index - u32 + * @param n_bytes_in_trace - u32 + * + */ +static inline void +pcap_add_buffer (pcap_main_t * pm, + vlib_main_t * vm, u32 buffer_index, + u32 n_bytes_in_trace) +{ + vlib_buffer_t * b = vlib_get_buffer (vm, buffer_index); + u32 n = vlib_buffer_length_in_chain (vm, b); + i32 n_left = clib_min (n_bytes_in_trace, n); + f64 time_now = vlib_time_now (vm); + void * d; + + d = pcap_add_packet (pm, time_now, n_left, n); + while (1) + { + u32 copy_length = clib_min ((u32) n_left, b->current_length); + clib_memcpy (d, b->data + b->current_data, copy_length); + n_left -= b->current_length; + if (n_left <= 0) + break; + d += b->current_length; + ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT); + b = vlib_get_buffer (vm, b->next_buffer); + } + + /** Flush output vector. */ + if (vec_len (pm->pcap_data) >= 64*1024 + || pm->n_packets_captured >= pm->n_packets_to_capture) + pcap_write (pm); +} + +#endif /* included_vnet_pcap_h */ diff --git a/src/vnet/unix/pcap2pg.c b/src/vnet/unix/pcap2pg.c new file mode 100644 index 00000000000..217a61f4cb4 --- /dev/null +++ b/src/vnet/unix/pcap2pg.c @@ -0,0 +1,182 @@ +/* + * pcap2pg.c: convert pcap input to pg input + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief Functions to convert PCAP file format to VPP PG (Packet Generator) + * + */ +#include <vnet/unix/pcap.h> +#include <vnet/ethernet/packet.h> +#include <stdio.h> + +pcap_main_t pcap_main; + +/** + * @brief char * to seed a PG file + */ +static char * pg_fmt = + "packet-generator new {\n" + " name s%d\n" + " limit 1\n" + " size %d-%d\n" + " node ethernet-input\n"; + + +/** + * @brief Packet Generator Stream boilerplate + * + * @param *ofp - FILE + * @param i - int + * @param *pkt - u8 + */ +void stream_boilerplate (FILE *ofp, int i, u8 * pkt) +{ + fformat(ofp, pg_fmt, i, vec_len(pkt), vec_len(pkt)); +} + +/** + * @brief Conversion of PCAP file to PG file format + * + * @param *pm - pcap_main_t + * @param *ofp - FILE + * + * @return rc - int + * + */ +int pcap2pg (pcap_main_t * pm, FILE *ofp) +{ + int i, j; + u8 *pkt; + + for (i = 0; i < vec_len (pm->packets_read); i++) + { + int offset; + ethernet_header_t * h; + u64 ethertype; + + pkt = pm->packets_read[i]; + h = (ethernet_header_t *)pkt; + + stream_boilerplate (ofp, i, pkt); + + fformat (ofp, " data {\n"); + + ethertype = clib_net_to_host_u16 (h->type); + + /** + * In vnet terms, packet generator interfaces are not ethernets. + * They don't have vlan tables. + * This transforms captured 802.1q VLAN packets into + * regular Ethernet packets. + */ + if (ethertype == 0x8100 /* 802.1q vlan */) + { + u16 * vlan_ethertype = (u16 *)(h+1); + ethertype = clib_net_to_host_u16(vlan_ethertype[0]); + offset = 18; + } + else + offset = 14; + + fformat (ofp, + " 0x%04x: %02x%02x.%02x%02x.%02x%02x" + " -> %02x%02x.%02x%02x.%02x%02x\n", + ethertype, + h->src_address[0], + h->src_address[1], + h->src_address[2], + h->src_address[3], + h->src_address[4], + h->src_address[5], + h->dst_address[0], + h->dst_address[1], + h->dst_address[2], + h->dst_address[3], + h->dst_address[4], + h->dst_address[5]); + + fformat (ofp, " hex 0x"); + + for (j = offset; j < vec_len (pkt); j++) + fformat (ofp, "%02x", pkt[j]); + + fformat (ofp, " }\n"); + fformat (ofp, "}\n\n"); + } + return 0; +} + +/** + * @brief pcap2pg. + * usage: pcap2pg -i <input-file> [-o <output-file>] + */ +int main (int argc, char **argv) +{ + unformat_input_t input; + pcap_main_t * pm = &pcap_main; + u8 * input_file = 0, * output_file = 0; + FILE * ofp; + clib_error_t * error; + + unformat_init_command_line (&input, argv); + + while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) + { + if (unformat(&input, "-i %s", &input_file) + || unformat (&input, "input %s", &input_file)) + ; + else if (unformat (&input, "-o %s", &output_file) + || unformat (&input, "output %s", &output_file)) + ; + else + { + usage: + fformat(stderr, + "usage: pcap2pg -i <input-file> [-o <output-file>]\n"); + exit (1); + } + } + + if (input_file == 0) + goto usage; + + pm->file_name = (char *)input_file; + error = pcap_read (pm); + + if (error) + { + clib_error_report (error); + exit (1); + } + + if (output_file) + { + ofp = fopen ((char *)output_file, "rw"); + if (ofp == NULL) + clib_unix_warning ("Couldn't create '%s'", output_file); + exit (1); + } + else + { + ofp = stdout; + } + + pcap2pg (pm, ofp); + + fclose (ofp); + exit (0); +} diff --git a/src/vnet/unix/tap.api b/src/vnet/unix/tap.api new file mode 100644 index 00000000000..9b16eadbf86 --- /dev/null +++ b/src/vnet/unix/tap.api @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \file + + This file defines vpe control-plane API messages for + the Linux kernel TAP device driver +*/ + +/** \brief Initialize a new tap interface with the given paramters + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param use_random_mac - let the system generate a unique mac address + @param tap_name - name to associate with the new interface + @param mac_address - mac addr to assign to the interface if use_radom not set +*/ +define tap_connect +{ + u32 client_index; + u32 context; + u8 use_random_mac; + u8 tap_name[64]; + u8 mac_address[6]; + u8 renumber; + u32 custom_dev_instance; + u8 tag[64]; +}; + +/** \brief Reply for tap connect request + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param sw_if_index - software index allocated for the new tap interface +*/ +define tap_connect_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Modify a tap interface with the given paramters + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface index of existing tap interface + @param use_random_mac - let the system generate a unique mac address + @param tap_name - name to associate with the new interface + @param mac_address - mac addr to assign to the interface if use_radom not set +*/ +define tap_modify +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 use_random_mac; + u8 tap_name[64]; + u8 mac_address[6]; + u8 renumber; + u32 custom_dev_instance; +}; + +/** \brief Reply for tap modify request + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param sw_if_index - software index if the modified tap interface +*/ +define tap_modify_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Delete tap interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface index of existing tap interface +*/ +define tap_delete +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Reply for tap delete request + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define tap_delete_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Dump tap interfaces request */ +define sw_interface_tap_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply for tap dump request + @param sw_if_index - software index of tap interface + @param dev_name - Linux tap device name +*/ +define sw_interface_tap_details +{ + u32 context; + u32 sw_if_index; + u8 dev_name[64]; +}; diff --git a/src/vnet/unix/tap_api.c b/src/vnet/unix/tap_api.c new file mode 100644 index 00000000000..99b79ba2e70 --- /dev/null +++ b/src/vnet/unix/tap_api.c @@ -0,0 +1,257 @@ +/* + *------------------------------------------------------------------ + * tap_api.c - vnet tap device driver API support + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip.h> +#include <vnet/unix/tuntap.h> +#include <vnet/unix/tapcli.h> + +#include <vnet/vnet_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <vnet/vnet_all_api_h.h> +#undef vl_printfun + +#include <vlibapi/api_helper_macros.h> + +#define foreach_tap_api_msg \ +_(TAP_CONNECT, tap_connect) \ +_(TAP_MODIFY, tap_modify) \ +_(TAP_DELETE, tap_delete) \ +_(SW_INTERFACE_TAP_DUMP, sw_interface_tap_dump) + +#define vl_msg_name_crc_list +#include <vnet/unix/tap.api.h> +#undef vl_msg_name_crc_list + +/* + * WARNING: replicated pending api refactor completion + */ +static void +send_sw_interface_flags_deleted (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + u32 sw_if_index) +{ + vl_api_sw_interface_set_flags_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_FLAGS); + mp->sw_if_index = ntohl (sw_if_index); + + mp->admin_up_down = 0; + mp->link_up_down = 0; + mp->deleted = 1; + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_tap_connect_t_handler (vl_api_tap_connect_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + int rv; + vl_api_tap_connect_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + unix_shared_memory_queue_t *q; + u32 sw_if_index = (u32) ~ 0; + u8 *tag; + + rv = vnet_tap_connect_renumber (vm, mp->tap_name, + mp->use_random_mac ? 0 : mp->mac_address, + &sw_if_index, mp->renumber, + ntohl (mp->custom_dev_instance)); + + /* Add tag if supplied */ + if (rv == 0 && mp->tag[0]) + { + mp->tag[ARRAY_LEN (mp->tag) - 1] = 0; + tag = format (0, "%s%c", mp->tag, 0); + vnet_set_sw_interface_tag (vnm, tag, sw_if_index); + } + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_TAP_CONNECT_REPLY); + rmp->context = mp->context; + rmp->retval = ntohl (rv); + rmp->sw_if_index = ntohl (sw_if_index); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_tap_modify_t_handler (vl_api_tap_modify_t * mp) +{ + int rv; + vl_api_tap_modify_reply_t *rmp; + unix_shared_memory_queue_t *q; + u32 sw_if_index = (u32) ~ 0; + vlib_main_t *vm = vlib_get_main (); + + rv = vnet_tap_modify (vm, ntohl (mp->sw_if_index), mp->tap_name, + mp->use_random_mac ? 0 : mp->mac_address, + &sw_if_index, mp->renumber, + ntohl (mp->custom_dev_instance)); + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_TAP_MODIFY_REPLY); + rmp->context = mp->context; + rmp->retval = ntohl (rv); + rmp->sw_if_index = ntohl (sw_if_index); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_tap_delete_t_handler (vl_api_tap_delete_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + int rv; + vpe_api_main_t *vam = &vpe_api_main; + vl_api_tap_delete_reply_t *rmp; + unix_shared_memory_queue_t *q; + u32 sw_if_index = ntohl (mp->sw_if_index); + + rv = vnet_tap_delete (vm, sw_if_index); + if (!rv) + { + vnet_main_t *vnm = vnet_get_main (); + vnet_clear_sw_interface_tag (vnm, sw_if_index); + } + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_TAP_DELETE_REPLY); + rmp->context = mp->context; + rmp->retval = ntohl (rv); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); + + if (!rv) + send_sw_interface_flags_deleted (vam, q, sw_if_index); +} + +static void +send_sw_interface_tap_details (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + tapcli_interface_details_t * tap_if, + u32 context) +{ + vl_api_sw_interface_tap_details_t *mp; + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_TAP_DETAILS); + mp->sw_if_index = ntohl (tap_if->sw_if_index); + strncpy ((char *) mp->dev_name, + (char *) tap_if->dev_name, ARRAY_LEN (mp->dev_name) - 1); + mp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_sw_interface_tap_dump_t_handler (vl_api_sw_interface_tap_dump_t * mp) +{ + int rv = 0; + vpe_api_main_t *am = &vpe_api_main; + unix_shared_memory_queue_t *q; + tapcli_interface_details_t *tapifs = NULL; + tapcli_interface_details_t *tap_if = NULL; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + rv = vnet_tap_dump_ifs (&tapifs); + if (rv) + return; + + vec_foreach (tap_if, tapifs) + { + send_sw_interface_tap_details (am, q, tap_if, mp->context); + } + + vec_free (tapifs); +} + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_tap; +#undef _ +} + +static clib_error_t * +tap_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_tap_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (tap_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c new file mode 100644 index 00000000000..9862a2bda50 --- /dev/null +++ b/src/vnet/unix/tapcli.c @@ -0,0 +1,1328 @@ +/* + *------------------------------------------------------------------ + * tapcli.c - dynamic tap interface hookup + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +/** + * @file + * @brief dynamic tap interface hookup + */ + +#include <fcntl.h> /* for open */ +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/uio.h> /* for iovec */ +#include <netinet/in.h> + +#include <linux/if_arp.h> +#include <linux/if_tun.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> + +#include <vnet/ip/ip.h> + +#include <vnet/ethernet/ethernet.h> + +#include <vnet/feature/feature.h> +#include <vnet/devices/devices.h> +#include <vnet/unix/tapcli.h> + +static vnet_device_class_t tapcli_dev_class; +static vnet_hw_interface_class_t tapcli_interface_class; +static vlib_node_registration_t tapcli_rx_node; + +static void tapcli_nopunt_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame); +/** + * @brief Struct for the tapcli interface + */ +typedef struct { + u32 unix_fd; + u32 unix_file_index; + u32 provision_fd; + /** For counters */ + u32 sw_if_index; + u32 hw_if_index; + u32 is_promisc; + struct ifreq ifr; + u32 per_interface_next_index; + /** for delete */ + u8 active; +} tapcli_interface_t; + +/** + * @brief Struct for RX trace + */ +typedef struct { + u16 sw_if_index; +} tapcli_rx_trace_t; + +/** + * @brief Function to format TAP CLI trace + * + * @param *s - u8 - formatting string + * @param *va - va_list + * + * @return *s - u8 - formatted string + * + */ +u8 * format_tapcli_rx_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + vnet_main_t * vnm = vnet_get_main(); + tapcli_rx_trace_t * t = va_arg (*va, tapcli_rx_trace_t *); + s = format (s, "%U", format_vnet_sw_if_index_name, + vnm, t->sw_if_index); + return s; +} + +/** + * @brief TAPCLI main state struct + */ +typedef struct { + /** Vector of iovecs for readv/writev calls. */ + struct iovec * iovecs; + + /** Vector of VLIB rx buffers to use. We allocate them in blocks + of VLIB_FRAME_SIZE (256). */ + u32 * rx_buffers; + + /** tap device destination MAC address. Required, or Linux drops pkts */ + u8 ether_dst_mac[6]; + + /** Interface MTU in bytes and # of default sized buffers. */ + u32 mtu_bytes, mtu_buffers; + + /** Vector of tap interfaces */ + tapcli_interface_t * tapcli_interfaces; + + /** Vector of deleted tap interfaces */ + u32 * tapcli_inactive_interfaces; + + /** Bitmap of tap interfaces with pending reads */ + uword * pending_read_bitmap; + + /** Hash table to find tapcli interface given hw_if_index */ + uword * tapcli_interface_index_by_sw_if_index; + + /** Hash table to find tapcli interface given unix fd */ + uword * tapcli_interface_index_by_unix_fd; + + /** renumbering table */ + u32 * show_dev_instance_by_real_dev_instance; + + /** 1 => disable CLI */ + int is_disabled; + + /** convenience - vlib_main_t */ + vlib_main_t * vlib_main; + /** convenience - vnet_main_t */ + vnet_main_t * vnet_main; + /** convenience - unix_main_t */ + unix_main_t * unix_main; +} tapcli_main_t; + +static tapcli_main_t tapcli_main; + +/** + * @brief tapcli TX node function + * @node tap-cli-tx + * + * Output node, writes the buffers comprising the incoming frame + * to the tun/tap device, aka hands them to the Linux kernel stack. + * + * @param *vm - vlib_main_t + * @param *node - vlib_node_runtime_t + * @param *frame - vlib_frame_t + * + * @return n_packets - uword + * + */ +static uword +tapcli_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * buffers = vlib_frame_args (frame); + uword n_packets = frame->n_vectors; + tapcli_main_t * tm = &tapcli_main; + tapcli_interface_t * ti; + int i; + + for (i = 0; i < n_packets; i++) + { + struct iovec * iov; + vlib_buffer_t * b; + uword l; + vnet_hw_interface_t * hw; + uword * p; + u32 tx_sw_if_index; + + b = vlib_get_buffer (vm, buffers[i]); + + tx_sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_TX]; + if (tx_sw_if_index == (u32)~0) + tx_sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_RX]; + + ASSERT(tx_sw_if_index != (u32)~0); + + /* Use the sup intfc to finesse vlan subifs */ + hw = vnet_get_sup_hw_interface (tm->vnet_main, tx_sw_if_index); + tx_sw_if_index = hw->sw_if_index; + + p = hash_get (tm->tapcli_interface_index_by_sw_if_index, + tx_sw_if_index); + if (p == 0) + { + clib_warning ("sw_if_index %d unknown", tx_sw_if_index); + /* $$$ leak, but this should never happen... */ + continue; + } + else + ti = vec_elt_at_index (tm->tapcli_interfaces, p[0]); + + /* Re-set iovecs if present. */ + if (tm->iovecs) + _vec_len (tm->iovecs) = 0; + + /* VLIB buffer chain -> Unix iovec(s). */ + vec_add2 (tm->iovecs, iov, 1); + iov->iov_base = b->data + b->current_data; + iov->iov_len = l = b->current_length; + + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + do { + b = vlib_get_buffer (vm, b->next_buffer); + + vec_add2 (tm->iovecs, iov, 1); + + iov->iov_base = b->data + b->current_data; + iov->iov_len = b->current_length; + l += b->current_length; + } while (b->flags & VLIB_BUFFER_NEXT_PRESENT); + } + + if (writev (ti->unix_fd, tm->iovecs, vec_len (tm->iovecs)) < l) + clib_unix_warning ("writev"); + } + + vlib_buffer_free(vm, vlib_frame_vector_args(frame), frame->n_vectors); + + return n_packets; +} + +VLIB_REGISTER_NODE (tapcli_tx_node,static) = { + .function = tapcli_tx, + .name = "tapcli-tx", + .type = VLIB_NODE_TYPE_INTERNAL, + .vector_size = 4, +}; + +/** + * @brief Dispatch tapcli RX node function for node tap_cli_rx + * + * + * @param *vm - vlib_main_t + * @param *node - vlib_node_runtime_t + * @param *ti - tapcli_interface_t + * + * @return n_packets - uword + * + */ +static uword tapcli_rx_iface(vlib_main_t * vm, + vlib_node_runtime_t * node, + tapcli_interface_t * ti) +{ + tapcli_main_t * tm = &tapcli_main; + const uword buffer_size = VLIB_BUFFER_DATA_SIZE; + u32 n_trace = vlib_get_trace_count (vm, node); + u8 set_trace = 0; + + vnet_main_t *vnm; + vnet_sw_interface_t * si; + u8 admin_down; + u32 next = node->cached_next_index; + u32 n_left_to_next, next_index; + u32 *to_next; + + vnm = vnet_get_main(); + si = vnet_get_sw_interface (vnm, ti->sw_if_index); + admin_down = !(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + vlib_get_next_frame(vm, node, next, to_next, n_left_to_next); + + while (n_left_to_next) { // Fill at most one vector + vlib_buffer_t *b_first, *b, *prev; + u32 bi_first, bi; + word n_bytes_in_packet; + int j, n_bytes_left; + + if (PREDICT_FALSE(vec_len(tm->rx_buffers) < tm->mtu_buffers)) { + uword len = vec_len(tm->rx_buffers); + _vec_len(tm->rx_buffers) += + vlib_buffer_alloc_from_free_list(vm, &tm->rx_buffers[len], + VLIB_FRAME_SIZE - len, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + if (PREDICT_FALSE(vec_len(tm->rx_buffers) < tm->mtu_buffers)) { + vlib_node_increment_counter(vm, tapcli_rx_node.index, + TAPCLI_ERROR_BUFFER_ALLOC, + tm->mtu_buffers - vec_len(tm->rx_buffers)); + break; + } + } + + uword i_rx = vec_len (tm->rx_buffers) - 1; + + /* Allocate RX buffers from end of rx_buffers. + Turn them into iovecs to pass to readv. */ + vec_validate (tm->iovecs, tm->mtu_buffers - 1); + for (j = 0; j < tm->mtu_buffers; j++) { + b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - j]); + tm->iovecs[j].iov_base = b->data; + tm->iovecs[j].iov_len = buffer_size; + } + + n_bytes_left = readv (ti->unix_fd, tm->iovecs, tm->mtu_buffers); + n_bytes_in_packet = n_bytes_left; + if (n_bytes_left <= 0) { + if (errno != EAGAIN) { + vlib_node_increment_counter(vm, tapcli_rx_node.index, + TAPCLI_ERROR_READ, 1); + } + break; + } + + bi_first = tm->rx_buffers[i_rx]; + b = b_first = vlib_get_buffer (vm, tm->rx_buffers[i_rx]); + prev = NULL; + + while (1) { + b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size; + n_bytes_left -= buffer_size; + + if (prev) { + prev->next_buffer = bi; + prev->flags |= VLIB_BUFFER_NEXT_PRESENT; + } + prev = b; + + /* last segment */ + if (n_bytes_left <= 0) + break; + + i_rx--; + bi = tm->rx_buffers[i_rx]; + b = vlib_get_buffer (vm, bi); + } + + _vec_len (tm->rx_buffers) = i_rx; + + b_first->total_length_not_including_first_buffer = + (n_bytes_in_packet > buffer_size) ? n_bytes_in_packet - buffer_size : 0; + b_first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b_first); + + vnet_buffer (b_first)->sw_if_index[VLIB_RX] = ti->sw_if_index; + vnet_buffer (b_first)->sw_if_index[VLIB_TX] = (u32)~0; + + b_first->error = node->errors[TAPCLI_ERROR_NONE]; + next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + next_index = (ti->per_interface_next_index != ~0) ? + ti->per_interface_next_index : next_index; + next_index = admin_down ? VNET_DEVICE_INPUT_NEXT_DROP : next_index; + + to_next[0] = bi_first; + to_next++; + n_left_to_next--; + + vnet_feature_start_device_input_x1 (ti->sw_if_index, &next_index, + b_first, 0); + + vlib_validate_buffer_enqueue_x1 (vm, node, next, + to_next, n_left_to_next, + bi_first, next_index); + + /* Interface counters for tapcli interface. */ + if (PREDICT_TRUE(!admin_down)) { + vlib_increment_combined_counter ( + vnet_main.interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + os_get_cpu_number(), ti->sw_if_index, + 1, n_bytes_in_packet); + + if (PREDICT_FALSE(n_trace > 0)) { + vlib_trace_buffer (vm, node, next_index, + b_first, /* follow_chain */ 1); + n_trace--; + set_trace = 1; + tapcli_rx_trace_t *t0 = vlib_add_trace (vm, node, b_first, sizeof (*t0)); + t0->sw_if_index = si->sw_if_index; + } + } + } + vlib_put_next_frame (vm, node, next, n_left_to_next); + if (set_trace) + vlib_set_trace_count (vm, node, n_trace); + return VLIB_FRAME_SIZE - n_left_to_next; +} + +/** + * @brief tapcli RX node function + * @node tap-cli-rx + * + * Input node from the Kernel tun/tap device + * + * @param *vm - vlib_main_t + * @param *node - vlib_node_runtime_t + * @param *frame - vlib_frame_t + * + * @return n_packets - uword + * + */ +static uword +tapcli_rx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + tapcli_main_t * tm = &tapcli_main; + static u32 * ready_interface_indices; + tapcli_interface_t * ti; + int i; + u32 total_count = 0; + + vec_reset_length (ready_interface_indices); + clib_bitmap_foreach (i, tm->pending_read_bitmap, + ({ + vec_add1 (ready_interface_indices, i); + })); + + if (vec_len (ready_interface_indices) == 0) + return 0; + + for (i = 0; i < vec_len(ready_interface_indices); i++) + { + tm->pending_read_bitmap = + clib_bitmap_set (tm->pending_read_bitmap, + ready_interface_indices[i], 0); + + ti = vec_elt_at_index (tm->tapcli_interfaces, ready_interface_indices[i]); + total_count += tapcli_rx_iface(vm, node, ti); + } + return total_count; //This might return more than 256. +} + +/** TAPCLI error strings */ +static char * tapcli_rx_error_strings[] = { +#define _(sym,string) string, + foreach_tapcli_error +#undef _ +}; + +VLIB_REGISTER_NODE (tapcli_rx_node, static) = { + .function = tapcli_rx, + .name = "tapcli-rx", + .sibling_of = "device-input", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = 4, + .n_errors = TAPCLI_N_ERROR, + .error_strings = tapcli_rx_error_strings, + .format_trace = format_tapcli_rx_trace, +}; + + +/** + * @brief Gets called when file descriptor is ready from epoll. + * + * @param *uf - unix_file_t + * + * @return error - clib_error_t + * + */ +static clib_error_t * tapcli_read_ready (unix_file_t * uf) +{ + vlib_main_t * vm = vlib_get_main(); + tapcli_main_t * tm = &tapcli_main; + uword * p; + + /** Schedule the rx node */ + vlib_node_set_interrupt_pending (vm, tapcli_rx_node.index); + + p = hash_get (tm->tapcli_interface_index_by_unix_fd, uf->file_descriptor); + + /** Mark the specific tap interface ready-to-read */ + if (p) + tm->pending_read_bitmap = clib_bitmap_set (tm->pending_read_bitmap, + p[0], 1); + else + clib_warning ("fd %d not in hash table", uf->file_descriptor); + + return 0; +} + +/** + * @brief CLI function for TAPCLI configuration + * + * @param *vm - vlib_main_t + * @param *input - unformat_input_t + * + * @return error - clib_error_t + * + */ +static clib_error_t * +tapcli_config (vlib_main_t * vm, unformat_input_t * input) +{ + tapcli_main_t *tm = &tapcli_main; + const uword buffer_size = VLIB_BUFFER_DATA_SIZE; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "mtu %d", &tm->mtu_bytes)) + ; + else if (unformat (input, "disable")) + tm->is_disabled = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (tm->is_disabled) + return 0; + + if (geteuid()) + { + clib_warning ("tapcli disabled: must be superuser"); + tm->is_disabled = 1; + return 0; + } + + tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size; + + return 0; +} + +/** + * @brief Renumber TAPCLI interface + * + * @param *hi - vnet_hw_interface_t + * @param new_dev_instance - u32 + * + * @return rc - int + * + */ +static int tap_name_renumber (vnet_hw_interface_t * hi, + u32 new_dev_instance) +{ + tapcli_main_t *tm = &tapcli_main; + + vec_validate_init_empty (tm->show_dev_instance_by_real_dev_instance, + hi->dev_instance, ~0); + + tm->show_dev_instance_by_real_dev_instance [hi->dev_instance] = + new_dev_instance; + + return 0; +} + +VLIB_CONFIG_FUNCTION (tapcli_config, "tapcli"); + +/** + * @brief Free "no punt" frame + * + * @param *vm - vlib_main_t + * @param *node - vlib_node_runtime_t + * @param *frame - vlib_frame_t + * + */ +static void +tapcli_nopunt_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * buffers = vlib_frame_args (frame); + uword n_packets = frame->n_vectors; + vlib_buffer_free (vm, buffers, n_packets); + vlib_frame_free (vm, node, frame); +} + +VNET_HW_INTERFACE_CLASS (tapcli_interface_class,static) = { + .name = "tapcli", + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, +}; + +/** + * @brief Formatter for TAPCLI interface name + * + * @param *s - formatter string + * @param *args - va_list + * + * @return *s - formatted string + * + */ +static u8 * format_tapcli_interface_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u32 show_dev_instance = ~0; + tapcli_main_t * tm = &tapcli_main; + + if (i < vec_len (tm->show_dev_instance_by_real_dev_instance)) + show_dev_instance = tm->show_dev_instance_by_real_dev_instance[i]; + + if (show_dev_instance != ~0) + i = show_dev_instance; + + s = format (s, "tap-%d", i); + return s; +} + +/** + * @brief Modify interface flags for TAPCLI interface + * + * @param *vnm - vnet_main_t + * @param *hw - vnet_hw_interface_t + * @param flags - u32 + * + * @return rc - u32 + * + */ +static u32 tapcli_flag_change (vnet_main_t * vnm, + vnet_hw_interface_t * hw, + u32 flags) +{ + tapcli_main_t *tm = &tapcli_main; + tapcli_interface_t *ti; + + ti = vec_elt_at_index (tm->tapcli_interfaces, hw->dev_instance); + + if (flags & ETHERNET_INTERFACE_FLAG_MTU) + { + const uword buffer_size = VLIB_BUFFER_DATA_SIZE; + tm->mtu_bytes = hw->max_packet_bytes; + tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size; + } + else + { + struct ifreq ifr; + u32 want_promisc; + + memcpy (&ifr, &ti->ifr, sizeof (ifr)); + + /* get flags, modify to bring up interface... */ + if (ioctl (ti->provision_fd, SIOCGIFFLAGS, &ifr) < 0) + { + clib_unix_warning ("Couldn't get interface flags for %s", hw->name); + return 0; + } + + want_promisc = (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0; + + if (want_promisc == ti->is_promisc) + return 0; + + if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) + ifr.ifr_flags |= IFF_PROMISC; + else + ifr.ifr_flags &= ~(IFF_PROMISC); + + /* get flags, modify to bring up interface... */ + if (ioctl (ti->provision_fd, SIOCSIFFLAGS, &ifr) < 0) + { + clib_unix_warning ("Couldn't set interface flags for %s", hw->name); + return 0; + } + + ti->is_promisc = want_promisc; + } + + return 0; +} + +/** + * @brief Setting the TAP interface's next processing node + * + * @param *vnm - vnet_main_t + * @param hw_if_index - u32 + * @param node_index - u32 + * + */ +static void tapcli_set_interface_next_node (vnet_main_t *vnm, + u32 hw_if_index, + u32 node_index) +{ + tapcli_main_t *tm = &tapcli_main; + tapcli_interface_t *ti; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + + ti = vec_elt_at_index (tm->tapcli_interfaces, hw->dev_instance); + + /** Shut off redirection */ + if (node_index == ~0) + { + ti->per_interface_next_index = node_index; + return; + } + + ti->per_interface_next_index = + vlib_node_add_next (tm->vlib_main, tapcli_rx_node.index, node_index); +} + +/** + * @brief Set link_state == admin_state otherwise things like ip6 neighbor discovery breaks + * + * @param *vnm - vnet_main_t + * @param hw_if_index - u32 + * @param flags - u32 + * + * @return error - clib_error_t + */ +static clib_error_t * +tapcli_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + uword is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + u32 hw_flags; + u32 speed_duplex = VNET_HW_INTERFACE_FLAG_FULL_DUPLEX + | VNET_HW_INTERFACE_FLAG_SPEED_1G; + + if (is_admin_up) + hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP | speed_duplex; + else + hw_flags = speed_duplex; + + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + return 0; +} + +VNET_DEVICE_CLASS (tapcli_dev_class,static) = { + .name = "tapcli", + .tx_function = tapcli_tx, + .format_device_name = format_tapcli_interface_name, + .rx_redirect_to_node = tapcli_set_interface_next_node, + .name_renumber = tap_name_renumber, + .admin_up_down_function = tapcli_interface_admin_up_down, +}; + +/** + * @brief Dump TAP interfaces + * + * @param **out_tapids - tapcli_interface_details_t + * + * @return rc - int + * + */ +int vnet_tap_dump_ifs (tapcli_interface_details_t **out_tapids) +{ + tapcli_main_t * tm = &tapcli_main; + tapcli_interface_t * ti; + + tapcli_interface_details_t * r_tapids = NULL; + tapcli_interface_details_t * tapid = NULL; + + vec_foreach (ti, tm->tapcli_interfaces) { + if (!ti->active) + continue; + vec_add2(r_tapids, tapid, 1); + tapid->sw_if_index = ti->sw_if_index; + strncpy((char *)tapid->dev_name, ti->ifr.ifr_name, sizeof (ti->ifr.ifr_name)-1); + } + + *out_tapids = r_tapids; + + return 0; +} + +/** + * @brief Get tap interface from inactive interfaces or create new + * + * @return interface - tapcli_interface_t + * + */ +static tapcli_interface_t *tapcli_get_new_tapif() +{ + tapcli_main_t * tm = &tapcli_main; + tapcli_interface_t *ti = NULL; + + int inactive_cnt = vec_len(tm->tapcli_inactive_interfaces); + // if there are any inactive ifaces + if (inactive_cnt > 0) { + // take last + u32 ti_idx = tm->tapcli_inactive_interfaces[inactive_cnt - 1]; + if (vec_len(tm->tapcli_interfaces) > ti_idx) { + ti = vec_elt_at_index (tm->tapcli_interfaces, ti_idx); + clib_warning("reusing tap interface"); + } + // "remove" from inactive list + _vec_len(tm->tapcli_inactive_interfaces) -= 1; + } + + // ti was not retrieved from inactive ifaces - create new + if (!ti) + vec_add2 (tm->tapcli_interfaces, ti, 1); + + return ti; +} + +/** + * @brief Connect a TAP interface + * + * @param vm - vlib_main_t + * @param intfc_name - u8 + * @param hwaddr_arg - u8 + * @param sw_if_indexp - u32 + * + * @return rc - int + * + */ +int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, u8 *hwaddr_arg, + u32 * sw_if_indexp) +{ + tapcli_main_t * tm = &tapcli_main; + tapcli_interface_t * ti = NULL; + struct ifreq ifr; + int flags; + int dev_net_tun_fd; + int dev_tap_fd = -1; + clib_error_t * error; + u8 hwaddr [6]; + int rv = 0; + + if (tm->is_disabled) + { + return VNET_API_ERROR_FEATURE_DISABLED; + } + + flags = IFF_TAP | IFF_NO_PI; + + if ((dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0) + return VNET_API_ERROR_SYSCALL_ERROR_1; + + memset (&ifr, 0, sizeof (ifr)); + strncpy(ifr.ifr_name, (char *) intfc_name, sizeof (ifr.ifr_name)-1); + ifr.ifr_flags = flags; + if (ioctl (dev_net_tun_fd, TUNSETIFF, (void *)&ifr) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_2; + goto error; + } + + /* Open a provisioning socket */ + if ((dev_tap_fd = socket(PF_PACKET, SOCK_RAW, + htons(ETH_P_ALL))) < 0 ) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_3; + goto error; + } + + /* Find the interface index. */ + { + struct ifreq ifr; + struct sockaddr_ll sll; + + memset (&ifr, 0, sizeof(ifr)); + strncpy (ifr.ifr_name, (char *) intfc_name, sizeof (ifr.ifr_name)-1); + if (ioctl (dev_tap_fd, SIOCGIFINDEX, &ifr) < 0 ) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_4; + goto error; + } + + /* Bind the provisioning socket to the interface. */ + memset(&sll, 0, sizeof(sll)); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = ifr.ifr_ifindex; + sll.sll_protocol = htons(ETH_P_ALL); + + if (bind(dev_tap_fd, (struct sockaddr*) &sll, sizeof(sll)) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_5; + goto error; + } + } + + /* non-blocking I/O on /dev/tapX */ + { + int one = 1; + if (ioctl (dev_net_tun_fd, FIONBIO, &one) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_6; + goto error; + } + } + ifr.ifr_mtu = tm->mtu_bytes; + if (ioctl (dev_tap_fd, SIOCSIFMTU, &ifr) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_7; + goto error; + } + + /* get flags, modify to bring up interface... */ + if (ioctl (dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_8; + goto error; + } + + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + + if (ioctl (dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_9; + goto error; + } + + ti = tapcli_get_new_tapif(); + ti->per_interface_next_index = ~0; + + if (hwaddr_arg != 0) + clib_memcpy(hwaddr, hwaddr_arg, 6); + else + { + f64 now = vlib_time_now(vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (hwaddr+2, &rnd, sizeof(rnd)); + hwaddr[0] = 2; + hwaddr[1] = 0xfe; + } + + error = ethernet_register_interface + (tm->vnet_main, + tapcli_dev_class.index, + ti - tm->tapcli_interfaces /* device instance */, + hwaddr /* ethernet address */, + &ti->hw_if_index, + tapcli_flag_change); + + if (error) + { + clib_error_report (error); + rv = VNET_API_ERROR_INVALID_REGISTRATION; + goto error; + } + + { + unix_file_t template = {0}; + template.read_function = tapcli_read_ready; + template.file_descriptor = dev_net_tun_fd; + ti->unix_file_index = unix_file_add (&unix_main, &template); + ti->unix_fd = dev_net_tun_fd; + ti->provision_fd = dev_tap_fd; + clib_memcpy (&ti->ifr, &ifr, sizeof (ifr)); + } + + { + vnet_hw_interface_t * hw; + hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index); + hw->min_supported_packet_bytes = TAP_MTU_MIN; + hw->max_supported_packet_bytes = TAP_MTU_MAX; + hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] = hw->max_supported_packet_bytes - sizeof(ethernet_header_t); + ti->sw_if_index = hw->sw_if_index; + if (sw_if_indexp) + *sw_if_indexp = hw->sw_if_index; + } + + ti->active = 1; + + hash_set (tm->tapcli_interface_index_by_sw_if_index, ti->sw_if_index, + ti - tm->tapcli_interfaces); + + hash_set (tm->tapcli_interface_index_by_unix_fd, ti->unix_fd, + ti - tm->tapcli_interfaces); + + return rv; + + error: + close (dev_net_tun_fd); + if (dev_tap_fd >= 0) + close (dev_tap_fd); + + return rv; +} + +/** + * @brief Renumber a TAP interface + * + * @param *vm - vlib_main_t + * @param *intfc_name - u8 + * @param *hwaddr_arg - u8 + * @param *sw_if_indexp - u32 + * @param renumber - u8 + * @param custom_dev_instance - u32 + * + * @return rc - int + * + */ +int vnet_tap_connect_renumber (vlib_main_t * vm, u8 * intfc_name, + u8 *hwaddr_arg, u32 * sw_if_indexp, + u8 renumber, u32 custom_dev_instance) +{ + int rv = vnet_tap_connect(vm, intfc_name, hwaddr_arg, sw_if_indexp); + + if (!rv && renumber) + vnet_interface_name_renumber (*sw_if_indexp, custom_dev_instance); + + return rv; +} + +/** + * @brief Disconnect TAP CLI interface + * + * @param *ti - tapcli_interface_t + * + * @return rc - int + * + */ +static int tapcli_tap_disconnect (tapcli_interface_t *ti) +{ + int rv = 0; + vnet_main_t * vnm = vnet_get_main(); + tapcli_main_t * tm = &tapcli_main; + u32 sw_if_index = ti->sw_if_index; + + // bring interface down + vnet_sw_interface_set_flags (vnm, sw_if_index, 0); + + if (ti->unix_file_index != ~0) { + unix_file_del (&unix_main, unix_main.file_pool + ti->unix_file_index); + ti->unix_file_index = ~0; + } + else + close(ti->unix_fd); + + hash_unset (tm->tapcli_interface_index_by_unix_fd, ti->unix_fd); + hash_unset (tm->tapcli_interface_index_by_sw_if_index, ti->sw_if_index); + close(ti->provision_fd); + ti->unix_fd = -1; + ti->provision_fd = -1; + + return rv; +} + +/** + * @brief Delete TAP interface + * + * @param *vm - vlib_main_t + * @param sw_if_index - u32 + * + * @return rc - int + * + */ +int vnet_tap_delete(vlib_main_t *vm, u32 sw_if_index) +{ + int rv = 0; + tapcli_main_t * tm = &tapcli_main; + tapcli_interface_t *ti; + uword *p = NULL; + + p = hash_get (tm->tapcli_interface_index_by_sw_if_index, + sw_if_index); + if (p == 0) { + clib_warning ("sw_if_index %d unknown", sw_if_index); + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + } + ti = vec_elt_at_index (tm->tapcli_interfaces, p[0]); + + // inactive + ti->active = 0; + tapcli_tap_disconnect(ti); + // add to inactive list + vec_add1(tm->tapcli_inactive_interfaces, ti - tm->tapcli_interfaces); + + // reset renumbered iface + if (p[0] < vec_len (tm->show_dev_instance_by_real_dev_instance)) + tm->show_dev_instance_by_real_dev_instance[p[0]] = ~0; + + ethernet_delete_interface (tm->vnet_main, ti->hw_if_index); + return rv; +} + +/** + * @brief CLI function to delete TAP interface + * + * @param *vm - vlib_main_t + * @param *input - unformat_input_t + * @param *cmd - vlib_cli_command_t + * + * @return error - clib_error_t + * + */ +static clib_error_t * +tap_delete_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + tapcli_main_t * tm = &tapcli_main; + u32 sw_if_index = ~0; + + if (tm->is_disabled) + { + return clib_error_return (0, "device disabled..."); + } + + if (unformat (input, "%U", unformat_vnet_sw_interface, tm->vnet_main, + &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + + int rc = vnet_tap_delete (vm, sw_if_index); + + if (!rc) { + vlib_cli_output (vm, "Deleted."); + } else { + vlib_cli_output (vm, "Error during deletion of tap interface. (rc: %d)", rc); + } + + return 0; +} + +VLIB_CLI_COMMAND (tap_delete_command, static) = { + .path = "tap delete", + .short_help = "tap delete <vpp-tap-intfc-name>", + .function = tap_delete_command_fn, +}; + +/** + * @brief Modifies tap interface - can result in new interface being created + * + * @param *vm - vlib_main_t + * @param orig_sw_if_index - u32 + * @param *intfc_name - u8 + * @param *hwaddr_arg - u8 + * @param *sw_if_indexp - u32 + * @param renumber - u8 + * @param custom_dev_instance - u32 + * + * @return rc - int + * + */ +int vnet_tap_modify (vlib_main_t * vm, u32 orig_sw_if_index, + u8 * intfc_name, u8 *hwaddr_arg, + u32 * sw_if_indexp, + u8 renumber, u32 custom_dev_instance) +{ + int rv = vnet_tap_delete (vm, orig_sw_if_index); + + if (rv) + return rv; + + rv = vnet_tap_connect_renumber(vm, intfc_name, hwaddr_arg, sw_if_indexp, + renumber, custom_dev_instance); + + return rv; +} + +/** + * @brief CLI function to modify TAP interface + * + * @param *vm - vlib_main_t + * @param *input - unformat_input_t + * @param *cmd - vlib_cli_command_t + * + * @return error - clib_error_t + * + */ +static clib_error_t * +tap_modify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 * intfc_name; + tapcli_main_t * tm = &tapcli_main; + u32 sw_if_index = ~0; + u32 new_sw_if_index = ~0; + int user_hwaddr = 0; + u8 hwaddr[6]; + + if (tm->is_disabled) + { + return clib_error_return (0, "device disabled..."); + } + + if (unformat (input, "%U", unformat_vnet_sw_interface, tm->vnet_main, + &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + if (unformat (input, "%s", &intfc_name)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + if (unformat(input, "hwaddr %U", unformat_ethernet_address, + &hwaddr)) + user_hwaddr = 1; + + + int rc = vnet_tap_modify (vm, sw_if_index, intfc_name, + (user_hwaddr == 1 ? hwaddr : 0), + &new_sw_if_index, 0, 0); + + if (!rc) { + vlib_cli_output (vm, "Modified %U for Linux tap '%s'", + format_vnet_sw_if_index_name, tm->vnet_main, + new_sw_if_index, intfc_name); + } else { + vlib_cli_output (vm, "Error during modification of tap interface. (rc: %d)", rc); + } + + return 0; +} + +VLIB_CLI_COMMAND (tap_modify_command, static) = { + .path = "tap modify", + .short_help = "tap modify <vpp-tap-intfc-name> <linux-intfc-name> [hwaddr <addr>]", + .function = tap_modify_command_fn, +}; + +/** + * @brief CLI function to connect TAP interface + * + * @param *vm - vlib_main_t + * @param *input - unformat_input_t + * @param *cmd - vlib_cli_command_t + * + * @return error - clib_error_t + * + */ +static clib_error_t * +tap_connect_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 * intfc_name; + tapcli_main_t * tm = &tapcli_main; + u8 hwaddr[6]; + u8 *hwaddr_arg = 0; + u32 sw_if_index; + + if (tm->is_disabled) + { + return clib_error_return (0, "device disabled..."); + } + + if (unformat (input, "%s", &intfc_name)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + if (unformat(input, "hwaddr %U", unformat_ethernet_address, + &hwaddr)) + hwaddr_arg = hwaddr; + + /* It is here for backward compatibility */ + if (unformat(input, "hwaddr random")) + ; + + int rv = vnet_tap_connect(vm, intfc_name, hwaddr_arg, &sw_if_index); + if (rv) { + switch (rv) { + case VNET_API_ERROR_SYSCALL_ERROR_1: + vlib_cli_output (vm, "Couldn't open /dev/net/tun"); + break; + + case VNET_API_ERROR_SYSCALL_ERROR_2: + vlib_cli_output (vm, "Error setting flags on '%s'", intfc_name); + break; + + case VNET_API_ERROR_SYSCALL_ERROR_3: + vlib_cli_output (vm, "Couldn't open provisioning socket"); + break; + + case VNET_API_ERROR_SYSCALL_ERROR_4: + vlib_cli_output (vm, "Couldn't get if_index"); + break; + + case VNET_API_ERROR_SYSCALL_ERROR_5: + vlib_cli_output (vm, "Couldn't bind provisioning socket"); + break; + + case VNET_API_ERROR_SYSCALL_ERROR_6: + vlib_cli_output (0, "Couldn't set device non-blocking flag"); + break; + + case VNET_API_ERROR_SYSCALL_ERROR_7: + vlib_cli_output (0, "Couldn't set device MTU"); + break; + + case VNET_API_ERROR_SYSCALL_ERROR_8: + vlib_cli_output (0, "Couldn't get interface flags"); + break; + + case VNET_API_ERROR_SYSCALL_ERROR_9: + vlib_cli_output (0, "Couldn't set intfc admin state up"); + break; + + case VNET_API_ERROR_INVALID_REGISTRATION: + vlib_cli_output (0, "Invalid registration"); + break; + default: + vlib_cli_output (0, "Unknown error: %d", rv); + break; + } + return 0; + } + + vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index); + return 0; + } + +VLIB_CLI_COMMAND (tap_connect_command, static) = { + .path = "tap connect", + .short_help = "tap connect <intfc-name> [hwaddr <addr>]", + .function = tap_connect_command_fn, +}; + +/** + * @brief TAPCLI main init + * + * @param *vm - vlib_main_t + * + * @return error - clib_error_t + * + */ +clib_error_t * +tapcli_init (vlib_main_t * vm) +{ + tapcli_main_t * tm = &tapcli_main; + + tm->vlib_main = vm; + tm->vnet_main = vnet_get_main(); + tm->unix_main = &unix_main; + tm->mtu_bytes = TAP_MTU_DEFAULT; + tm->tapcli_interface_index_by_sw_if_index = hash_create (0, sizeof(uword)); + tm->tapcli_interface_index_by_unix_fd = hash_create (0, sizeof (uword)); + tm->rx_buffers = 0; + vec_alloc(tm->rx_buffers, VLIB_FRAME_SIZE); + vec_reset_length(tm->rx_buffers); + vm->os_punt_frame = tapcli_nopunt_frame; + return 0; +} + +VLIB_INIT_FUNCTION (tapcli_init); diff --git a/src/vnet/unix/tapcli.h b/src/vnet/unix/tapcli.h new file mode 100644 index 00000000000..fcd82dbf25d --- /dev/null +++ b/src/vnet/unix/tapcli.h @@ -0,0 +1,52 @@ +/* + * tapcli.h : tap support + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief TAPCLI definitions + */ + +#ifndef __included_tapcli_h__ +#define __included_tapcli_h__ + +/** TAP CLI errors */ +#define foreach_tapcli_error \ + /* Must be first. */ \ + _(NONE, "no error") \ + _(READ, "read error") \ + _(BUFFER_ALLOC, "buffer allocation error") \ + _(UNKNOWN, "unknown error") + +typedef enum { +#define _(sym,str) TAPCLI_ERROR_##sym, + foreach_tapcli_error +#undef _ + TAPCLI_N_ERROR, + } tapcli_error_t; + +/** TAP CLI interface details struct */ +typedef struct { + u32 sw_if_index; + u8 dev_name[64]; +} tapcli_interface_details_t; + +int vnet_tap_dump_ifs (tapcli_interface_details_t **out_tapids); + +#define TAP_MTU_MIN 68 +#define TAP_MTU_MAX 65535 +#define TAP_MTU_DEFAULT 1500 + +#endif /* __included_tapcli_h__ */ diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c new file mode 100644 index 00000000000..4a5dd676a68 --- /dev/null +++ b/src/vnet/unix/tuntap.c @@ -0,0 +1,1000 @@ +/* + *------------------------------------------------------------------ + * tuntap.c - kernel stack (reverse) punt/inject path + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +/** + * @file + * @brief TunTap Kernel stack (reverse) punt/inject path. + * + * This driver runs in one of two distinct modes: + * - "punt/inject" mode, where we send pkts not otherwise processed + * by the forwarding to the Linux kernel stack, and + * + * - "normal interface" mode, where we treat the Linux kernel stack + * as a peer. + * + * By default, we select punt/inject mode. + */ + +#include <fcntl.h> /* for open */ +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/uio.h> /* for iovec */ +#include <netinet/in.h> + +#include <linux/if_arp.h> +#include <linux/if_tun.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> + +#include <vnet/ip/ip.h> + +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/devices.h> +#include <vnet/feature/feature.h> + +static vnet_device_class_t tuntap_dev_class; +static vnet_hw_interface_class_t tuntap_interface_class; + +static void tuntap_punt_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame); +static void tuntap_nopunt_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame); + +typedef struct { + u32 sw_if_index; + u8 is_v6; + u8 addr[16]; +} subif_address_t; + +/** + * @brief TUNTAP node main state + */ +typedef struct { + /** Vector of iovecs for readv/writev calls. */ + struct iovec * iovecs; + + /** Vector of VLIB rx buffers to use. We allocate them in blocks + of VLIB_FRAME_SIZE (256). */ + u32 * rx_buffers; + + /** File descriptors for /dev/net/tun and provisioning socket. */ + int dev_net_tun_fd, dev_tap_fd; + + /** Create a "tap" [ethernet] encaps device */ + int is_ether; + + /** 1 if a "normal" routed intfc, 0 if a punt/inject interface */ + + int have_normal_interface; + + /** tap device destination MAC address. Required, or Linux drops pkts */ + u8 ether_dst_mac[6]; + + /** Interface MTU in bytes and # of default sized buffers. */ + u32 mtu_bytes, mtu_buffers; + + /** Linux interface name for tun device. */ + char * tun_name; + + /** Pool of subinterface addresses */ + subif_address_t *subifs; + + /** Hash for subif addresses */ + mhash_t subif_mhash; + + /** Unix file index */ + u32 unix_file_index; + + /** For the "normal" interface, if configured */ + u32 hw_if_index, sw_if_index; + +} tuntap_main_t; + +static tuntap_main_t tuntap_main = { + .tun_name = "vnet", + + /** Suitable defaults for an Ethernet-like tun/tap device */ + .mtu_bytes = 4096 + 256, +}; + +/** + * @brief tuntap_tx + * @node tuntap-tx + * + * Output node, writes the buffers comprising the incoming frame + * to the tun/tap device, aka hands them to the Linux kernel stack. + * + * @param *vm - vlib_main_t + * @param *node - vlib_node_runtime_t + * @param *frame - vlib_frame_t + * + * @return rc - uword + * + */ +static uword +tuntap_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * buffers = vlib_frame_args (frame); + uword n_packets = frame->n_vectors; + tuntap_main_t * tm = &tuntap_main; + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + u32 n_bytes = 0; + int i; + + for (i = 0; i < n_packets; i++) + { + struct iovec * iov; + vlib_buffer_t * b; + uword l; + + b = vlib_get_buffer (vm, buffers[i]); + + if (tm->is_ether && (!tm->have_normal_interface)) + { + vlib_buffer_reset(b); + clib_memcpy (vlib_buffer_get_current (b), tm->ether_dst_mac, 6); + } + + /* Re-set iovecs if present. */ + if (tm->iovecs) + _vec_len (tm->iovecs) = 0; + + /** VLIB buffer chain -> Unix iovec(s). */ + vec_add2 (tm->iovecs, iov, 1); + iov->iov_base = b->data + b->current_data; + iov->iov_len = l = b->current_length; + + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + do { + b = vlib_get_buffer (vm, b->next_buffer); + + vec_add2 (tm->iovecs, iov, 1); + + iov->iov_base = b->data + b->current_data; + iov->iov_len = b->current_length; + l += b->current_length; + } while (b->flags & VLIB_BUFFER_NEXT_PRESENT); + } + + if (writev (tm->dev_net_tun_fd, tm->iovecs, vec_len (tm->iovecs)) < l) + clib_unix_warning ("writev"); + + n_bytes += l; + } + + /* Update tuntap interface output stats. */ + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + vm->cpu_index, + tm->sw_if_index, n_packets, n_bytes); + + + /** The normal interface path flattens the buffer chain */ + if (tm->have_normal_interface) + vlib_buffer_free_no_next (vm, buffers, n_packets); + else + vlib_buffer_free (vm, buffers, n_packets); + + return n_packets; +} + +VLIB_REGISTER_NODE (tuntap_tx_node,static) = { + .function = tuntap_tx, + .name = "tuntap-tx", + .type = VLIB_NODE_TYPE_INTERNAL, + .vector_size = 4, +}; + +/** + * @brief TUNTAP receive node + * @node tuntap-rx + * + * @param *vm - vlib_main_t + * @param *node - vlib_node_runtime_t + * @param *frame - vlib_frame_t + * + * @return rc - uword + * + */ +static uword +tuntap_rx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + tuntap_main_t * tm = &tuntap_main; + vlib_buffer_t * b; + u32 bi; + const uword buffer_size = VLIB_BUFFER_DATA_SIZE; + + /** Make sure we have some RX buffers. */ + { + uword n_left = vec_len (tm->rx_buffers); + uword n_alloc; + + if (n_left < VLIB_FRAME_SIZE / 2) + { + if (! tm->rx_buffers) + vec_alloc (tm->rx_buffers, VLIB_FRAME_SIZE); + + n_alloc = vlib_buffer_alloc (vm, tm->rx_buffers + n_left, VLIB_FRAME_SIZE - n_left); + _vec_len (tm->rx_buffers) = n_left + n_alloc; + } + } + + /** Allocate RX buffers from end of rx_buffers. + Turn them into iovecs to pass to readv. */ + { + uword i_rx = vec_len (tm->rx_buffers) - 1; + vlib_buffer_t * b; + word i, n_bytes_left, n_bytes_in_packet; + + /** We should have enough buffers left for an MTU sized packet. */ + ASSERT (vec_len (tm->rx_buffers) >= tm->mtu_buffers); + + vec_validate (tm->iovecs, tm->mtu_buffers - 1); + for (i = 0; i < tm->mtu_buffers; i++) + { + b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - i]); + tm->iovecs[i].iov_base = b->data; + tm->iovecs[i].iov_len = buffer_size; + } + + n_bytes_left = readv (tm->dev_net_tun_fd, tm->iovecs, tm->mtu_buffers); + n_bytes_in_packet = n_bytes_left; + if (n_bytes_left <= 0) + { + if (errno != EAGAIN) + clib_unix_warning ("readv %d", n_bytes_left); + return 0; + } + + bi = tm->rx_buffers[i_rx]; + + while (1) + { + b = vlib_get_buffer (vm, tm->rx_buffers[i_rx]); + b->flags = 0; + b->current_data = 0; + b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size; + + n_bytes_left -= buffer_size; + + if (n_bytes_left <= 0) + { + break; + } + + i_rx--; + b->flags |= VLIB_BUFFER_NEXT_PRESENT; + b->next_buffer = tm->rx_buffers[i_rx]; + } + + /** Interface counters for tuntap interface. */ + vlib_increment_combined_counter + (vnet_main.interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + os_get_cpu_number(), + tm->sw_if_index, + 1, n_bytes_in_packet); + + _vec_len (tm->rx_buffers) = i_rx; + } + + b = vlib_get_buffer (vm, bi); + + { + u32 next_index; + uword n_trace = vlib_get_trace_count (vm, node); + + vnet_buffer (b)->sw_if_index[VLIB_RX] = tm->sw_if_index; + vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32)~0; + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... + */ + if (VLIB_BUFFER_TRACE_TRAJECTORY) + b->pre_data[0] = 0; + + b->error = node->errors[0]; + + if (tm->is_ether) + { + next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + } + else + switch (b->data[0] & 0xf0) + { + case 0x40: + next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT; + break; + case 0x60: + next_index = VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + break; + default: + next_index = VNET_DEVICE_INPUT_NEXT_DROP; + break; + } + + /* The linux kernel couldn't care less if our interface is up */ + if (tm->have_normal_interface) + { + vnet_main_t *vnm = vnet_get_main(); + vnet_sw_interface_t * si; + si = vnet_get_sw_interface (vnm, tm->sw_if_index); + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + next_index = VNET_DEVICE_INPUT_NEXT_DROP; + } + + vnet_feature_start_device_input_x1 (tm->sw_if_index, &next_index, b, 0); + + vlib_set_next_frame_buffer (vm, node, next_index, bi); + + if (n_trace > 0) + { + vlib_trace_buffer (vm, node, next_index, + b, /* follow_chain */ 1); + vlib_set_trace_count (vm, node, n_trace - 1); + } + } + + return 1; +} + +/** + * @brief TUNTAP_RX error strings + */ +static char * tuntap_rx_error_strings[] = { + "unknown packet type", +}; + +VLIB_REGISTER_NODE (tuntap_rx_node,static) = { + .function = tuntap_rx, + .name = "tuntap-rx", + .sibling_of = "device-input", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = 4, + .n_errors = 1, + .error_strings = tuntap_rx_error_strings, +}; + +/** + * @brief Gets called when file descriptor is ready from epoll. + * + * @param *uf - unix_file_t + * + * @return error - clib_error_t + */ +static clib_error_t * tuntap_read_ready (unix_file_t * uf) +{ + vlib_main_t * vm = vlib_get_main(); + vlib_node_set_interrupt_pending (vm, tuntap_rx_node.index); + return 0; +} + +/** + * @brief Clean up the tun/tap device + * + * @param *vm - vlib_main_t + * + * @return error - clib_error_t + * + */ +static clib_error_t * +tuntap_exit (vlib_main_t * vm) +{ + tuntap_main_t *tm = &tuntap_main; + struct ifreq ifr; + int sfd; + + /* Not present. */ + if (! tm->dev_net_tun_fd || tm->dev_net_tun_fd < 0) + return 0; + + sfd = socket (AF_INET, SOCK_STREAM, 0); + if (sfd < 0) + clib_unix_warning("provisioning socket"); + + memset(&ifr, 0, sizeof (ifr)); + strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name)-1); + + /* get flags, modify to bring down interface... */ + if (ioctl (sfd, SIOCGIFFLAGS, &ifr) < 0) + clib_unix_warning ("SIOCGIFFLAGS"); + + ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); + + if (ioctl (sfd, SIOCSIFFLAGS, &ifr) < 0) + clib_unix_warning ("SIOCSIFFLAGS"); + + /* Turn off persistence */ + if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 0) < 0) + clib_unix_warning ("TUNSETPERSIST"); + close(tm->dev_tap_fd); + if (tm->dev_net_tun_fd >= 0) + close(tm->dev_net_tun_fd); + if (sfd >= 0) + close (sfd); + + return 0; +} + +VLIB_MAIN_LOOP_EXIT_FUNCTION (tuntap_exit); + +/** + * @brief CLI function for tun/tap config + * + * @param *vm - vlib_main_t + * @param *input - unformat_input_t + * + * @return error - clib_error_t + * + */ +static clib_error_t * +tuntap_config (vlib_main_t * vm, unformat_input_t * input) +{ + tuntap_main_t *tm = &tuntap_main; + clib_error_t * error = 0; + struct ifreq ifr; + u8 * name; + int flags = IFF_TUN | IFF_NO_PI; + int is_enabled = 0, is_ether = 0, have_normal_interface = 0; + const uword buffer_size = VLIB_BUFFER_DATA_SIZE; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "mtu %d", &tm->mtu_bytes)) + ; + else if (unformat (input, "enable")) + is_enabled = 1; + else if (unformat (input, "disable")) + is_enabled = 0; + else if (unformat (input, "ethernet") || + unformat (input, "ether")) + is_ether = 1; + else if (unformat (input, "have-normal-interface") || + unformat (input, "have-normal")) + have_normal_interface = 1; + else if (unformat (input, "name %s", &name)) + tm->tun_name = (char *) name; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + tm->dev_net_tun_fd = -1; + tm->dev_tap_fd = -1; + + if (is_enabled == 0) + return 0; + + if (geteuid()) + { + clib_warning ("tuntap disabled: must be superuser"); + return 0; + } + + tm->is_ether = is_ether; + tm->have_normal_interface = have_normal_interface; + + if (is_ether) + flags = IFF_TAP | IFF_NO_PI; + + if ((tm->dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0) + { + error = clib_error_return_unix (0, "open /dev/net/tun"); + goto done; + } + + memset (&ifr, 0, sizeof (ifr)); + strncpy(ifr.ifr_name, tm->tun_name, sizeof(ifr.ifr_name)-1); + ifr.ifr_flags = flags; + if (ioctl (tm->dev_net_tun_fd, TUNSETIFF, (void *)&ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl TUNSETIFF"); + goto done; + } + + /* Make it persistent, at least until we split. */ + if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 1) < 0) + { + error = clib_error_return_unix (0, "TUNSETPERSIST"); + goto done; + } + + /* Open a provisioning socket */ + if ((tm->dev_tap_fd = socket(PF_PACKET, SOCK_RAW, + htons(ETH_P_ALL))) < 0 ) + { + error = clib_error_return_unix (0, "socket"); + goto done; + } + + /* Find the interface index. */ + { + struct ifreq ifr; + struct sockaddr_ll sll; + + memset (&ifr, 0, sizeof(ifr)); + strncpy (ifr.ifr_name, tm->tun_name, sizeof(ifr.ifr_name)-1); + if (ioctl (tm->dev_tap_fd, SIOCGIFINDEX, &ifr) < 0 ) + { + error = clib_error_return_unix (0, "ioctl SIOCGIFINDEX"); + goto done; + } + + /* Bind the provisioning socket to the interface. */ + memset(&sll, 0, sizeof(sll)); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = ifr.ifr_ifindex; + sll.sll_protocol = htons(ETH_P_ALL); + + if (bind(tm->dev_tap_fd, (struct sockaddr*) &sll, sizeof(sll)) < 0) + { + error = clib_error_return_unix (0, "bind"); + goto done; + } + } + + /* non-blocking I/O on /dev/tapX */ + { + int one = 1; + if (ioctl (tm->dev_net_tun_fd, FIONBIO, &one) < 0) + { + error = clib_error_return_unix (0, "ioctl FIONBIO"); + goto done; + } + } + + tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size; + + ifr.ifr_mtu = tm->mtu_bytes; + if (ioctl (tm->dev_tap_fd, SIOCSIFMTU, &ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl SIOCSIFMTU"); + goto done; + } + + /* get flags, modify to bring up interface... */ + if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl SIOCGIFFLAGS"); + goto done; + } + + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + + if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl SIOCSIFFLAGS"); + goto done; + } + + if (is_ether) + { + if (ioctl (tm->dev_tap_fd, SIOCGIFHWADDR, &ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl SIOCGIFHWADDR"); + goto done; + } + else + clib_memcpy (tm->ether_dst_mac, ifr.ifr_hwaddr.sa_data, 6); + } + + if (have_normal_interface) + { + vnet_main_t *vnm = vnet_get_main(); + error = ethernet_register_interface + (vnm, + tuntap_dev_class.index, + 0 /* device instance */, + tm->ether_dst_mac /* ethernet address */, + &tm->hw_if_index, + 0 /* flag change */); + if (error) + clib_error_report (error); + tm->sw_if_index = tm->hw_if_index; + vm->os_punt_frame = tuntap_nopunt_frame; + } + else + { + vnet_main_t *vnm = vnet_get_main(); + vnet_hw_interface_t * hi; + + vm->os_punt_frame = tuntap_punt_frame; + + tm->hw_if_index = vnet_register_interface + (vnm, + tuntap_dev_class.index, 0 /* device instance */, + tuntap_interface_class.index, 0); + hi = vnet_get_hw_interface (vnm, tm->hw_if_index); + tm->sw_if_index = hi->sw_if_index; + + /* Interface is always up. */ + vnet_hw_interface_set_flags (vnm, tm->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + vnet_sw_interface_set_flags (vnm, tm->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + } + + { + unix_file_t template = {0}; + template.read_function = tuntap_read_ready; + template.file_descriptor = tm->dev_net_tun_fd; + tm->unix_file_index = unix_file_add (&unix_main, &template); + } + + done: + if (error) + { + if (tm->dev_net_tun_fd >= 0) + close (tm->dev_net_tun_fd); + if (tm->dev_tap_fd >= 0) + close (tm->dev_tap_fd); + } + + return error; +} + +VLIB_CONFIG_FUNCTION (tuntap_config, "tuntap"); + +/** + * @brief Add or Del IP4 address to tun/tap interface + * + * @param *im - ip4_main_t + * @param opaque - uword + * @param sw_if_index - u32 + * @param *address - ip4_address_t + * @param is_delete - u32 + * + */ +void +tuntap_ip4_add_del_interface_address (ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, + u32 is_delete) +{ + tuntap_main_t * tm = &tuntap_main; + struct ifreq ifr; + subif_address_t subif_addr, * ap; + uword * p; + + /** Tuntap disabled, or using a "normal" interface. */ + if (tm->have_normal_interface || tm->dev_tap_fd < 0) + return; + + /** See if we already know about this subif */ + memset (&subif_addr, 0, sizeof (subif_addr)); + subif_addr.sw_if_index = sw_if_index; + clib_memcpy (&subif_addr.addr, address, sizeof (*address)); + + p = mhash_get (&tm->subif_mhash, &subif_addr); + + if (p) + ap = pool_elt_at_index (tm->subifs, p[0]); + else + { + pool_get (tm->subifs, ap); + *ap = subif_addr; + mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0); + } + + /* Use subif pool index to select alias device. */ + memset (&ifr, 0, sizeof (ifr)); + snprintf (ifr.ifr_name, sizeof(ifr.ifr_name), + "%s:%d", tm->tun_name, (int)(ap - tm->subifs)); + + /* the tuntap punt/inject is enabled for IPv4 RX so long as + * any vpp interface has an IPv4 address. + * this is also ref counted. + */ + ip4_sw_interface_enable_disable (tm->sw_if_index, !is_delete); + + if (! is_delete) + { + struct sockaddr_in * sin; + + sin = (struct sockaddr_in *)&ifr.ifr_addr; + + /* Set ipv4 address, netmask. */ + sin->sin_family = AF_INET; + clib_memcpy (&sin->sin_addr.s_addr, address, 4); + if (ioctl (tm->dev_tap_fd, SIOCSIFADDR, &ifr) < 0) + clib_unix_warning ("ioctl SIOCSIFADDR"); + + sin->sin_addr.s_addr = im->fib_masks[address_length]; + if (ioctl (tm->dev_tap_fd, SIOCSIFNETMASK, &ifr) < 0) + clib_unix_warning ("ioctl SIOCSIFNETMASK"); + } + else + { + mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */); + pool_put (tm->subifs, ap); + } + + /* get flags, modify to bring up interface... */ + if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0) + clib_unix_warning ("ioctl SIOCGIFFLAGS"); + + if (is_delete) + ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); + else + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + + if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0) + clib_unix_warning ("ioctl SIOCSIFFLAGS"); +} + +/** + * @brief workaround for a known include file bug. + * including @c <linux/ipv6.h> causes multiple definitions if + * @c <netinet/in.h is also included. + */ +struct in6_ifreq { + struct in6_addr ifr6_addr; + u32 ifr6_prefixlen; + int ifr6_ifindex; +}; + +/** + * @brief Add or Del tun/tap interface address. + * + * Both the v6 interface address API and the way ifconfig + * displays subinterfaces differ from their v4 couterparts. + * The code given here seems to work but YMMV. + * + * @param *im - ip6_main_t + * @param opaque - uword + * @param sw_if_index - u32 + * @param *address - ip6_address_t + * @param address_length - u32 + * @param if_address_index - u32 + * @param is_delete - u32 + */ +void +tuntap_ip6_add_del_interface_address (ip6_main_t * im, + uword opaque, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, + u32 if_address_index, + u32 is_delete) +{ + tuntap_main_t * tm = &tuntap_main; + struct ifreq ifr; + struct in6_ifreq ifr6; + subif_address_t subif_addr, * ap; + uword * p; + + /* Tuntap disabled, or using a "normal" interface. */ + if (tm->have_normal_interface || tm->dev_tap_fd < 0) + return; + + /* See if we already know about this subif */ + memset (&subif_addr, 0, sizeof (subif_addr)); + subif_addr.sw_if_index = sw_if_index; + subif_addr.is_v6 = 1; + clib_memcpy (&subif_addr.addr, address, sizeof (*address)); + + p = mhash_get (&tm->subif_mhash, &subif_addr); + + if (p) + ap = pool_elt_at_index (tm->subifs, p[0]); + else + { + pool_get (tm->subifs, ap); + *ap = subif_addr; + mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0); + } + + /* Use subif pool index to select alias device. */ + memset (&ifr, 0, sizeof (ifr)); + memset (&ifr6, 0, sizeof (ifr6)); + snprintf (ifr.ifr_name, sizeof(ifr.ifr_name), + "%s:%d", tm->tun_name, (int)(ap - tm->subifs)); + + /* the tuntap punt/inject is enabled for IPv6 RX so long as + * any vpp interface has an IPv6 address. + * this is also ref counted. + */ + ip6_sw_interface_enable_disable (tm->sw_if_index, !is_delete); + + if (! is_delete) + { + int sockfd = socket (AF_INET6, SOCK_STREAM, 0); + if (sockfd < 0) + clib_unix_warning ("get ifindex socket"); + + if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0) + clib_unix_warning ("get ifindex"); + + ifr6.ifr6_ifindex = ifr.ifr_ifindex; + ifr6.ifr6_prefixlen = address_length; + clib_memcpy (&ifr6.ifr6_addr, address, 16); + + if (ioctl (sockfd, SIOCSIFADDR, &ifr6) < 0) + clib_unix_warning ("set address"); + + if (sockfd >= 0) + close (sockfd); + } + else + { + int sockfd = socket (AF_INET6, SOCK_STREAM, 0); + if (sockfd < 0) + clib_unix_warning ("get ifindex socket"); + + if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0) + clib_unix_warning ("get ifindex"); + + ifr6.ifr6_ifindex = ifr.ifr_ifindex; + ifr6.ifr6_prefixlen = address_length; + clib_memcpy (&ifr6.ifr6_addr, address, 16); + + if (ioctl (sockfd, SIOCDIFADDR, &ifr6) < 0) + clib_unix_warning ("del address"); + + if (sockfd >= 0) + close (sockfd); + + mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */); + pool_put (tm->subifs, ap); + } +} + +/** + * @brief TX the tun/tap frame + * + * @param *vm - vlib_main_t + * @param *node - vlib_node_runtime_t + * @param *frame - vlib_frame_t + * + */ +static void +tuntap_punt_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + tuntap_tx (vm, node, frame); + vlib_frame_free (vm, node, frame); +} + +/** + * @brief Free the tun/tap frame + * + * @param *vm - vlib_main_t + * @param *node - vlib_node_runtime_t + * @param *frame - vlib_frame_t + * + */ +static void +tuntap_nopunt_frame (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * buffers = vlib_frame_args (frame); + uword n_packets = frame->n_vectors; + vlib_buffer_free (vm, buffers, n_packets); + vlib_frame_free (vm, node, frame); +} + +VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = { + .name = "tuntap", + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, +}; + +/** + * @brief Format tun/tap interface name + * + * @param *s - u8 - formatter string + * @param *args - va_list + * + * @return *s - u8 - formatted string + * + */ +static u8 * format_tuntap_interface_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + + s = format (s, "tuntap-%d", i); + return s; +} + +/** + * @brief TX packet out tun/tap + * + * @param *vm - vlib_main_t + * @param *node - vlib_node_runtime_t + * @param *frame - vlib_frame_t + * + * @return n_buffers - uword - Packets transmitted + * + */ +static uword +tuntap_intfc_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + tuntap_main_t * tm = &tuntap_main; + u32 * buffers = vlib_frame_args (frame); + uword n_buffers = frame->n_vectors; + + /* Normal interface transmit happens only on the normal interface... */ + if (tm->have_normal_interface) + return tuntap_tx (vm, node, frame); + + vlib_buffer_free (vm, buffers, n_buffers); + return n_buffers; +} + +VNET_DEVICE_CLASS (tuntap_dev_class,static) = { + .name = "tuntap", + .tx_function = tuntap_intfc_tx, + .format_device_name = format_tuntap_interface_name, +}; + +/** + * @brief tun/tap node init + * + * @param *vm - vlib_main_t + * + * @return error - clib_error_t + * + */ +static clib_error_t * +tuntap_init (vlib_main_t * vm) +{ + clib_error_t * error; + ip4_main_t * im4 = &ip4_main; + ip6_main_t * im6 = &ip6_main; + ip4_add_del_interface_address_callback_t cb4; + ip6_add_del_interface_address_callback_t cb6; + tuntap_main_t * tm = &tuntap_main; + + error = vlib_call_init_function (vm, ip4_init); + if (error) + return error; + + mhash_init (&tm->subif_mhash, sizeof (u32), sizeof(subif_address_t)); + + cb4.function = tuntap_ip4_add_del_interface_address; + cb4.function_opaque = 0; + vec_add1 (im4->add_del_interface_address_callbacks, cb4); + + cb6.function = tuntap_ip6_add_del_interface_address; + cb6.function_opaque = 0; + vec_add1 (im6->add_del_interface_address_callbacks, cb6); + + return 0; +} + +VLIB_INIT_FUNCTION (tuntap_init); diff --git a/src/vnet/unix/tuntap.h b/src/vnet/unix/tuntap.h new file mode 100644 index 00000000000..d7f96caeaf0 --- /dev/null +++ b/src/vnet/unix/tuntap.h @@ -0,0 +1,36 @@ +/* + *------------------------------------------------------------------ + * tuntap.h - kernel stack (reverse) punt/inject path + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +/** + * @file + * @brief Call from VLIB_INIT_FUNCTION to set the Linux kernel inject node name. + */ +void register_tuntap_inject_node_name (char *name); + +int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, + u8 *hwaddr_arg, u32 * sw_if_indexp); +int vnet_tap_connect_renumber (vlib_main_t * vm, u8 * intfc_name, + u8 *hwaddr_arg, u32 * sw_if_indexp, + u8 renumber, u32 custom_dev_instance); + +int vnet_tap_delete(vlib_main_t *vm, u32 sw_if_index); + +int vnet_tap_modify (vlib_main_t * vm, u32 orig_sw_if_index, + u8 * intfc_name, u8 *hwaddr_arg, + u32 * sw_if_indexp, + u8 renumber, u32 custom_dev_instance); |