/* *------------------------------------------------------------------ * tuntap.c - kernel stack (reverse) punt/inject path * * Copyright (c) 2009 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *------------------------------------------------------------------ */ /** * @file * @brief TunTap Kernel stack (reverse) punt/inject path. * * This driver runs in one of two distinct modes: * - "punt/inject" mode, where we send pkts not otherwise processed * by the forwarding to the Linux kernel stack, and * * - "normal interface" mode, where we treat the Linux kernel stack * as a peer. * * By default, we select punt/inject mode. */ #include /* for open */ #include #include #include #include #include /* for iovec */ #include #include #include #include #include #include #include #include #include #include static vnet_device_class_t tuntap_dev_class; static vnet_hw_interface_class_t tuntap_interface_class; static void tuntap_punt_frame (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame); static void tuntap_nopunt_frame (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame); typedef struct { u32 sw_if_index; u8 is_v6; u8 addr[16]; } subif_address_t; /** * @brief TUNTAP node main state */ typedef struct { /** Vector of iovecs for readv/writev calls. */ struct iovec * iovecs; /** Vector of VLIB rx buffers to use. We allocate them in blocks of VLIB_FRAME_SIZE (256). */ u32 * rx_buffers; /** File descriptors for /dev/net/tun and provisioning socket. */ int dev_net_tun_fd, dev_tap_fd; /** Create a "tap" [ethernet] encaps device */ int is_ether; /** 1 if a "normal" routed intfc, 0 if a punt/inject interface */ int have_normal_interface; /** tap device destination MAC address. Required, or Linux drops pkts */ u8 ether_dst_mac[6]; /** Interface MTU in bytes and # of default sized buffers. */ u32 mtu_bytes, mtu_buffers; /** Linux interface name for tun device. */ char * tun_name; /** Pool of subinterface addresses */ subif_address_t *subifs; /** Hash for subif addresses */ mhash_t subif_mhash; /** Unix file index */ u32 clib_file_index; /** For the "normal" interface, if configured */ u32 hw_if_index, sw_if_index; } tuntap_main_t; static tuntap_main_t tuntap_main = { .tun_name = "vnet", /** Suitable defaults for an Ethernet-like tun/tap device */ .mtu_bytes = 4096 + 256, }; /** * @brief tuntap_tx * @node tuntap-tx * * Output node, writes the buffers comprising the incoming frame * to the tun/tap device, aka hands them to the Linux kernel stack. * * @param *vm - vlib_main_t * @param *node - vlib_node_runtime_t * @param *frame - vlib_frame_t * * @return rc - uword * */ static uword tuntap_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 * buffers = vlib_frame_args (frame); uword n_packets = frame->n_vectors; tuntap_main_t * tm = &tuntap_main; vnet_main_t *vnm = vnet_get_main (); vnet_interface_main_t *im = &vnm->interface_main; u32 n_bytes = 0; int i; for (i = 0; i < n_packets; i++) { struct iovec * iov; vlib_buffer_t * b; uword l; b = vlib_get_buffer (vm, buffers[i]); if (tm->is_ether && (!tm->have_normal_interface)) { vlib_buffer_reset(b); clib_memcpy (vlib_buffer_get_current (b), tm->ether_dst_mac, 6); } /* Re-set iovecs if present. */ if (tm->iovecs) _vec_len (tm->iovecs) = 0; /** VLIB buffer chain -> Unix iovec(s). */ vec_add2 (tm->iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = l = b->current_length; if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) { do { b = vlib_get_buffer (vm, b->next_buffer); vec_add2 (tm->iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = b->current_length; l += b->current_length; } while (b->flags & VLIB_BUFFER_NEXT_PRESENT); } if (writev (tm->dev_net_tun_fd, tm->iovecs, vec_len (tm->iovecs)) < l) clib_unix_warning ("writev"); n_bytes += l; } /* Update tuntap interface output stats. */ vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, vm->thread_index, tm->sw_if_index, n_packets, n_bytes); /** The normal interface path flattens the buffer chain */ if (tm->have_normal_interface) vlib_buffer_free_no_next (vm, buffers, n_packets); else vlib_buffer_free (vm, buffers, n_packets); return n_packets; } VLIB_REGISTER_NODE (tuntap_tx_node,static) = { .function = tuntap_tx, .name = "tuntap-tx", .type = VLIB_NODE_TYPE_INTERNAL, .vector_size = 4, }; /** * @brief TUNTAP receive node * @node tuntap-rx * * @param *vm - vlib_main_t * @param *node - vlib_node_runtime_t * @param *frame - vlib_frame_t * * @return rc - uword * */ static uword tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { tuntap_main_t * tm = &tuntap_main; vlib_buffer_t * b; u32 bi; const uword buffer_size = VLIB_BUFFER_DATA_SIZE; /** Make sure we have some RX buffers. */ { uword n_left = vec_len (tm->rx_buffers);
# Copyright (c) 2017 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cmake_minimum_required(VERSION 3.5 FATAL_ERROR)

set(HEADERS_DIR ${CMAKE_CURRENT_SOURCE_DIR})

list(APPEND MEMIF_HEADERS
  libmemif.h
  memif.h
)

list(APPEND MEMIF_PRIVATE_HEADERS
  memif_private.h
  socket.h
)

list(APPEND MEMIF_SOURCES
  main.c
  socket.c
)

include_directories(${HEADERS_DIR})

add_vpp_library(${LIBMEMIF}
  SOURCES ${MEMIF_SOURCES}

  INSTALL_HEADERS ${MEMIF_HEADERS}

  LINK_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}

  COMPONENT libmemif
)
pool_elt_at_index (tm->subifs, p[0]); else { pool_get (tm->subifs, ap); *ap = subif_addr; mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0); } /* Use subif pool index to select alias device. */ memset (&ifr, 0, sizeof (ifr)); snprintf (ifr.ifr_name, sizeof(ifr.ifr_name), "%s:%d", tm->tun_name, (int)(ap - tm->subifs)); /* the tuntap punt/inject is enabled for IPv4 RX so long as * any vpp interface has an IPv4 address. * this is also ref counted. */ ip4_sw_interface_enable_disable (tm->sw_if_index, !is_delete); if (! is_delete) { struct sockaddr_in * sin; sin = (struct sockaddr_in *)&ifr.ifr_addr; /* Set ipv4 address, netmask. */ sin->sin_family = AF_INET; clib_memcpy (&sin->sin_addr.s_addr, address, 4); if (ioctl (tm->dev_tap_fd, SIOCSIFADDR, &ifr) < 0) clib_unix_warning ("ioctl SIOCSIFADDR"); sin->sin_addr.s_addr = im->fib_masks[address_length]; if (ioctl (tm->dev_tap_fd, SIOCSIFNETMASK, &ifr) < 0) clib_unix_warning ("ioctl SIOCSIFNETMASK"); } else { mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */); pool_put (tm->subifs, ap); } /* get flags, modify to bring up interface... */ if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0) clib_unix_warning ("ioctl SIOCGIFFLAGS"); if (is_delete) ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); else ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0) clib_unix_warning ("ioctl SIOCSIFFLAGS"); } /** * @brief workaround for a known include file bug. * including @c causes multiple definitions if * @c have_normal_interface || tm->dev_tap_fd < 0) return; /* if the address is being applied to an interface that is not in * the same table/VRF as this tap, then ignore it. * If we don't do this overlapping address spaces in the diferent tables * breaks the linux host's routing tables */ if (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP6, sw_if_index) != fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP6, tm->sw_if_index)) return; /* See if we already know about this subif */ memset (&subif_addr, 0, sizeof (subif_addr)); subif_addr.sw_if_index = sw_if_index; subif_addr.is_v6 = 1; clib_memcpy (&subif_addr.addr, address, sizeof (*address)); p = mhash_get (&tm->subif_mhash, &subif_addr); if (p) ap = pool_elt_at_index (tm->subifs, p[0]); else { pool_get (tm->subifs, ap); *ap = subif_addr; mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0); } /* Use subif pool index to select alias device. */ memset (&ifr, 0, sizeof (ifr)); memset (&ifr6, 0, sizeof (ifr6)); snprintf (ifr.ifr_name, sizeof(ifr.ifr_name), "%s:%d", tm->tun_name, (int)(ap - tm->subifs)); /* the tuntap punt/inject is enabled for IPv6 RX so long as * any vpp interface has an IPv6 address. * this is also ref counted. */ ip6_sw_interface_enable_disable (tm->sw_if_index, !is_delete); if (! is_delete) { int sockfd = socket (AF_INET6, SOCK_STREAM, 0); if (sockfd < 0) clib_unix_warning ("get ifindex socket"); if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0) clib_unix_warning ("get ifindex"); ifr6.ifr6_ifindex = ifr.ifr_ifindex; ifr6.ifr6_prefixlen = address_length; clib_memcpy (&ifr6.ifr6_addr, address, 16); if (ioctl (sockfd, SIOCSIFADDR, &ifr6) < 0) clib_unix_warning ("set address"); if (sockfd >= 0) close (sockfd); } else { int sockfd = socket (AF_INET6, SOCK_STREAM, 0); if (sockfd < 0) clib_unix_warning ("get ifindex socket"); if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0) clib_unix_warning ("get ifindex"); ifr6.ifr6_ifindex = ifr.ifr_ifindex; ifr6.ifr6_prefixlen = address_length; clib_memcpy (&ifr6.ifr6_addr, address, 16); if (ioctl (sockfd, SIOCDIFADDR, &ifr6) < 0) clib_unix_warning ("del address"); if (sockfd >= 0) close (sockfd); mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */); pool_put (tm->subifs, ap); } } /** * @brief TX the tun/tap frame * * @param *vm - vlib_main_t * @param *node - vlib_node_runtime_t * @param *frame - vlib_frame_t * */ static void tuntap_punt_frame (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { tuntap_tx (vm, node, frame); vlib_frame_free (vm, node, frame); } /** * @brief Free the tun/tap frame * * @param *vm - vlib_main_t * @param *node - vlib_node_runtime_t * @param *frame - vlib_frame_t * */ static void tuntap_nopunt_frame (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 * buffers = vlib_frame_args (frame); uword n_packets = frame->n_vectors; vlib_buffer_free (vm, buffers, n_packets); vlib_frame_free (vm, node, frame); } VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = { .name = "tuntap", .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; /** * @brief Format tun/tap interface name * * @param *s - u8 - formatter string * @param *args - va_list * * @return *s - u8 - formatted string * */ static u8 * format_tuntap_interface_name (u8 * s, va_list * args) { u32 i = va_arg (*args, u32); s = format (s, "tuntap-%d", i); return s; } /** * @brief TX packet out tun/tap * * @param *vm - vlib_main_t * @param *node - vlib_node_runtime_t * @param *frame - vlib_frame_t * * @return n_buffers - uword - Packets transmitted * */ static uword tuntap_intfc_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { tuntap_main_t * tm = &tuntap_main; u32 * buffers = vlib_frame_args (frame); uword n_buffers = frame->n_vectors; /* Normal interface transmit happens only on the normal interface... */ if (tm->have_normal_interface) return tuntap_tx (vm, node, frame); vlib_buffer_free (vm, buffers, n_buffers); return n_buffers; } VNET_DEVICE_CLASS (tuntap_dev_class,static) = { .name = "tuntap", .tx_function = tuntap_intfc_tx, .format_device_name = format_tuntap_interface_name, }; /** * @brief tun/tap node init * * @param *vm - vlib_main_t * * @return error - clib_error_t * */ static clib_error_t * tuntap_init (vlib_main_t * vm) { clib_error_t * error; ip4_main_t * im4 = &ip4_main; ip6_main_t * im6 = &ip6_main; ip4_add_del_interface_address_callback_t cb4; ip6_add_del_interface_address_callback_t cb6; tuntap_main_t * tm = &tuntap_main; error = vlib_call_init_function (vm, ip4_init); if (error) return error; mhash_init (&tm->subif_mhash, sizeof (u32), sizeof(subif_address_t)); cb4.function = tuntap_ip4_add_del_interface_address; cb4.function_opaque = 0; vec_add1 (im4->add_del_interface_address_callbacks, cb4); cb6.function = tuntap_ip6_add_del_interface_address; cb6.function_opaque = 0; vec_add1 (im6->add_del_interface_address_callbacks, cb6); return 0; } VLIB_INIT_FUNCTION (tuntap_init);