From 78c896b3b3127515478090c19447e27dc406427e Mon Sep 17 00:00:00 2001 From: Jianfeng Tan Date: Mon, 18 Nov 2019 06:59:50 +0000 Subject: TLDKv2 Signed-off-by: Jianfeng Tan Signed-off-by: Jielong Zhou Signed-off-by: Jian Zhang Signed-off-by: Chen Zhao Change-Id: I55c39de4c6cd30f991f35631eb507f770230f08e --- test/packetdrill/netdev.c | 502 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 502 insertions(+) create mode 100644 test/packetdrill/netdev.c (limited to 'test/packetdrill/netdev.c') diff --git a/test/packetdrill/netdev.c b/test/packetdrill/netdev.c new file mode 100644 index 0000000..7734709 --- /dev/null +++ b/test/packetdrill/netdev.c @@ -0,0 +1,502 @@ +/* + * Copyright 2013 Google Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ +/* + * Author: ncardwell@google.com (Neal Cardwell) + * + * Implementation for a "virtual network device" module to + * inject packets into the kernel and read packets leaving the kernel. + */ + +#include "netdev.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) +#include +#endif /* defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) */ + +#include "assert.h" +#include "ip.h" +#include "ipv6.h" +#include "logging.h" +#include "net_utils.h" +#include "packet.h" +#include "packet_parser.h" +#include "packet_socket.h" +#include "tcp.h" +#include "tun.h" +#include "wrap.h" + +/* Internal private state for the netdev for purely local tests. */ +struct local_netdev { + struct netdev netdev; /* "inherit" from netdev */ + + char *name; /* malloc-ed copy of interface name (owned) */ + int tun_fd; /* tun for sending/receiving packets */ + int control_fd; /* fd for configuration of tun interface */ + int index; /* interface index from if_nametoindex */ + struct packet_socket *psock; /* for sniffing packets (owned) */ +}; + +struct netdev_ops local_netdev_ops; + +/* "Downcast" an abstract netdev to our local flavor. */ +static inline struct local_netdev *to_local_netdev(struct netdev *netdev) +{ + return (struct local_netdev *)netdev; +} + +/* Clean up any old tun device state that might be lying around from + * previous tests. NetBSD the kernel does not automatically tear down + * unreferenced tun devices and routes referencing those routes. + */ +static void cleanup_old_device(struct config *config, + struct local_netdev *netdev) +{ +#if defined(__NetBSD__) + char *cleanup_command = NULL; + int result; + + asprintf(&cleanup_command, + "/sbin/ifconfig %s down delete > /dev/null 2>&1", + TUN_DEV); + DEBUGP("running: '%s'\n", cleanup_command); + result = system(cleanup_command); + DEBUGP("result: %d\n", result); + free(cleanup_command); +#endif /* defined(__NetBSD__) */ +} + +/* Check that the remote IP is actually remote. It must be to ensure + * that test packets will pass into our tun device. + */ +static void check_remote_address(struct config *config, + struct local_netdev *netdev) +{ + if (is_ip_local(&config->live_remote_ip)) { + die("error: live_remote_ip %s is not remote\n", + config->live_remote_ip_string); + } +} + +/* Make sure config->live_local_ip is not configured on any devices. + * This is only used for anyip tests. + */ +static void check_local_anyip(struct config *config) +{ + if (is_ip_local(&config->live_local_ip)) { + die("error: live_local_ip %s is not remote for anyip\n", + config->live_local_ip_string); + } +} + +/* Create a tun device for the lifetime of this test. */ +static void create_device(struct config *config, struct local_netdev *netdev) +{ + /* Open the tun device, which "clones" it for our purposes. */ + int tun_fd; +#ifdef linux + int nb = 0; + +loop: + if (++nb > 10) + die_perror("open tun device"); +#endif + tun_fd = open(TUN_PATH, O_RDWR); + if (tun_fd < 0) + die_perror("open tun device"); + + netdev->tun_fd = tun_fd; + +#ifdef linux + /* Create the device. Since we do not specify a device name, the + * kernel will try to allocate the "next" device of the specified + * type. This device will disappear when we are done. + */ + struct ifreq ifr; + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TUN | IFF_NO_PI | IFF_VNET_HDR; + int status = ioctl(netdev->tun_fd, TUNSETIFF, (void *)&ifr); + if (status < 0) + die_perror("TUNSETIFF"); + + /* Our tests rely on using tun0. + * We might change this in the future, by passing a variable filled + * with tunnel name. In the mean time, wait a bit that tun0 gets free. + */ + if (strcmp(ifr.ifr_name, "tun0")) { + close(tun_fd); + usleep(100000); + goto loop; + } + netdev->name = strdup(ifr.ifr_name); +#endif + +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) + const int mode = IFF_BROADCAST | IFF_MULTICAST; + if (ioctl(netdev->tun_fd, TUNSIFMODE, &mode, sizeof(mode)) < 0) + die_perror("TUNSIFMODE"); + + netdev->name = strdup(TUN_DEV); +#endif /* defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) */ + +#if defined(__FreeBSD__) || defined(__NetBSD__) + /* On FreeBSD and NetBSD we need to explicitly ask to be able + * to prepend the address family when injecting tun packets. + * OpenBSD presumes we are doing this, even without the ioctl. + */ + const int header = 1; + if (ioctl(netdev->tun_fd, TUNSIFHEAD, &header, sizeof(header)) < 0) + die_perror("TUNSIFHEAD"); +#endif /* defined(__FreeBSD__) || defined(__NetBSD__) */ + + DEBUGP("tun name: '%s'\n", netdev->name); + + netdev->index = if_nametoindex(netdev->name); + if (netdev->index == 0) + die_perror("if_nametoindex"); + + DEBUGP("tun index: '%d'\n", netdev->index); + + if (config->speed != TUN_DRIVER_SPEED_CUR) { + char *command; + asprintf(&command, "ethtool -s %s speed %u autoneg off", + netdev->name, config->speed); + if (system(command) < 0) + die("Error executing %s\n", command); + free(command); + + /* Need to bring interface down and up so the interface speed + * will be copied to the link_speed field. This field is + * used by TCP's cwnd bound. */ + asprintf(&command, "ifconfig %s down; sleep 1; ifconfig %s up; " + "sleep 1", netdev->name, netdev->name); + if (system(command) < 0) + die("Error executing %s\n", command); + free(command); + } + + if (config->mtu != TUN_DRIVER_DEFAULT_MTU) { + char *command; + asprintf(&command, "ifconfig %s mtu %d", + netdev->name, config->mtu); + if (system(command) < 0) + die("Error executing %s\n", command); + free(command); + } + + /* Open a socket we can use to configure the tun interface. */ + netdev->control_fd = wrap_socket(config->ip_version, SOCK_DGRAM); +} + +/* Set the offload flags to be like a typical ethernet device */ +static void set_device_offload_flags(struct local_netdev *netdev) +{ +#ifdef linux + const u32 offload = + TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN; + if (ioctl(netdev->tun_fd, TUNSETOFFLOAD, offload) != 0) + die_perror("TUNSETOFFLOAD"); +#endif +} + +/* Bring up the device */ +static void bring_up_device(struct local_netdev *netdev) +{ + struct ifreq ifr; + memset(&ifr, 0, sizeof(ifr)); + strncpy(ifr.ifr_name, netdev->name, IFNAMSIZ); + if (ioctl(netdev->control_fd, SIOCGIFFLAGS, &ifr) < 0) + die_perror("SIOCGIFFLAGS"); + ifr.ifr_flags |= IFF_UP | IFF_RUNNING; + if (ioctl(netdev->control_fd, SIOCSIFFLAGS, &ifr) < 0) + die_perror("SIOCSIFFLAGS"); +} + +/* Route traffic destined for our remote IP through this device. + * In anyip environment, we don't use the gateway IP. + */ +static void route_traffic_to_device(struct config *config, + struct local_netdev *netdev) +{ + char *route_command = NULL; +#ifdef linux + asprintf(&route_command, + "ip -%d route del %s > /dev/null 2>&1 ; " + "ip -%d route add %s dev %s %s%s > /dev/null 2>&1", + (config->wire_protocol == AF_INET) ? 4 : 6, + config->live_remote_prefix_string, + (config->wire_protocol == AF_INET) ? 4 : 6, + config->live_remote_prefix_string, + netdev->name, + config->is_anyip ? "" : "via ", + config->is_anyip ? "" : + config->live_gateway_ip_string); +#endif +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) + if (config->wire_protocol == AF_INET) { + asprintf(&route_command, + "route delete %s > /dev/null 2>&1 ; " + "route add %s %s > /dev/null", + config->live_remote_prefix_string, + config->live_remote_prefix_string, + config->live_gateway_ip_string); + } else if (config->wire_protocol == AF_INET6) { + asprintf(&route_command, + "route delete -inet6 %s > /dev/null 2>&1 ; " +#if defined(__FreeBSD__) + "route add -inet6 %s -interface tun0 %s > /dev/null", +#elif defined(__OpenBSD__) || defined(__NetBSD__) + "route add -inet6 %s %s > /dev/null", +#endif + config->live_remote_prefix_string, + config->live_remote_prefix_string, + config->live_gateway_ip_string); + } else { + assert(!"bad wire protocol"); + } +#endif /* defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) */ + int result = system(route_command); + if ((result == -1) || (WEXITSTATUS(result) != 0)) { + die("error executing route command '%s'\n", + route_command); + } + free(route_command); +} + +struct netdev *local_netdev_new(struct config *config) +{ + struct local_netdev *netdev = calloc(1, sizeof(struct local_netdev)); + + netdev->netdev.ops = &local_netdev_ops; + + cleanup_old_device(config, netdev); + + check_remote_address(config, netdev); + create_device(config, netdev); + set_device_offload_flags(netdev); + bring_up_device(netdev); + + if (config->is_anyip) + check_local_anyip(config); + else + net_setup_dev_address(netdev->name, + &config->live_local_ip, + config->live_prefix_len); + + route_traffic_to_device(config, netdev); + netdev->psock = packet_socket_new(netdev->name); + + return (struct netdev *)netdev; +} + +static void local_netdev_free(struct netdev *a_netdev) +{ + struct local_netdev *netdev = to_local_netdev(a_netdev); + + if (netdev->psock) + packet_socket_free(netdev->psock); + if (netdev->tun_fd >= 0) + close(netdev->tun_fd); + if (netdev->control_fd >= 0) + close(netdev->control_fd); + if (netdev->name != NULL) + free(netdev->name); + memset(netdev, 0, sizeof(*netdev)); /* paranoia to help catch bugs */ + free(netdev); +} + +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) +/* According to `man 4 tun` on OpenBSD: "Each packet read or written + * is prefixed with a tunnel header consisting of a 4-byte network + * byte order integer containing the address family in the case of + * layer 3 tunneling." Similarly, on FreeBSD and NetBSD one must use + * ioctl(TUNSIFHEAD) and prepend an address family, in order to be + * able to send IPv6 packets (otherwise FreeBSD and NetBSD assume the + * packets are IPv4). + */ +static void bsd_tun_write(struct local_netdev *netdev, + struct packet *packet) +{ + int address_family = htonl(packet_address_family(packet)); + struct iovec vector[2] = { + { &address_family, sizeof(address_family) }, + { packet_start(packet), packet->ip_bytes } + }; + + if (writev(netdev->tun_fd, vector, ARRAY_SIZE(vector)) < 0) + die_perror("BSD tun write()"); +} +#endif /* defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) */ + +#ifdef linux +#include + +static void linux_tun_write(struct local_netdev *netdev, + struct packet *packet) +{ + struct virtio_net_hdr gso = { 0 }; + struct iovec vector[2] = { + { &gso, sizeof(gso) }, + { packet_start(packet), packet->ip_bytes } + }; + + if (packet->tcp && packet->mss) { + if (packet->ipv4) + gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + else + gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + gso.gso_size = packet->mss; + } + if (writev(netdev->tun_fd, vector, ARRAY_SIZE(vector)) < 0) + die_perror("Linux tun write()"); +} +#endif /* linux */ + +static int local_netdev_send(struct netdev *a_netdev, + struct packet *packet) +{ + struct local_netdev *netdev = to_local_netdev(a_netdev); + + assert(packet->ip_bytes > 0); + /* We do IPv4 and IPv6 */ + assert(packet->ipv4 || packet->ipv6); + /* We only do TCP and ICMP */ + assert(packet->tcp || packet->udp || packet->icmpv4 || packet->icmpv6); + + DEBUGP("local_netdev_send\n"); + +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) + bsd_tun_write(netdev, packet); +#endif /* defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) */ + +#ifdef linux + linux_tun_write(netdev, packet); +#endif /* linux */ + + return STATUS_OK; +} + +/* Read the given number of packets out of the tun device. We read + * these packets so that the kernel can exercise its normal code paths + * for packet transmit completion, since this code path may feed back + * to TCP behavior; e.g., see the Linux patch "tcp: avoid retransmits + * of TCP packets hanging in host queues". We don't need to actually + * need the packet contents, but on Linux we need to read at least 1 + * byte of packet data to consume the packet. + * After we added IFF_VNET_HDR attribute to the linux tun device, + * we expect to receive a virtio_net_hdr at the beginning. + */ +static void local_netdev_read_queue(struct local_netdev *netdev, + int num_packets) +{ +#ifdef linux + char buf[sizeof(struct virtio_net_hdr) + 1]; +#else + char buf[1]; +#endif + int i = 0, in_bytes = 0; + + for (i = 0; i < num_packets; ++i) { + in_bytes = read(netdev->tun_fd, buf, sizeof(buf)); + assert(in_bytes <= (int)sizeof(buf)); + + if (in_bytes < 0) { + if (errno == EINTR) + continue; + else + die_perror("tun read()"); + } + } +} + +static int local_netdev_receive(struct netdev *a_netdev, + struct packet **packet, char **error) +{ + struct local_netdev *netdev = to_local_netdev(a_netdev); + int status = STATUS_ERR; + int num_packets = 0; + + DEBUGP("local_netdev_receive\n"); + + status = netdev_receive_loop(netdev->psock, PACKET_LAYER_3_IP, + DIRECTION_OUTBOUND, packet, &num_packets, + error); + local_netdev_read_queue(netdev, num_packets); + return status; +} + +int netdev_receive_loop(struct packet_socket *psock, + enum packet_layer_t layer, + enum direction_t direction, + struct packet **packet, + int *num_packets, + char **error) +{ + assert(*packet == NULL); /* should be no packet yet */ + + *num_packets = 0; + while (1) { + int in_bytes = 0; + enum packet_parse_result_t result; + + *packet = packet_new(PACKET_READ_BYTES); + + /* Sniff the next outbound packet from the kernel under test. */ + if (packet_socket_receive(psock, direction, *packet, &in_bytes)) + continue; + + ++*num_packets; + result = parse_packet(*packet, in_bytes, layer, error); + + if (result == PACKET_OK) + return STATUS_OK; + + packet_free(*packet); + *packet = NULL; + + if (result == PACKET_BAD) + return STATUS_ERR; + + DEBUGP("parse_result:%d; error parsing packet: %s\n", + result, *error); + } + + assert(!"should not be reached"); + return STATUS_ERR; /* not reached */ +} + +struct netdev_ops local_netdev_ops = { + .free = local_netdev_free, + .send = local_netdev_send, + .receive = local_netdev_receive, +}; -- cgit 1.2.3-korg