From 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 19 Dec 2016 23:05:39 +0100 Subject: Reorganize source tree to use single autotools instance Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion --- src/vnet/devices/af_packet/af_packet.api | 71 + src/vnet/devices/af_packet/af_packet.c | 366 +++ src/vnet/devices/af_packet/af_packet.h | 69 + src/vnet/devices/af_packet/af_packet_api.c | 143 + src/vnet/devices/af_packet/cli.c | 144 + src/vnet/devices/af_packet/device.c | 250 ++ src/vnet/devices/af_packet/node.c | 288 ++ src/vnet/devices/devices.c | 91 + src/vnet/devices/devices.h | 53 + src/vnet/devices/dpdk/cli.c | 1296 ++++++++ src/vnet/devices/dpdk/device.c | 840 +++++ src/vnet/devices/dpdk/dpdk.h | 534 ++++ src/vnet/devices/dpdk/dpdk_priv.h | 132 + src/vnet/devices/dpdk/format.c | 763 +++++ src/vnet/devices/dpdk/hqos.c | 775 +++++ src/vnet/devices/dpdk/init.c | 1803 +++++++++++ src/vnet/devices/dpdk/ipsec/cli.c | 141 + src/vnet/devices/dpdk/ipsec/crypto_node.c | 210 ++ src/vnet/devices/dpdk/ipsec/dir.dox | 18 + .../devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md | 73 + src/vnet/devices/dpdk/ipsec/esp.h | 295 ++ src/vnet/devices/dpdk/ipsec/esp_decrypt.c | 583 ++++ src/vnet/devices/dpdk/ipsec/esp_encrypt.c | 598 ++++ src/vnet/devices/dpdk/ipsec/ipsec.c | 313 ++ src/vnet/devices/dpdk/ipsec/ipsec.h | 227 ++ src/vnet/devices/dpdk/node.c | 687 ++++ src/vnet/devices/dpdk/qos_doc.md | 404 +++ src/vnet/devices/netmap/cli.c | 146 + src/vnet/devices/netmap/device.c | 261 ++ src/vnet/devices/netmap/net_netmap.h | 650 ++++ src/vnet/devices/netmap/netmap.api | 74 + src/vnet/devices/netmap/netmap.c | 316 ++ src/vnet/devices/netmap/netmap.h | 164 + src/vnet/devices/netmap/netmap_api.c | 137 + src/vnet/devices/netmap/node.c | 300 ++ src/vnet/devices/nic/ixge.c | 2938 +++++++++++++++++ src/vnet/devices/nic/ixge.h | 1293 ++++++++ src/vnet/devices/nic/sfp.c | 117 + src/vnet/devices/nic/sfp.h | 117 + src/vnet/devices/ssvm/node.c | 343 ++ src/vnet/devices/ssvm/ssvm_eth.c | 491 +++ src/vnet/devices/ssvm/ssvm_eth.h | 141 + src/vnet/devices/virtio/dir.dox | 27 + src/vnet/devices/virtio/vhost-user.c | 3314 ++++++++++++++++++++ src/vnet/devices/virtio/vhost-user.h | 350 +++ src/vnet/devices/virtio/vhost_user.api | 125 + src/vnet/devices/virtio/vhost_user_api.c | 262 ++ 47 files changed, 22733 insertions(+) create mode 100644 src/vnet/devices/af_packet/af_packet.api create mode 100644 src/vnet/devices/af_packet/af_packet.c create mode 100644 src/vnet/devices/af_packet/af_packet.h create mode 100644 src/vnet/devices/af_packet/af_packet_api.c create mode 100644 src/vnet/devices/af_packet/cli.c create mode 100644 src/vnet/devices/af_packet/device.c create mode 100644 src/vnet/devices/af_packet/node.c create mode 100644 src/vnet/devices/devices.c create mode 100644 src/vnet/devices/devices.h create mode 100644 src/vnet/devices/dpdk/cli.c create mode 100644 src/vnet/devices/dpdk/device.c create mode 100644 src/vnet/devices/dpdk/dpdk.h create mode 100644 src/vnet/devices/dpdk/dpdk_priv.h create mode 100644 src/vnet/devices/dpdk/format.c create mode 100644 src/vnet/devices/dpdk/hqos.c create mode 100755 src/vnet/devices/dpdk/init.c create mode 100644 src/vnet/devices/dpdk/ipsec/cli.c create mode 100644 src/vnet/devices/dpdk/ipsec/crypto_node.c create mode 100644 src/vnet/devices/dpdk/ipsec/dir.dox create mode 100644 src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md create mode 100644 src/vnet/devices/dpdk/ipsec/esp.h create mode 100644 src/vnet/devices/dpdk/ipsec/esp_decrypt.c create mode 100644 src/vnet/devices/dpdk/ipsec/esp_encrypt.c create mode 100644 src/vnet/devices/dpdk/ipsec/ipsec.c create mode 100644 src/vnet/devices/dpdk/ipsec/ipsec.h create mode 100644 src/vnet/devices/dpdk/node.c create mode 100644 src/vnet/devices/dpdk/qos_doc.md create mode 100644 src/vnet/devices/netmap/cli.c create mode 100644 src/vnet/devices/netmap/device.c create mode 100644 src/vnet/devices/netmap/net_netmap.h create mode 100644 src/vnet/devices/netmap/netmap.api create mode 100644 src/vnet/devices/netmap/netmap.c create mode 100644 src/vnet/devices/netmap/netmap.h create mode 100644 src/vnet/devices/netmap/netmap_api.c create mode 100644 src/vnet/devices/netmap/node.c create mode 100644 src/vnet/devices/nic/ixge.c create mode 100644 src/vnet/devices/nic/ixge.h create mode 100644 src/vnet/devices/nic/sfp.c create mode 100644 src/vnet/devices/nic/sfp.h create mode 100644 src/vnet/devices/ssvm/node.c create mode 100644 src/vnet/devices/ssvm/ssvm_eth.c create mode 100644 src/vnet/devices/ssvm/ssvm_eth.h create mode 100644 src/vnet/devices/virtio/dir.dox create mode 100644 src/vnet/devices/virtio/vhost-user.c create mode 100644 src/vnet/devices/virtio/vhost-user.h create mode 100644 src/vnet/devices/virtio/vhost_user.api create mode 100644 src/vnet/devices/virtio/vhost_user_api.c (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/af_packet/af_packet.api b/src/vnet/devices/af_packet/af_packet.api new file mode 100644 index 00000000..9fb2a207 --- /dev/null +++ b/src/vnet/devices/af_packet/af_packet.api @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief Create host-interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param host_if_name - interface name + @param hw_addr - interface MAC + @param use_random_hw_addr - use random generated MAC +*/ +define af_packet_create +{ + u32 client_index; + u32 context; + + u8 host_if_name[64]; + u8 hw_addr[6]; + u8 use_random_hw_addr; +}; + +/** \brief Create host-interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define af_packet_create_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Delete host-interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param host_if_name - interface name +*/ +define af_packet_delete +{ + u32 client_index; + u32 context; + + u8 host_if_name[64]; +}; + +/** \brief Delete host-interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define af_packet_delete_reply +{ + u32 context; + i32 retval; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c new file mode 100644 index 00000000..91c3988b --- /dev/null +++ b/src/vnet/devices/af_packet/af_packet.c @@ -0,0 +1,366 @@ +/* + *------------------------------------------------------------------ + * af_packet.c - linux kernel packet interface + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include +#include + +#include + +#define AF_PACKET_DEBUG_SOCKET 0 + +#define AF_PACKET_TX_FRAMES_PER_BLOCK 1024 +#define AF_PACKET_TX_FRAME_SIZE (2048 * 5) +#define AF_PACKET_TX_BLOCK_NR 1 +#define AF_PACKET_TX_FRAME_NR (AF_PACKET_TX_BLOCK_NR * \ + AF_PACKET_TX_FRAMES_PER_BLOCK) +#define AF_PACKET_TX_BLOCK_SIZE (AF_PACKET_TX_FRAME_SIZE * \ + AF_PACKET_TX_FRAMES_PER_BLOCK) + +#define AF_PACKET_RX_FRAMES_PER_BLOCK 1024 +#define AF_PACKET_RX_FRAME_SIZE (2048 * 5) +#define AF_PACKET_RX_BLOCK_NR 1 +#define AF_PACKET_RX_FRAME_NR (AF_PACKET_RX_BLOCK_NR * \ + AF_PACKET_RX_FRAMES_PER_BLOCK) +#define AF_PACKET_RX_BLOCK_SIZE (AF_PACKET_RX_FRAME_SIZE * \ + AF_PACKET_RX_FRAMES_PER_BLOCK) + +#if AF_PACKET_DEBUG_SOCKET == 1 +#define DBG_SOCK(args...) clib_warning(args); +#else +#define DBG_SOCK(args...) +#endif + +/*defined in net/if.h but clashes with dpdk headers */ +unsigned int if_nametoindex (const char *ifname); + +typedef struct tpacket_req tpacket_req_t; + +static u32 +af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, + u32 flags) +{ + /* nothing for now */ + return 0; +} + +static clib_error_t * +af_packet_fd_read_ready (unix_file_t * uf) +{ + vlib_main_t *vm = vlib_get_main (); + af_packet_main_t *apm = &af_packet_main; + u32 idx = uf->private_data; + + apm->pending_input_bitmap = + clib_bitmap_set (apm->pending_input_bitmap, idx, 1); + + /* Schedule the rx node */ + vlib_node_set_interrupt_pending (vm, af_packet_input_node.index); + + return 0; +} + +static int +create_packet_v2_sock (u8 * name, tpacket_req_t * rx_req, + tpacket_req_t * tx_req, int *fd, u8 ** ring) +{ + int ret, err; + struct sockaddr_ll sll; + uint host_if_index; + int ver = TPACKET_V2; + socklen_t req_sz = sizeof (struct tpacket_req); + u32 ring_sz = rx_req->tp_block_size * rx_req->tp_block_nr + + tx_req->tp_block_size * tx_req->tp_block_nr; + + host_if_index = if_nametoindex ((const char *) name); + + if (!host_if_index) + { + DBG_SOCK ("Wrong host interface name"); + ret = VNET_API_ERROR_INVALID_INTERFACE; + goto error; + } + + if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0) + { + DBG_SOCK ("Failed to create socket"); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + if ((err = + setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver))) < 0) + { + DBG_SOCK ("Failed to set rx packet interface version"); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + int opt = 1; + if ((err = + setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt))) < 0) + { + DBG_SOCK ("Failed to set packet tx ring error handling option"); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + if ((err = + setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz)) < 0) + { + DBG_SOCK ("Failed to set packet rx ring options"); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + if ((err = + setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz)) < 0) + { + DBG_SOCK ("Failed to set packet rx ring options"); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + *ring = + mmap (NULL, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, *fd, + 0); + if (*ring == MAP_FAILED) + { + DBG_SOCK ("mmap failure"); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + memset (&sll, 0, sizeof (sll)); + sll.sll_family = PF_PACKET; + sll.sll_protocol = htons (ETH_P_ALL); + sll.sll_ifindex = host_if_index; + + if ((err = bind (*fd, (struct sockaddr *) &sll, sizeof (sll))) < 0) + { + DBG_SOCK ("Failed to bind rx packet socket (error %d)", err); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + return 0; +error: + if (*fd >= 0) + close (*fd); + *fd = -1; + return ret; +} + +int +af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, + u32 * sw_if_index) +{ + af_packet_main_t *apm = &af_packet_main; + int ret, fd = -1; + struct tpacket_req *rx_req = 0; + struct tpacket_req *tx_req = 0; + u8 *ring = 0; + af_packet_if_t *apif = 0; + u8 hw_addr[6]; + clib_error_t *error; + vnet_sw_interface_t *sw; + vnet_main_t *vnm = vnet_get_main (); + uword *p; + uword if_index; + u8 *host_if_name_dup = vec_dup (host_if_name); + + p = mhash_get (&apm->if_index_by_host_if_name, host_if_name); + if (p) + { + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + } + + vec_validate (rx_req, 0); + rx_req->tp_block_size = AF_PACKET_RX_BLOCK_SIZE; + rx_req->tp_frame_size = AF_PACKET_RX_FRAME_SIZE; + rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR; + rx_req->tp_frame_nr = AF_PACKET_RX_FRAME_NR; + + vec_validate (tx_req, 0); + tx_req->tp_block_size = AF_PACKET_TX_BLOCK_SIZE; + tx_req->tp_frame_size = AF_PACKET_TX_FRAME_SIZE; + tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR; + tx_req->tp_frame_nr = AF_PACKET_TX_FRAME_NR; + + ret = create_packet_v2_sock (host_if_name, rx_req, tx_req, &fd, &ring); + + if (ret != 0) + goto error; + + /* So far everything looks good, let's create interface */ + pool_get (apm->interfaces, apif); + if_index = apif - apm->interfaces; + + apif->fd = fd; + apif->rx_ring = ring; + apif->tx_ring = ring + rx_req->tp_block_size * rx_req->tp_block_nr; + apif->rx_req = rx_req; + apif->tx_req = tx_req; + apif->host_if_name = host_if_name_dup; + apif->per_interface_next_index = ~0; + apif->next_tx_frame = 0; + apif->next_rx_frame = 0; + + { + unix_file_t template = { 0 }; + template.read_function = af_packet_fd_read_ready; + template.file_descriptor = fd; + template.private_data = if_index; + template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED; + apif->unix_file_index = unix_file_add (&unix_main, &template); + } + + /*use configured or generate random MAC address */ + if (hw_addr_set) + clib_memcpy (hw_addr, hw_addr_set, 6); + else + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd)); + hw_addr[0] = 2; + hw_addr[1] = 0xfe; + } + + error = ethernet_register_interface (vnm, af_packet_device_class.index, + if_index, hw_addr, &apif->hw_if_index, + af_packet_eth_flag_change); + + if (error) + { + memset (apif, 0, sizeof (*apif)); + pool_put (apm->interfaces, apif); + clib_error_report (error); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index); + apif->sw_if_index = sw->sw_if_index; + + vnet_hw_interface_set_flags (vnm, apif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + + mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index, + 0); + if (sw_if_index) + *sw_if_index = apif->sw_if_index; + return 0; + +error: + vec_free (host_if_name_dup); + vec_free (rx_req); + vec_free (tx_req); + return ret; +} + +int +af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name) +{ + vnet_main_t *vnm = vnet_get_main (); + af_packet_main_t *apm = &af_packet_main; + af_packet_if_t *apif; + uword *p; + uword if_index; + u32 ring_sz; + + p = mhash_get (&apm->if_index_by_host_if_name, host_if_name); + if (p == NULL) + { + clib_warning ("Host interface %s does not exist", host_if_name); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + apif = pool_elt_at_index (apm->interfaces, p[0]); + if_index = apif - apm->interfaces; + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0); + + /* clean up */ + if (apif->unix_file_index != ~0) + { + unix_file_del (&unix_main, unix_main.file_pool + apif->unix_file_index); + apif->unix_file_index = ~0; + } + else + close (apif->fd); + + ring_sz = apif->rx_req->tp_block_size * apif->rx_req->tp_block_nr + + apif->tx_req->tp_block_size * apif->tx_req->tp_block_nr; + if (munmap (apif->rx_ring, ring_sz)) + clib_warning ("Host interface %s could not free rx/tx ring", + host_if_name); + apif->rx_ring = NULL; + apif->tx_ring = NULL; + apif->fd = -1; + + vec_free (apif->rx_req); + apif->rx_req = NULL; + vec_free (apif->tx_req); + apif->tx_req = NULL; + + vec_free (apif->host_if_name); + apif->host_if_name = NULL; + + mhash_unset (&apm->if_index_by_host_if_name, host_if_name, &if_index); + + ethernet_delete_interface (vnm, apif->hw_if_index); + + pool_put (apm->interfaces, apif); + + return 0; +} + +static clib_error_t * +af_packet_init (vlib_main_t * vm) +{ + af_packet_main_t *apm = &af_packet_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + memset (apm, 0, sizeof (af_packet_main_t)); + + mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword)); + + vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + return 0; +} + +VLIB_INIT_FUNCTION (af_packet_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h new file mode 100644 index 00000000..19e2523d --- /dev/null +++ b/src/vnet/devices/af_packet/af_packet.h @@ -0,0 +1,69 @@ +/* + *------------------------------------------------------------------ + * af_packet.h - linux kernel packet interface header file + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u8 *host_if_name; + int fd; + struct tpacket_req *rx_req; + struct tpacket_req *tx_req; + u8 *rx_ring; + u8 *tx_ring; + u32 hw_if_index; + u32 sw_if_index; + u32 unix_file_index; + + u32 next_rx_frame; + u32 next_tx_frame; + + u32 per_interface_next_index; + u8 is_admin_up; +} af_packet_if_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + af_packet_if_t *interfaces; + + /* bitmap of pending rx interfaces */ + uword *pending_input_bitmap; + + /* rx buffer cache */ + u32 **rx_buffers; + + /* hash of host interface names */ + mhash_t if_index_by_host_if_name; +} af_packet_main_t; + +af_packet_main_t af_packet_main; +extern vnet_device_class_t af_packet_device_class; +extern vlib_node_registration_t af_packet_input_node; + +int af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, + u8 * hw_addr_set, u32 * sw_if_index); +int af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/af_packet/af_packet_api.c b/src/vnet/devices/af_packet/af_packet_api.c new file mode 100644 index 00000000..414c838c --- /dev/null +++ b/src/vnet/devices/af_packet/af_packet_api.c @@ -0,0 +1,143 @@ +/* + *------------------------------------------------------------------ + * af_packet_api.c - af-packet api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(AF_PACKET_CREATE, af_packet_create) \ +_(AF_PACKET_DELETE, af_packet_delete) + +static void +vl_api_af_packet_create_t_handler (vl_api_af_packet_create_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_af_packet_create_reply_t *rmp; + int rv = 0; + u8 *host_if_name = NULL; + u32 sw_if_index; + + host_if_name = format (0, "%s", mp->host_if_name); + vec_add1 (host_if_name, 0); + + rv = af_packet_create_if (vm, host_if_name, + mp->use_random_hw_addr ? 0 : mp->hw_addr, + &sw_if_index); + + vec_free (host_if_name); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_AF_PACKET_CREATE_REPLY, + ({ + rmp->sw_if_index = clib_host_to_net_u32(sw_if_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_af_packet_delete_t_handler (vl_api_af_packet_delete_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_af_packet_delete_reply_t *rmp; + int rv = 0; + u8 *host_if_name = NULL; + + host_if_name = format (0, "%s", mp->host_if_name); + vec_add1 (host_if_name, 0); + + rv = af_packet_delete_if (vm, host_if_name); + + vec_free (host_if_name); + + REPLY_MACRO (VL_API_AF_PACKET_DELETE_REPLY); +} + +/* + * af_packet_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_af_packet; +#undef _ +} + +static clib_error_t * +af_packet_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (af_packet_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/af_packet/cli.c b/src/vnet/devices/af_packet/cli.c new file mode 100644 index 00000000..2cbd4152 --- /dev/null +++ b/src/vnet/devices/af_packet/cli.c @@ -0,0 +1,144 @@ +/* + *------------------------------------------------------------------ + * af_packet.c - linux kernel packet interface + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include /* for open */ +#include +#include +#include +#include +#include /* for iovec */ +#include + +#include +#include +#include +#include + +#include + +static clib_error_t * +af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *host_if_name = NULL; + u8 hwaddr[6]; + u8 *hw_addr_ptr = 0; + u32 sw_if_index; + int r; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &host_if_name)) + ; + else + if (unformat + (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr)) + hw_addr_ptr = hwaddr; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (host_if_name == NULL) + return clib_error_return (0, "missing host interface name"); + + r = af_packet_create_if (vm, host_if_name, hw_addr_ptr, &sw_if_index); + vec_free (host_if_name); + + if (r == VNET_API_ERROR_SYSCALL_ERROR_1) + return clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + + if (r == VNET_API_ERROR_INVALID_INTERFACE) + return clib_error_return (0, "Invalid interface name"); + + if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) + return clib_error_return (0, "Interface elready exists"); + + vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (af_packet_create_command, static) = { + .path = "create host-interface", + .short_help = "create host-interface name [hw-addr ]", + .function = af_packet_create_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *host_if_name = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &host_if_name)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (host_if_name == NULL) + return clib_error_return (0, "missing host interface name"); + + af_packet_delete_if (vm, host_if_name); + vec_free (host_if_name); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (af_packet_delete_command, static) = { + .path = "delete host-interface", + .short_help = "delete host-interface name ", + .function = af_packet_delete_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +af_packet_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (af_packet_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c new file mode 100644 index 00000000..1fb4000f --- /dev/null +++ b/src/vnet/devices/af_packet/device.c @@ -0,0 +1,250 @@ +/* + *------------------------------------------------------------------ + * af_packet.c - linux kernel packet interface + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include + +#include +#include +#include +#include + +#include + +#define foreach_af_packet_tx_func_error \ +_(FRAME_NOT_READY, "tx frame not ready") \ +_(TXRING_EAGAIN, "tx sendto temporary failure") \ +_(TXRING_FATAL, "tx sendto fatal failure") \ +_(TXRING_OVERRUN, "tx ring overrun") + +typedef enum +{ +#define _(f,s) AF_PACKET_TX_ERROR_##f, + foreach_af_packet_tx_func_error +#undef _ + AF_PACKET_TX_N_ERROR, +} af_packet_tx_func_error_t; + +static char *af_packet_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_af_packet_tx_func_error +#undef _ +}; + + +static u8 * +format_af_packet_device_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + af_packet_main_t *apm = &af_packet_main; + af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, i); + + s = format (s, "host-%s", apif->host_if_name); + return s; +} + +static u8 * +format_af_packet_device (u8 * s, va_list * args) +{ + s = format (s, "Linux PACKET socket interface"); + return s; +} + +static u8 * +format_af_packet_tx_trace (u8 * s, va_list * args) +{ + s = format (s, "Unimplemented..."); + return s; +} + +static uword +af_packet_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + af_packet_main_t *apm = &af_packet_main; + u32 *buffers = vlib_frame_args (frame); + u32 n_left = frame->n_vectors; + u32 n_sent = 0; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + af_packet_if_t *apif = + pool_elt_at_index (apm->interfaces, rd->dev_instance); + int block = 0; + u32 block_size = apif->tx_req->tp_block_size; + u32 frame_size = apif->tx_req->tp_frame_size; + u32 frame_num = apif->tx_req->tp_frame_nr; + u8 *block_start = apif->tx_ring + block * block_size; + u32 tx_frame = apif->next_tx_frame; + struct tpacket2_hdr *tph; + u32 frame_not_ready = 0; + + while (n_left > 0) + { + u32 len; + u32 offset = 0; + vlib_buffer_t *b0; + n_left--; + u32 bi = buffers[0]; + buffers++; + + tph = (struct tpacket2_hdr *) (block_start + tx_frame * frame_size); + + if (PREDICT_FALSE + (tph->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))) + { + frame_not_ready++; + goto next; + } + + do + { + b0 = vlib_get_buffer (vm, bi); + len = b0->current_length; + clib_memcpy ((u8 *) tph + + TPACKET_ALIGN (sizeof (struct tpacket2_hdr)) + offset, + vlib_buffer_get_current (b0), len); + offset += len; + } + while ((bi = b0->next_buffer)); + + tph->tp_len = tph->tp_snaplen = offset; + tph->tp_status = TP_STATUS_SEND_REQUEST; + n_sent++; + next: + /* check if we've exhausted the ring */ + if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num)) + break; + + tx_frame = (tx_frame + 1) % frame_num; + } + + CLIB_MEMORY_BARRIER (); + + if (PREDICT_TRUE (n_sent)) + { + apif->next_tx_frame = tx_frame; + + if (PREDICT_FALSE (sendto (apif->fd, NULL, 0, + MSG_DONTWAIT, NULL, 0) == -1)) + { + /* Uh-oh, drop & move on, but count whether it was fatal or not. + * Note that we have no reliable way to properly determine the + * disposition of the packets we just enqueued for delivery. + */ + vlib_error_count (vm, node->node_index, + unix_error_is_fatal (errno) ? + AF_PACKET_TX_ERROR_TXRING_FATAL : + AF_PACKET_TX_ERROR_TXRING_EAGAIN, n_sent); + } + } + + if (PREDICT_FALSE (frame_not_ready)) + vlib_error_count (vm, node->node_index, + AF_PACKET_TX_ERROR_FRAME_NOT_READY, frame_not_ready); + + if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num)) + vlib_error_count (vm, node->node_index, AF_PACKET_TX_ERROR_TXRING_OVERRUN, + n_left); + + vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); + return frame->n_vectors; +} + +static void +af_packet_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + af_packet_main_t *apm = &af_packet_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + af_packet_if_t *apif = + pool_elt_at_index (apm->interfaces, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + apif->per_interface_next_index = node_index; + return; + } + + apif->per_interface_next_index = + vlib_node_add_next (vlib_get_main (), af_packet_input_node.index, + node_index); +} + +static void +af_packet_clear_hw_interface_counters (u32 instance) +{ + /* Nothing for now */ +} + +static clib_error_t * +af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, + u32 flags) +{ + af_packet_main_t *apm = &af_packet_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + af_packet_if_t *apif = + pool_elt_at_index (apm->interfaces, hw->dev_instance); + u32 hw_flags; + + apif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + + if (apif->is_admin_up) + hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP; + else + hw_flags = 0; + + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + + return 0; +} + +static clib_error_t * +af_packet_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + /* Nothing for now */ + return 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (af_packet_device_class) = { + .name = "af-packet", + .tx_function = af_packet_interface_tx, + .format_device_name = format_af_packet_device_name, + .format_device = format_af_packet_device, + .format_tx_trace = format_af_packet_tx_trace, + .tx_function_n_errors = AF_PACKET_TX_N_ERROR, + .tx_function_error_strings = af_packet_tx_func_error_strings, + .rx_redirect_to_node = af_packet_set_interface_next_node, + .clear_counters = af_packet_clear_hw_interface_counters, + .admin_up_down_function = af_packet_interface_admin_up_down, + .subif_add_del_function = af_packet_subif_add_del_function, +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (af_packet_device_class, + af_packet_interface_tx) +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c new file mode 100644 index 00000000..72004320 --- /dev/null +++ b/src/vnet/devices/af_packet/node.c @@ -0,0 +1,288 @@ +/* + *------------------------------------------------------------------ + * af_packet.c - linux kernel packet interface + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#define foreach_af_packet_input_error + +typedef enum +{ +#define _(f,s) AF_PACKET_INPUT_ERROR_##f, + foreach_af_packet_input_error +#undef _ + AF_PACKET_INPUT_N_ERROR, +} af_packet_input_error_t; + +static char *af_packet_input_error_strings[] = { +#define _(n,s) s, + foreach_af_packet_input_error +#undef _ +}; + +typedef struct +{ + u32 next_index; + u32 hw_if_index; + int block; + struct tpacket2_hdr tph; +} af_packet_input_trace_t; + +static u8 * +format_af_packet_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *); + uword indent = format_get_indent (s); + + s = format (s, "af_packet: hw_if_index %d next-index %d", + t->hw_if_index, t->next_index); + + s = + format (s, + "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u" + "\n%Usec 0x%x nsec 0x%x vlan %U" +#ifdef TP_STATUS_VLAN_TPID_VALID + " vlan_tpid %u" +#endif + , + format_white_space, indent + 2, + format_white_space, indent + 4, + t->tph.tp_status, + t->tph.tp_len, + t->tph.tp_snaplen, + t->tph.tp_mac, + t->tph.tp_net, + format_white_space, indent + 4, + t->tph.tp_sec, + t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.tp_vlan_tci +#ifdef TP_STATUS_VLAN_TPID_VALID + , t->tph.tp_vlan_tpid +#endif + ); + return s; +} + +always_inline void +buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi); + vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi); + + /* update first buffer */ + first_b->total_length_not_including_first_buffer += b->current_length; + + /* update previous buffer */ + prev_b->next_buffer = bi; + prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT; + + /* update current buffer */ + b->next_buffer = 0; +} + +always_inline uword +af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, u32 device_idx) +{ + af_packet_main_t *apm = &af_packet_main; + af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, device_idx); + struct tpacket2_hdr *tph; + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + u32 block = 0; + u32 rx_frame; + u32 n_free_bufs; + u32 n_rx_packets = 0; + u32 n_rx_bytes = 0; + u32 *to_next = 0; + u32 block_size = apif->rx_req->tp_block_size; + u32 frame_size = apif->rx_req->tp_frame_size; + u32 frame_num = apif->rx_req->tp_frame_nr; + u8 *block_start = apif->rx_ring + block * block_size; + uword n_trace = vlib_get_trace_count (vm, node); + u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes; + int cpu_index = node->cpu_index; + + if (apif->per_interface_next_index != ~0) + next_index = apif->per_interface_next_index; + + n_free_bufs = vec_len (apm->rx_buffers[cpu_index]); + if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE)) + { + vec_validate (apm->rx_buffers[cpu_index], + VLIB_FRAME_SIZE + n_free_bufs - 1); + n_free_bufs += + vlib_buffer_alloc (vm, &apm->rx_buffers[cpu_index][n_free_bufs], + VLIB_FRAME_SIZE); + _vec_len (apm->rx_buffers[cpu_index]) = n_free_bufs; + } + + rx_frame = apif->next_rx_frame; + tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size); + while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs)) + { + vlib_buffer_t *b0 = 0, *first_b0 = 0; + u32 next0 = next_index; + + u32 n_left_to_next; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs) && + n_left_to_next) + { + u32 data_len = tph->tp_snaplen; + u32 offset = 0; + u32 bi0 = 0, first_bi0 = 0, prev_bi0; + + while (data_len) + { + /* grab free buffer */ + u32 last_empty_buffer = + vec_len (apm->rx_buffers[cpu_index]) - 1; + prev_bi0 = bi0; + bi0 = apm->rx_buffers[cpu_index][last_empty_buffer]; + b0 = vlib_get_buffer (vm, bi0); + _vec_len (apm->rx_buffers[cpu_index]) = last_empty_buffer; + n_free_bufs--; + + /* copy data */ + u32 bytes_to_copy = + data_len > n_buffer_bytes ? n_buffer_bytes : data_len; + b0->current_data = 0; + clib_memcpy (vlib_buffer_get_current (b0), + (u8 *) tph + tph->tp_mac + offset, bytes_to_copy); + + /* fill buffer header */ + b0->current_length = bytes_to_copy; + + if (offset == 0) + { + b0->total_length_not_including_first_buffer = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = apif->sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + first_bi0 = bi0; + first_b0 = vlib_get_buffer (vm, first_bi0); + } + else + buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0); + + offset += bytes_to_copy; + data_len -= bytes_to_copy; + } + n_rx_packets++; + n_rx_bytes += tph->tp_snaplen; + to_next[0] = first_bi0; + to_next += 1; + n_left_to_next--; + + /* trace */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0); + if (PREDICT_FALSE (n_trace > 0)) + { + af_packet_input_trace_t *tr; + vlib_trace_buffer (vm, node, next0, first_b0, /* follow_chain */ + 0); + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = apif->hw_if_index; + clib_memcpy (&tr->tph, tph, sizeof (struct tpacket2_hdr)); + } + + /* redirect if feature path enabled */ + vnet_feature_start_device_input_x1 (apif->sw_if_index, &next0, b0, + 0); + + /* enque and take next packet */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, first_bi0, next0); + + /* next packet */ + tph->tp_status = TP_STATUS_KERNEL; + rx_frame = (rx_frame + 1) % frame_num; + tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + apif->next_rx_frame = rx_frame; + + vlib_increment_combined_counter + (vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + os_get_cpu_number (), apif->hw_if_index, n_rx_packets, n_rx_bytes); + + return n_rx_packets; +} + +static uword +af_packet_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + int i; + u32 n_rx_packets = 0; + + af_packet_main_t *apm = &af_packet_main; + + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, apm->pending_input_bitmap, + ({ + clib_bitmap_set (apm->pending_input_bitmap, i, 0); + n_rx_packets += af_packet_device_input_fn(vm, node, frame, i); + })); + /* *INDENT-ON* */ + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (af_packet_input_node) = { + .function = af_packet_input_fn, + .name = "af-packet-input", + .sibling_of = "device-input", + .format_trace = format_af_packet_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .n_errors = AF_PACKET_INPUT_N_ERROR, + .error_strings = af_packet_input_error_strings, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (af_packet_input_node, af_packet_input_fn) +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c new file mode 100644 index 00000000..cd4386eb --- /dev/null +++ b/src/vnet/devices/devices.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +static uword +device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (device_input_node) = { + .function = device_input_fn, + .name = "device-input", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, + .n_next_nodes = VNET_DEVICE_INPUT_N_NEXT_NODES, + .next_nodes = VNET_DEVICE_INPUT_NEXT_NODES, +}; + +/* Table defines how much we need to advance current data pointer + in the buffer if we shortcut to l3 nodes */ + +const u32 __attribute__((aligned (CLIB_CACHE_LINE_BYTES))) +device_input_next_node_advance[((VNET_DEVICE_INPUT_N_NEXT_NODES / + CLIB_CACHE_LINE_BYTES) +1) * CLIB_CACHE_LINE_BYTES] = +{ + [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = sizeof (ethernet_header_t), + [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = sizeof (ethernet_header_t), + [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = sizeof (ethernet_header_t), + [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = sizeof (ethernet_header_t), +}; + +VNET_FEATURE_ARC_INIT (device_input, static) = +{ + .arc_name = "device-input", + .start_nodes = VNET_FEATURES ("device-input"), + .end_node = "ethernet-input", + .arc_index_ptr = &feature_main.device_input_feature_arc_index, +}; + +VNET_FEATURE_INIT (l2_patch, static) = { + .arc_name = "device-input", + .node_name = "l2-patch", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; + +VNET_FEATURE_INIT (worker_handoff, static) = { + .arc_name = "device-input", + .node_name = "worker-handoff", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; + +VNET_FEATURE_INIT (span_input, static) = { + .arc_name = "device-input", + .node_name = "span-input", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; + +VNET_FEATURE_INIT (ethernet_input, static) = { + .arc_name = "device-input", + .node_name = "ethernet-input", + .runs_before = 0, /* not before any other features */ +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h new file mode 100644 index 00000000..c46dab90 --- /dev/null +++ b/src/vnet/devices/devices.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_vnet_device_h +#define included_vnet_vnet_device_h + +#include +#include + +typedef enum +{ + VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT, + VNET_DEVICE_INPUT_NEXT_IP4_INPUT, + VNET_DEVICE_INPUT_NEXT_IP6_INPUT, + VNET_DEVICE_INPUT_NEXT_MPLS_INPUT, + VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT, + VNET_DEVICE_INPUT_NEXT_DROP, + VNET_DEVICE_INPUT_N_NEXT_NODES, +} vnet_device_input_next_t; + +#define VNET_DEVICE_INPUT_NEXT_NODES { \ + [VNET_DEVICE_INPUT_NEXT_DROP] = "error-drop", \ + [VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input", \ + [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = "ip4-input-no-checksum", \ + [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = "ip4-input", \ + [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = "ip6-input", \ + [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = "mpls-input", \ +} + +extern vlib_node_registration_t device_input_node; +extern const u32 device_input_next_node_advance[]; + +#endif /* included_vnet_vnet_device_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c new file mode 100644 index 00000000..538a00fd --- /dev/null +++ b/src/vnet/devices/dpdk/cli.c @@ -0,0 +1,1296 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "dpdk_priv.h" + +static clib_error_t * +pcap_trace_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + dpdk_main_t *dm = &dpdk_main; + u8 *filename; + u32 max; + int matched = 0; + clib_error_t *error = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "on")) + { + if (dm->tx_pcap_enable == 0) + { + if (dm->pcap_filename == 0) + dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0); + + memset (&dm->pcap_main, 0, sizeof (dm->pcap_main)); + dm->pcap_main.file_name = (char *) dm->pcap_filename; + dm->pcap_main.n_packets_to_capture = 100; + if (dm->pcap_pkts_to_capture) + dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture; + + dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; + dm->tx_pcap_enable = 1; + matched = 1; + vlib_cli_output (vm, "pcap tx capture on..."); + } + else + { + vlib_cli_output (vm, "pcap tx capture already on..."); + } + matched = 1; + } + else if (unformat (input, "off")) + { + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, "captured %d pkts...", + dm->pcap_main.n_packets_captured + 1); + if (dm->pcap_main.n_packets_captured) + { + dm->pcap_main.n_packets_to_capture = + dm->pcap_main.n_packets_captured; + error = pcap_write (&dm->pcap_main); + if (error) + clib_error_report (error); + else + vlib_cli_output (vm, "saved to %s...", dm->pcap_filename); + } + } + else + { + vlib_cli_output (vm, "pcap tx capture already off..."); + } + + dm->tx_pcap_enable = 0; + matched = 1; + } + else if (unformat (input, "max %d", &max)) + { + dm->pcap_pkts_to_capture = max; + matched = 1; + } + + else if (unformat (input, "intfc %U", + unformat_vnet_sw_interface, dm->vnet_main, + &dm->pcap_sw_if_index)) + matched = 1; + else if (unformat (input, "intfc any")) + { + dm->pcap_sw_if_index = 0; + matched = 1; + } + else if (unformat (input, "file %s", &filename)) + { + u8 *chroot_filename; + /* Brain-police user path input */ + if (strstr ((char *) filename, "..") + || index ((char *) filename, '/')) + { + vlib_cli_output (vm, "illegal characters in filename '%s'", + filename); + continue; + } + + chroot_filename = format (0, "/tmp/%s%c", filename, 0); + vec_free (filename); + + if (dm->pcap_filename) + vec_free (dm->pcap_filename); + vec_add1 (filename, 0); + dm->pcap_filename = chroot_filename; + matched = 1; + } + else if (unformat (input, "status")) + { + if (dm->tx_pcap_enable == 0) + { + vlib_cli_output (vm, "pcap tx capture is off..."); + continue; + } + + vlib_cli_output (vm, "pcap tx capture: %d of %d pkts...", + dm->pcap_main.n_packets_captured, + dm->pcap_main.n_packets_to_capture); + matched = 1; + } + + else + break; + } + + if (matched == 0) + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (pcap_trace_command, static) = { + .path = "pcap tx trace", + .short_help = + "pcap tx trace on off max intfc file status", + .function = pcap_trace_command_fn, +}; +/* *INDENT-ON* */ + + +static clib_error_t * +show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + struct rte_mempool *rmp; + int i; + + for (i = 0; i < vec_len (vm->buffer_main->pktmbuf_pools); i++) + { + rmp = vm->buffer_main->pktmbuf_pools[i]; + if (rmp) + { + unsigned count = rte_mempool_avail_count (rmp); + unsigned free_count = rte_mempool_in_use_count (rmp); + + vlib_cli_output (vm, + "name=\"%s\" available = %7d allocated = %7d total = %7d\n", + rmp->name, (u32) count, (u32) free_count, + (u32) (count + free_count)); + } + else + { + vlib_cli_output (vm, "rte_mempool is NULL (!)\n"); + } + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_bufferr,static) = { + .path = "show dpdk buffer", + .short_help = "show dpdk buffer state", + .function = show_dpdk_buffer, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static clib_error_t * +test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + static u32 *allocated_buffers; + u32 n_alloc = 0; + u32 n_free = 0; + u32 first, actual_alloc; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "allocate %d", &n_alloc)) + ; + else if (unformat (input, "free %d", &n_free)) + ; + else + break; + } + + if (n_free) + { + if (vec_len (allocated_buffers) < n_free) + return clib_error_return (0, "Can't free %d, only %d allocated", + n_free, vec_len (allocated_buffers)); + + first = vec_len (allocated_buffers) - n_free; + vlib_buffer_free (vm, allocated_buffers + first, n_free); + _vec_len (allocated_buffers) = first; + } + if (n_alloc) + { + first = vec_len (allocated_buffers); + vec_validate (allocated_buffers, + vec_len (allocated_buffers) + n_alloc - 1); + + actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first, + n_alloc); + _vec_len (allocated_buffers) = first + actual_alloc; + + if (actual_alloc < n_alloc) + vlib_cli_output (vm, "WARNING: only allocated %d buffers", + actual_alloc); + } + + vlib_cli_output (vm, "Currently %d buffers allocated", + vec_len (allocated_buffers)); + + if (allocated_buffers && vec_len (allocated_buffers) == 0) + vec_free (allocated_buffers); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = { + .path = "test dpdk buffer", + .short_help = "test dpdk buffer [allocate ][free ]", + .function = test_dpdk_buffer, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 nb_rx_desc = (u32) ~ 0; + u32 nb_tx_desc = (u32) ~ 0; + clib_error_t *rv; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "tx %d", &nb_tx_desc)) + ; + else if (unformat (line_input, "rx %d", &nb_rx_desc)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + return clib_error_return (0, "number of descriptors can be set only for " + "physical devices"); + + if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) && + (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc)) + return clib_error_return (0, "nothing changed"); + + if (nb_rx_desc != (u32) ~ 0) + xd->nb_rx_desc = nb_rx_desc; + + if (nb_tx_desc != (u32) ~ 0) + xd->nb_tx_desc = nb_tx_desc; + + rv = dpdk_port_setup (dm, xd); + + return rv; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = { + .path = "set dpdk interface descriptors", + .short_help = "set dpdk interface descriptors [rx ] [tx ]", + .function = set_dpdk_if_desc, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + int cpu; + + if (tm->n_vlib_mains == 1) + vlib_cli_output (vm, "All interfaces are handled by main thread"); + + for (cpu = 0; cpu < vec_len (dm->devices_by_cpu); cpu++) + { + if (vec_len (dm->devices_by_cpu[cpu])) + vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, + vlib_worker_threads[cpu].name, + vlib_worker_threads[cpu].lcore_id); + + /* *INDENT-OFF* */ + vec_foreach(dq, dm->devices_by_cpu[cpu]) + { + u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; + vnet_hw_interface_t * hi = vnet_get_hw_interface(dm->vnet_main, hw_if_index); + vlib_cli_output(vm, " %v queue %u", hi->name, dq->queue_id); + } + /* *INDENT-ON* */ + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_if_placement,static) = { + .path = "show dpdk interface placement", + .short_help = "show dpdk interface placement", + .function = show_dpdk_if_placement, +}; +/* *INDENT-ON* */ + +static int +dpdk_device_queue_sort (void *a1, void *a2) +{ + dpdk_device_and_queue_t *dq1 = a1; + dpdk_device_and_queue_t *dq2 = a2; + + if (dq1->device > dq2->device) + return 1; + else if (dq1->device < dq2->device) + return -1; + else if (dq1->queue_id > dq2->queue_id) + return 1; + else if (dq1->queue_id < dq2->queue_id) + return -1; + else + return 0; +} + +static clib_error_t * +set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 queue = (u32) 0; + u32 cpu = (u32) ~ 0; + int i; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "queue %d", &queue)) + ; + else if (unformat (line_input, "thread %d", &cpu)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + + if (cpu < dm->input_cpu_first_index || + cpu >= (dm->input_cpu_first_index + dm->input_cpu_count)) + return clib_error_return (0, "please specify valid thread id"); + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + for (i = 0; i < vec_len (dm->devices_by_cpu); i++) + { + /* *INDENT-OFF* */ + vec_foreach(dq, dm->devices_by_cpu[i]) + { + if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index && + queue == dq->queue_id) + { + if (cpu == i) /* nothing to do */ + return 0; + + vec_del1(dm->devices_by_cpu[i], dq - dm->devices_by_cpu[i]); + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->queue_id = queue; + dq->device = xd->device_index; + xd->cpu_socket_id_by_queue[queue] = + rte_lcore_to_socket_id(vlib_worker_threads[cpu].lcore_id); + + vec_sort_with_function(dm->devices_by_cpu[i], + dpdk_device_queue_sort); + + vec_sort_with_function(dm->devices_by_cpu[cpu], + dpdk_device_queue_sort); + + if (vec_len(dm->devices_by_cpu[i]) == 0) + vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, + VLIB_NODE_STATE_DISABLED); + + if (vec_len(dm->devices_by_cpu[cpu]) == 1) + vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + + return 0; + } + } + /* *INDENT-ON* */ + } + + return clib_error_return (0, "not found"); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { + .path = "set dpdk interface placement", + .short_help = "set dpdk interface placement [queue ] thread ", + .function = set_dpdk_if_placement, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + int cpu; + + if (tm->n_vlib_mains == 1) + vlib_cli_output (vm, "All interfaces are handled by main thread"); + + for (cpu = 0; cpu < vec_len (dm->devices_by_hqos_cpu); cpu++) + { + if (vec_len (dm->devices_by_hqos_cpu[cpu])) + vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, + vlib_worker_threads[cpu].name, + vlib_worker_threads[cpu].lcore_id); + + vec_foreach (dq, dm->devices_by_hqos_cpu[cpu]) + { + u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; + vnet_hw_interface_t *hi = + vnet_get_hw_interface (dm->vnet_main, hw_if_index); + vlib_cli_output (vm, " %v queue %u", hi->name, dq->queue_id); + } + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos_placement, static) = { + .path = "show dpdk interface hqos placement", + .short_help = "show dpdk interface hqos placement", + .function = show_dpdk_if_hqos_placement, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 cpu = (u32) ~ 0; + int i; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "thread %d", &cpu)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + + if (cpu < dm->hqos_cpu_first_index || + cpu >= (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) + return clib_error_return (0, "please specify valid thread id"); + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + for (i = 0; i < vec_len (dm->devices_by_hqos_cpu); i++) + { + vec_foreach (dq, dm->devices_by_hqos_cpu[i]) + { + if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index) + { + if (cpu == i) /* nothing to do */ + return 0; + + vec_del1 (dm->devices_by_hqos_cpu[i], + dq - dm->devices_by_hqos_cpu[i]); + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->queue_id = 0; + dq->device = xd->device_index; + + vec_sort_with_function (dm->devices_by_hqos_cpu[i], + dpdk_device_queue_sort); + + vec_sort_with_function (dm->devices_by_hqos_cpu[cpu], + dpdk_device_queue_sort); + + return 0; + } + } + } + + return clib_error_return (0, "not found"); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_placement, static) = { + .path = "set dpdk interface hqos placement", + .short_help = "set dpdk interface hqos placement thread ", + .function = set_dpdk_if_hqos_placement, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 subport_id = (u32) ~ 0; + u32 pipe_id = (u32) ~ 0; + u32 profile_id = (u32) ~ 0; + int rv; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "subport %d", &subport_id)) + ; + else if (unformat (line_input, "pipe %d", &pipe_id)) + ; + else if (unformat (line_input, "profile %d", &profile_id)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = + rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, + profile_id); + if (rv) + return clib_error_return (0, "pipe configuration failed"); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pipe, static) = +{ + .path = "set dpdk interface hqos pipe", + .short_help = "set dpdk interface hqos pipe subport pipe " + "profile ", + .function = set_dpdk_if_hqos_pipe, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 subport_id = (u32) ~ 0; + struct rte_sched_subport_params p = { + .tb_rate = 1250000000, /* 10GbE */ + .tb_size = 1000000, + .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000}, + .tc_period = 10, + }; + int rv; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "subport %d", &subport_id)) + ; + else if (unformat (line_input, "rate %d", &p.tb_rate)) + { + p.tc_rate[0] = p.tb_rate; + p.tc_rate[1] = p.tb_rate; + p.tc_rate[2] = p.tb_rate; + p.tc_rate[3] = p.tb_rate; + } + else if (unformat (line_input, "bktsize %d", &p.tb_size)) + ; + else if (unformat (line_input, "tc0 %d", &p.tc_rate[0])) + ; + else if (unformat (line_input, "tc1 %d", &p.tc_rate[1])) + ; + else if (unformat (line_input, "tc2 %d", &p.tc_rate[2])) + ; + else if (unformat (line_input, "tc3 %d", &p.tc_rate[3])) + ; + else if (unformat (line_input, "period %d", &p.tc_period)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p); + if (rv) + return clib_error_return (0, "subport configuration failed"); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_subport, static) = { + .path = "set dpdk interface hqos subport", + .short_help = "set dpdk interface hqos subport subport " + "[rate ] [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] " + "[period ]", + .function = set_dpdk_if_hqos_subport, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 tc = (u32) ~ 0; + u32 queue = (u32) ~ 0; + u32 entry = (u32) ~ 0; + u32 val, i; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "entry %d", &entry)) + ; + else if (unformat (line_input, "tc %d", &tc)) + ; + else if (unformat (line_input, "queue %d", &queue)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + if (entry >= 64) + return clib_error_return (0, "invalid entry"); + if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + return clib_error_return (0, "invalid traffic class"); + if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + return clib_error_return (0, "invalid traffic class"); + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + /* Should never happen, shut up Coverity warning */ + if (p == 0) + return clib_error_return (0, "no worker registrations?"); + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + for (i = 0; i < worker_thread_count; i++) + xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_tctbl, static) = { + .path = "set dpdk interface hqos tctbl", + .short_help = "set dpdk interface hqos tctbl entry tc queue ", + .function = set_dpdk_if_hqos_tctbl, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + + /* Device specific data */ + struct rte_eth_dev_info dev_info; + dpdk_device_config_t *devconf = 0; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + /* Should never happen, shut up Coverity warning */ + if (p == 0) + return clib_error_return (0, "no worker registrations?"); + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + /* Packet field configuration */ + u64 mask = (u64) ~ 0; + u32 id = (u32) ~ 0; + u32 offset = (u32) ~ 0; + + /* HQoS params */ + u32 n_subports_per_port, n_pipes_per_subport, tctbl_size; + + u32 i; + + /* Parse input arguments */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "id %d", &id)) + ; + else if (unformat (line_input, "offset %d", &offset)) + ; + else if (unformat (line_input, "mask %llx", &mask)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + /* Get interface */ + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get (xd->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + if (devconf->hqos_enabled == 0) + { + vlib_cli_output (vm, "HQoS disabled for this interface"); + return 0; + } + + n_subports_per_port = devconf->hqos.port.n_subports_per_port; + n_pipes_per_subport = devconf->hqos.port.n_pipes_per_subport; + tctbl_size = RTE_DIM (devconf->hqos.tc_table); + + /* Validate packet field configuration: id, offset and mask */ + if (id >= 3) + return clib_error_return (0, "invalid packet field id"); + + switch (id) + { + case 0: + if (dpdk_hqos_validate_mask (mask, n_subports_per_port) != 0) + return clib_error_return (0, "invalid subport ID mask " + "(n_subports_per_port = %u)", + n_subports_per_port); + break; + case 1: + if (dpdk_hqos_validate_mask (mask, n_pipes_per_subport) != 0) + return clib_error_return (0, "invalid pipe ID mask " + "(n_pipes_per_subport = %u)", + n_pipes_per_subport); + break; + case 2: + default: + if (dpdk_hqos_validate_mask (mask, tctbl_size) != 0) + return clib_error_return (0, "invalid TC table index mask " + "(TC table size = %u)", tctbl_size); + } + + /* Propagate packet field configuration to all workers */ + for (i = 0; i < worker_thread_count; i++) + switch (id) + { + case 0: + xd->hqos_wt[worker_thread_first + i].hqos_field0_slabpos = offset; + xd->hqos_wt[worker_thread_first + i].hqos_field0_slabmask = mask; + xd->hqos_wt[worker_thread_first + i].hqos_field0_slabshr = + __builtin_ctzll (mask); + break; + case 1: + xd->hqos_wt[worker_thread_first + i].hqos_field1_slabpos = offset; + xd->hqos_wt[worker_thread_first + i].hqos_field1_slabmask = mask; + xd->hqos_wt[worker_thread_first + i].hqos_field1_slabshr = + __builtin_ctzll (mask); + break; + case 2: + default: + xd->hqos_wt[worker_thread_first + i].hqos_field2_slabpos = offset; + xd->hqos_wt[worker_thread_first + i].hqos_field2_slabmask = mask; + xd->hqos_wt[worker_thread_first + i].hqos_field2_slabshr = + __builtin_ctzll (mask); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pktfield, static) = { + .path = "set dpdk interface hqos pktfield", + .short_help = "set dpdk interface hqos pktfield id offset " + "mask ", + .function = set_dpdk_if_hqos_pktfield, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + dpdk_device_config_hqos_t *cfg; + dpdk_device_hqos_per_hqos_thread_t *ht; + dpdk_device_hqos_per_worker_thread_t *wk; + u32 *tctbl; + u32 hw_if_index = (u32) ~ 0; + u32 profile_id, i; + struct rte_eth_dev_info dev_info; + dpdk_device_config_t *devconf = 0; + vlib_thread_registration_t *tr; + uword *p = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify interface name!!"); + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get (xd->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + if (devconf->hqos_enabled == 0) + { + vlib_cli_output (vm, "HQoS disabled for this interface"); + return 0; + } + + /* Detect the set of worker threads */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + + /* Should never happen, shut up Coverity warning */ + if (p == 0) + return clib_error_return (0, "no worker registrations?"); + + tr = (vlib_thread_registration_t *) p[0]; + + cfg = &devconf->hqos; + ht = xd->hqos_ht; + wk = &xd->hqos_wt[tr->first_index]; + tctbl = wk->hqos_tc_table; + + vlib_cli_output (vm, " Thread:"); + vlib_cli_output (vm, " Input SWQ size = %u packets", cfg->swq_size); + vlib_cli_output (vm, " Enqueue burst size = %u packets", + ht->hqos_burst_enq); + vlib_cli_output (vm, " Dequeue burst size = %u packets", + ht->hqos_burst_deq); + + vlib_cli_output (vm, + " Packet field 0: slab position = %4u, slab bitmask = 0x%016llx", + wk->hqos_field0_slabpos, wk->hqos_field0_slabmask); + vlib_cli_output (vm, + " Packet field 1: slab position = %4u, slab bitmask = 0x%016llx", + wk->hqos_field1_slabpos, wk->hqos_field1_slabmask); + vlib_cli_output (vm, + " Packet field 2: slab position = %4u, slab bitmask = 0x%016llx", + wk->hqos_field2_slabpos, wk->hqos_field2_slabmask); + vlib_cli_output (vm, " Packet field 2 translation table:"); + vlib_cli_output (vm, " [ 0 .. 15]: " + "%2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u", + tctbl[0], tctbl[1], tctbl[2], tctbl[3], + tctbl[4], tctbl[5], tctbl[6], tctbl[7], + tctbl[8], tctbl[9], tctbl[10], tctbl[11], + tctbl[12], tctbl[13], tctbl[14], tctbl[15]); + vlib_cli_output (vm, " [16 .. 31]: " + "%2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u", + tctbl[16], tctbl[17], tctbl[18], tctbl[19], + tctbl[20], tctbl[21], tctbl[22], tctbl[23], + tctbl[24], tctbl[25], tctbl[26], tctbl[27], + tctbl[28], tctbl[29], tctbl[30], tctbl[31]); + vlib_cli_output (vm, " [32 .. 47]: " + "%2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u", + tctbl[32], tctbl[33], tctbl[34], tctbl[35], + tctbl[36], tctbl[37], tctbl[38], tctbl[39], + tctbl[40], tctbl[41], tctbl[42], tctbl[43], + tctbl[44], tctbl[45], tctbl[46], tctbl[47]); + vlib_cli_output (vm, " [48 .. 63]: " + "%2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u", + tctbl[48], tctbl[49], tctbl[50], tctbl[51], + tctbl[52], tctbl[53], tctbl[54], tctbl[55], + tctbl[56], tctbl[57], tctbl[58], tctbl[59], + tctbl[60], tctbl[61], tctbl[62], tctbl[63]); + + vlib_cli_output (vm, " Port:"); + vlib_cli_output (vm, " Rate = %u bytes/second", cfg->port.rate); + vlib_cli_output (vm, " MTU = %u bytes", cfg->port.mtu); + vlib_cli_output (vm, " Frame overhead = %u bytes", + cfg->port.frame_overhead); + vlib_cli_output (vm, " Number of subports = %u", + cfg->port.n_subports_per_port); + vlib_cli_output (vm, " Number of pipes per subport = %u", + cfg->port.n_pipes_per_subport); + vlib_cli_output (vm, + " Packet queue size: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u packets", + cfg->port.qsize[0], cfg->port.qsize[1], cfg->port.qsize[2], + cfg->port.qsize[3]); + vlib_cli_output (vm, " Number of pipe profiles = %u", + cfg->port.n_pipe_profiles); + + for (profile_id = 0; profile_id < vec_len (cfg->pipe); profile_id++) + { + vlib_cli_output (vm, " Pipe profile %u:", profile_id); + vlib_cli_output (vm, " Rate = %u bytes/second", + cfg->pipe[profile_id].tb_rate); + vlib_cli_output (vm, " Token bucket size = %u bytes", + cfg->pipe[profile_id].tb_size); + vlib_cli_output (vm, + " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", + cfg->pipe[profile_id].tc_rate[0], + cfg->pipe[profile_id].tc_rate[1], + cfg->pipe[profile_id].tc_rate[2], + cfg->pipe[profile_id].tc_rate[3]); + vlib_cli_output (vm, " TC period = %u milliseconds", + cfg->pipe[profile_id].tc_period); +#ifdef RTE_SCHED_SUBPORT_TC_OV + vlib_cli_output (vm, " TC3 oversubscription_weight = %u", + cfg->pipe[profile_id].tc_ov_weight); +#endif + + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + { + vlib_cli_output (vm, + " TC%u WRR weights: Q0 = %u, Q1 = %u, Q2 = %u, Q3 = %u", + i, cfg->pipe[profile_id].wrr_weights[i * 4], + cfg->pipe[profile_id].wrr_weights[i * 4 + 1], + cfg->pipe[profile_id].wrr_weights[i * 4 + 2], + cfg->pipe[profile_id].wrr_weights[i * 4 + 3]); + } + } + +#ifdef RTE_SCHED_RED + vlib_cli_output (vm, " Weighted Random Early Detection (WRED):"); + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + { + vlib_cli_output (vm, " TC%u min: G = %u, Y = %u, R = %u", i, + cfg->port.red_params[i][e_RTE_METER_GREEN].min_th, + cfg->port.red_params[i][e_RTE_METER_YELLOW].min_th, + cfg->port.red_params[i][e_RTE_METER_RED].min_th); + + vlib_cli_output (vm, " TC%u max: G = %u, Y = %u, R = %u", i, + cfg->port.red_params[i][e_RTE_METER_GREEN].max_th, + cfg->port.red_params[i][e_RTE_METER_YELLOW].max_th, + cfg->port.red_params[i][e_RTE_METER_RED].max_th); + + vlib_cli_output (vm, + " TC%u inverted probability: G = %u, Y = %u, R = %u", + i, cfg->port.red_params[i][e_RTE_METER_GREEN].maxp_inv, + cfg->port.red_params[i][e_RTE_METER_YELLOW].maxp_inv, + cfg->port.red_params[i][e_RTE_METER_RED].maxp_inv); + + vlib_cli_output (vm, " TC%u weight: R = %u, Y = %u, R = %u", i, + cfg->port.red_params[i][e_RTE_METER_GREEN].wq_log2, + cfg->port.red_params[i][e_RTE_METER_YELLOW].wq_log2, + cfg->port.red_params[i][e_RTE_METER_RED].wq_log2); + } +#endif + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos, static) = { + .path = "show dpdk interface hqos", + .short_help = "show dpdk interface hqos ", + .function = show_dpdk_if_hqos, +}; + +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + u32 hw_if_index = (u32) ~ 0; + u32 subport = (u32) ~ 0; + u32 pipe = (u32) ~ 0; + u32 tc = (u32) ~ 0; + u32 tc_q = (u32) ~ 0; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + uword *p = 0; + struct rte_eth_dev_info dev_info; + dpdk_device_config_t *devconf = 0; + u32 qindex; + struct rte_sched_queue_stats stats; + u16 qlen; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + + else if (unformat (line_input, "subport %d", &subport)) + ; + + else if (unformat (line_input, "pipe %d", &pipe)) + ; + + else if (unformat (line_input, "tc %d", &tc)) + ; + + else if (unformat (line_input, "tc_q %d", &tc_q)) + ; + + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify interface name!!"); + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get (xd->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + if (devconf->hqos_enabled == 0) + { + vlib_cli_output (vm, "HQoS disabled for this interface"); + return 0; + } + + /* + * Figure out which queue to query. cf rte_sched_port_qindex. (Not sure why + * that method isn't made public by DPDK - how _should_ we get the queue ID?) + */ + qindex = subport * devconf->hqos.port.n_pipes_per_subport + pipe; + qindex = qindex * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + tc; + qindex = qindex * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + tc_q; + + if (rte_sched_queue_read_stats (xd->hqos_ht->hqos, qindex, &stats, &qlen) != + 0) + return clib_error_return (0, "failed to read stats"); + + vlib_cli_output (vm, "%=24s%=16s", "Stats Parameter", "Value"); + vlib_cli_output (vm, "%=24s%=16d", "Packets", stats.n_pkts); + vlib_cli_output (vm, "%=24s%=16d", "Packets dropped", stats.n_pkts_dropped); +#ifdef RTE_SCHED_RED + vlib_cli_output (vm, "%=24s%=16d", "Packets dropped (RED)", + stats.n_pkts_red_dropped); +#endif + vlib_cli_output (vm, "%=24s%=16d", "Bytes", stats.n_bytes); + vlib_cli_output (vm, "%=24s%=16d", "Bytes dropped", stats.n_bytes_dropped); + + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = { + .path = "show dpdk hqos queue", + .short_help = "show dpdk hqos queue subport pipe tc tc_q ", + .function = show_dpdk_hqos_queue_stats, +}; +/* *INDENT-ON* */ + +clib_error_t * +dpdk_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/device.c b/src/vnet/devices/dpdk/device.c new file mode 100644 index 00000000..b22fbf2e --- /dev/null +++ b/src/vnet/devices/dpdk/device.c @@ -0,0 +1,840 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include "dpdk_priv.h" +#include + +#define foreach_dpdk_tx_func_error \ + _(BAD_RETVAL, "DPDK tx function returned an error") \ + _(RING_FULL, "Tx packet drops (ring full)") \ + _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \ + _(REPL_FAIL, "Tx packet drops (replication failure)") + +typedef enum +{ +#define _(f,s) DPDK_TX_FUNC_ERROR_##f, + foreach_dpdk_tx_func_error +#undef _ + DPDK_TX_FUNC_N_ERROR, +} dpdk_tx_func_error_t; + +static char *dpdk_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_dpdk_tx_func_error +#undef _ +}; + +clib_error_t * +dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address) +{ + int error; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + error = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) address); + + if (error) + { + return clib_error_return (0, "mac address set failed: %d", error); + } + else + { + return NULL; + } +} + +clib_error_t * +dpdk_set_mc_filter (vnet_hw_interface_t * hi, + struct ether_addr mc_addr_vec[], int naddr) +{ + int error; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + error = rte_eth_dev_set_mc_addr_list (xd->device_index, mc_addr_vec, naddr); + + if (error) + { + return clib_error_return (0, "mc addr list failed: %d", error); + } + else + { + return NULL; + } +} + +struct rte_mbuf * +dpdk_replicate_packet_mb (vlib_buffer_t * b) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_buffer_main_t *bm = vm->buffer_main; + struct rte_mbuf **mbufs = 0, *s, *d; + u8 nb_segs; + unsigned socket_id = rte_socket_id (); + int i; + + ASSERT (bm->pktmbuf_pools[socket_id]); + s = rte_mbuf_from_vlib_buffer (b); + nb_segs = s->nb_segs; + vec_validate (mbufs, nb_segs - 1); + + if (rte_pktmbuf_alloc_bulk (bm->pktmbuf_pools[socket_id], mbufs, nb_segs)) + { + vec_free (mbufs); + return 0; + } + + d = mbufs[0]; + d->nb_segs = s->nb_segs; + d->data_len = s->data_len; + d->pkt_len = s->pkt_len; + d->data_off = s->data_off; + clib_memcpy (d->buf_addr, s->buf_addr, RTE_PKTMBUF_HEADROOM + s->data_len); + + for (i = 1; i < nb_segs; i++) + { + d->next = mbufs[i]; + d = mbufs[i]; + s = s->next; + d->data_len = s->data_len; + clib_memcpy (d->buf_addr, s->buf_addr, + RTE_PKTMBUF_HEADROOM + s->data_len); + } + + d = mbufs[0]; + vec_free (mbufs); + return d; +} + +static void +dpdk_tx_trace_buffer (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 buffer_index, vlib_buffer_t * buffer) +{ + vlib_main_t *vm = vlib_get_main (); + dpdk_tx_dma_trace_t *t0; + struct rte_mbuf *mb; + + mb = rte_mbuf_from_vlib_buffer (buffer); + + t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0])); + t0->queue_index = queue_id; + t0->device_index = xd->device_index; + t0->buffer_index = buffer_index; + clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); + clib_memcpy (&t0->buffer, buffer, + sizeof (buffer[0]) - sizeof (buffer->pre_data)); + clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data, + sizeof (t0->buffer.pre_data)); +} + +static_always_inline void +dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b, + int maybe_multiseg) +{ + struct rte_mbuf *mb, *first_mb, *last_mb; + + /* buffer is coming from non-dpdk source so we need to init + rte_mbuf header */ + if (PREDICT_FALSE ((b->flags & VNET_BUFFER_RTE_MBUF_VALID) == 0)) + { + vlib_buffer_t *b2 = b; + last_mb = mb = rte_mbuf_from_vlib_buffer (b2); + rte_pktmbuf_reset (mb); + while (maybe_multiseg && (b2->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + b2 = vlib_get_buffer (vm, b2->next_buffer); + mb = rte_mbuf_from_vlib_buffer (b2); + last_mb->next = mb; + last_mb = mb; + rte_pktmbuf_reset (mb); + } + } + + first_mb = mb = rte_mbuf_from_vlib_buffer (b); + first_mb->nb_segs = 1; + mb->data_len = b->current_length; + mb->pkt_len = maybe_multiseg ? vlib_buffer_length_in_chain (vm, b) : + b->current_length; + mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; + + while (maybe_multiseg && (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + b = vlib_get_buffer (vm, b->next_buffer); + mb = rte_mbuf_from_vlib_buffer (b); + mb->data_len = b->current_length; + mb->pkt_len = b->current_length; + mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; + first_mb->nb_segs++; + } +} + +/* + * This function calls the dpdk's tx_burst function to transmit the packets + * on the tx_vector. It manages a lock per-device if the device does not + * support multiple queues. It returns the number of packets untransmitted + * on the tx_vector. If all packets are transmitted (the normal case), the + * function returns 0. + * + * The function assumes there is at least one packet on the tx_vector. + */ +static_always_inline + u32 tx_burst_vector_internal (vlib_main_t * vm, + dpdk_device_t * xd, + struct rte_mbuf **tx_vector) +{ + dpdk_main_t *dm = &dpdk_main; + u32 n_packets; + u32 tx_head; + u32 tx_tail; + u32 n_retry; + int rv; + int queue_id; + tx_ring_hdr_t *ring; + + ring = vec_header (tx_vector, sizeof (*ring)); + + n_packets = ring->tx_head - ring->tx_tail; + + tx_head = ring->tx_head % xd->nb_tx_desc; + + /* + * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to + * unpredictable results. + */ + ASSERT (n_packets > 0); + + /* + * Check for tx_vector overflow. If this fails it is a system configuration + * error. The ring should be sized big enough to handle the largest un-flowed + * off burst from a traffic manager. A larger size also helps performance + * a bit because it decreases the probability of having to issue two tx_burst + * calls due to a ring wrap. + */ + ASSERT (n_packets < xd->nb_tx_desc); + ASSERT (ring->tx_tail == 0); + + n_retry = 16; + queue_id = vm->cpu_index; + + do + { + /* start the burst at the tail */ + tx_tail = ring->tx_tail % xd->nb_tx_desc; + + /* + * This device only supports one TX queue, + * and we're running multi-threaded... + */ + if (PREDICT_FALSE (xd->lockp != 0)) + { + queue_id = queue_id % xd->tx_q_used; + while (__sync_lock_test_and_set (xd->lockp[queue_id], 1)) + /* zzzz */ + queue_id = (queue_id + 1) % xd->tx_q_used; + } + + if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */ + { + /* no wrap, transmit in one burst */ + dpdk_device_hqos_per_worker_thread_t *hqos = + &xd->hqos_wt[vm->cpu_index]; + + ASSERT (hqos->swq != NULL); + + dpdk_hqos_metadata_set (hqos, + &tx_vector[tx_tail], tx_head - tx_tail); + rv = rte_ring_sp_enqueue_burst (hqos->swq, + (void **) &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); + } + else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) + { + /* no wrap, transmit in one burst */ + rv = rte_eth_tx_burst (xd->device_index, + (uint16_t) queue_id, + &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); + } + else + { + ASSERT (0); + rv = 0; + } + + if (PREDICT_FALSE (xd->lockp != 0)) + *xd->lockp[queue_id] = 0; + + if (PREDICT_FALSE (rv < 0)) + { + // emit non-fatal message, bump counter + vnet_main_t *vnm = dm->vnet_main; + vnet_interface_main_t *im = &vnm->interface_main; + u32 node_index; + + node_index = vec_elt_at_index (im->hw_interfaces, + xd->vlib_hw_if_index)->tx_node_index; + + vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1); + clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index, + rv); + return n_packets; // untransmitted packets + } + ring->tx_tail += (u16) rv; + n_packets -= (uint16_t) rv; + } + while (rv && n_packets && (n_retry > 0)); + + return n_packets; +} + +static_always_inline void +dpdk_prefetch_buffer_by_index (vlib_main_t * vm, u32 bi) +{ + vlib_buffer_t *b; + struct rte_mbuf *mb; + b = vlib_get_buffer (vm, bi); + mb = rte_mbuf_from_vlib_buffer (b); + CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); +} + +static_always_inline void +dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp) +{ + dpdk_main_t *dm = &dpdk_main; + u32 my_cpu = vm->cpu_index; + struct rte_mbuf *mb_new; + + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0) + return; + + mb_new = dpdk_replicate_packet_mb (b); + if (PREDICT_FALSE (mb_new == 0)) + { + vlib_error_count (vm, node->node_index, + DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); + b->flags |= VLIB_BUFFER_REPL_FAIL; + } + else + *mbp = mb_new; + + vec_add1 (dm->recycle[my_cpu], bi); +} + +/* + * Transmits the packets on the frame to the interface associated with the + * node. It first copies packets on the frame to a tx_vector containing the + * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal + * which calls the dpdk tx_burst function. + */ +static uword +dpdk_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * f) +{ + dpdk_main_t *dm = &dpdk_main; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance); + u32 n_packets = f->n_vectors; + u32 n_left; + u32 *from; + struct rte_mbuf **tx_vector; + u16 i; + u16 nb_tx_desc = xd->nb_tx_desc; + int queue_id; + u32 my_cpu; + u32 tx_pkts = 0; + tx_ring_hdr_t *ring; + u32 n_on_ring; + + my_cpu = vm->cpu_index; + + queue_id = my_cpu; + + tx_vector = xd->tx_vectors[queue_id]; + ring = vec_header (tx_vector, sizeof (*ring)); + + n_on_ring = ring->tx_head - ring->tx_tail; + from = vlib_frame_vector_args (f); + + ASSERT (n_packets <= VLIB_FRAME_SIZE); + + if (PREDICT_FALSE (n_on_ring + n_packets > nb_tx_desc)) + { + /* + * Overflowing the ring should never happen. + * If it does then drop the whole frame. + */ + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL, + n_packets); + + while (n_packets--) + { + u32 bi0 = from[n_packets]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer (b0); + rte_pktmbuf_free (mb0); + } + return n_on_ring; + } + + if (PREDICT_FALSE (dm->tx_pcap_enable)) + { + n_left = n_packets; + while (n_left > 0) + { + u32 bi0 = from[0]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + if (dm->pcap_sw_if_index == 0 || + dm->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_TX]) + pcap_add_buffer (&dm->pcap_main, vm, bi0, 512); + from++; + n_left--; + } + } + + from = vlib_frame_vector_args (f); + n_left = n_packets; + i = ring->tx_head % nb_tx_desc; + + while (n_left >= 8) + { + u32 bi0, bi1, bi2, bi3; + struct rte_mbuf *mb0, *mb1, *mb2, *mb3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 or_flags; + + dpdk_prefetch_buffer_by_index (vm, from[4]); + dpdk_prefetch_buffer_by_index (vm, from[5]); + dpdk_prefetch_buffer_by_index (vm, from[6]); + dpdk_prefetch_buffer_by_index (vm, from[7]); + + bi0 = from[0]; + bi1 = from[1]; + bi2 = from[2]; + bi3 = from[3]; + from += 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + or_flags = b0->flags | b1->flags | b2->flags | b3->flags; + + if (or_flags & VLIB_BUFFER_NEXT_PRESENT) + { + dpdk_validate_rte_mbuf (vm, b0, 1); + dpdk_validate_rte_mbuf (vm, b1, 1); + dpdk_validate_rte_mbuf (vm, b2, 1); + dpdk_validate_rte_mbuf (vm, b3, 1); + } + else + { + dpdk_validate_rte_mbuf (vm, b0, 0); + dpdk_validate_rte_mbuf (vm, b1, 0); + dpdk_validate_rte_mbuf (vm, b2, 0); + dpdk_validate_rte_mbuf (vm, b3, 0); + } + + mb0 = rte_mbuf_from_vlib_buffer (b0); + mb1 = rte_mbuf_from_vlib_buffer (b1); + mb2 = rte_mbuf_from_vlib_buffer (b2); + mb3 = rte_mbuf_from_vlib_buffer (b3); + + if (PREDICT_FALSE (or_flags & VLIB_BUFFER_RECYCLE)) + { + dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); + dpdk_buffer_recycle (vm, node, b1, bi1, &mb1); + dpdk_buffer_recycle (vm, node, b2, bi2, &mb2); + dpdk_buffer_recycle (vm, node, b3, bi3, &mb3); + + /* dont enqueue packets if replication failed as they must + be sent back to recycle */ + if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb0; + if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb1; + if (PREDICT_TRUE ((b2->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb2; + if (PREDICT_TRUE ((b3->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb3; + } + else + { + if (PREDICT_FALSE (i + 3 >= nb_tx_desc)) + { + tx_vector[i++ % nb_tx_desc] = mb0; + tx_vector[i++ % nb_tx_desc] = mb1; + tx_vector[i++ % nb_tx_desc] = mb2; + tx_vector[i++ % nb_tx_desc] = mb3; + i %= nb_tx_desc; + } + else + { + tx_vector[i++] = mb0; + tx_vector[i++] = mb1; + tx_vector[i++] = mb2; + tx_vector[i++] = mb3; + } + } + + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); + if (b1->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1); + if (b2->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi2, b2); + if (b3->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi3, b3); + } + + n_left -= 4; + } + while (n_left > 0) + { + u32 bi0; + struct rte_mbuf *mb0; + vlib_buffer_t *b0; + + bi0 = from[0]; + from++; + + b0 = vlib_get_buffer (vm, bi0); + + dpdk_validate_rte_mbuf (vm, b0, 1); + + mb0 = rte_mbuf_from_vlib_buffer (b0); + dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + if (b0->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); + + if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + { + tx_vector[i % nb_tx_desc] = mb0; + i++; + } + n_left--; + } + + /* account for additional packets in the ring */ + ring->tx_head += n_packets; + n_on_ring = ring->tx_head - ring->tx_tail; + + /* transmit as many packets as possible */ + n_packets = tx_burst_vector_internal (vm, xd, tx_vector); + + /* + * tx_pkts is the number of packets successfully transmitted + * This is the number originally on ring minus the number remaining on ring + */ + tx_pkts = n_on_ring - n_packets; + + { + /* If there is no callback then drop any non-transmitted packets */ + if (PREDICT_FALSE (n_packets)) + { + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + n_packets); + + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, + n_packets); + + while (n_packets--) + rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]); + } + + /* Reset head/tail to avoid unnecessary wrap */ + ring->tx_head = 0; + ring->tx_tail = 0; + } + + /* Recycle replicated buffers */ + if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu]))) + { + vlib_buffer_free (vm, dm->recycle[my_cpu], + vec_len (dm->recycle[my_cpu])); + _vec_len (dm->recycle[my_cpu]) = 0; + } + + ASSERT (ring->tx_head >= ring->tx_tail); + + return tx_pkts; +} + +static void +dpdk_clear_hw_interface_counters (u32 instance) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, instance); + + /* + * Set the "last_cleared_stats" to the current stats, so that + * things appear to clear from a display perspective. + */ + dpdk_update_counters (xd, vlib_time_now (dm->vlib_main)); + + clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof (xd->stats)); + clib_memcpy (xd->last_cleared_xstats, xd->xstats, + vec_len (xd->last_cleared_xstats) * + sizeof (xd->last_cleared_xstats[0])); + +} + +static clib_error_t * +dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); + uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hif->dev_instance); + int rv = 0; + + if (is_up) + { + f64 now = vlib_time_now (dm->vlib_main); + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) + rv = rte_eth_dev_start (xd->device_index); + + if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) + rte_eth_promiscuous_enable (xd->device_index); + else + rte_eth_promiscuous_disable (xd->device_index); + + rte_eth_allmulticast_enable (xd->device_index); + xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP; + dpdk_update_counters (xd, now); + dpdk_update_link_state (xd, now); + } + else + { + xd->flags &= ~DPDK_DEVICE_FLAG_ADMIN_UP; + + rte_eth_allmulticast_disable (xd->device_index); + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); + rte_eth_dev_stop (xd->device_index); + + /* For bonded interface, stop slave links */ + if (xd->pmd == VNET_DPDK_PMD_BOND) + { + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16); + while (nlink >= 1) + { + u8 dpdk_port = slink[--nlink]; + rte_eth_dev_stop (dpdk_port); + } + } + } + + if (rv < 0) + clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop", rv); + + return /* no error */ 0; +} + +/* + * Dynamically redirect all pkts from a specific interface + * to the specified node + */ +static void +dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + dpdk_main_t *xm = &dpdk_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + xd->per_interface_next_index = node_index; + return; + } + + xd->per_interface_next_index = + vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index); +} + + +static clib_error_t * +dpdk_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + dpdk_main_t *xm = &dpdk_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + vnet_sw_interface_t *t = (vnet_sw_interface_t *) st; + int r, vlan_offload; + u32 prev_subifs = xd->num_subifs; + clib_error_t *err = 0; + + if (is_add) + xd->num_subifs++; + else if (xd->num_subifs) + xd->num_subifs--; + + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + goto done; + + /* currently we program VLANS only for IXGBE VF and I40E VF */ + if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && (xd->pmd != VNET_DPDK_PMD_I40EVF)) + goto done; + + if (t->sub.eth.flags.no_tags == 1) + goto done; + + if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1)) + { + xd->num_subifs = prev_subifs; + err = clib_error_return (0, "unsupported VLAN setup"); + goto done; + } + + vlan_offload = rte_eth_dev_get_vlan_offload (xd->device_index); + vlan_offload |= ETH_VLAN_FILTER_OFFLOAD; + + if ((r = rte_eth_dev_set_vlan_offload (xd->device_index, vlan_offload))) + { + xd->num_subifs = prev_subifs; + err = clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d", + xd->device_index, r); + goto done; + } + + + if ((r = + rte_eth_dev_vlan_filter (xd->device_index, t->sub.eth.outer_vlan_id, + is_add))) + { + xd->num_subifs = prev_subifs; + err = clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d", + xd->device_index, r); + goto done; + } + +done: + if (xd->num_subifs) + xd->flags |= DPDK_DEVICE_FLAG_HAVE_SUBIF; + else + xd->flags &= ~DPDK_DEVICE_FLAG_HAVE_SUBIF; + + return err; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (dpdk_device_class) = { + .name = "dpdk", + .tx_function = dpdk_interface_tx, + .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR, + .tx_function_error_strings = dpdk_tx_func_error_strings, + .format_device_name = format_dpdk_device_name, + .format_device = format_dpdk_device, + .format_tx_trace = format_dpdk_tx_dma_trace, + .clear_counters = dpdk_clear_hw_interface_counters, + .admin_up_down_function = dpdk_interface_admin_up_down, + .subif_add_del_function = dpdk_subif_add_del_function, + .rx_redirect_to_node = dpdk_set_interface_next_node, + .mac_addr_change_function = dpdk_set_mac_address, +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx) +/* *INDENT-ON* */ + +#define UP_DOWN_FLAG_EVENT 1 + +uword +admin_up_down_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + clib_error_t *error = 0; + uword event_type; + uword *event_data = 0; + u32 sw_if_index; + u32 flags; + + while (1) + { + vlib_process_wait_for_event (vm); + + event_type = vlib_process_get_events (vm, &event_data); + + dpdk_main.admin_up_down_in_progress = 1; + + switch (event_type) + { + case UP_DOWN_FLAG_EVENT: + { + if (vec_len (event_data) == 2) + { + sw_if_index = event_data[0]; + flags = event_data[1]; + error = + vnet_sw_interface_set_flags (vnet_get_main (), sw_if_index, + flags); + clib_error_report (error); + } + } + break; + } + + vec_reset_length (event_data); + + dpdk_main.admin_up_down_in_progress = 0; + + } + return 0; /* or not */ +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (admin_up_down_process_node,static) = { + .function = admin_up_down_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "admin-up-down-process", + .process_log2_n_stack_bytes = 17, // 256KB +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h new file mode 100644 index 00000000..d8f378d2 --- /dev/null +++ b/src/vnet/devices/dpdk/dpdk.h @@ -0,0 +1,534 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_dpdk_h__ +#define __included_dpdk_h__ + +/* $$$$ We should rename always_inline -> clib_always_inline */ +#undef always_inline + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if CLIB_DEBUG > 0 +#define always_inline static inline +#else +#define always_inline static inline __attribute__ ((__always_inline__)) +#endif + +#include + +#define NB_MBUF (16<<10) + +extern vnet_device_class_t dpdk_device_class; +extern vlib_node_registration_t dpdk_input_node; +extern vlib_node_registration_t handoff_dispatch_node; + +#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0) +#define foreach_dpdk_pmd \ + _ ("net_thunderx", THUNDERX) \ + _ ("net_e1000_em", E1000EM) \ + _ ("net_e1000_igb", IGB) \ + _ ("net_e1000_igb_vf", IGBVF) \ + _ ("net_ixgbe", IXGBE) \ + _ ("net_ixgbe_vf", IXGBEVF) \ + _ ("net_i40e", I40E) \ + _ ("net_i40e_vf", I40EVF) \ + _ ("net_virtio", VIRTIO) \ + _ ("net_enic", ENIC) \ + _ ("net_vmxnet3", VMXNET3) \ + _ ("net_af_packet", AF_PACKET) \ + _ ("rte_bond_pmd", BOND) \ + _ ("net_fm10k", FM10K) \ + _ ("net_cxgbe", CXGBE) \ + _ ("net_mlx5", MLX5) \ + _ ("net_dpaa2", DPAA2) +#else +#define foreach_dpdk_pmd \ + _ ("rte_nicvf_pmd", THUNDERX) \ + _ ("rte_em_pmd", E1000EM) \ + _ ("rte_igb_pmd", IGB) \ + _ ("rte_igbvf_pmd", IGBVF) \ + _ ("rte_ixgbe_pmd", IXGBE) \ + _ ("rte_ixgbevf_pmd", IXGBEVF) \ + _ ("rte_i40e_pmd", I40E) \ + _ ("rte_i40evf_pmd", I40EVF) \ + _ ("rte_virtio_pmd", VIRTIO) \ + _ ("rte_enic_pmd", ENIC) \ + _ ("rte_vmxnet3_pmd", VMXNET3) \ + _ ("AF_PACKET PMD", AF_PACKET) \ + _ ("rte_bond_pmd", BOND) \ + _ ("rte_pmd_fm10k", FM10K) \ + _ ("rte_cxgbe_pmd", CXGBE) \ + _ ("rte_dpaa2_dpni", DPAA2) +#endif + +typedef enum +{ + VNET_DPDK_PMD_NONE, +#define _(s,f) VNET_DPDK_PMD_##f, + foreach_dpdk_pmd +#undef _ + VNET_DPDK_PMD_UNKNOWN, /* must be last */ +} dpdk_pmd_t; + +typedef enum +{ + VNET_DPDK_PORT_TYPE_ETH_1G, + VNET_DPDK_PORT_TYPE_ETH_10G, + VNET_DPDK_PORT_TYPE_ETH_40G, + VNET_DPDK_PORT_TYPE_ETH_100G, + VNET_DPDK_PORT_TYPE_ETH_BOND, + VNET_DPDK_PORT_TYPE_ETH_SWITCH, + VNET_DPDK_PORT_TYPE_AF_PACKET, + VNET_DPDK_PORT_TYPE_UNKNOWN, +} dpdk_port_type_t; + +/* + * The header for the tx_vector in dpdk_device_t. + * Head and tail are indexes into the tx_vector and are of type + * u64 so they never overflow. + */ +typedef struct +{ + u64 tx_head; + u64 tx_tail; +} tx_ring_hdr_t; + +typedef struct +{ + struct rte_ring *swq; + + u64 hqos_field0_slabmask; + u32 hqos_field0_slabpos; + u32 hqos_field0_slabshr; + u64 hqos_field1_slabmask; + u32 hqos_field1_slabpos; + u32 hqos_field1_slabshr; + u64 hqos_field2_slabmask; + u32 hqos_field2_slabpos; + u32 hqos_field2_slabshr; + u32 hqos_tc_table[64]; +} dpdk_device_hqos_per_worker_thread_t; + +typedef struct +{ + struct rte_ring **swq; + struct rte_mbuf **pkts_enq; + struct rte_mbuf **pkts_deq; + struct rte_sched_port *hqos; + u32 hqos_burst_enq; + u32 hqos_burst_deq; + u32 pkts_enq_len; + u32 swq_pos; + u32 flush_count; +} dpdk_device_hqos_per_hqos_thread_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + volatile u32 **lockp; + + /* Instance ID */ + u32 device_index; + + u32 vlib_hw_if_index; + u32 vlib_sw_if_index; + + /* next node index if we decide to steal the rx graph arc */ + u32 per_interface_next_index; + + /* dpdk rte_mbuf rx and tx vectors, VLIB_FRAME_SIZE */ + struct rte_mbuf ***tx_vectors; /* one per worker thread */ + struct rte_mbuf ***rx_vectors; + + /* vector of traced contexts, per device */ + u32 **d_trace_buffers; + + dpdk_pmd_t pmd:8; + i8 cpu_socket; + + u16 flags; +#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0) +#define DPDK_DEVICE_FLAG_PROMISC (1 << 1) +#define DPDK_DEVICE_FLAG_PMD (1 << 2) +#define DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE (1 << 3) +#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4) +#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5) +#define DPDK_DEVICE_FLAG_HQOS (1 << 6) + + u16 nb_tx_desc; + CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); + + u8 *interface_name_suffix; + + /* number of sub-interfaces */ + u16 num_subifs; + + /* PMD related */ + u16 tx_q_used; + u16 rx_q_used; + u16 nb_rx_desc; + u16 *cpu_socket_id_by_queue; + struct rte_eth_conf port_conf; + struct rte_eth_txconf tx_conf; + + /* HQoS related */ + dpdk_device_hqos_per_worker_thread_t *hqos_wt; + dpdk_device_hqos_per_hqos_thread_t *hqos_ht; + + /* af_packet */ + u8 af_packet_port_id; + + struct rte_eth_link link; + f64 time_last_link_update; + + struct rte_eth_stats stats; + struct rte_eth_stats last_stats; + struct rte_eth_stats last_cleared_stats; + struct rte_eth_xstat *xstats; + struct rte_eth_xstat *last_cleared_xstats; + f64 time_last_stats_update; + dpdk_port_type_t port_type; +} dpdk_device_t; + +#define DPDK_STATS_POLL_INTERVAL (10.0) +#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */ + +#define DPDK_LINK_POLL_INTERVAL (3.0) +#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */ + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /* total input packet counter */ + u64 aggregate_rx_packets; +} dpdk_worker_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /* total input packet counter */ + u64 aggregate_rx_packets; +} dpdk_hqos_thread_t; + +typedef struct +{ + u32 device; + u16 queue_id; +} dpdk_device_and_queue_t; + +#ifndef DPDK_HQOS_DBG_BYPASS +#define DPDK_HQOS_DBG_BYPASS 0 +#endif + +#ifndef HQOS_FLUSH_COUNT_THRESHOLD +#define HQOS_FLUSH_COUNT_THRESHOLD 100000 +#endif + +typedef struct dpdk_device_config_hqos_t +{ + u32 hqos_thread; + u32 hqos_thread_valid; + + u32 swq_size; + u32 burst_enq; + u32 burst_deq; + + u32 pktfield0_slabpos; + u32 pktfield1_slabpos; + u32 pktfield2_slabpos; + u64 pktfield0_slabmask; + u64 pktfield1_slabmask; + u64 pktfield2_slabmask; + u32 tc_table[64]; + + struct rte_sched_port_params port; + struct rte_sched_subport_params *subport; + struct rte_sched_pipe_params *pipe; + uint32_t *pipe_map; +} dpdk_device_config_hqos_t; + +int dpdk_hqos_validate_mask (u64 mask, u32 n); +void dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * + hqos, u32 pipe_profile_id); +void dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos); +clib_error_t *dpdk_port_setup_hqos (dpdk_device_t * xd, + dpdk_device_config_hqos_t * hqos); +void dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, + struct rte_mbuf **pkts, u32 n_pkts); + +#define foreach_dpdk_device_config_item \ + _ (num_rx_queues) \ + _ (num_tx_queues) \ + _ (num_rx_desc) \ + _ (num_tx_desc) \ + _ (rss_fn) + +typedef struct +{ + vlib_pci_addr_t pci_addr; + u8 is_blacklisted; + u8 vlan_strip_offload; +#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0 +#define DPDK_DEVICE_VLAN_STRIP_OFF 1 +#define DPDK_DEVICE_VLAN_STRIP_ON 2 + +#define _(x) uword x; + foreach_dpdk_device_config_item +#undef _ + clib_bitmap_t * workers; + u32 hqos_enabled; + dpdk_device_config_hqos_t hqos; +} dpdk_device_config_t; + +typedef struct +{ + + /* Config stuff */ + u8 **eal_init_args; + u8 *eal_init_args_str; + u8 *uio_driver_name; + u8 no_multi_seg; + u8 enable_tcp_udp_checksum; + + /* Required config parameters */ + u8 coremask_set_manually; + u8 nchannels_set_manually; + u32 coremask; + u32 nchannels; + u32 num_mbufs; + u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */ + + /* + * format interface names ala xxxEthernet%d/%d/%d instead of + * xxxEthernet%x/%x/%x. + */ + u8 interface_name_format_decimal; + + /* per-device config */ + dpdk_device_config_t default_devconf; + dpdk_device_config_t *dev_confs; + uword *device_config_index_by_pci_addr; + +} dpdk_config_main_t; + +dpdk_config_main_t dpdk_config_main; + +typedef struct +{ + + /* Devices */ + dpdk_device_t *devices; + dpdk_device_and_queue_t **devices_by_cpu; + dpdk_device_and_queue_t **devices_by_hqos_cpu; + + /* per-thread recycle lists */ + u32 **recycle; + + /* buffer flags template, configurable to enable/disable tcp / udp cksum */ + u32 buffer_flags_template; + + /* vlib buffer free list, must be same size as an rte_mbuf */ + u32 vlib_buffer_free_list_index; + + /* dpdk worker "threads" */ + dpdk_worker_t *workers; + + /* dpdk HQoS "threads" */ + dpdk_hqos_thread_t *hqos_threads; + + /* Ethernet input node index */ + u32 ethernet_input_node_index; + + /* pcap tracing [only works if (CLIB_DEBUG > 0)] */ + int tx_pcap_enable; + pcap_main_t pcap_main; + u8 *pcap_filename; + u32 pcap_sw_if_index; + u32 pcap_pkts_to_capture; + + /* hashes */ + uword *dpdk_device_by_kni_port_id; + uword *vu_sw_if_index_by_listener_fd; + uword *vu_sw_if_index_by_sock_fd; + u32 *vu_inactive_interfaces_device_index; + + /* + * flag indicating that a posted admin up/down + * (via post_sw_interface_set_flags) is in progress + */ + u8 admin_up_down_in_progress; + + u8 use_rss; + + /* which cpus are running dpdk-input */ + int input_cpu_first_index; + int input_cpu_count; + + /* which cpus are running I/O TX */ + int hqos_cpu_first_index; + int hqos_cpu_count; + + /* control interval of dpdk link state and stat polling */ + f64 link_state_poll_interval; + f64 stat_poll_interval; + + /* Sleep for this many MS after each device poll */ + u32 poll_sleep; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + dpdk_config_main_t *conf; +} dpdk_main_t; + +dpdk_main_t dpdk_main; + +typedef struct +{ + u32 buffer_index; + u16 device_index; + u8 queue_index; + struct rte_mbuf mb; + /* Copy of VLIB buffer; packet data stored in pre_data. */ + vlib_buffer_t buffer; +} dpdk_tx_dma_trace_t; + +typedef struct +{ + u32 buffer_index; + u16 device_index; + u16 queue_index; + struct rte_mbuf mb; + vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */ + u8 data[256]; /* First 256 data bytes, used for hexdump */ +} dpdk_rx_dma_trace_t; + +void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b); + +clib_error_t *dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address); + +clib_error_t *dpdk_set_mc_filter (vnet_hw_interface_t * hi, + struct ether_addr mc_addr_vec[], int naddr); + +void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd); + +clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd); + +u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance); + +struct rte_mbuf *dpdk_replicate_packet_mb (vlib_buffer_t * b); +struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b); + +#define foreach_dpdk_error \ + _(NONE, "no error") \ + _(RX_PACKET_ERROR, "Rx packet errors") \ + _(RX_BAD_FCS, "Rx bad fcs") \ + _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \ + _(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \ + _(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \ + _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error") + +typedef enum +{ +#define _(f,s) DPDK_ERROR_##f, + foreach_dpdk_error +#undef _ + DPDK_N_ERROR, +} dpdk_error_t; + +int dpdk_set_stat_poll_interval (f64 interval); +int dpdk_set_link_state_poll_interval (f64 interval); +void dpdk_update_link_state (dpdk_device_t * xd, f64 now); +void dpdk_device_lock_init (dpdk_device_t * xd); +void dpdk_device_lock_free (dpdk_device_t * xd); + +static inline u64 +vnet_get_aggregate_rx_packets (void) +{ + dpdk_main_t *dm = &dpdk_main; + u64 sum = 0; + dpdk_worker_t *dw; + + vec_foreach (dw, dm->workers) sum += dw->aggregate_rx_packets; + + return sum; +} + +void dpdk_rx_trace (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 * buffers, uword n_buffers); + +#define EFD_OPERATION_LESS_THAN 0 +#define EFD_OPERATION_GREATER_OR_EQUAL 1 + +format_function_t format_dpdk_device_name; +format_function_t format_dpdk_device; +format_function_t format_dpdk_tx_dma_trace; +format_function_t format_dpdk_rx_dma_trace; +format_function_t format_dpdk_rte_mbuf; +format_function_t format_dpdk_rx_rte_mbuf; +unformat_function_t unformat_socket_mem; +clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn); +clib_error_t *unformat_hqos (unformat_input_t * input, + dpdk_device_config_hqos_t * hqos); + +uword +admin_up_down_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f); + +#endif /* __included_dpdk_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/dpdk_priv.h b/src/vnet/devices/dpdk/dpdk_priv.h new file mode 100644 index 00000000..0c81dbc3 --- /dev/null +++ b/src/vnet/devices/dpdk/dpdk_priv.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define DPDK_NB_RX_DESC_DEFAULT 1024 +#define DPDK_NB_TX_DESC_DEFAULT 1024 +#define DPDK_NB_RX_DESC_VIRTIO 256 +#define DPDK_NB_TX_DESC_VIRTIO 256 + +#define I40E_DEV_ID_SFP_XL710 0x1572 +#define I40E_DEV_ID_QSFP_A 0x1583 +#define I40E_DEV_ID_QSFP_B 0x1584 +#define I40E_DEV_ID_QSFP_C 0x1585 +#define I40E_DEV_ID_10G_BASE_T 0x1586 +#define I40E_DEV_ID_VF 0x154C + +/* These args appear by themselves */ +#define foreach_eal_double_hyphen_predicate_arg \ +_(no-shconf) \ +_(no-hpet) \ +_(no-huge) \ +_(vmware-tsc-map) + +#define foreach_eal_single_hyphen_mandatory_arg \ +_(coremask, c) \ +_(nchannels, n) \ + +#define foreach_eal_single_hyphen_arg \ +_(blacklist, b) \ +_(mem-alloc-request, m) \ +_(force-ranks, r) + +/* These args are preceeded by "--" and followed by a single string */ +#define foreach_eal_double_hyphen_arg \ +_(huge-dir) \ +_(proc-type) \ +_(file-prefix) \ +_(vdev) + +static inline void +dpdk_get_xstats (dpdk_device_t * xd) +{ + int len; + if ((len = rte_eth_xstats_get (xd->device_index, NULL, 0)) > 0) + { + vec_validate (xd->xstats, len - 1); + vec_validate (xd->last_cleared_xstats, len - 1); + + len = + rte_eth_xstats_get (xd->device_index, xd->xstats, + vec_len (xd->xstats)); + + ASSERT (vec_len (xd->xstats) == len); + ASSERT (vec_len (xd->last_cleared_xstats) == len); + + _vec_len (xd->xstats) = len; + _vec_len (xd->last_cleared_xstats) = len; + + } +} + + +static inline void +dpdk_update_counters (dpdk_device_t * xd, f64 now) +{ + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); + u32 my_cpu = os_get_cpu_number (); + u64 rxerrors, last_rxerrors; + + /* only update counters for PMD interfaces */ + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + return; + + xd->time_last_stats_update = now ? now : xd->time_last_stats_update; + clib_memcpy (&xd->last_stats, &xd->stats, sizeof (xd->last_stats)); + rte_eth_stats_get (xd->device_index, &xd->stats); + + /* maybe bump interface rx no buffer counter */ + if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_NO_BUF); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + xd->stats.rx_nombuf - + xd->last_stats.rx_nombuf); + } + + /* missed pkt counter */ + if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_MISS); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + xd->stats.imissed - + xd->last_stats.imissed); + } + rxerrors = xd->stats.ierrors; + last_rxerrors = xd->last_stats.ierrors; + + if (PREDICT_FALSE (rxerrors != last_rxerrors)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_ERROR); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + rxerrors - last_rxerrors); + } + + dpdk_get_xstats (xd); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/format.c b/src/vnet/devices/dpdk/format.c new file mode 100644 index 00000000..ff7c7a5a --- /dev/null +++ b/src/vnet/devices/dpdk/format.c @@ -0,0 +1,763 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include "dpdk_priv.h" +#include + +#define foreach_dpdk_counter \ + _ (tx_frames_ok, opackets) \ + _ (tx_bytes_ok, obytes) \ + _ (tx_errors, oerrors) \ + _ (rx_frames_ok, ipackets) \ + _ (rx_bytes_ok, ibytes) \ + _ (rx_errors, ierrors) \ + _ (rx_missed, imissed) \ + _ (rx_no_bufs, rx_nombuf) + +#define foreach_dpdk_q_counter \ + _ (rx_frames_ok, q_ipackets) \ + _ (tx_frames_ok, q_opackets) \ + _ (rx_bytes_ok, q_ibytes) \ + _ (tx_bytes_ok, q_obytes) \ + _ (rx_errors, q_errors) + +#define foreach_dpdk_rss_hf \ + _(ETH_RSS_FRAG_IPV4, "ipv4-frag") \ + _(ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \ + _(ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \ + _(ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \ + _(ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \ + _(ETH_RSS_IPV4, "ipv4") \ + _(ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \ + _(ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \ + _(ETH_RSS_FRAG_IPV6, "ipv6-frag") \ + _(ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \ + _(ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \ + _(ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \ + _(ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \ + _(ETH_RSS_L2_PAYLOAD, "l2-payload") \ + _(ETH_RSS_IPV6_EX, "ipv6-ex") \ + _(ETH_RSS_IPV6, "ipv6") + + +#define foreach_dpdk_rx_offload_caps \ + _(DEV_RX_OFFLOAD_VLAN_STRIP, "vlan-strip") \ + _(DEV_RX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ + _(DEV_RX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ + _(DEV_RX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ + _(DEV_RX_OFFLOAD_TCP_LRO , "rcp-lro") \ + _(DEV_RX_OFFLOAD_QINQ_STRIP, "qinq-strip") + +#define foreach_dpdk_tx_offload_caps \ + _(DEV_TX_OFFLOAD_VLAN_INSERT, "vlan-insert") \ + _(DEV_TX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ + _(DEV_TX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ + _(DEV_TX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ + _(DEV_TX_OFFLOAD_SCTP_CKSUM , "sctp-cksum") \ + _(DEV_TX_OFFLOAD_TCP_TSO , "tcp-tso") \ + _(DEV_TX_OFFLOAD_UDP_TSO , "udp-tso") \ + _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \ + _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert") + +#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) +/* New ol_flags bits added in DPDK-16.11 */ +#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7) +#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8) +#endif + +#define foreach_dpdk_pkt_rx_offload_flag \ + _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \ + _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \ + _ (PKT_RX_FDIR, "RX packet with FDIR infos") \ + _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \ + _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \ + _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \ + _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \ + _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \ + _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \ + _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \ + _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped") + +#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) +/* PTYPE added in DPDK-16.11 */ +#define RTE_PTYPE_L2_ETHER_VLAN 0x00000006 +#define RTE_PTYPE_L2_ETHER_QINQ 0x00000007 +#endif + +#define foreach_dpdk_pkt_type \ + _ (L2, ETHER, "Ethernet packet") \ + _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \ + _ (L2, ETHER_ARP, "ARP packet") \ + _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet") \ + _ (L2, ETHER_NSH, "NSH (Network Service Header) packet") \ + _ (L2, ETHER_VLAN, "VLAN packet") \ + _ (L2, ETHER_QINQ, "QinQ packet") \ + _ (L3, IPV4, "IPv4 packet without extension headers") \ + _ (L3, IPV4_EXT, "IPv4 packet with extension headers") \ + _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \ + _ (L3, IPV6, "IPv6 packet without extension headers") \ + _ (L3, IPV6_EXT, "IPv6 packet with extension headers") \ + _ (L3, IPV6_EXT_UNKNOWN, "IPv6 packet with or without extension headers") \ + _ (L4, TCP, "TCP packet") \ + _ (L4, UDP, "UDP packet") \ + _ (L4, FRAG, "Fragmented IP packet") \ + _ (L4, SCTP, "SCTP (Stream Control Transmission Protocol) packet") \ + _ (L4, ICMP, "ICMP packet") \ + _ (L4, NONFRAG, "Non-fragmented IP packet") \ + _ (TUNNEL, GRE, "GRE tunneling packet") \ + _ (TUNNEL, VXLAN, "VXLAN tunneling packet") \ + _ (TUNNEL, NVGRE, "NVGRE Tunneling packet") \ + _ (TUNNEL, GENEVE, "GENEVE Tunneling packet") \ + _ (TUNNEL, GRENAT, "Teredo, VXLAN or GRE Tunneling packet") \ + _ (INNER_L2, ETHER, "Inner Ethernet packet") \ + _ (INNER_L2, ETHER_VLAN, "Inner Ethernet packet with VLAN") \ + _ (INNER_L3, IPV4, "Inner IPv4 packet without extension headers") \ + _ (INNER_L3, IPV4_EXT, "Inner IPv4 packet with extension headers") \ + _ (INNER_L3, IPV4_EXT_UNKNOWN, "Inner IPv4 packet with or without extension headers") \ + _ (INNER_L3, IPV6, "Inner IPv6 packet without extension headers") \ + _ (INNER_L3, IPV6_EXT, "Inner IPv6 packet with extension headers") \ + _ (INNER_L3, IPV6_EXT_UNKNOWN, "Inner IPv6 packet with or without extension headers") \ + _ (INNER_L4, TCP, "Inner TCP packet") \ + _ (INNER_L4, UDP, "Inner UDP packet") \ + _ (INNER_L4, FRAG, "Inner fagmented IP packet") \ + _ (INNER_L4, SCTP, "Inner SCTP (Stream Control Transmission Protocol) packet") \ + _ (INNER_L4, ICMP, "Inner ICMP packet") \ + _ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet") + +#define foreach_dpdk_pkt_tx_offload_flag \ + _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \ + _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp") + +#define foreach_dpdk_pkt_offload_flag \ + foreach_dpdk_pkt_rx_offload_flag \ + foreach_dpdk_pkt_tx_offload_flag + +u8 * +format_dpdk_device_name (u8 * s, va_list * args) +{ + dpdk_main_t *dm = &dpdk_main; + char *devname_format; + char *device_name; + u32 i = va_arg (*args, u32); + struct rte_eth_dev_info dev_info; + u8 *ret; + + if (dm->conf->interface_name_format_decimal) + devname_format = "%s%d/%d/%d"; + else + devname_format = "%s%x/%x/%x"; + + switch (dm->devices[i].port_type) + { + case VNET_DPDK_PORT_TYPE_ETH_1G: + device_name = "GigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_10G: + device_name = "TenGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_40G: + device_name = "FortyGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_100G: + device_name = "HundredGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_BOND: + return format (s, "BondEthernet%d", dm->devices[i].device_index); + + case VNET_DPDK_PORT_TYPE_ETH_SWITCH: + device_name = "EthernetSwitch"; + break; + + case VNET_DPDK_PORT_TYPE_AF_PACKET: + rte_eth_dev_info_get (i, &dev_info); + return format (s, "af_packet%d", dm->devices[i].af_packet_port_id); + + default: + case VNET_DPDK_PORT_TYPE_UNKNOWN: + device_name = "UnknownEthernet"; + break; + } + + rte_eth_dev_info_get (i, &dev_info); + + if (dev_info.pci_dev) + ret = format (s, devname_format, device_name, dev_info.pci_dev->addr.bus, + dev_info.pci_dev->addr.devid, + dev_info.pci_dev->addr.function); + else + ret = format (s, "%s%d", device_name, dm->devices[i].device_index); + + if (dm->devices[i].interface_name_suffix) + return format (ret, "/%s", dm->devices[i].interface_name_suffix); + return ret; +} + +static u8 * +format_dpdk_device_type (u8 * s, va_list * args) +{ + dpdk_main_t *dm = &dpdk_main; + char *dev_type; + u32 i = va_arg (*args, u32); + + switch (dm->devices[i].pmd) + { + case VNET_DPDK_PMD_E1000EM: + dev_type = "Intel 82540EM (e1000)"; + break; + + case VNET_DPDK_PMD_IGB: + dev_type = "Intel e1000"; + break; + + case VNET_DPDK_PMD_I40E: + dev_type = "Intel X710/XL710 Family"; + break; + + case VNET_DPDK_PMD_I40EVF: + dev_type = "Intel X710/XL710 Family VF"; + break; + + case VNET_DPDK_PMD_FM10K: + dev_type = "Intel FM10000 Family Ethernet Switch"; + break; + + case VNET_DPDK_PMD_IGBVF: + dev_type = "Intel e1000 VF"; + break; + + case VNET_DPDK_PMD_VIRTIO: + dev_type = "Red Hat Virtio"; + break; + + case VNET_DPDK_PMD_IXGBEVF: + dev_type = "Intel 82599 VF"; + break; + + case VNET_DPDK_PMD_IXGBE: + dev_type = "Intel 82599"; + break; + + case VNET_DPDK_PMD_ENIC: + dev_type = "Cisco VIC"; + break; + + case VNET_DPDK_PMD_CXGBE: + dev_type = "Chelsio T4/T5"; + break; + + case VNET_DPDK_PMD_MLX5: + dev_type = "Mellanox ConnectX-4 Family"; + break; + + case VNET_DPDK_PMD_VMXNET3: + dev_type = "VMware VMXNET3"; + break; + + case VNET_DPDK_PMD_AF_PACKET: + dev_type = "af_packet"; + break; + + case VNET_DPDK_PMD_BOND: + dev_type = "Ethernet Bonding"; + break; + + case VNET_DPDK_PMD_DPAA2: + dev_type = "NXP DPAA2 Mac"; + break; + + default: + case VNET_DPDK_PMD_UNKNOWN: + dev_type = "### UNKNOWN ###"; + break; + } + + return format (s, dev_type); +} + +static u8 * +format_dpdk_link_status (u8 * s, va_list * args) +{ + dpdk_device_t *xd = va_arg (*args, dpdk_device_t *); + struct rte_eth_link *l = &xd->link; + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index); + + s = format (s, "%s ", l->link_status ? "up" : "down"); + if (l->link_status) + { + u32 promisc = rte_eth_promiscuous_get (xd->device_index); + + s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ? + "full" : "half"); + s = format (s, "speed %u mtu %d %s\n", l->link_speed, + hi->max_packet_bytes, promisc ? " promisc" : ""); + } + else + s = format (s, "\n"); + + return s; +} + +#define _line_len 72 +#define _(v, str) \ +if (bitmap & v) { \ + if (format_get_indent (s) > next_split ) { \ + next_split += _line_len; \ + s = format(s,"\n%U", format_white_space, indent); \ + } \ + s = format(s, "%s ", str); \ +} + +static u8 * +format_dpdk_rss_hf_name (u8 * s, va_list * args) +{ + u64 bitmap = va_arg (*args, u64); + int next_split = _line_len; + int indent = format_get_indent (s); + + if (!bitmap) + return format (s, "none"); + + foreach_dpdk_rss_hf return s; +} + +static u8 * +format_dpdk_rx_offload_caps (u8 * s, va_list * args) +{ + u32 bitmap = va_arg (*args, u32); + int next_split = _line_len; + int indent = format_get_indent (s); + + if (!bitmap) + return format (s, "none"); + + foreach_dpdk_rx_offload_caps return s; +} + +static u8 * +format_dpdk_tx_offload_caps (u8 * s, va_list * args) +{ + u32 bitmap = va_arg (*args, u32); + int next_split = _line_len; + int indent = format_get_indent (s); + if (!bitmap) + return format (s, "none"); + + foreach_dpdk_tx_offload_caps return s; +} + +#undef _line_len +#undef _ + +u8 * +format_dpdk_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance); + uword indent = format_get_indent (s); + f64 now = vlib_time_now (dm->vlib_main); + struct rte_eth_dev_info di; + + dpdk_update_counters (xd, now); + dpdk_update_link_state (xd, now); + + s = format (s, "%U\n%Ucarrier %U", + format_dpdk_device_type, xd->device_index, + format_white_space, indent + 2, format_dpdk_link_status, xd); + + rte_eth_dev_info_get (xd->device_index, &di); + + if (verbose > 1 && xd->flags & DPDK_DEVICE_FLAG_PMD) + { + struct rte_pci_device *pci; + struct rte_eth_rss_conf rss_conf; + int vlan_off; + int retval; + + rss_conf.rss_key = 0; + retval = rte_eth_dev_rss_hash_conf_get (xd->device_index, &rss_conf); + if (retval < 0) + clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval); + pci = di.pci_dev; + + if (pci) + s = + format (s, + "%Upci id: device %04x:%04x subsystem %04x:%04x\n" + "%Upci address: %04x:%02x:%02x.%02x\n", + format_white_space, indent + 2, pci->id.vendor_id, + pci->id.device_id, pci->id.subsystem_vendor_id, + pci->id.subsystem_device_id, format_white_space, indent + 2, + pci->addr.domain, pci->addr.bus, pci->addr.devid, + pci->addr.function); + s = + format (s, "%Umax rx packet len: %d\n", format_white_space, + indent + 2, di.max_rx_pktlen); + s = + format (s, "%Umax num of queues: rx %d tx %d\n", format_white_space, + indent + 2, di.max_rx_queues, di.max_tx_queues); + s = + format (s, "%Upromiscuous: unicast %s all-multicast %s\n", + format_white_space, indent + 2, + rte_eth_promiscuous_get (xd->device_index) ? "on" : "off", + rte_eth_promiscuous_get (xd->device_index) ? "on" : "off"); + vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); + s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n", + format_white_space, indent + 2, + vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off", + vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off", + vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off"); + s = format (s, "%Urx offload caps: %U\n", + format_white_space, indent + 2, + format_dpdk_rx_offload_caps, di.rx_offload_capa); + s = format (s, "%Utx offload caps: %U\n", + format_white_space, indent + 2, + format_dpdk_tx_offload_caps, di.tx_offload_capa); + s = format (s, "%Urss active: %U\n" + "%Urss supported: %U\n", + format_white_space, indent + 2, + format_dpdk_rss_hf_name, rss_conf.rss_hf, + format_white_space, indent + 2, + format_dpdk_rss_hf_name, di.flow_type_rss_offloads); + } + + s = format (s, "%Urx queues %d, rx desc %d, tx queues %d, tx desc %d\n", + format_white_space, indent + 2, + xd->rx_q_used, xd->nb_rx_desc, xd->tx_q_used, xd->nb_tx_desc); + + if (xd->cpu_socket > -1) + s = format (s, "%Ucpu socket %d\n", + format_white_space, indent + 2, xd->cpu_socket); + + /* $$$ MIB counters */ + { +#define _(N, V) \ + if ((xd->stats.V - xd->last_cleared_stats.V) != 0) { \ + s = format (s, "\n%U%-40U%16Ld", \ + format_white_space, indent + 2, \ + format_c_identifier, #N, \ + xd->stats.V - xd->last_cleared_stats.V); \ + } \ + + foreach_dpdk_counter +#undef _ + } + + u8 *xs = 0; + u32 i = 0; + struct rte_eth_xstat *xstat, *last_xstat; + struct rte_eth_xstat_name *xstat_names = 0; + int len = rte_eth_xstats_get_names (xd->device_index, NULL, 0); + vec_validate (xstat_names, len - 1); + rte_eth_xstats_get_names (xd->device_index, xstat_names, len); + + ASSERT (vec_len (xd->xstats) == vec_len (xd->last_cleared_xstats)); + + /* *INDENT-OFF* */ + vec_foreach_index(i, xd->xstats) + { + u64 delta = 0; + xstat = vec_elt_at_index(xd->xstats, i); + last_xstat = vec_elt_at_index(xd->last_cleared_xstats, i); + + delta = xstat->value - last_xstat->value; + if (verbose == 2 || (verbose && delta)) + { + /* format_c_identifier doesn't like c strings inside vector */ + u8 * name = format(0,"%s", xstat_names[i].name); + xs = format(xs, "\n%U%-38U%16Ld", + format_white_space, indent + 4, + format_c_identifier, name, delta); + vec_free(name); + } + } + /* *INDENT-ON* */ + + vec_free (xstat_names); + + if (xs) + { + s = format (s, "\n%Uextended stats:%v", + format_white_space, indent + 2, xs); + vec_free (xs); + } + + return s; +} + +u8 * +format_dpdk_tx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); + dpdk_tx_dma_trace_t *t = va_arg (*va, dpdk_tx_dma_trace_t *); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); + uword indent = format_get_indent (s); + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + + s = format (s, "%U tx queue %d", + format_vnet_sw_interface_name, vnm, sw, t->queue_index); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U%U", format_white_space, indent, + format_ethernet_header_with_length, t->buffer.pre_data, + sizeof (t->buffer.pre_data)); + + return s; +} + +u8 * +format_dpdk_rx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); + dpdk_rx_dma_trace_t *t = va_arg (*va, dpdk_rx_dma_trace_t *); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); + format_function_t *f; + uword indent = format_get_indent (s); + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + + s = format (s, "%U rx queue %d", + format_vnet_sw_interface_name, vnm, sw, t->queue_index); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U%U", + format_white_space, indent, + format_dpdk_rte_mbuf, &t->mb, &t->data); + + if (vm->trace_main.verbose) + { + s = format (s, "\n%UPacket Dump%s", format_white_space, indent + 2, + t->mb.data_len > sizeof (t->data) ? " (truncated)" : ""); + s = format (s, "\n%U%U", format_white_space, indent + 4, + format_hexdump, &t->data, + t->mb.data_len > + sizeof (t->data) ? sizeof (t->data) : t->mb.data_len); + } + f = node->format_buffer; + if (!f) + f = format_hex_bytes; + s = format (s, "\n%U%U", format_white_space, indent, + f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); + + return s; +} + + +static inline u8 * +format_dpdk_pkt_types (u8 * s, va_list * va) +{ + u32 *pkt_types = va_arg (*va, u32 *); + uword indent __attribute__ ((unused)) = format_get_indent (s) + 2; + + if (!*pkt_types) + return s; + + s = format (s, "Packet Types"); + +#define _(L, F, S) \ + if ((*pkt_types & RTE_PTYPE_##L##_MASK) == RTE_PTYPE_##L##_##F) \ + { \ + s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \ + "RTE_PTYPE_" #L "_" #F, RTE_PTYPE_##L##_##F, S); \ + } + + foreach_dpdk_pkt_type +#undef _ + return s; +} + +static inline u8 * +format_dpdk_pkt_offload_flags (u8 * s, va_list * va) +{ + u64 *ol_flags = va_arg (*va, u64 *); + uword indent = format_get_indent (s) + 2; + + if (!*ol_flags) + return s; + + s = format (s, "Packet Offload Flags"); + +#define _(F, S) \ + if (*ol_flags & F) \ + { \ + s = format (s, "\n%U%s (0x%04x) %s", \ + format_white_space, indent, #F, F, S); \ + } + + foreach_dpdk_pkt_offload_flag +#undef _ + return s; +} + +u8 * +format_dpdk_rte_mbuf_vlan (u8 * s, va_list * va) +{ + ethernet_vlan_header_tv_t *vlan_hdr = + va_arg (*va, ethernet_vlan_header_tv_t *); + + if (clib_net_to_host_u16 (vlan_hdr->type) == ETHERNET_TYPE_DOT1AD) + { + s = format (s, "%U 802.1q vlan ", + format_ethernet_vlan_tci, + clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); + vlan_hdr++; + } + + s = format (s, "%U", + format_ethernet_vlan_tci, + clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); + + return s; +} + +u8 * +format_dpdk_rte_mbuf (u8 * s, va_list * va) +{ + struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *); + ethernet_header_t *eth_hdr = va_arg (*va, ethernet_header_t *); + uword indent = format_get_indent (s) + 2; + + s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d" + "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x, data_off %d, phys_addr 0x%x" + "\n%Upacket_type 0x%x", + mb->port, mb->nb_segs, mb->pkt_len, + format_white_space, indent, + mb->buf_len, mb->data_len, mb->ol_flags, mb->data_off, + mb->buf_physaddr, format_white_space, indent, mb->packet_type); + + if (mb->ol_flags) + s = format (s, "\n%U%U", format_white_space, indent, + format_dpdk_pkt_offload_flags, &mb->ol_flags); + + if ((mb->ol_flags & PKT_RX_VLAN_PKT) && + ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0)) + { + ethernet_vlan_header_tv_t *vlan_hdr = + ((ethernet_vlan_header_tv_t *) & (eth_hdr->type)); + s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr); + } + + if (mb->packet_type) + s = format (s, "\n%U%U", format_white_space, indent, + format_dpdk_pkt_types, &mb->packet_type); + + return s; +} + +uword +unformat_socket_mem (unformat_input_t * input, va_list * va) +{ + uword **r = va_arg (*va, uword **); + int i = 0; + u32 mem; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, ",")) + hash_set (*r, i, 1024); + else if (unformat (input, "%u,", &mem)) + hash_set (*r, i, mem); + else if (unformat (input, "%u", &mem)) + hash_set (*r, i, mem); + else + { + unformat_put_input (input); + goto done; + } + i++; + } + +done: + return 1; +} + +clib_error_t * +unformat_rss_fn (unformat_input_t * input, uword * rss_fn) +{ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (0) + ; +#undef _ +#define _(f, s) \ + else if (unformat (input, s)) \ + *rss_fn |= f; + + foreach_dpdk_rss_hf +#undef _ + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + } + return 0; +} + +clib_error_t * +unformat_hqos (unformat_input_t * input, dpdk_device_config_hqos_t * hqos) +{ + clib_error_t *error = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "hqos-thread %u", &hqos->hqos_thread)) + hqos->hqos_thread_valid = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + break; + } + } + + return error; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/hqos.c b/src/vnet/devices/dpdk/hqos.c new file mode 100644 index 00000000..d68bc48f --- /dev/null +++ b/src/vnet/devices/dpdk/hqos.c @@ -0,0 +1,775 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include /* enumerate all vlib messages */ + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include "dpdk_priv.h" + +dpdk_main_t dpdk_main; + +/*** + * + * HQoS default configuration values + * + ***/ + +static dpdk_device_config_hqos_t hqos_params_default = { + .hqos_thread_valid = 0, + + .swq_size = 4096, + .burst_enq = 256, + .burst_deq = 220, + + /* + * Packet field to identify the subport. + * + * Default value: Since only one subport is defined by default (see below: + * n_subports_per_port = 1), the subport ID is hardcoded to 0. + */ + .pktfield0_slabpos = 0, + .pktfield0_slabmask = 0, + + /* + * Packet field to identify the pipe. + * + * Default value: Assuming Ethernet/IPv4/UDP packets, UDP payload bits 12 .. 23 + */ + .pktfield1_slabpos = 40, + .pktfield1_slabmask = 0x0000000FFF000000LLU, + + /* Packet field used as index into TC translation table to identify the traffic + * class and queue. + * + * Default value: Assuming Ethernet/IPv4 packets, IPv4 DSCP field + */ + .pktfield2_slabpos = 8, + .pktfield2_slabmask = 0x00000000000000FCLLU, + .tc_table = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + }, + + /* port */ + .port = { + .name = NULL, /* Set at init */ + .socket = 0, /* Set at init */ + .rate = 1250000000, /* Assuming 10GbE port */ + .mtu = 14 + 1500, /* Assuming Ethernet/IPv4 pkt (Ethernet FCS not included) */ + .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT, + .n_subports_per_port = 1, + .n_pipes_per_subport = 4096, + .qsize = {64, 64, 64, 64}, + .pipe_profiles = NULL, /* Set at config */ + .n_pipe_profiles = 1, + +#ifdef RTE_SCHED_RED + .red_params = { + /* Traffic Class 0 Colors Green / Yellow / Red */ + [0][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [0][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [0][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + + /* Traffic Class 1 - Colors Green / Yellow / Red */ + [1][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [1][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [1][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + + /* Traffic Class 2 - Colors Green / Yellow / Red */ + [2][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [2][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [2][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + + /* Traffic Class 3 - Colors Green / Yellow / Red */ + [3][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [3][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [3][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9} + }, +#endif /* RTE_SCHED_RED */ + }, +}; + +static struct rte_sched_subport_params hqos_subport_params_default = { + .tb_rate = 1250000000, /* 10GbE line rate (measured in bytes/second) */ + .tb_size = 1000000, + .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000}, + .tc_period = 10, +}; + +static struct rte_sched_pipe_params hqos_pipe_params_default = { + .tb_rate = 305175, /* 10GbE line rate divided by 4K pipes */ + .tb_size = 1000000, + .tc_rate = {305175, 305175, 305175, 305175}, + .tc_period = 40, +#ifdef RTE_SCHED_SUBPORT_TC_OV + .tc_ov_weight = 1, +#endif + .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, +}; + +/*** + * + * HQoS configuration + * + ***/ + +int +dpdk_hqos_validate_mask (u64 mask, u32 n) +{ + int count = __builtin_popcountll (mask); + int pos_lead = sizeof (u64) * 8 - __builtin_clzll (mask); + int pos_trail = __builtin_ctzll (mask); + int count_expected = __builtin_popcount (n - 1); + + /* Handle the exceptions */ + if (n == 0) + return -1; /* Error */ + + if ((mask == 0) && (n == 1)) + return 0; /* OK */ + + if (((mask == 0) && (n != 1)) || ((mask != 0) && (n == 1))) + return -2; /* Error */ + + /* Check that mask is contiguous */ + if ((pos_lead - pos_trail) != count) + return -3; /* Error */ + + /* Check that mask contains the expected number of bits set */ + if (count != count_expected) + return -4; /* Error */ + + return 0; /* OK */ +} + +void +dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * + hqos, u32 pipe_profile_id) +{ + memcpy (&hqos->pipe[pipe_profile_id], &hqos_pipe_params_default, + sizeof (hqos_pipe_params_default)); +} + +void +dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos) +{ + struct rte_sched_subport_params *subport_params; + struct rte_sched_pipe_params *pipe_params; + u32 *pipe_map; + u32 i; + + memcpy (hqos, &hqos_params_default, sizeof (hqos_params_default)); + + /* pipe */ + vec_add2 (hqos->pipe, pipe_params, hqos->port.n_pipe_profiles); + + for (i = 0; i < vec_len (hqos->pipe); i++) + memcpy (&pipe_params[i], + &hqos_pipe_params_default, sizeof (hqos_pipe_params_default)); + + hqos->port.pipe_profiles = hqos->pipe; + + /* subport */ + vec_add2 (hqos->subport, subport_params, hqos->port.n_subports_per_port); + + for (i = 0; i < vec_len (hqos->subport); i++) + memcpy (&subport_params[i], + &hqos_subport_params_default, + sizeof (hqos_subport_params_default)); + + /* pipe profile */ + vec_add2 (hqos->pipe_map, + pipe_map, + hqos->port.n_subports_per_port * hqos->port.n_pipes_per_subport); + + for (i = 0; i < vec_len (hqos->pipe_map); i++) + pipe_map[i] = 0; +} + +/*** + * + * HQoS init + * + ***/ + +clib_error_t * +dpdk_port_setup_hqos (dpdk_device_t * xd, dpdk_device_config_hqos_t * hqos) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + char name[32]; + u32 subport_id, i; + int rv; + + /* Detect the set of worker threads */ + int worker_thread_first = 0; + int worker_thread_count = 0; + + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + vlib_thread_registration_t *tr = + p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + worker_thread_first = tr->first_index; + worker_thread_count = tr->count; + } + + /* Allocate the per-thread device data array */ + vec_validate_aligned (xd->hqos_wt, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + memset (xd->hqos_wt, 0, tm->n_vlib_mains * sizeof (xd->hqos_wt[0])); + + vec_validate_aligned (xd->hqos_ht, 0, CLIB_CACHE_LINE_BYTES); + memset (xd->hqos_ht, 0, sizeof (xd->hqos_ht[0])); + + /* Allocate space for one SWQ per worker thread in the I/O TX thread data structure */ + vec_validate (xd->hqos_ht->swq, worker_thread_count); + + /* SWQ */ + for (i = 0; i < worker_thread_count + 1; i++) + { + u32 swq_flags = RING_F_SP_ENQ | RING_F_SC_DEQ; + + snprintf (name, sizeof (name), "SWQ-worker%u-to-device%u", i, + xd->device_index); + xd->hqos_ht->swq[i] = + rte_ring_create (name, hqos->swq_size, xd->cpu_socket, swq_flags); + if (xd->hqos_ht->swq[i] == NULL) + return clib_error_return (0, + "SWQ-worker%u-to-device%u: rte_ring_create err", + i, xd->device_index); + } + + /* + * HQoS + */ + + /* HQoS port */ + snprintf (name, sizeof (name), "HQoS%u", xd->device_index); + hqos->port.name = strdup (name); + if (hqos->port.name == NULL) + return clib_error_return (0, "HQoS%u: strdup err", xd->device_index); + + hqos->port.socket = rte_eth_dev_socket_id (xd->device_index); + if (hqos->port.socket == SOCKET_ID_ANY) + hqos->port.socket = 0; + + xd->hqos_ht->hqos = rte_sched_port_config (&hqos->port); + if (xd->hqos_ht->hqos == NULL) + return clib_error_return (0, "HQoS%u: rte_sched_port_config err", + xd->device_index); + + /* HQoS subport */ + for (subport_id = 0; subport_id < hqos->port.n_subports_per_port; + subport_id++) + { + u32 pipe_id; + + rv = + rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, + &hqos->subport[subport_id]); + if (rv) + return clib_error_return (0, + "HQoS%u subport %u: rte_sched_subport_config err (%d)", + xd->device_index, subport_id, rv); + + /* HQoS pipe */ + for (pipe_id = 0; pipe_id < hqos->port.n_pipes_per_subport; pipe_id++) + { + u32 pos = subport_id * hqos->port.n_pipes_per_subport + pipe_id; + u32 profile_id = hqos->pipe_map[pos]; + + rv = + rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, + profile_id); + if (rv) + return clib_error_return (0, + "HQoS%u subport %u pipe %u: rte_sched_pipe_config err (%d)", + xd->device_index, subport_id, pipe_id, + rv); + } + } + + /* Set up per-thread device data for the I/O TX thread */ + xd->hqos_ht->hqos_burst_enq = hqos->burst_enq; + xd->hqos_ht->hqos_burst_deq = hqos->burst_deq; + vec_validate (xd->hqos_ht->pkts_enq, 2 * hqos->burst_enq - 1); + vec_validate (xd->hqos_ht->pkts_deq, hqos->burst_deq - 1); + xd->hqos_ht->pkts_enq_len = 0; + xd->hqos_ht->swq_pos = 0; + xd->hqos_ht->flush_count = 0; + + /* Set up per-thread device data for each worker thread */ + for (i = 0; i < worker_thread_count + 1; i++) + { + u32 tid; + if (i) + tid = worker_thread_first + (i - 1); + else + tid = i; + + xd->hqos_wt[tid].swq = xd->hqos_ht->swq[i]; + xd->hqos_wt[tid].hqos_field0_slabpos = hqos->pktfield0_slabpos; + xd->hqos_wt[tid].hqos_field0_slabmask = hqos->pktfield0_slabmask; + xd->hqos_wt[tid].hqos_field0_slabshr = + __builtin_ctzll (hqos->pktfield0_slabmask); + xd->hqos_wt[tid].hqos_field1_slabpos = hqos->pktfield1_slabpos; + xd->hqos_wt[tid].hqos_field1_slabmask = hqos->pktfield1_slabmask; + xd->hqos_wt[tid].hqos_field1_slabshr = + __builtin_ctzll (hqos->pktfield1_slabmask); + xd->hqos_wt[tid].hqos_field2_slabpos = hqos->pktfield2_slabpos; + xd->hqos_wt[tid].hqos_field2_slabmask = hqos->pktfield2_slabmask; + xd->hqos_wt[tid].hqos_field2_slabshr = + __builtin_ctzll (hqos->pktfield2_slabmask); + memcpy (xd->hqos_wt[tid].hqos_tc_table, hqos->tc_table, + sizeof (hqos->tc_table)); + } + + return 0; +} + +/*** + * + * HQoS run-time + * + ***/ +/* + * dpdk_hqos_thread - Contains the main loop of an HQoS thread. + * + * w + * Information for the current thread + */ +static_always_inline void +dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + u32 cpu_index = vm->cpu_index; + u32 dev_pos; + + dev_pos = 0; + while (1) + { + vlib_worker_thread_barrier_check (); + + u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); + if (dev_pos >= n_devs) + dev_pos = 0; + + dpdk_device_and_queue_t *dq = + vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); + + dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; + u32 device_index = xd->device_index; + u16 queue_id = dq->queue_id; + + struct rte_mbuf **pkts_enq = hqos->pkts_enq; + u32 pkts_enq_len = hqos->pkts_enq_len; + u32 swq_pos = hqos->swq_pos; + u32 n_swq = vec_len (hqos->swq), i; + u32 flush_count = hqos->flush_count; + + for (i = 0; i < n_swq; i++) + { + /* Get current SWQ for this device */ + struct rte_ring *swq = hqos->swq[swq_pos]; + + /* Read SWQ burst to packet buffer of this device */ + pkts_enq_len += rte_ring_sc_dequeue_burst (swq, + (void **) + &pkts_enq[pkts_enq_len], + hqos->hqos_burst_enq); + + /* Get next SWQ for this device */ + swq_pos++; + if (swq_pos >= n_swq) + swq_pos = 0; + hqos->swq_pos = swq_pos; + + /* HWQ TX enqueue when burst available */ + if (pkts_enq_len >= hqos->hqos_burst_enq) + { + u32 n_pkts = rte_eth_tx_burst (device_index, + (uint16_t) queue_id, + pkts_enq, + (uint16_t) pkts_enq_len); + + for (; n_pkts < pkts_enq_len; n_pkts++) + rte_pktmbuf_free (pkts_enq[n_pkts]); + + pkts_enq_len = 0; + flush_count = 0; + break; + } + } + if (pkts_enq_len) + { + flush_count++; + if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) + { + rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); + + pkts_enq_len = 0; + flush_count = 0; + } + } + hqos->pkts_enq_len = pkts_enq_len; + hqos->flush_count = flush_count; + + /* Advance to next device */ + dev_pos++; + } +} + +static_always_inline void +dpdk_hqos_thread_internal (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + u32 cpu_index = vm->cpu_index; + u32 dev_pos; + + dev_pos = 0; + while (1) + { + vlib_worker_thread_barrier_check (); + + u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); + if (PREDICT_FALSE (n_devs == 0)) + { + dev_pos = 0; + continue; + } + if (dev_pos >= n_devs) + dev_pos = 0; + + dpdk_device_and_queue_t *dq = + vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); + + dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; + u32 device_index = xd->device_index; + u16 queue_id = dq->queue_id; + + struct rte_mbuf **pkts_enq = hqos->pkts_enq; + struct rte_mbuf **pkts_deq = hqos->pkts_deq; + u32 pkts_enq_len = hqos->pkts_enq_len; + u32 swq_pos = hqos->swq_pos; + u32 n_swq = vec_len (hqos->swq), i; + u32 flush_count = hqos->flush_count; + + /* + * SWQ dequeue and HQoS enqueue for current device + */ + for (i = 0; i < n_swq; i++) + { + /* Get current SWQ for this device */ + struct rte_ring *swq = hqos->swq[swq_pos]; + + /* Read SWQ burst to packet buffer of this device */ + pkts_enq_len += rte_ring_sc_dequeue_burst (swq, + (void **) + &pkts_enq[pkts_enq_len], + hqos->hqos_burst_enq); + + /* Get next SWQ for this device */ + swq_pos++; + if (swq_pos >= n_swq) + swq_pos = 0; + hqos->swq_pos = swq_pos; + + /* HQoS enqueue when burst available */ + if (pkts_enq_len >= hqos->hqos_burst_enq) + { + rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); + + pkts_enq_len = 0; + flush_count = 0; + break; + } + } + if (pkts_enq_len) + { + flush_count++; + if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) + { + rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); + + pkts_enq_len = 0; + flush_count = 0; + } + } + hqos->pkts_enq_len = pkts_enq_len; + hqos->flush_count = flush_count; + + /* + * HQoS dequeue and HWQ TX enqueue for current device + */ + { + u32 pkts_deq_len, n_pkts; + + pkts_deq_len = rte_sched_port_dequeue (hqos->hqos, + pkts_deq, + hqos->hqos_burst_deq); + + for (n_pkts = 0; n_pkts < pkts_deq_len;) + n_pkts += rte_eth_tx_burst (device_index, + (uint16_t) queue_id, + &pkts_deq[n_pkts], + (uint16_t) (pkts_deq_len - n_pkts)); + } + + /* Advance to next device */ + dev_pos++; + } +} + +void +dpdk_hqos_thread (vlib_worker_thread_t * w) +{ + vlib_main_t *vm; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + + vm = vlib_get_main (); + + ASSERT (vm->cpu_index == os_get_cpu_number ()); + + clib_time_init (&vm->clib_time); + clib_mem_set_heap (w->thread_mheap); + + /* Wait until the dpdk init sequence is complete */ + while (tm->worker_thread_release == 0) + vlib_worker_thread_barrier_check (); + + if (vec_len (dm->devices_by_hqos_cpu[vm->cpu_index]) == 0) + return + clib_error + ("current I/O TX thread does not have any devices assigned to it"); + + if (DPDK_HQOS_DBG_BYPASS) + dpdk_hqos_thread_internal_hqos_dbg_bypass (vm); + else + dpdk_hqos_thread_internal (vm); +} + +void +dpdk_hqos_thread_fn (void *arg) +{ + vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; + vlib_worker_thread_init (w); + dpdk_hqos_thread (w); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_THREAD (hqos_thread_reg, static) = +{ + .name = "hqos-threads", + .short_name = "hqos-threads", + .function = dpdk_hqos_thread_fn, +}; +/* *INDENT-ON* */ + +/* + * HQoS run-time code to be called by the worker threads + */ +#define BITFIELD(byte_array, slab_pos, slab_mask, slab_shr) \ +({ \ + u64 slab = *((u64 *) &byte_array[slab_pos]); \ + u64 val = (rte_be_to_cpu_64(slab) & slab_mask) >> slab_shr; \ + val; \ +}) + +#define RTE_SCHED_PORT_HIERARCHY(subport, pipe, traffic_class, queue, color) \ + ((((u64) (queue)) & 0x3) | \ + ((((u64) (traffic_class)) & 0x3) << 2) | \ + ((((u64) (color)) & 0x3) << 4) | \ + ((((u64) (subport)) & 0xFFFF) << 16) | \ + ((((u64) (pipe)) & 0xFFFFFFFF) << 32)) + +void +dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, + struct rte_mbuf **pkts, u32 n_pkts) +{ + u32 i; + + for (i = 0; i < (n_pkts & (~0x3)); i += 4) + { + struct rte_mbuf *pkt0 = pkts[i]; + struct rte_mbuf *pkt1 = pkts[i + 1]; + struct rte_mbuf *pkt2 = pkts[i + 2]; + struct rte_mbuf *pkt3 = pkts[i + 3]; + + u8 *pkt0_data = rte_pktmbuf_mtod (pkt0, u8 *); + u8 *pkt1_data = rte_pktmbuf_mtod (pkt1, u8 *); + u8 *pkt2_data = rte_pktmbuf_mtod (pkt2, u8 *); + u8 *pkt3_data = rte_pktmbuf_mtod (pkt3, u8 *); + + u64 pkt0_subport = BITFIELD (pkt0_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt0_pipe = BITFIELD (pkt0_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt0_dscp = BITFIELD (pkt0_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt0_tc = hqos->hqos_tc_table[pkt0_dscp & 0x3F] >> 2; + u32 pkt0_tc_q = hqos->hqos_tc_table[pkt0_dscp & 0x3F] & 0x3; + + u64 pkt1_subport = BITFIELD (pkt1_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt1_pipe = BITFIELD (pkt1_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt1_dscp = BITFIELD (pkt1_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt1_tc = hqos->hqos_tc_table[pkt1_dscp & 0x3F] >> 2; + u32 pkt1_tc_q = hqos->hqos_tc_table[pkt1_dscp & 0x3F] & 0x3; + + u64 pkt2_subport = BITFIELD (pkt2_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt2_pipe = BITFIELD (pkt2_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt2_dscp = BITFIELD (pkt2_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt2_tc = hqos->hqos_tc_table[pkt2_dscp & 0x3F] >> 2; + u32 pkt2_tc_q = hqos->hqos_tc_table[pkt2_dscp & 0x3F] & 0x3; + + u64 pkt3_subport = BITFIELD (pkt3_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt3_pipe = BITFIELD (pkt3_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt3_dscp = BITFIELD (pkt3_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt3_tc = hqos->hqos_tc_table[pkt3_dscp & 0x3F] >> 2; + u32 pkt3_tc_q = hqos->hqos_tc_table[pkt3_dscp & 0x3F] & 0x3; + + u64 pkt0_sched = RTE_SCHED_PORT_HIERARCHY (pkt0_subport, + pkt0_pipe, + pkt0_tc, + pkt0_tc_q, + 0); + u64 pkt1_sched = RTE_SCHED_PORT_HIERARCHY (pkt1_subport, + pkt1_pipe, + pkt1_tc, + pkt1_tc_q, + 0); + u64 pkt2_sched = RTE_SCHED_PORT_HIERARCHY (pkt2_subport, + pkt2_pipe, + pkt2_tc, + pkt2_tc_q, + 0); + u64 pkt3_sched = RTE_SCHED_PORT_HIERARCHY (pkt3_subport, + pkt3_pipe, + pkt3_tc, + pkt3_tc_q, + 0); + + pkt0->hash.sched.lo = pkt0_sched & 0xFFFFFFFF; + pkt0->hash.sched.hi = pkt0_sched >> 32; + pkt1->hash.sched.lo = pkt1_sched & 0xFFFFFFFF; + pkt1->hash.sched.hi = pkt1_sched >> 32; + pkt2->hash.sched.lo = pkt2_sched & 0xFFFFFFFF; + pkt2->hash.sched.hi = pkt2_sched >> 32; + pkt3->hash.sched.lo = pkt3_sched & 0xFFFFFFFF; + pkt3->hash.sched.hi = pkt3_sched >> 32; + } + + for (; i < n_pkts; i++) + { + struct rte_mbuf *pkt = pkts[i]; + + u8 *pkt_data = rte_pktmbuf_mtod (pkt, u8 *); + + u64 pkt_subport = BITFIELD (pkt_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt_pipe = BITFIELD (pkt_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt_dscp = BITFIELD (pkt_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt_tc = hqos->hqos_tc_table[pkt_dscp & 0x3F] >> 2; + u32 pkt_tc_q = hqos->hqos_tc_table[pkt_dscp & 0x3F] & 0x3; + + u64 pkt_sched = RTE_SCHED_PORT_HIERARCHY (pkt_subport, + pkt_pipe, + pkt_tc, + pkt_tc_q, + 0); + + pkt->hash.sched.lo = pkt_sched & 0xFFFFFFFF; + pkt->hash.sched.hi = pkt_sched >> 32; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c new file mode 100755 index 00000000..693ca985 --- /dev/null +++ b/src/vnet/devices/dpdk/init.c @@ -0,0 +1,1803 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "dpdk_priv.h" + +dpdk_main_t dpdk_main; + +/* force linker to link functions used by vlib and declared weak */ +void *vlib_weakly_linked_functions[] = { + &rte_pktmbuf_init, + &rte_pktmbuf_pool_init, +}; + +#define LINK_STATE_ELOGS 0 + +#define DEFAULT_HUGE_DIR "/run/vpp/hugepages" +#define VPP_RUN_DIR "/run/vpp" + +/* Port configuration, mildly modified Intel app values */ + +static struct rte_eth_conf port_conf_template = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +clib_error_t * +dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_buffer_main_t *bm = vm->buffer_main; + int rv; + int j; + + ASSERT (os_get_cpu_number () == 0); + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0); + rte_eth_dev_stop (xd->device_index); + } + + rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, + xd->tx_q_used, &xd->port_conf); + + if (rv < 0) + return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", + xd->device_index, rv); + + /* Set up one TX-queue per worker thread */ + for (j = 0; j < xd->tx_q_used; j++) + { + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + xd->cpu_socket, &xd->tx_conf); + + /* retry with any other CPU socket */ + if (rv < 0) + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + SOCKET_ID_ANY, &xd->tx_conf); + if (rv < 0) + break; + } + + if (rv < 0) + return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", + xd->device_index, rv); + + for (j = 0; j < xd->rx_q_used; j++) + { + + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + xd->cpu_socket, 0, + bm-> + pktmbuf_pools[xd->cpu_socket_id_by_queue + [j]]); + + /* retry with any other CPU socket */ + if (rv < 0) + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + SOCKET_ID_ANY, 0, + bm-> + pktmbuf_pools[xd->cpu_socket_id_by_queue + [j]]); + if (rv < 0) + return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", + xd->device_index, rv); + } + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + int rv; + rv = rte_eth_dev_start (xd->device_index); + if (rv < 0) + clib_warning ("rte_eth_dev_start %d returned %d", + xd->device_index, rv); + } + return 0; +} + +static u32 +dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + u32 old = 0; + + if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC (flags)) + { + old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0; + + if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) + xd->flags |= DPDK_DEVICE_FLAG_PROMISC; + else + xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC; + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) + rte_eth_promiscuous_enable (xd->device_index); + else + rte_eth_promiscuous_disable (xd->device_index); + } + } + else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags)) + { + /* + * DAW-FIXME: The Cisco VIC firmware does not provide an api for a + * driver to dynamically change the mtu. If/when the + * VIC firmware gets fixed, then this should be removed. + */ + if (xd->pmd == VNET_DPDK_PMD_ENIC) + { + struct rte_eth_dev_info dev_info; + + /* + * Restore mtu to what has been set by CIMC in the firmware cfg. + */ + rte_eth_dev_info_get (xd->device_index, &dev_info); + hi->max_packet_bytes = dev_info.max_rx_pktlen; + + vlib_cli_output (vlib_get_main (), + "Cisco VIC mtu can only be changed " + "using CIMC then rebooting the server!"); + } + else + { + int rv; + + xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + rte_eth_dev_stop (xd->device_index); + + rv = rte_eth_dev_configure + (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); + + if (rv < 0) + vlib_cli_output (vlib_get_main (), + "rte_eth_dev_configure[%d]: err %d", + xd->device_index, rv); + + rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + int rv = rte_eth_dev_start (xd->device_index); + if (rv < 0) + clib_warning ("rte_eth_dev_start %d returned %d", + xd->device_index, rv); + } + } + } + return old; +} + +void +dpdk_device_lock_init (dpdk_device_t * xd) +{ + int q; + vec_validate (xd->lockp, xd->tx_q_used - 1); + for (q = 0; q < xd->tx_q_used; q++) + { + xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES); + } +} + +void +dpdk_device_lock_free (dpdk_device_t * xd) +{ + int q; + + for (q = 0; q < vec_len (xd->lockp); q++) + clib_mem_free ((void *) xd->lockp[q]); + vec_free (xd->lockp); + xd->lockp = 0; +} + +static clib_error_t * +dpdk_lib_init (dpdk_main_t * dm) +{ + u32 nports; + u32 nb_desc = 0; + int i; + clib_error_t *error; + vlib_main_t *vm = vlib_get_main (); + vlib_thread_main_t *tm = vlib_get_thread_main (); + vnet_sw_interface_t *sw; + vnet_hw_interface_t *hi; + dpdk_device_t *xd; + vlib_pci_addr_t last_pci_addr; + u32 last_pci_addr_port = 0; + vlib_thread_registration_t *tr, *tr_hqos; + uword *p, *p_hqos; + + u32 next_cpu = 0, next_hqos_cpu = 0; + u8 af_packet_port_id = 0; + last_pci_addr.as_u32 = ~0; + + dm->input_cpu_first_index = 0; + dm->input_cpu_count = 1; + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + dm->input_cpu_first_index = tr->first_index; + dm->input_cpu_count = tr->count; + } + + vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + vec_validate_aligned (dm->workers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + dm->hqos_cpu_first_index = 0; + dm->hqos_cpu_count = 0; + + /* find out which cpus will be used for I/O TX */ + p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads"); + tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0; + + if (tr_hqos && tr_hqos->count > 0) + { + dm->hqos_cpu_first_index = tr_hqos->first_index; + dm->hqos_cpu_count = tr_hqos->count; + } + + vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + vec_validate_aligned (dm->hqos_threads, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + nports = rte_eth_dev_count (); + if (nports < 1) + { + clib_warning ("DPDK drivers found no ports..."); + } + + if (CLIB_DEBUG > 0) + clib_warning ("DPDK drivers found %d ports...", nports); + + /* + * All buffers are all allocated from the same rte_mempool. + * Thus they all have the same number of data bytes. + */ + dm->vlib_buffer_free_list_index = + vlib_buffer_get_or_create_free_list (vm, + VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, + "dpdk rx"); + + if (dm->conf->enable_tcp_udp_checksum) + dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT + | IP_BUFFER_L4_CHECKSUM_COMPUTED); + + for (i = 0; i < nports; i++) + { + u8 addr[6]; + u8 vlan_strip = 0; + int j; + struct rte_eth_dev_info dev_info; + clib_error_t *rv; + struct rte_eth_link l; + dpdk_device_config_t *devconf = 0; + vlib_pci_addr_t pci_addr; + uword *p = 0; + + rte_eth_dev_info_get (i, &dev_info); + if (dev_info.pci_dev) /* bonded interface has no pci info */ + { + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + p = + hash_get (dm->conf->device_config_index_by_pci_addr, + pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + /* Create vnet interface */ + vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); + xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; + xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; + xd->cpu_socket = (i8) rte_eth_dev_socket_id (i); + + /* Handle interface naming for devices with multiple ports sharing same PCI ID */ + if (dev_info.pci_dev) + { + struct rte_eth_dev_info di = { 0 }; + rte_eth_dev_info_get (i + 1, &di); + if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 && + memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr, + sizeof (struct rte_pci_addr)) == 0) + { + xd->interface_name_suffix = format (0, "0"); + last_pci_addr.as_u32 = pci_addr.as_u32; + last_pci_addr_port = i; + } + else if (pci_addr.as_u32 == last_pci_addr.as_u32) + { + xd->interface_name_suffix = + format (0, "%u", i - last_pci_addr_port); + } + else + { + last_pci_addr.as_u32 = ~0; + } + } + else + last_pci_addr.as_u32 = ~0; + + clib_memcpy (&xd->tx_conf, &dev_info.default_txconf, + sizeof (struct rte_eth_txconf)); + if (dm->conf->no_multi_seg) + { + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 0; + } + else + { + xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 1; + xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG; + } + + clib_memcpy (&xd->port_conf, &port_conf_template, + sizeof (struct rte_eth_conf)); + + xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains); + + if (devconf->num_tx_queues > 0 + && devconf->num_tx_queues < xd->tx_q_used) + xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues); + + if (devconf->num_rx_queues > 1 && dm->use_rss == 0) + { + dm->use_rss = 1; + } + + if (devconf->num_rx_queues > 1 + && dev_info.max_rx_queues >= devconf->num_rx_queues) + { + xd->rx_q_used = devconf->num_rx_queues; + xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; + if (devconf->rss_fn == 0) + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = + ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; + else + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn; + } + else + xd->rx_q_used = 1; + + xd->flags |= DPDK_DEVICE_FLAG_PMD; + + /* workaround for drivers not setting driver_name */ + if ((!dev_info.driver_name) && (dev_info.pci_dev)) +#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) + dev_info.driver_name = dev_info.pci_dev->driver->name; +#else + dev_info.driver_name = dev_info.pci_dev->driver->driver.name; +#endif + ASSERT (dev_info.driver_name); + + if (!xd->pmd) + { + + +#define _(s,f) else if (dev_info.driver_name && \ + !strcmp(dev_info.driver_name, s)) \ + xd->pmd = VNET_DPDK_PMD_##f; + if (0) + ; + foreach_dpdk_pmd +#undef _ + else + xd->pmd = VNET_DPDK_PMD_UNKNOWN; + + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; + xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; + + switch (xd->pmd) + { + /* 1G adapters */ + case VNET_DPDK_PMD_E1000EM: + case VNET_DPDK_PMD_IGB: + case VNET_DPDK_PMD_IGBVF: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + break; + + /* 10G adapters */ + case VNET_DPDK_PMD_IXGBE: + case VNET_DPDK_PMD_IXGBEVF: + case VNET_DPDK_PMD_THUNDERX: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + case VNET_DPDK_PMD_DPAA2: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + + /* Cisco VIC */ + case VNET_DPDK_PMD_ENIC: + rte_eth_link_get_nowait (i, &l); + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + if (l.link_speed == 40000) + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + else + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + + /* Intel Fortville */ + case VNET_DPDK_PMD_I40E: + case VNET_DPDK_PMD_I40EVF: + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + + switch (dev_info.pci_dev->id.device_id) + { + case I40E_DEV_ID_10G_BASE_T: + case I40E_DEV_ID_SFP_XL710: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + case I40E_DEV_ID_QSFP_A: + case I40E_DEV_ID_QSFP_B: + case I40E_DEV_ID_QSFP_C: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case I40E_DEV_ID_VF: + rte_eth_link_get_nowait (i, &l); + xd->port_type = l.link_speed == 10000 ? + VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; + break; + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + case VNET_DPDK_PMD_CXGBE: + switch (dev_info.pci_dev->id.device_id) + { + case 0x540d: /* T580-CR */ + case 0x5410: /* T580-LP-cr */ + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case 0x5403: /* T540-CR */ + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + case VNET_DPDK_PMD_MLX5: + { + char *pn_100g[] = { "MCX415A-CCAT", "MCX416A-CCAT", 0 }; + char *pn_40g[] = { "MCX413A-BCAT", "MCX414A-BCAT", + "MCX415A-BCAT", "MCX416A-BCAT", "MCX4131A-BCAT", 0 + }; + char *pn_10g[] = { "MCX4111A-XCAT", "MCX4121A-XCAT", 0 }; + + vlib_pci_device_t *pd = vlib_get_pci_device (&pci_addr); + u8 *pn = 0; + char **c; + int found = 0; + pn = format (0, "%U%c", + format_vlib_pci_vpd, pd->vpd_r, "PN", 0); + + if (!pn) + break; + + c = pn_100g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_100G; + break; + } + c++; + } + + c = pn_40g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + } + c++; + } + + c = pn_10g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + } + c++; + } + + vec_free (pn); + } + + break; + /* Intel Red Rock Canyon */ + case VNET_DPDK_PMD_FM10K: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; + break; + + /* virtio */ + case VNET_DPDK_PMD_VIRTIO: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO; + xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO; + break; + + /* vmxnet3 */ + case VNET_DPDK_PMD_VMXNET3: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + break; + + case VNET_DPDK_PMD_AF_PACKET: + xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET; + xd->af_packet_port_id = af_packet_port_id++; + break; + + case VNET_DPDK_PMD_BOND: + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; + break; + + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + + if (devconf->num_rx_desc) + xd->nb_rx_desc = devconf->num_rx_desc; + + if (devconf->num_tx_desc) + xd->nb_tx_desc = devconf->num_tx_desc; + } + + /* + * Ensure default mtu is not > the mtu read from the hardware. + * Otherwise rte_eth_dev_configure() will fail and the port will + * not be available. + */ + if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen) + { + /* + * This device does not support the platforms's max frame + * size. Use it's advertised mru instead. + */ + xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen; + } + else + { + xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES; + + /* + * Some platforms do not account for Ethernet FCS (4 bytes) in + * MTU calculations. To interop with them increase mru but only + * if the device's settings can support it. + */ + if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) && + xd->port_conf.rxmode.hw_strip_crc) + { + /* + * Allow additional 4 bytes (for Ethernet FCS). These bytes are + * stripped by h/w and so will not consume any buffer memory. + */ + xd->port_conf.rxmode.max_rx_pkt_len += 4; + } + } + + if (xd->pmd == VNET_DPDK_PMD_AF_PACKET) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + clib_memcpy (addr + 2, &rnd, sizeof (rnd)); + addr[0] = 2; + addr[1] = 0xfe; + } + else + rte_eth_macaddr_get (i, (struct ether_addr *) addr); + + if (xd->tx_q_used < tm->n_vlib_mains) + dpdk_device_lock_init (xd); + + xd->device_index = xd - dm->devices; + ASSERT (i == xd->device_index); + xd->per_interface_next_index = ~0; + + /* assign interface to input thread */ + dpdk_device_and_queue_t *dq; + int q; + + if (devconf->workers) + { + int i; + q = 0; + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, devconf->workers, ({ + int cpu = dm->input_cpu_first_index + i; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; + vec_validate(xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q++; + })); + /* *INDENT-ON* */ + } + else + for (q = 0; q < xd->rx_q_used; q++) + { + int cpu = dm->input_cpu_first_index + next_cpu; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; + + /* + * numa node for worker thread handling this queue + * needed for taking buffers from the right mempool + */ + vec_validate (xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore); + + /* + * construct vector of (device,queue) pairs for each worker thread + */ + vec_add2 (dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q; + + next_cpu++; + if (next_cpu == dm->input_cpu_count) + next_cpu = 0; + } + + + if (devconf->hqos_enabled) + { + xd->flags |= DPDK_DEVICE_FLAG_HQOS; + + if (devconf->hqos.hqos_thread_valid) + { + int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread; + + if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count) + return clib_error_return (0, "invalid HQoS thread index"); + + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + } + else + { + int cpu = dm->hqos_cpu_first_index + next_hqos_cpu; + + if (dm->hqos_cpu_count == 0) + return clib_error_return (0, "no HQoS threads available"); + + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + + next_hqos_cpu++; + if (next_hqos_cpu == dm->hqos_cpu_count) + next_hqos_cpu = 0; + + devconf->hqos.hqos_thread_valid = 1; + devconf->hqos.hqos_thread = cpu; + } + } + + vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < tm->n_vlib_mains; j++) + { + vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc, + sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } + + vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < xd->rx_q_used; j++) + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } + + vec_validate_aligned (xd->d_trace_buffers, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + + rv = dpdk_port_setup (dm, xd); + + if (rv) + return rv; + + if (devconf->hqos_enabled) + { + rv = dpdk_port_setup_hqos (xd, &devconf->hqos); + if (rv) + return rv; + } + + /* count the number of descriptors used for this device */ + nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; + + error = ethernet_register_interface + (dm->vnet_main, dpdk_device_class.index, xd->device_index, + /* ethernet address */ addr, + &xd->vlib_hw_if_index, dpdk_flag_change); + if (error) + return error; + + sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); + xd->vlib_sw_if_index = sw->sw_if_index; + hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); + + /* + * DAW-FIXME: The Cisco VIC firmware does not provide an api for a + * driver to dynamically change the mtu. If/when the + * VIC firmware gets fixed, then this should be removed. + */ + if (xd->pmd == VNET_DPDK_PMD_ENIC) + { + /* + * Initialize mtu to what has been set by CIMC in the firmware cfg. + */ + hi->max_packet_bytes = dev_info.max_rx_pktlen; + if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF) + vlan_strip = 1; /* remove vlan tag from VIC port by default */ + else + clib_warning ("VLAN strip disabled for interface\n"); + } + else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON) + vlan_strip = 1; + + if (vlan_strip) + { + int vlan_off; + vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); + vlan_off |= ETH_VLAN_STRIP_OFFLOAD; + xd->port_conf.rxmode.hw_vlan_strip = vlan_off; + if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0) + clib_warning ("VLAN strip enabled for interface\n"); + else + clib_warning ("VLAN strip cannot be supported by interface\n"); + } + + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = + xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t); + + rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); + } + + if (nb_desc > dm->conf->num_mbufs) + clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", + dm->conf->num_mbufs, nb_desc); + + return 0; +} + +static void +dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) +{ + vlib_pci_main_t *pm = &pci_main; + clib_error_t *error; + vlib_pci_device_t *d; + u8 *pci_addr = 0; + int num_whitelisted = vec_len (conf->dev_confs); + + /* *INDENT-OFF* */ + pool_foreach (d, pm->pci_devs, ({ + dpdk_device_config_t * devconf = 0; + vec_reset_length (pci_addr); + pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); + + if (d->device_class != PCI_CLASS_NETWORK_ETHERNET) + continue; + + if (num_whitelisted) + { + uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32); + + if (!p) + continue; + + devconf = pool_elt_at_index (conf->dev_confs, p[0]); + } + + /* virtio */ + if (d->vendor_id == 0x1af4 && d->device_id == 0x1000) + ; + /* vmxnet3 */ + else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0) + ; + /* all Intel devices */ + else if (d->vendor_id == 0x8086) + ; + /* Cisco VIC */ + else if (d->vendor_id == 0x1137 && d->device_id == 0x0043) + ; + /* Chelsio T4/T5 */ + else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000) + ; + else + { + clib_warning ("Unsupported Ethernet PCI device 0x%04x:0x%04x found " + "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id, + pci_addr); + continue; + } + + error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name); + + if (error) + { + if (devconf == 0) + { + pool_get (conf->dev_confs, devconf); + hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32, + devconf - conf->dev_confs); + devconf->pci_addr.as_u32 = d->bus_address.as_u32; + } + devconf->is_blacklisted = 1; + clib_error_report (error); + } + })); + /* *INDENT-ON* */ + vec_free (pci_addr); +} + +static clib_error_t * +dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, + unformat_input_t * input, u8 is_default) +{ + clib_error_t *error = 0; + uword *p; + dpdk_device_config_t *devconf; + unformat_input_t sub_input; + + if (is_default) + { + devconf = &conf->default_devconf; + } + else + { + p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32); + + if (!p) + { + pool_get (conf->dev_confs, devconf); + hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32, + devconf - conf->dev_confs); + } + else + return clib_error_return (0, + "duplicate configuration for PCI address %U", + format_vlib_pci_addr, &pci_addr); + } + + devconf->pci_addr.as_u32 = pci_addr.as_u32; + devconf->hqos_enabled = 0; + dpdk_device_config_hqos_default (&devconf->hqos); + + if (!input) + return 0; + + unformat_skip_white_space (input); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues)) + ; + else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues)) + ; + else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc)) + ; + else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc)) + ; + else if (unformat (input, "workers %U", unformat_bitmap_list, + &devconf->workers)) + ; + else + if (unformat + (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input)) + { + error = unformat_rss_fn (&sub_input, &devconf->rss_fn); + if (error) + break; + } + else if (unformat (input, "vlan-strip-offload off")) + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; + else if (unformat (input, "vlan-strip-offload on")) + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; + else + if (unformat + (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input)) + { + devconf->hqos_enabled = 1; + error = unformat_hqos (&sub_input, &devconf->hqos); + if (error) + break; + } + else if (unformat (input, "hqos")) + { + devconf->hqos_enabled = 1; + } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + break; + } + } + + if (error) + return error; + + if (devconf->workers && devconf->num_rx_queues == 0) + devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers); + else if (devconf->workers && + clib_bitmap_count_set_bits (devconf->workers) != + devconf->num_rx_queues) + error = + clib_error_return (0, + "%U: number of worker threadds must be " + "equal to number of rx queues", format_vlib_pci_addr, + &pci_addr); + + return error; +} + +static clib_error_t * +dpdk_config (vlib_main_t * vm, unformat_input_t * input) +{ + clib_error_t *error = 0; + dpdk_main_t *dm = &dpdk_main; + dpdk_config_main_t *conf = &dpdk_config_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_config_t *devconf; + vlib_pci_addr_t pci_addr; + unformat_input_t sub_input; + u8 *s, *tmp = 0; + u8 *rte_cmd = 0, *ethname = 0; + u32 log_level; + int ret, i; + int num_whitelisted = 0; + u8 no_pci = 0; + u8 no_huge = 0; + u8 huge_dir = 0; + u8 file_prefix = 0; + u8 *socket_mem = 0; + + conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + /* Prime the pump */ + if (unformat (input, "no-hugetlb")) + { + vec_add1 (conf->eal_init_args, (u8 *) "no-huge"); + no_huge = 1; + } + + else if (unformat (input, "enable-tcp-udp-checksum")) + conf->enable_tcp_udp_checksum = 1; + + else if (unformat (input, "decimal-interface-names")) + conf->interface_name_format_decimal = 1; + + else if (unformat (input, "no-multi-seg")) + conf->no_multi_seg = 1; + + else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input, + &sub_input)) + { + error = + dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input, + 1); + + if (error) + return error; + } + else + if (unformat + (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr, + unformat_vlib_cli_sub_input, &sub_input)) + { + error = dpdk_device_config (conf, pci_addr, &sub_input, 0); + + if (error) + return error; + + num_whitelisted++; + } + else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr)) + { + error = dpdk_device_config (conf, pci_addr, 0, 0); + + if (error) + return error; + + num_whitelisted++; + } + else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) + ; + else if (unformat (input, "kni %d", &conf->num_kni)) + ; + else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) + ; + else if (unformat (input, "socket-mem %s", &socket_mem)) + ; + else if (unformat (input, "no-pci")) + { + no_pci = 1; + tmp = format (0, "--no-pci%c", 0); + vec_add1 (conf->eal_init_args, tmp); + } + else if (unformat (input, "poll-sleep %d", &dm->poll_sleep)) + ; + +#define _(a) \ + else if (unformat(input, #a)) \ + { \ + tmp = format (0, "--%s%c", #a, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + } + foreach_eal_double_hyphen_predicate_arg +#undef _ +#define _(a) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + if (!strncmp(#a, "huge-dir", 8)) \ + huge_dir = 1; \ + else if (!strncmp(#a, "file-prefix", 11)) \ + file_prefix = 1; \ + tmp = format (0, "--%s%c", #a, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (conf->eal_init_args, s); \ + } + foreach_eal_double_hyphen_arg +#undef _ +#define _(a,b) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + tmp = format (0, "-%s%c", #b, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (conf->eal_init_args, s); \ + } + foreach_eal_single_hyphen_arg +#undef _ +#define _(a,b) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + tmp = format (0, "-%s%c", #b, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (conf->eal_init_args, s); \ + conf->a##_set_manually = 1; \ + } + foreach_eal_single_hyphen_mandatory_arg +#undef _ + else if (unformat (input, "default")) + ; + + else if (unformat_skip_white_space (input)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (!conf->uio_driver_name) + conf->uio_driver_name = format (0, "igb_uio%c", 0); + + /* + * Use 1G huge pages if available. + */ + if (!no_huge && !huge_dir) + { + u32 x, *mem_by_socket = 0; + uword c = 0; + u8 use_1g = 1; + u8 use_2m = 1; + u8 less_than_1g = 1; + int rv; + + umount (DEFAULT_HUGE_DIR); + + /* Process "socket-mem" parameter value */ + if (vec_len (socket_mem)) + { + unformat_input_t in; + unformat_init_vector (&in, socket_mem); + while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT) + { + if (unformat (&in, "%u,", &x)) + ; + else if (unformat (&in, "%u", &x)) + ; + else if (unformat (&in, ",")) + x = 0; + else + break; + + vec_add1 (mem_by_socket, x); + + if (x > 1023) + less_than_1g = 0; + } + /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ + unformat_free (&in); + socket_mem = 0; + } + else + { + /* *INDENT-OFF* */ + clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( + { + vec_validate(mem_by_socket, c); + mem_by_socket[c] = 256; /* default per-socket mem */ + } + )); + /* *INDENT-ON* */ + } + + /* check if available enough 1GB pages for each socket */ + /* *INDENT-OFF* */ + clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( + { + int pages_avail, page_size, mem; + + vec_validate(mem_by_socket, c); + mem = mem_by_socket[c]; + + page_size = 1024; + pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); + + if (pages_avail < 0 || page_size * pages_avail < mem) + use_1g = 0; + + page_size = 2; + pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); + + if (pages_avail < 0 || page_size * pages_avail < mem) + use_2m = 0; + })); + /* *INDENT-ON* */ + + if (mem_by_socket == 0) + { + error = clib_error_return (0, "mem_by_socket NULL"); + goto done; + } + _vec_len (mem_by_socket) = c + 1; + + /* regenerate socket_mem string */ + vec_foreach_index (x, mem_by_socket) + socket_mem = format (socket_mem, "%s%u", + socket_mem ? "," : "", mem_by_socket[x]); + socket_mem = format (socket_mem, "%c", 0); + + vec_free (mem_by_socket); + + rv = mkdir (VPP_RUN_DIR, 0755); + if (rv && errno != EEXIST) + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + VPP_RUN_DIR, errno); + goto done; + } + + rv = mkdir (DEFAULT_HUGE_DIR, 0755); + if (rv && errno != EEXIST) + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + DEFAULT_HUGE_DIR, errno); + goto done; + } + + if (use_1g && !(less_than_1g && use_2m)) + { + rv = + mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); + } + else if (use_2m) + { + rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); + } + else + { + return clib_error_return (0, "not enough free huge pages"); + } + + if (rv) + { + error = clib_error_return (0, "mount failed %d", errno); + goto done; + } + + tmp = format (0, "--huge-dir%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); + vec_add1 (conf->eal_init_args, tmp); + if (!file_prefix) + { + tmp = format (0, "--file-prefix%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "vpp%c", 0); + vec_add1 (conf->eal_init_args, tmp); + } + } + + vec_free (rte_cmd); + vec_free (ethname); + + if (error) + return error; + + /* I'll bet that -c and -n must be the first and second args... */ + if (!conf->coremask_set_manually) + { + vlib_thread_registration_t *tr; + uword *coremask = 0; + int i; + + /* main thread core */ + coremask = clib_bitmap_set (coremask, tm->main_lcore, 1); + + for (i = 0; i < vec_len (tm->registrations); i++) + { + tr = tm->registrations[i]; + coremask = clib_bitmap_or (coremask, tr->coremask); + } + + vec_insert (conf->eal_init_args, 2, 1); + conf->eal_init_args[1] = (u8 *) "-c"; + tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0); + conf->eal_init_args[2] = tmp; + clib_bitmap_free (coremask); + } + + if (!conf->nchannels_set_manually) + { + vec_insert (conf->eal_init_args, 2, 3); + conf->eal_init_args[3] = (u8 *) "-n"; + tmp = format (0, "%d", conf->nchannels); + conf->eal_init_args[4] = tmp; + } + + if (no_pci == 0 && geteuid () == 0) + dpdk_bind_devices_to_uio (conf); + +#define _(x) \ + if (devconf->x == 0 && conf->default_devconf.x > 0) \ + devconf->x = conf->default_devconf.x ; + + /* *INDENT-OFF* */ + pool_foreach (devconf, conf->dev_confs, ({ + + /* default per-device config items */ + foreach_dpdk_device_config_item + + /* add DPDK EAL whitelist/blacklist entry */ + if (num_whitelisted > 0 && devconf->is_blacklisted == 0) + { + tmp = format (0, "-w%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); + vec_add1 (conf->eal_init_args, tmp); + } + else if (num_whitelisted == 0 && devconf->is_blacklisted != 0) + { + tmp = format (0, "-b%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); + vec_add1 (conf->eal_init_args, tmp); + } + })); + /* *INDENT-ON* */ + +#undef _ + + /* set master-lcore */ + tmp = format (0, "--master-lcore%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%u%c", tm->main_lcore, 0); + vec_add1 (conf->eal_init_args, tmp); + + /* set socket-mem */ + tmp = format (0, "--socket-mem%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%s%c", socket_mem, 0); + vec_add1 (conf->eal_init_args, tmp); + + /* NULL terminate the "argv" vector, in case of stupidity */ + vec_add1 (conf->eal_init_args, 0); + _vec_len (conf->eal_init_args) -= 1; + + /* Set up DPDK eal and packet mbuf pool early. */ + + log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE; + + rte_set_log_level (log_level); + + vm = vlib_get_main (); + + /* make copy of args as rte_eal_init tends to mess up with arg array */ + for (i = 1; i < vec_len (conf->eal_init_args); i++) + conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", + conf->eal_init_args[i]); + + ret = + rte_eal_init (vec_len (conf->eal_init_args), + (char **) conf->eal_init_args); + + /* lazy umount hugepages */ + umount2 (DEFAULT_HUGE_DIR, MNT_DETACH); + + if (ret < 0) + return clib_error_return (0, "rte_eal_init returned %d", ret); + + /* Dump the physical memory layout prior to creating the mbuf_pool */ + fprintf (stdout, "DPDK physical memory layout:\n"); + rte_dump_physmem_layout (stdout); + + /* main thread 1st */ + error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); + if (error) + return error; + + for (i = 0; i < RTE_MAX_LCORE; i++) + { + error = vlib_buffer_pool_create (vm, conf->num_mbufs, + rte_lcore_to_socket_id (i)); + if (error) + return error; + } + +done: + return error; +} + +VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk"); + +void +dpdk_update_link_state (dpdk_device_t * xd, f64 now) +{ + vnet_main_t *vnm = vnet_get_main (); + struct rte_eth_link prev_link = xd->link; + u32 hw_flags = 0; + u8 hw_flags_chg = 0; + + /* only update link state for PMD interfaces */ + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + return; + + xd->time_last_link_update = now ? now : xd->time_last_link_update; + memset (&xd->link, 0, sizeof (xd->link)); + rte_eth_link_get_nowait (xd->device_index, &xd->link); + + if (LINK_STATE_ELOGS) + { + vlib_main_t *vm = vlib_get_main (); + ELOG_TYPE_DECLARE (e) = + { + .format = + "update-link-state: sw_if_index %d, admin_up %d," + "old link_state %d new link_state %d",.format_args = "i4i1i1i1",}; + + struct + { + u32 sw_if_index; + u8 admin_up; + u8 old_link_state; + u8 new_link_state; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0; + ed->old_link_state = (u8) + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index); + ed->new_link_state = (u8) xd->link.link_status; + } + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) && + ((xd->link.link_status != 0) ^ + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index))) + { + hw_flags_chg = 1; + hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + } + + if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex)) + { + hw_flags_chg = 1; + switch (xd->link.link_duplex) + { + case ETH_LINK_HALF_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX; + break; + case ETH_LINK_FULL_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX; + break; + default: + break; + } + } + if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) + { + hw_flags_chg = 1; + switch (xd->link.link_speed) + { + case ETH_SPEED_NUM_10M: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; + break; + case ETH_SPEED_NUM_100M: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; + break; + case ETH_SPEED_NUM_1G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; + break; + case ETH_SPEED_NUM_10G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; + break; + case ETH_SPEED_NUM_40G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; + break; + case 0: + break; + default: + clib_warning ("unknown link speed %d", xd->link.link_speed); + break; + } + } + if (hw_flags_chg) + { + if (LINK_STATE_ELOGS) + { + vlib_main_t *vm = vlib_get_main (); + + ELOG_TYPE_DECLARE (e) = + { + .format = + "update-link-state: sw_if_index %d, new flags %d",.format_args + = "i4i4",}; + + struct + { + u32 sw_if_index; + u32 flags; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->flags = hw_flags; + } + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags); + } +} + +static uword +dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + clib_error_t *error; + vnet_main_t *vnm = vnet_get_main (); + dpdk_main_t *dm = &dpdk_main; + ethernet_main_t *em = ðernet_main; + dpdk_device_t *xd; + vlib_thread_main_t *tm = vlib_get_thread_main (); + int i; + + error = dpdk_lib_init (dm); + + /* + * Turn on the input node if we found some devices to drive + * and we're not running worker threads or i/o threads + */ + + if (error == 0 && vec_len (dm->devices) > 0) + { + if (tm->n_vlib_mains == 1) + vlib_node_set_state (vm, dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + else + for (i = 0; i < tm->n_vlib_mains; i++) + if (vec_len (dm->devices_by_cpu[i]) > 0) + vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + } + + if (error) + clib_error_report (error); + + tm->worker_thread_release = 1; + + f64 now = vlib_time_now (vm); + vec_foreach (xd, dm->devices) + { + dpdk_update_link_state (xd, now); + } + + { + /* + * Extra set up for bond interfaces: + * 1. Setup MACs for bond interfaces and their slave links which was set + * in dpdk_port_setup() but needs to be done again here to take effect. + * 2. Set up info for bond interface related CLI support. + */ + int nports = rte_eth_dev_count (); + if (nports > 0) + { + for (i = 0; i < nports; i++) + { + struct rte_eth_dev_info dev_info; + rte_eth_dev_info_get (i, &dev_info); + if (!dev_info.driver_name) +#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) + dev_info.driver_name = dev_info.pci_dev->driver->name; +#else + dev_info.driver_name = dev_info.pci_dev->driver->driver.name; +#endif + ASSERT (dev_info.driver_name); + if (strncmp (dev_info.driver_name, "rte_bond_pmd", 12) == 0) + { + u8 addr[6]; + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get (i, slink, 16); + if (nlink > 0) + { + vnet_hw_interface_t *bhi; + ethernet_interface_t *bei; + int rv; + + /* Get MAC of 1st slave link */ + rte_eth_macaddr_get (slink[0], + (struct ether_addr *) addr); + /* Set MAC of bounded interface to that of 1st slave link */ + rv = + rte_eth_bond_mac_address_set (i, + (struct ether_addr *) + addr); + if (rv < 0) + clib_warning ("Failed to set MAC address"); + + /* Populate MAC of bonded interface in VPP hw tables */ + bhi = + vnet_get_hw_interface (vnm, + dm->devices[i].vlib_hw_if_index); + bei = + pool_elt_at_index (em->interfaces, bhi->hw_instance); + clib_memcpy (bhi->hw_address, addr, 6); + clib_memcpy (bei->address, addr, 6); + /* Init l3 packet size allowed on bonded interface */ + bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES; + bhi->max_l3_packet_bytes[VLIB_RX] = + bhi->max_l3_packet_bytes[VLIB_TX] = + ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t); + while (nlink >= 1) + { /* for all slave links */ + int slave = slink[--nlink]; + dpdk_device_t *sdev = &dm->devices[slave]; + vnet_hw_interface_t *shi; + vnet_sw_interface_t *ssi; + /* Add MAC to all slave links except the first one */ + if (nlink) + rte_eth_dev_mac_addr_add (slave, + (struct ether_addr *) + addr, 0); + /* Set slaves bitmap for bonded interface */ + bhi->bond_info = + clib_bitmap_set (bhi->bond_info, + sdev->vlib_hw_if_index, 1); + /* Set slave link flags on slave interface */ + shi = + vnet_get_hw_interface (vnm, sdev->vlib_hw_if_index); + ssi = + vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); + shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; + ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; + + /* Set l3 packet size allowed as the lowest of slave */ + if (bhi->max_l3_packet_bytes[VLIB_RX] > + shi->max_l3_packet_bytes[VLIB_RX]) + bhi->max_l3_packet_bytes[VLIB_RX] = + bhi->max_l3_packet_bytes[VLIB_TX] = + shi->max_l3_packet_bytes[VLIB_RX]; + + /* Set max packet size allowed as the lowest of slave */ + if (bhi->max_packet_bytes > shi->max_packet_bytes) + bhi->max_packet_bytes = shi->max_packet_bytes; + } + } + } + } + } + } + + while (1) + { + /* + * check each time through the loop in case intervals are changed + */ + f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ? + dm->link_state_poll_interval : dm->stat_poll_interval; + + vlib_process_wait_for_event_or_clock (vm, min_wait); + + if (dm->admin_up_down_in_progress) + /* skip the poll if an admin up down is in progress (on any interface) */ + continue; + + vec_foreach (xd, dm->devices) + { + f64 now = vlib_time_now (vm); + if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval) + dpdk_update_counters (xd, now); + if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) + dpdk_update_link_state (xd, now); + + } + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_process_node,static) = { + .function = dpdk_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dpdk-process", + .process_log2_n_stack_bytes = 17, +}; +/* *INDENT-ON* */ + +int +dpdk_set_stat_poll_interval (f64 interval) +{ + if (interval < DPDK_MIN_STATS_POLL_INTERVAL) + return (VNET_API_ERROR_INVALID_VALUE); + + dpdk_main.stat_poll_interval = interval; + + return 0; +} + +int +dpdk_set_link_state_poll_interval (f64 interval) +{ + if (interval < DPDK_MIN_LINK_POLL_INTERVAL) + return (VNET_API_ERROR_INVALID_VALUE); + + dpdk_main.link_state_poll_interval = interval; + + return 0; +} + +clib_error_t * +dpdk_init (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_node_t *ei; + clib_error_t *error = 0; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + /* verify that structs are cacheline aligned */ + STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0, + "Cache line marker must be 1st element in dpdk_device_t"); + STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) == + CLIB_CACHE_LINE_BYTES, + "Data in cache line 0 is bigger than cache line size"); + STATIC_ASSERT (offsetof (dpdk_worker_t, cacheline0) == 0, + "Cache line marker must be 1st element in dpdk_worker_t"); + STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, + "Cache line marker must be 1st element in frame_queue_trace_t"); + + dm->vlib_main = vm; + dm->vnet_main = vnet_get_main (); + dm->conf = &dpdk_config_main; + + ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); + if (ei == 0) + return clib_error_return (0, "ethernet-input node AWOL"); + + dm->ethernet_input_node_index = ei->index; + + dm->conf->nchannels = 4; + dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; + vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); + + dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword)); + dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword)); + dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword)); + + /* $$$ use n_thread_stacks since it's known-good at this point */ + vec_validate (dm->recycle, tm->n_thread_stacks - 1); + + /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ + dm->buffer_flags_template = + (VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_RTE_MBUF_VALID + | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); + + dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; + dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL; + + /* init CLI */ + if ((error = vlib_call_init_function (vm, dpdk_cli_init))) + return error; + + return error; +} + +VLIB_INIT_FUNCTION (dpdk_init); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/ipsec/cli.c b/src/vnet/devices/dpdk/ipsec/cli.c new file mode 100644 index 00000000..3b634e03 --- /dev/null +++ b/src/vnet/devices/dpdk/ipsec/cli.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +static void +dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display) +{ + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 i, skip_master; + + if (detail_display) + vlib_cli_output (vm, "worker\t%10s\t%15s\tdir\tdev\tqp\n", + "cipher", "auth"); + else + vlib_cli_output (vm, "worker\tcrypto device id(type)\n"); + + skip_master = vlib_num_workers () > 0; + + for (i = 0; i < tm->n_vlib_mains; i++) + { + uword key, data; + u32 cpu_index = vlib_mains[i]->cpu_index; + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + u8 *s = 0; + + if (skip_master) + { + skip_master = 0; + continue; + } + + if (!detail_display) + { + i32 last_cdev = -1; + crypto_qp_data_t *qpd; + + s = format (s, "%u\t", cpu_index); + + /* *INDENT-OFF* */ + vec_foreach (qpd, cwm->qp_data) + { + u32 dev_id = qpd->dev_id; + + if ((u16) last_cdev != dev_id) + { + struct rte_cryptodev_info cdev_info; + + rte_cryptodev_info_get (dev_id, &cdev_info); + + s = format(s, "%u(%s)\t", dev_id, cdev_info.feature_flags & + RTE_CRYPTODEV_FF_HW_ACCELERATED ? "HW" : "SW"); + } + last_cdev = dev_id; + } + /* *INDENT-ON* */ + vlib_cli_output (vm, "%s", s); + } + else + { + char cipher_str[15], auth_str[15]; + struct rte_cryptodev_capabilities cap; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; + /* *INDENT-OFF* */ + hash_foreach (key, data, cwm->algo_qp_map, + ({ + cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER; + cap.sym.cipher.algo = p_key->cipher_algo; + check_algo_is_supported (&cap, cipher_str); + cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AUTH; + cap.sym.auth.algo = p_key->auth_algo; + check_algo_is_supported (&cap, auth_str); + vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n", + vlib_mains[i]->cpu_index, cipher_str, auth_str, + p_key->is_outbound ? "out" : "in", + cwm->qp_data[data].dev_id, + cwm->qp_data[data].qp_id); + })); + /* *INDENT-ON* */ + } + } +} + +static clib_error_t * +lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u16 detail = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "verbose")) + detail = 1; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + dpdk_ipsec_show_mapping (vm, detail); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (lcore_cryptodev_map, static) = { + .path = "show crypto device mapping", + .short_help = + "show cryptodev device mapping ", + .function = lcore_cryptodev_map_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/ipsec/crypto_node.c b/src/vnet/devices/dpdk/ipsec/crypto_node.c new file mode 100644 index 00000000..7b32704e --- /dev/null +++ b/src/vnet/devices/dpdk/ipsec/crypto_node.c @@ -0,0 +1,210 @@ +/* + *------------------------------------------------------------------ + * crypto_node.c - DPDK Cryptodev input node + * + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include + +#include + +#define foreach_dpdk_crypto_input_next \ + _(DROP, "error-drop") \ + _(ENCRYPT_POST, "dpdk-esp-encrypt-post") \ + _(DECRYPT_POST, "dpdk-esp-decrypt-post") + +typedef enum +{ +#define _(f,s) DPDK_CRYPTO_INPUT_NEXT_##f, + foreach_dpdk_crypto_input_next +#undef _ + DPDK_CRYPTO_INPUT_N_NEXT, +} dpdk_crypto_input_next_t; + +#define foreach_dpdk_crypto_input_error \ + _(DQ_COPS, "Crypto ops dequeued") \ + _(COP_FAILED, "Crypto op failed") + +typedef enum +{ +#define _(f,s) DPDK_CRYPTO_INPUT_ERROR_##f, + foreach_dpdk_crypto_input_error +#undef _ + DPDK_CRYPTO_INPUT_N_ERROR, +} dpdk_crypto_input_error_t; + +static char *dpdk_crypto_input_error_strings[] = { +#define _(n, s) s, + foreach_dpdk_crypto_input_error +#undef _ +}; + +vlib_node_registration_t dpdk_crypto_input_node; + +typedef struct +{ + u32 cdev; + u32 qp; + u32 status; + u32 sa_idx; + u32 next_index; +} dpdk_crypto_input_trace_t; + +static u8 * +format_dpdk_crypto_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + dpdk_crypto_input_trace_t *t = va_arg (*args, dpdk_crypto_input_trace_t *); + + s = format (s, "dpdk_crypto: cryptodev-id %u queue-pair %u next-index %d", + t->cdev, t->qp, t->next_index); + + s = format (s, " status %u sa-idx %u\n", t->status, t->sa_idx); + + return s; +} + +static_always_inline u32 +dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, + crypto_qp_data_t * qpd) +{ + u32 n_deq, *to_next = 0, next_index, n_cops, def_next_index; + struct rte_crypto_op **cops = qpd->cops; + + if (qpd->inflights == 0) + return 0; + + if (qpd->is_outbound) + def_next_index = DPDK_CRYPTO_INPUT_NEXT_ENCRYPT_POST; + else + def_next_index = DPDK_CRYPTO_INPUT_NEXT_DECRYPT_POST; + + n_cops = rte_cryptodev_dequeue_burst (qpd->dev_id, qpd->qp_id, + cops, VLIB_FRAME_SIZE); + n_deq = n_cops; + next_index = def_next_index; + + qpd->inflights -= n_cops; + ASSERT (qpd->inflights >= 0); + + while (n_cops > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_cops > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t *b0 = 0; + struct rte_crypto_op *cop; + struct rte_crypto_sym_op *sym_cop; + + cop = cops[0]; + cops += 1; + n_cops -= 1; + n_left_to_next -= 1; + + next0 = def_next_index; + + if (PREDICT_FALSE (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) + { + next0 = DPDK_CRYPTO_INPUT_NEXT_DROP; + vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, + DPDK_CRYPTO_INPUT_ERROR_COP_FAILED, + 1); + } + cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; + + sym_cop = (struct rte_crypto_sym_op *) (cop + 1); + b0 = vlib_buffer_from_rte_mbuf (sym_cop->m_src); + bi0 = vlib_get_buffer_index (vm, b0); + + to_next[0] = bi0; + to_next += 1; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_next_frame (vm, node, next0); + dpdk_crypto_input_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->cdev = qpd->dev_id; + tr->qp = qpd->qp_id; + tr->status = cop->status; + tr->next_index = next0; + tr->sa_idx = vnet_buffer (b0)->ipsec.sad_index; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + crypto_free_cop (qpd, qpd->cops, n_deq); + + vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, + DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, n_deq); + return n_deq; +} + +static uword +dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 cpu_index = os_get_cpu_number (); + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + crypto_qp_data_t *qpd; + u32 n_deq = 0; + + /* *INDENT-OFF* */ + vec_foreach (qpd, cwm->qp_data) + n_deq += dpdk_crypto_dequeue(vm, node, qpd); + /* *INDENT-ON* */ + + return n_deq; +} + +VLIB_REGISTER_NODE (dpdk_crypto_input_node) = +{ + .function = dpdk_crypto_input_fn,.name = "dpdk-crypto-input",.format_trace = + format_dpdk_crypto_input_trace,.type = VLIB_NODE_TYPE_INPUT,.state = + VLIB_NODE_STATE_DISABLED,.n_errors = + DPDK_CRYPTO_INPUT_N_ERROR,.error_strings = + dpdk_crypto_input_error_strings,.n_next_nodes = + DPDK_CRYPTO_INPUT_N_NEXT,.next_nodes = + { +#define _(s,n) [DPDK_CRYPTO_INPUT_NEXT_##s] = n, + foreach_dpdk_crypto_input_next +#undef _ + } +,}; + +#if DPDK_CRYPTO==1 +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_crypto_input_node, dpdk_crypto_input_fn) +#endif +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/ipsec/dir.dox b/src/vnet/devices/dpdk/ipsec/dir.dox new file mode 100644 index 00000000..ffebfc4d --- /dev/null +++ b/src/vnet/devices/dpdk/ipsec/dir.dox @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + @dir vnet/vnet/devices/dpdk/ipsec + @brief IPSec ESP encrypt/decrypt using DPDK Cryptodev API +*/ diff --git a/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md b/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md new file mode 100644 index 00000000..8089696f --- /dev/null +++ b/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md @@ -0,0 +1,73 @@ +# VPP IPSec implementation using DPDK Cryptodev API {#dpdk_crypto_ipsec_doc} + +This document is meant to contain all related information about implementation and usability. + + +## VPP IPsec with DPDK Cryptodev + +DPDK Cryptodev is an asynchronous crypto API that supports both Hardware and Software implementations (for more details refer to [DPDK Cryptography Device Library documentation](http://dpdk.org/doc/guides/prog_guide/cryptodev_lib.html)). + +When DPDK Cryptodev support is enabled, the node graph is modified by adding and replacing some of the nodes. + +The following nodes are replaced: +* esp-encrypt -> dpdk-esp-encrypt +* esp-decrypt -> dpdk-esp-decrypt + +The following nodes are added: +* dpdk-crypto-input : polling input node, basically dequeuing from crypto devices. +* dpdk-esp-encrypt-post : internal node. +* dpdk-esp-decrypt-post : internal node. + + +### How to enable VPP IPSec with DPDK Cryptodev support + +To enable DPDK Cryptodev support (disabled by default), we need the following env option: + + vpp_uses_dpdk_cryptodev=yes + +A couple of ways to achive this: +* uncomment/add it in the platforms config (ie. build-data/platforms/vpp.mk) +* set the option when building vpp (ie. make vpp_uses_dpdk_cryptodev=yes build-release) + + +### Crypto Resources allocation + +VPP allocates crypto resources based on a best effort approach: +* first allocate Hardware crypto resources, then Software. +* if there are not enough crypto resources for all workers, all packets will be dropped if they reach ESP encrypt/decrypt nodes, displaying the warning: + + 0: dpdk_ipsec_init: not enough cryptodevs for ipsec + + +### Configuration example + +No especial IPsec configuration is required. + +Once DPDK Cryptodev is enabled, the user just needs to provide cryptodevs in the startup.conf. + +Example startup.conf: + +``` +dpdk { + socket-mem 1024,1024 + num-mbufs 131072 + dev 0000:81:00.0 + dev 0000:81:00.1 + dev 0000:85:01.0 + dev 0000:85:01.1 + vdev cryptodev_aesni_mb_pmd,socket_id=1 + vdev cryptodev_aesni_mb_pmd,socket_id=1 +} +``` + +In the above configuration: +* 0000:85:01.0 and 0000:85:01.1 are crypto BDFs and they require the same driver binding as DPDK Ethernet devices but they do not support any extra configuration options. +* Two AESNI-MB Software Cryptodev PMDs are created in NUMA node 1. + +For further details refer to [DPDK Crypto Device Driver documentation](http://dpdk.org/doc/guides/cryptodevs/index.html) + +### Operational data + +The following CLI command displays the Cryptodev/Worker mapping: + + show crypto device mapping [verbose] diff --git a/src/vnet/devices/dpdk/ipsec/esp.h b/src/vnet/devices/dpdk/ipsec/esp.h new file mode 100644 index 00000000..7ef90c49 --- /dev/null +++ b/src/vnet/devices/dpdk/ipsec/esp.h @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __DPDK_ESP_H__ +#define __DPDK_ESP_H__ + +#include +#include +#include + +typedef struct +{ + enum rte_crypto_cipher_algorithm algo; + u8 key_len; + u8 iv_len; +} dpdk_esp_crypto_alg_t; + +typedef struct +{ + enum rte_crypto_auth_algorithm algo; + u8 trunc_size; +} dpdk_esp_integ_alg_t; + +typedef struct +{ + dpdk_esp_crypto_alg_t *esp_crypto_algs; + dpdk_esp_integ_alg_t *esp_integ_algs; +} dpdk_esp_main_t; + +dpdk_esp_main_t dpdk_esp_main; + +static_always_inline void +dpdk_esp_init () +{ + dpdk_esp_main_t *em = &dpdk_esp_main; + dpdk_esp_integ_alg_t *i; + dpdk_esp_crypto_alg_t *c; + + vec_validate (em->esp_crypto_algs, IPSEC_CRYPTO_N_ALG - 1); + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_128]; + c->algo = RTE_CRYPTO_CIPHER_AES_CBC; + c->key_len = 16; + c->iv_len = 16; + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_192]; + c->algo = RTE_CRYPTO_CIPHER_AES_CBC; + c->key_len = 24; + c->iv_len = 16; + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_256]; + c->algo = RTE_CRYPTO_CIPHER_AES_CBC; + c->key_len = 32; + c->iv_len = 16; + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_GCM_128]; + c->algo = RTE_CRYPTO_CIPHER_AES_GCM; + c->key_len = 16; + c->iv_len = 8; + + vec_validate (em->esp_integ_algs, IPSEC_INTEG_N_ALG - 1); + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA1_96]; + i->algo = RTE_CRYPTO_AUTH_SHA1_HMAC; + i->trunc_size = 12; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_96]; + i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + i->trunc_size = 12; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_128]; + i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + i->trunc_size = 16; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_384_192]; + i->algo = RTE_CRYPTO_AUTH_SHA384_HMAC; + i->trunc_size = 24; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256]; + i->algo = RTE_CRYPTO_AUTH_SHA512_HMAC; + i->trunc_size = 32; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_AES_GCM_128]; + i->algo = RTE_CRYPTO_AUTH_AES_GCM; + i->trunc_size = 16; +} + +static_always_inline int +add_del_sa_sess (u32 sa_index, u8 is_add) +{ + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm; + u8 skip_master = vlib_num_workers () > 0; + + /* *INDENT-OFF* */ + vec_foreach (cwm, dcm->workers_main) + { + crypto_sa_session_t *sa_sess; + u8 is_outbound; + + if (skip_master) + { + skip_master = 0; + continue; + } + + for (is_outbound = 0; is_outbound < 2; is_outbound++) + { + if (is_add) + { + pool_get (cwm->sa_sess_d[is_outbound], sa_sess); + } + else + { + u8 dev_id; + + sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index); + dev_id = cwm->qp_data[sa_sess->qp_index].dev_id; + + if (!sa_sess->sess) + continue; + + if (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess)) + { + clib_warning("failed to free session"); + return -1; + } + memset(sa_sess, 0, sizeof(sa_sess[0])); + } + } + } + /* *INDENT-OFF* */ + + return 0; +} + +static_always_inline int +translate_crypto_algo(ipsec_crypto_alg_t crypto_algo, + struct rte_crypto_sym_xform *cipher_xform) +{ + switch (crypto_algo) + { + case IPSEC_CRYPTO_ALG_NONE: + cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL; + break; + case IPSEC_CRYPTO_ALG_AES_CBC_128: + case IPSEC_CRYPTO_ALG_AES_CBC_192: + case IPSEC_CRYPTO_ALG_AES_CBC_256: + cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; + break; + case IPSEC_CRYPTO_ALG_AES_GCM_128: + cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_GCM; + break; + default: + return -1; + } + + cipher_xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + + return 0; +} + +static_always_inline int +translate_integ_algo(ipsec_integ_alg_t integ_alg, + struct rte_crypto_sym_xform *auth_xform, int use_esn) +{ + switch (integ_alg) { + case IPSEC_INTEG_ALG_NONE: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_NULL; + auth_xform->auth.digest_length = 0; + break; + case IPSEC_INTEG_ALG_SHA1_96: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC; + auth_xform->auth.digest_length = 12; + break; + case IPSEC_INTEG_ALG_SHA_256_96: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + auth_xform->auth.digest_length = 12; + break; + case IPSEC_INTEG_ALG_SHA_256_128: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + auth_xform->auth.digest_length = 16; + break; + case IPSEC_INTEG_ALG_SHA_384_192: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA384_HMAC; + auth_xform->auth.digest_length = 24; + break; + case IPSEC_INTEG_ALG_SHA_512_256: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA512_HMAC; + auth_xform->auth.digest_length = 32; + break; + case IPSEC_INTEG_ALG_AES_GCM_128: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM; + auth_xform->auth.digest_length = 16; + auth_xform->auth.add_auth_data_length = use_esn? 12 : 8; + break; + default: + return -1; + } + + auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; + + return 0; +} + +static_always_inline int +create_sym_sess(ipsec_sa_t *sa, crypto_sa_session_t *sa_sess, u8 is_outbound) +{ + u32 cpu_index = os_get_cpu_number(); + dpdk_crypto_main_t * dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + struct rte_crypto_sym_xform cipher_xform = {0}; + struct rte_crypto_sym_xform auth_xform = {0}; + struct rte_crypto_sym_xform *xfs; + uword key = 0, *data; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *)&key; + + if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + sa->crypto_key_len -= 4; + clib_memcpy(&sa->salt, &sa->crypto_key[sa->crypto_key_len], 4); + } + else + { + sa->salt = (u32) rand(); + } + + cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; + cipher_xform.cipher.key.data = sa->crypto_key; + cipher_xform.cipher.key.length = sa->crypto_key_len; + + auth_xform.type = RTE_CRYPTO_SYM_XFORM_AUTH; + auth_xform.auth.key.data = sa->integ_key; + auth_xform.auth.key.length = sa->integ_key_len; + + if (translate_crypto_algo(sa->crypto_alg, &cipher_xform) < 0) + return -1; + p_key->cipher_algo = cipher_xform.cipher.algo; + + if (translate_integ_algo(sa->integ_alg, &auth_xform, sa->use_esn) < 0) + return -1; + p_key->auth_algo = auth_xform.auth.algo; + + if (is_outbound) + { + cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; + auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; + cipher_xform.next = &auth_xform; + xfs = &cipher_xform; + } + else + { + cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; + auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_VERIFY; + auth_xform.next = &cipher_xform; + xfs = &auth_xform; + } + + p_key->is_outbound = is_outbound; + + data = hash_get(cwm->algo_qp_map, key); + if (!data) + return -1; + + sa_sess->sess = + rte_cryptodev_sym_session_create(cwm->qp_data[*data].dev_id, xfs); + + if (!sa_sess->sess) + return -1; + + sa_sess->qp_index = (u8)*data; + + return 0; +} + +#endif /* __DPDK_ESP_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/ipsec/esp_decrypt.c b/src/vnet/devices/dpdk/ipsec/esp_decrypt.c new file mode 100644 index 00000000..89ab9f9b --- /dev/null +++ b/src/vnet/devices/dpdk/ipsec/esp_decrypt.c @@ -0,0 +1,583 @@ +/* + * esp_decrypt.c : IPSec ESP Decrypt node using DPDK Cryptodev + * + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +#define foreach_esp_decrypt_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(IP6_INPUT, "ip6-input") + +#define _(v, s) ESP_DECRYPT_NEXT_##v, +typedef enum { + foreach_esp_decrypt_next +#undef _ + ESP_DECRYPT_N_NEXT, +} esp_decrypt_next_t; + +#define foreach_esp_decrypt_error \ + _(RX_PKTS, "ESP pkts received") \ + _(DECRYPTION_FAILED, "ESP decryption failed") \ + _(REPLAY, "SA replayed packet") \ + _(NOT_IP, "Not IP packet (dropped)") \ + _(ENQ_FAIL, "Enqueue failed (buffer full)") \ + _(NO_CRYPTODEV, "Cryptodev not configured") \ + _(BAD_LEN, "Invalid ciphertext length") \ + _(UNSUPPORTED, "Cipher/Auth not supported") + + +typedef enum { +#define _(sym,str) ESP_DECRYPT_ERROR_##sym, + foreach_esp_decrypt_error +#undef _ + ESP_DECRYPT_N_ERROR, +} esp_decrypt_error_t; + +static char * esp_decrypt_error_strings[] = { +#define _(sym,string) string, + foreach_esp_decrypt_error +#undef _ +}; + +vlib_node_registration_t dpdk_esp_decrypt_node; + +typedef struct { + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; +} esp_decrypt_trace_t; + +/* packet trace format function */ +static u8 * format_esp_decrypt_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *); + + s = format (s, "esp: crypto %U integrity %U", + format_ipsec_crypto_alg, t->crypto_alg, + format_ipsec_integ_alg, t->integ_alg); + return s; +} + +static uword +dpdk_esp_decrypt_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next, next_index; + ipsec_main_t *im = &ipsec_main; + u32 cpu_index = os_get_cpu_number(); + dpdk_crypto_main_t * dcm = &dpdk_crypto_main; + dpdk_esp_main_t * em = &dpdk_esp_main; + u32 i; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (PREDICT_FALSE(!dcm->workers_main)) + { + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_NO_CRYPTODEV, n_left_from); + vlib_buffer_free(vm, from, n_left_from); + return n_left_from; + } + + crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, cpu_index); + u32 n_qps = vec_len(cwm->qp_data); + struct rte_crypto_op ** cops_to_enq[n_qps]; + u32 n_cop_qp[n_qps], * bi_to_enq[n_qps]; + + for (i = 0; i < n_qps; i++) + { + bi_to_enq[i] = cwm->qp_data[i].bi; + cops_to_enq[i] = cwm->qp_data[i].cops; + } + + memset(n_cop_qp, 0, n_qps * sizeof(u32)); + + crypto_alloc_cops(); + + next_index = ESP_DECRYPT_NEXT_DROP; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, sa_index0 = ~0, seq, icv_size, iv_size; + vlib_buffer_t * b0; + esp_header_t * esp0; + ipsec_sa_t * sa0; + struct rte_mbuf * mb0 = 0; + const int BLOCK_SIZE = 16; + crypto_sa_session_t * sa_sess; + void * sess; + u16 qp_index; + struct rte_crypto_op * cop = 0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + esp0 = vlib_buffer_get_current (b0); + + sa_index0 = vnet_buffer(b0)->ipsec.sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + seq = clib_host_to_net_u32(esp0->seq); + + /* anti-replay check */ + if (sa0->use_anti_replay) + { + int rv = 0; + + if (PREDICT_TRUE(sa0->use_esn)) + rv = esp_replay_check_esn(sa0, seq); + else + rv = esp_replay_check(sa0, seq); + + if (PREDICT_FALSE(rv)) + { + clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_REPLAY, 1); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + } + + if (PREDICT_FALSE(sa0->integ_alg == IPSEC_INTEG_ALG_NONE) || + PREDICT_FALSE(sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE)) + { + clib_warning ("SPI %u : only cipher + auth supported", sa0->spi); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_UNSUPPORTED, 1); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + + sa_sess = pool_elt_at_index(cwm->sa_sess_d[0], sa_index0); + + if (PREDICT_FALSE(!sa_sess->sess)) + { + int ret = create_sym_sess(sa0, sa_sess, 0); + ASSERT(ret == 0); + } + + sess = sa_sess->sess; + qp_index = sa_sess->qp_index; + + ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); + cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); + ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); + + cops_to_enq[qp_index][0] = cop; + cops_to_enq[qp_index] += 1; + n_cop_qp[qp_index] += 1; + bi_to_enq[qp_index][0] = bi0; + bi_to_enq[qp_index] += 1; + + rte_crypto_op_attach_sym_session(cop, sess); + + icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; + iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + + /* Convert vlib buffer to mbuf */ + mb0 = rte_mbuf_from_vlib_buffer(b0); + mb0->data_len = b0->current_length; + mb0->pkt_len = b0->current_length; + mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; + + /* Outer IP header has already been stripped */ + u16 payload_len = rte_pktmbuf_pkt_len(mb0) - sizeof (esp_header_t) - + iv_size - icv_size; + + if ((payload_len & (BLOCK_SIZE - 1)) || (payload_len <= 0)) + { + clib_warning ("payload %u not multiple of %d\n", + payload_len, BLOCK_SIZE); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_BAD_LEN, 1); + vec_add (vec_elt (cwm->qp_data, qp_index).free_cops, &cop, 1); + bi_to_enq[qp_index] -= 1; + cops_to_enq[qp_index] -= 1; + n_cop_qp[qp_index] -= 1; + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + + struct rte_crypto_sym_op *sym_cop = (struct rte_crypto_sym_op *)(cop + 1); + + sym_cop->m_src = mb0; + sym_cop->cipher.data.offset = sizeof (esp_header_t) + iv_size; + sym_cop->cipher.data.length = payload_len; + + u8 *iv = rte_pktmbuf_mtod_offset(mb0, void*, sizeof (esp_header_t)); + dpdk_cop_priv_t * priv = (dpdk_cop_priv_t *)(sym_cop + 1); + + if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + dpdk_gcm_cnt_blk *icb = &priv->cb; + icb->salt = sa0->salt; + clib_memcpy(icb->iv, iv, 8); + icb->cnt = clib_host_to_net_u32(1); + sym_cop->cipher.iv.data = (u8 *)icb; + sym_cop->cipher.iv.phys_addr = cop->phys_addr + + (uintptr_t)icb - (uintptr_t)cop; + sym_cop->cipher.iv.length = 16; + + u8 *aad = priv->aad; + clib_memcpy(aad, iv - sizeof(esp_header_t), 8); + sym_cop->auth.aad.data = aad; + sym_cop->auth.aad.phys_addr = cop->phys_addr + + (uintptr_t)aad - (uintptr_t)cop; + if (sa0->use_esn) + { + *((u32*)&aad[8]) = sa0->seq_hi; + sym_cop->auth.aad.length = 12; + } + else + { + sym_cop->auth.aad.length = 8; + } + + sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.length = icv_size; + + } + else + { + sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(mb0, void*, + sizeof (esp_header_t)); + sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(mb0, + sizeof (esp_header_t)); + sym_cop->cipher.iv.length = iv_size; + + if (sa0->use_esn) + { + dpdk_cop_priv_t* priv = (dpdk_cop_priv_t*) (sym_cop + 1); + u8* payload_end = rte_pktmbuf_mtod_offset( + mb0, u8*, sizeof(esp_header_t) + iv_size + payload_len); + + clib_memcpy (priv->icv, payload_end, icv_size); + *((u32*) payload_end) = sa0->seq_hi; + sym_cop->auth.data.offset = 0; + sym_cop->auth.data.length = sizeof(esp_header_t) + iv_size + + payload_len + sizeof(sa0->seq_hi); + sym_cop->auth.digest.data = priv->icv; + sym_cop->auth.digest.phys_addr = cop->phys_addr + + (uintptr_t) priv->icv - (uintptr_t) cop; + sym_cop->auth.digest.length = icv_size; + } + else + { + sym_cop->auth.data.offset = 0; + sym_cop->auth.data.length = sizeof(esp_header_t) + + iv_size + payload_len; + + sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.length = icv_size; + } + } + +trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_RX_PKTS, + from_frame->n_vectors); + crypto_qp_data_t *qpd; + /* *INDENT-OFF* */ + vec_foreach_index (i, cwm->qp_data) + { + u32 enq; + + qpd = vec_elt_at_index(cwm->qp_data, i); + enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, + qpd->cops, n_cop_qp[i]); + qpd->inflights += enq; + + if (PREDICT_FALSE(enq < n_cop_qp[i])) + { + crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); + vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); + + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_ENQ_FAIL, + n_cop_qp[i] - enq); + } + } + /* *INDENT-ON* */ + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_decrypt_node) = { + .function = dpdk_esp_decrypt_node_fn, + .name = "dpdk-esp-decrypt", + .vector_size = sizeof (u32), + .format_trace = format_esp_decrypt_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(esp_decrypt_error_strings), + .error_strings = esp_decrypt_error_strings, + + .n_next_nodes = ESP_DECRYPT_N_NEXT, + .next_nodes = { +#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, + foreach_esp_decrypt_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_node, dpdk_esp_decrypt_node_fn) + +/* + * Decrypt Post Node + */ + +#define foreach_esp_decrypt_post_error \ + _(PKTS, "ESP post pkts") + +typedef enum { +#define _(sym,str) ESP_DECRYPT_POST_ERROR_##sym, + foreach_esp_decrypt_post_error +#undef _ + ESP_DECRYPT_POST_N_ERROR, +} esp_decrypt_post_error_t; + +static char * esp_decrypt_post_error_strings[] = { +#define _(sym,string) string, + foreach_esp_decrypt_post_error +#undef _ +}; + +vlib_node_registration_t dpdk_esp_decrypt_post_node; + +static u8 * format_esp_decrypt_post_trace (u8 * s, va_list * args) +{ + return s; +} + +static uword +dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next = 0, next_index; + ipsec_sa_t * sa0; + u32 sa_index0 = ~0; + ipsec_main_t *im = &ipsec_main; + dpdk_esp_main_t *em = &dpdk_esp_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + esp_footer_t * f0; + u32 bi0, next0, icv_size, iv_size; + vlib_buffer_t * b0 = 0; + ip4_header_t *ih4 = 0, *oh4 = 0; + ip6_header_t *ih6 = 0, *oh6 = 0; + u8 tunnel_mode = 1; + u8 transport_ip6 = 0; + + next0 = ESP_DECRYPT_NEXT_DROP; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sa_index0 = vnet_buffer(b0)->ipsec.sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + to_next[0] = bi0; + to_next += 1; + + icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; + iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + + if (sa0->use_anti_replay) + { + esp_header_t * esp0 = vlib_buffer_get_current (b0); + u32 seq; + seq = clib_host_to_net_u32(esp0->seq); + if (PREDICT_TRUE(sa0->use_esn)) + esp_replay_advance_esn(sa0, seq); + else + esp_replay_advance(sa0, seq); + } + + ih4 = (ip4_header_t *) (b0->data + sizeof(ethernet_header_t)); + vlib_buffer_advance (b0, sizeof (esp_header_t) + iv_size); + + b0->current_length -= (icv_size + 2); + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (b0) + + b0->current_length); + b0->current_length -= f0->pad_length; + + /* transport mode */ + if (PREDICT_FALSE(!sa0->is_tunnel && !sa0->is_tunnel_ip6)) + { + tunnel_mode = 0; + + if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) != 0x40)) + { + if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) == 0x60)) + transport_ip6 = 1; + else + { + clib_warning("next header: 0x%x", f0->next_header); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_NOT_IP, 1); + goto trace; + } + } + } + + if (PREDICT_TRUE (tunnel_mode)) + { + if (PREDICT_TRUE(f0->next_header == IP_PROTOCOL_IP_IN_IP)) + next0 = ESP_DECRYPT_NEXT_IP4_INPUT; + else if (f0->next_header == IP_PROTOCOL_IPV6) + next0 = ESP_DECRYPT_NEXT_IP6_INPUT; + else + { + clib_warning("next header: 0x%x", f0->next_header); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_DECRYPTION_FAILED, + 1); + goto trace; + } + } + /* transport mode */ + else + { + if (PREDICT_FALSE(transport_ip6)) + { + ih6 = (ip6_header_t *) (b0->data + sizeof(ethernet_header_t)); + vlib_buffer_advance (b0, -sizeof(ip6_header_t)); + oh6 = vlib_buffer_get_current (b0); + memmove(oh6, ih6, sizeof(ip6_header_t)); + + next0 = ESP_DECRYPT_NEXT_IP6_INPUT; + oh6->protocol = f0->next_header; + oh6->payload_length = + clib_host_to_net_u16 ( + vlib_buffer_length_in_chain(vm, b0) - + sizeof (ip6_header_t)); + } + else + { + vlib_buffer_advance (b0, -sizeof(ip4_header_t)); + oh4 = vlib_buffer_get_current (b0); + memmove(oh4, ih4, sizeof(ip4_header_t)); + + next0 = ESP_DECRYPT_NEXT_IP4_INPUT; + oh4->ip_version_and_header_length = 0x45; + oh4->fragment_id = 0; + oh4->flags_and_fragment_offset = 0; + oh4->protocol = f0->next_header; + oh4->length = clib_host_to_net_u16 ( + vlib_buffer_length_in_chain (vm, b0)); + oh4->checksum = ip4_header_checksum (oh4); + } + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0; + +trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, dpdk_esp_decrypt_post_node.index, + ESP_DECRYPT_POST_ERROR_PKTS, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_decrypt_post_node) = { + .function = dpdk_esp_decrypt_post_node_fn, + .name = "dpdk-esp-decrypt-post", + .vector_size = sizeof (u32), + .format_trace = format_esp_decrypt_post_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(esp_decrypt_post_error_strings), + .error_strings = esp_decrypt_post_error_strings, + + .n_next_nodes = ESP_DECRYPT_N_NEXT, + .next_nodes = { +#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, + foreach_esp_decrypt_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_post_node, dpdk_esp_decrypt_post_node_fn) diff --git a/src/vnet/devices/dpdk/ipsec/esp_encrypt.c b/src/vnet/devices/dpdk/ipsec/esp_encrypt.c new file mode 100644 index 00000000..10bb4616 --- /dev/null +++ b/src/vnet/devices/dpdk/ipsec/esp_encrypt.c @@ -0,0 +1,598 @@ +/* + * esp_encrypt.c : IPSec ESP encrypt node using DPDK Cryptodev + * + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include + +#define foreach_esp_encrypt_next \ +_(DROP, "error-drop") \ +_(IP4_LOOKUP, "ip4-lookup") \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(INTERFACE_OUTPUT, "interface-output") + +#define _(v, s) ESP_ENCRYPT_NEXT_##v, +typedef enum +{ + foreach_esp_encrypt_next +#undef _ + ESP_ENCRYPT_N_NEXT, +} esp_encrypt_next_t; + +#define foreach_esp_encrypt_error \ + _(RX_PKTS, "ESP pkts received") \ + _(SEQ_CYCLED, "sequence number cycled") \ + _(ENQ_FAIL, "Enqueue failed (buffer full)") \ + _(NO_CRYPTODEV, "Cryptodev not configured") \ + _(UNSUPPORTED, "Cipher/Auth not supported") + + +typedef enum +{ +#define _(sym,str) ESP_ENCRYPT_ERROR_##sym, + foreach_esp_encrypt_error +#undef _ + ESP_ENCRYPT_N_ERROR, +} esp_encrypt_error_t; + +static char *esp_encrypt_error_strings[] = { +#define _(sym,string) string, + foreach_esp_encrypt_error +#undef _ +}; + +vlib_node_registration_t dpdk_esp_encrypt_node; + +typedef struct +{ + u32 spi; + u32 seq; + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; +} esp_encrypt_trace_t; + +/* packet trace format function */ +static u8 * +format_esp_encrypt_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *); + + s = format (s, "esp: spi %u seq %u crypto %U integrity %U", + t->spi, t->seq, + format_ipsec_crypto_alg, t->crypto_alg, + format_ipsec_integ_alg, t->integ_alg); + return s; +} + +static uword +dpdk_esp_encrypt_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next, next_index; + ipsec_main_t *im = &ipsec_main; + u32 cpu_index = os_get_cpu_number (); + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + dpdk_esp_main_t *em = &dpdk_esp_main; + u32 i; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (PREDICT_FALSE (!dcm->workers_main)) + { + /* Likely there are not enough cryptodevs, so drop frame */ + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_NO_CRYPTODEV, + n_left_from); + vlib_buffer_free (vm, from, n_left_from); + return n_left_from; + } + + crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, cpu_index); + u32 n_qps = vec_len (cwm->qp_data); + struct rte_crypto_op **cops_to_enq[n_qps]; + u32 n_cop_qp[n_qps], *bi_to_enq[n_qps]; + + for (i = 0; i < n_qps; i++) + { + bi_to_enq[i] = cwm->qp_data[i].bi; + cops_to_enq[i] = cwm->qp_data[i].cops; + } + + memset (n_cop_qp, 0, n_qps * sizeof (u32)); + + crypto_alloc_cops (); + + next_index = ESP_ENCRYPT_NEXT_DROP; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t *b0 = 0; + u32 sa_index0; + ipsec_sa_t *sa0; + ip4_and_esp_header_t *ih0, *oh0 = 0; + ip6_and_esp_header_t *ih6_0, *oh6_0 = 0; + struct rte_mbuf *mb0 = 0; + esp_footer_t *f0; + u8 is_ipv6; + u8 ip_hdr_size; + u8 next_hdr_type; + u8 transport_mode = 0; + const int BLOCK_SIZE = 16; + u32 iv_size; + u16 orig_sz; + crypto_sa_session_t *sa_sess; + void *sess; + struct rte_crypto_op *cop = 0; + u16 qp_index; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sa_index0 = vnet_buffer (b0)->ipsec.sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + if (PREDICT_FALSE (esp_seq_advance (sa0))) + { + clib_warning ("sequence number counter has cycled SPI %u", + sa0->spi); + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1); + //TODO: rekey SA + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + + sa_sess = pool_elt_at_index (cwm->sa_sess_d[1], sa_index0); + if (PREDICT_FALSE (!sa_sess->sess)) + { + int ret = create_sym_sess (sa0, sa_sess, 1); + ASSERT (ret == 0); + } + + qp_index = sa_sess->qp_index; + sess = sa_sess->sess; + + ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); + cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); + ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); + + cops_to_enq[qp_index][0] = cop; + cops_to_enq[qp_index] += 1; + n_cop_qp[qp_index] += 1; + bi_to_enq[qp_index][0] = bi0; + bi_to_enq[qp_index] += 1; + + ssize_t adv; + iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + ih0 = vlib_buffer_get_current (b0); + orig_sz = b0->current_length; + is_ipv6 = (ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60; + /* is ipv6 */ + if (PREDICT_TRUE (sa0->is_tunnel)) + { + if (PREDICT_TRUE (!is_ipv6)) + adv = -sizeof (ip4_and_esp_header_t); + else + adv = -sizeof (ip6_and_esp_header_t); + } + else + { + adv = -sizeof (esp_header_t); + if (PREDICT_TRUE (!is_ipv6)) + orig_sz -= sizeof (ip4_header_t); + else + orig_sz -= sizeof (ip6_header_t); + } + + /*transport mode save the eth header before it is overwritten */ + if (PREDICT_FALSE (!sa0->is_tunnel)) + { + ethernet_header_t *ieh0 = (ethernet_header_t *) + ((u8 *) vlib_buffer_get_current (b0) - + sizeof (ethernet_header_t)); + ethernet_header_t *oeh0 = + (ethernet_header_t *) ((u8 *) ieh0 + (adv - iv_size)); + clib_memcpy (oeh0, ieh0, sizeof (ethernet_header_t)); + } + + vlib_buffer_advance (b0, adv - iv_size); + + /* XXX IP6/ip4 and IP4/IP6 not supported, only IP4/IP4 and IP6/IP6 */ + + /* is ipv6 */ + if (PREDICT_FALSE (is_ipv6)) + { + ih6_0 = (ip6_and_esp_header_t *) ih0; + ip_hdr_size = sizeof (ip6_header_t); + oh6_0 = vlib_buffer_get_current (b0); + + if (PREDICT_TRUE (sa0->is_tunnel)) + { + next_hdr_type = IP_PROTOCOL_IPV6; + oh6_0->ip6.ip_version_traffic_class_and_flow_label = + ih6_0->ip6.ip_version_traffic_class_and_flow_label; + } + else + { + next_hdr_type = ih6_0->ip6.protocol; + memmove (oh6_0, ih6_0, sizeof (ip6_header_t)); + } + + oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP; + oh6_0->ip6.hop_limit = 254; + oh6_0->esp.spi = clib_net_to_host_u32 (sa0->spi); + oh6_0->esp.seq = clib_net_to_host_u32 (sa0->seq); + } + else + { + ip_hdr_size = sizeof (ip4_header_t); + oh0 = vlib_buffer_get_current (b0); + + if (PREDICT_TRUE (sa0->is_tunnel)) + { + next_hdr_type = IP_PROTOCOL_IP_IN_IP; + oh0->ip4.tos = ih0->ip4.tos; + } + else + { + next_hdr_type = ih0->ip4.protocol; + memmove (oh0, ih0, sizeof (ip4_header_t)); + } + + oh0->ip4.ip_version_and_header_length = 0x45; + oh0->ip4.fragment_id = 0; + oh0->ip4.flags_and_fragment_offset = 0; + oh0->ip4.ttl = 254; + oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP; + oh0->esp.spi = clib_net_to_host_u32 (sa0->spi); + oh0->esp.seq = clib_net_to_host_u32 (sa0->seq); + } + + if (PREDICT_TRUE (sa0->is_tunnel && !sa0->is_tunnel_ip6)) + { + oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32; + oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32; + + /* in tunnel mode send it back to FIB */ + next0 = ESP_ENCRYPT_NEXT_IP4_LOOKUP; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + } + else if (sa0->is_tunnel && sa0->is_tunnel_ip6) + { + oh6_0->ip6.src_address.as_u64[0] = + sa0->tunnel_src_addr.ip6.as_u64[0]; + oh6_0->ip6.src_address.as_u64[1] = + sa0->tunnel_src_addr.ip6.as_u64[1]; + oh6_0->ip6.dst_address.as_u64[0] = + sa0->tunnel_dst_addr.ip6.as_u64[0]; + oh6_0->ip6.dst_address.as_u64[1] = + sa0->tunnel_dst_addr.ip6.as_u64[1]; + + /* in tunnel mode send it back to FIB */ + next0 = ESP_ENCRYPT_NEXT_IP6_LOOKUP; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + } + else + { + next0 = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT; + transport_mode = 1; + } + + ASSERT (sa0->crypto_alg < IPSEC_CRYPTO_N_ALG); + ASSERT (sa0->crypto_alg != IPSEC_CRYPTO_ALG_NONE); + + int blocks = 1 + (orig_sz + 1) / BLOCK_SIZE; + + /* pad packet in input buffer */ + u8 pad_bytes = BLOCK_SIZE * blocks - 2 - orig_sz; + u8 i; + u8 *padding = vlib_buffer_get_current (b0) + b0->current_length; + + for (i = 0; i < pad_bytes; ++i) + padding[i] = i + 1; + + f0 = vlib_buffer_get_current (b0) + b0->current_length + pad_bytes; + f0->pad_length = pad_bytes; + f0->next_header = next_hdr_type; + b0->current_length += pad_bytes + 2 + + em->esp_integ_algs[sa0->integ_alg].trunc_size; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + vnet_buffer (b0)->sw_if_index[VLIB_RX]; + b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + + struct rte_crypto_sym_op *sym_cop; + sym_cop = (struct rte_crypto_sym_op *) (cop + 1); + + dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *) (sym_cop + 1); + + vnet_buffer (b0)->unused[0] = next0; + + mb0 = rte_mbuf_from_vlib_buffer (b0); + mb0->data_len = b0->current_length; + mb0->pkt_len = b0->current_length; + mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; + + rte_crypto_op_attach_sym_session (cop, sess); + + sym_cop->m_src = mb0; + + dpdk_gcm_cnt_blk *icb = &priv->cb; + icb->salt = sa0->salt; + icb->iv[0] = sa0->seq; + icb->iv[1] = sa0->seq_hi; + + if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + icb->cnt = clib_host_to_net_u32 (1); + clib_memcpy (vlib_buffer_get_current (b0) + ip_hdr_size + + sizeof (esp_header_t), icb->iv, 8); + sym_cop->cipher.data.offset = + ip_hdr_size + sizeof (esp_header_t) + iv_size; + sym_cop->cipher.data.length = BLOCK_SIZE * blocks; + sym_cop->cipher.iv.length = 16; + } + else + { + sym_cop->cipher.data.offset = + ip_hdr_size + sizeof (esp_header_t); + sym_cop->cipher.data.length = BLOCK_SIZE * blocks + iv_size; + sym_cop->cipher.iv.length = iv_size; + } + + sym_cop->cipher.iv.data = (u8 *) icb; + sym_cop->cipher.iv.phys_addr = cop->phys_addr + (uintptr_t) icb + - (uintptr_t) cop; + + + ASSERT (sa0->integ_alg < IPSEC_INTEG_N_ALG); + ASSERT (sa0->integ_alg != IPSEC_INTEG_ALG_NONE); + + if (PREDICT_FALSE (sa0->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128)) + { + u8 *aad = priv->aad; + clib_memcpy (aad, vlib_buffer_get_current (b0) + ip_hdr_size, + 8); + sym_cop->auth.aad.data = aad; + sym_cop->auth.aad.phys_addr = cop->phys_addr + + (uintptr_t) aad - (uintptr_t) cop; + + if (PREDICT_FALSE (sa0->use_esn)) + { + *((u32 *) & aad[8]) = sa0->seq_hi; + sym_cop->auth.aad.length = 12; + } + else + { + sym_cop->auth.aad.length = 8; + } + } + else + { + sym_cop->auth.data.offset = ip_hdr_size; + sym_cop->auth.data.length = b0->current_length - ip_hdr_size + - em->esp_integ_algs[sa0->integ_alg].trunc_size; + + if (PREDICT_FALSE (sa0->use_esn)) + { + u8 *payload_end = + vlib_buffer_get_current (b0) + b0->current_length; + *((u32 *) payload_end) = sa0->seq_hi; + sym_cop->auth.data.length += sizeof (sa0->seq_hi); + } + } + sym_cop->auth.digest.data = vlib_buffer_get_current (b0) + + b0->current_length - + em->esp_integ_algs[sa0->integ_alg].trunc_size; + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset (mb0, + b0->current_length + - + em->esp_integ_algs + [sa0->integ_alg].trunc_size); + sym_cop->auth.digest.length = + em->esp_integ_algs[sa0->integ_alg].trunc_size; + + + if (PREDICT_FALSE (is_ipv6)) + { + oh6_0->ip6.payload_length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - + sizeof (ip6_header_t)); + } + else + { + oh0->ip4.length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4); + } + + if (transport_mode) + vlib_buffer_advance (b0, -sizeof (ethernet_header_t)); + + trace: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + esp_encrypt_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->spi = sa0->spi; + tr->seq = sa0->seq - 1; + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_RX_PKTS, + from_frame->n_vectors); + crypto_qp_data_t *qpd; + /* *INDENT-OFF* */ + vec_foreach_index (i, cwm->qp_data) + { + u32 enq; + + qpd = vec_elt_at_index(cwm->qp_data, i); + enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, + qpd->cops, n_cop_qp[i]); + qpd->inflights += enq; + + if (PREDICT_FALSE(enq < n_cop_qp[i])) + { + crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); + vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); + + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_ENQ_FAIL, + n_cop_qp[i] - enq); + } + } + /* *INDENT-ON* */ + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_encrypt_node) = +{ + .function = dpdk_esp_encrypt_node_fn,.name = "dpdk-esp-encrypt",.flags = + VLIB_NODE_FLAG_IS_OUTPUT,.vector_size = sizeof (u32),.format_trace = + format_esp_encrypt_trace,.n_errors = + ARRAY_LEN (esp_encrypt_error_strings),.error_strings = + esp_encrypt_error_strings,.n_next_nodes = 1,.next_nodes = + { + [ESP_ENCRYPT_NEXT_DROP] = "error-drop",} +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_node, dpdk_esp_encrypt_node_fn) +/* + * ESP Encrypt Post Node + */ +#define foreach_esp_encrypt_post_error \ + _(PKTS, "ESP post pkts") + typedef enum + { +#define _(sym,str) ESP_ENCRYPT_POST_ERROR_##sym, + foreach_esp_encrypt_post_error +#undef _ + ESP_ENCRYPT_POST_N_ERROR, + } esp_encrypt_post_error_t; + + static char *esp_encrypt_post_error_strings[] = { +#define _(sym,string) string, + foreach_esp_encrypt_post_error +#undef _ + }; + +vlib_node_registration_t dpdk_esp_encrypt_post_node; + +static u8 * +format_esp_encrypt_post_trace (u8 * s, va_list * args) +{ + return s; +} + +static uword +dpdk_esp_encrypt_post_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next = 0, next_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t *b0 = 0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + to_next[0] = bi0; + to_next += 1; + + next0 = vnet_buffer (b0)->unused[0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, bi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, dpdk_esp_encrypt_post_node.index, + ESP_ENCRYPT_POST_ERROR_PKTS, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_encrypt_post_node) = +{ + .function = dpdk_esp_encrypt_post_node_fn,.name = + "dpdk-esp-encrypt-post",.vector_size = sizeof (u32),.format_trace = + format_esp_encrypt_post_trace,.type = VLIB_NODE_TYPE_INTERNAL,.n_errors = + ARRAY_LEN (esp_encrypt_post_error_strings),.error_strings = + esp_encrypt_post_error_strings,.n_next_nodes = + ESP_ENCRYPT_N_NEXT,.next_nodes = + { +#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n, + foreach_esp_encrypt_next +#undef _ + } +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_post_node, + dpdk_esp_encrypt_post_node_fn) +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/ipsec/ipsec.c b/src/vnet/devices/dpdk/ipsec/ipsec.c new file mode 100644 index 00000000..de253f02 --- /dev/null +++ b/src/vnet/devices/dpdk/ipsec/ipsec.c @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include + +#define DPDK_CRYPTO_NB_OBJS 2048 +#define DPDK_CRYPTO_CACHE_SIZE 512 +#define DPDK_CRYPTO_PRIV_SIZE 128 +#define DPDK_CRYPTO_N_QUEUE_DESC 512 +#define DPDK_CRYPTO_NB_COPS (1024 * 4) + +/* + * return: + * -1: update failed + * 0: already exist + * 1: mapped + */ +static int +update_qp_data (crypto_worker_main_t * cwm, + u8 cdev_id, u16 qp_id, u8 is_outbound, u16 * idx) +{ + crypto_qp_data_t *qpd; + + /* *INDENT-OFF* */ + vec_foreach_index (*idx, cwm->qp_data) + { + qpd = vec_elt_at_index(cwm->qp_data, *idx); + + if (qpd->dev_id == cdev_id && qpd->qp_id == qp_id && + qpd->is_outbound == is_outbound) + return 0; + } + /* *INDENT-ON* */ + + vec_add2 (cwm->qp_data, qpd, 1); + + qpd->dev_id = cdev_id; + qpd->qp_id = qp_id; + qpd->is_outbound = is_outbound; + + return 1; +} + +/* + * return: + * -1: error + * 0: already exist + * 1: mapped + */ +static int +add_mapping (crypto_worker_main_t * cwm, + u8 cdev_id, u16 qp, u8 is_outbound, + const struct rte_cryptodev_capabilities *cipher_cap, + const struct rte_cryptodev_capabilities *auth_cap) +{ + int mapped; + u16 qp_index; + uword key = 0, data, *ret; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; + + p_key->cipher_algo = (u8) cipher_cap->sym.cipher.algo; + p_key->auth_algo = (u8) auth_cap->sym.auth.algo; + p_key->is_outbound = is_outbound; + + ret = hash_get (cwm->algo_qp_map, key); + if (ret) + return 0; + + mapped = update_qp_data (cwm, cdev_id, qp, is_outbound, &qp_index); + if (mapped < 0) + return -1; + + data = (uword) qp_index; + + ret = hash_set (cwm->algo_qp_map, key, data); + if (!ret) + rte_panic ("Failed to insert hash table\n"); + + return mapped; +} + +/* + * return: + * 0: already exist + * 1: mapped + */ +static int +add_cdev_mapping (crypto_worker_main_t * cwm, + struct rte_cryptodev_info *dev_info, u8 cdev_id, + u16 qp, u8 is_outbound) +{ + const struct rte_cryptodev_capabilities *i, *j; + u32 mapped = 0; + + for (i = dev_info->capabilities; i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++) + { + if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER) + continue; + + if (check_algo_is_supported (i, NULL) != 0) + continue; + + for (j = dev_info->capabilities; j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; + j++) + { + int status = 0; + + if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH) + continue; + + if (check_algo_is_supported (j, NULL) != 0) + continue; + + status = add_mapping (cwm, cdev_id, qp, is_outbound, i, j); + if (status == 1) + mapped += 1; + if (status < 0) + return status; + } + } + + return mapped; +} + +static int +check_cryptodev_queues () +{ + u32 n_qs = 0; + u8 cdev_id; + u32 n_req_qs = 2; + + if (vlib_num_workers () > 0) + n_req_qs = vlib_num_workers () * 2; + + for (cdev_id = 0; cdev_id < rte_cryptodev_count (); cdev_id++) + { + struct rte_cryptodev_info cdev_info; + + rte_cryptodev_info_get (cdev_id, &cdev_info); + + if (! + (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) + continue; + + n_qs += cdev_info.max_nb_queue_pairs; + } + + if (n_qs >= n_req_qs) + return 0; + else + return -1; +} + +static clib_error_t * +dpdk_ipsec_init (vlib_main_t * vm) +{ + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + struct rte_cryptodev_config dev_conf; + struct rte_cryptodev_qp_conf qp_conf; + struct rte_cryptodev_info cdev_info; + struct rte_mempool *rmp; + i32 dev_id, ret; + u32 i, skip_master; + + if (check_cryptodev_queues () < 0) + return clib_error_return (0, "not enough cryptodevs for ipsec"); + + vec_alloc (dcm->workers_main, tm->n_vlib_mains); + _vec_len (dcm->workers_main) = tm->n_vlib_mains; + + fprintf (stdout, "DPDK Cryptodevs info:\n"); + fprintf (stdout, "dev_id\tn_qp\tnb_obj\tcache_size\n"); + /* HW cryptodevs have higher dev_id, use HW first */ + for (dev_id = rte_cryptodev_count () - 1; dev_id >= 0; dev_id--) + { + u16 max_nb_qp, qp = 0; + skip_master = vlib_num_workers () > 0; + + rte_cryptodev_info_get (dev_id, &cdev_info); + + if (! + (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) + continue; + + max_nb_qp = cdev_info.max_nb_queue_pairs; + + for (i = 0; i < tm->n_vlib_mains; i++) + { + u8 is_outbound; + crypto_worker_main_t *cwm; + uword *map; + + if (skip_master) + { + skip_master = 0; + continue; + } + + cwm = vec_elt_at_index (dcm->workers_main, i); + map = cwm->algo_qp_map; + + if (!map) + { + map = hash_create (0, sizeof (crypto_worker_qp_key_t)); + if (!map) + return clib_error_return (0, "unable to create hash table " + "for worker %u", + vlib_mains[i]->cpu_index); + cwm->algo_qp_map = map; + } + + for (is_outbound = 0; is_outbound < 2 && qp < max_nb_qp; + is_outbound++) + { + int mapped = add_cdev_mapping (cwm, &cdev_info, + dev_id, qp, is_outbound); + if (mapped > 0) + qp++; + + if (mapped < 0) + return clib_error_return (0, + "too many queues for one worker"); + } + } + + if (qp == 0) + continue; + + dev_conf.socket_id = rte_cryptodev_socket_id (dev_id); + dev_conf.nb_queue_pairs = cdev_info.max_nb_queue_pairs; + dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_OBJS; + dev_conf.session_mp.cache_size = DPDK_CRYPTO_CACHE_SIZE; + + ret = rte_cryptodev_configure (dev_id, &dev_conf); + if (ret < 0) + return clib_error_return (0, "cryptodev %u config error", dev_id); + + qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC; + for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++) + { + ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf, + dev_conf.socket_id); + if (ret < 0) + return clib_error_return (0, "cryptodev %u qp %u setup error", + dev_id, qp); + } + fprintf (stdout, "%u\t%u\t%u\t%u\n", dev_id, dev_conf.nb_queue_pairs, + DPDK_CRYPTO_NB_OBJS, DPDK_CRYPTO_CACHE_SIZE); + } + + u32 socket_id = rte_socket_id (); + + vec_validate_aligned (dcm->cop_pools, socket_id, CLIB_CACHE_LINE_BYTES); + + /* pool already exists, nothing to do */ + if (dcm->cop_pools[socket_id]) + return 0; + + u8 *pool_name = format (0, "crypto_op_pool_socket%u%c", socket_id, 0); + + rmp = rte_crypto_op_pool_create ((char *) pool_name, + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + DPDK_CRYPTO_NB_COPS * + (1 + vlib_num_workers ()), + DPDK_CRYPTO_CACHE_SIZE, + DPDK_CRYPTO_PRIV_SIZE, socket_id); + vec_free (pool_name); + + if (!rmp) + return clib_error_return (0, "failed to allocate mempool on socket %u", + socket_id); + dcm->cop_pools[socket_id] = rmp; + + dpdk_esp_init (); + + if (vec_len (vlib_mains) == 0) + vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index, + VLIB_NODE_STATE_POLLING); + else + for (i = 1; i < tm->n_vlib_mains; i++) + vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index, + VLIB_NODE_STATE_POLLING); + + return 0; +} + +VLIB_MAIN_LOOP_ENTER_FUNCTION (dpdk_ipsec_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/ipsec/ipsec.h b/src/vnet/devices/dpdk/ipsec/ipsec.h new file mode 100644 index 00000000..e6c7498c --- /dev/null +++ b/src/vnet/devices/dpdk/ipsec/ipsec.h @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __DPDK_IPSEC_H__ +#define __DPDK_IPSEC_H__ + +#include + +#undef always_inline +#include +#include + +#if CLIB_DEBUG > 0 +#define always_inline static inline +#else +#define always_inline static inline __attribute__ ((__always_inline__)) +#endif + + +#define MAX_QP_PER_LCORE 16 + +typedef struct +{ + u32 salt; + u32 iv[2]; + u32 cnt; +} dpdk_gcm_cnt_blk; + +typedef struct +{ + dpdk_gcm_cnt_blk cb; + union + { + u8 aad[12]; + u8 icv[64]; + }; +} dpdk_cop_priv_t; + +typedef struct +{ + u8 cipher_algo; + u8 auth_algo; + u8 is_outbound; +} crypto_worker_qp_key_t; + +typedef struct +{ + u16 dev_id; + u16 qp_id; + u16 is_outbound; + i16 inflights; + u32 bi[VLIB_FRAME_SIZE]; + struct rte_crypto_op *cops[VLIB_FRAME_SIZE]; + struct rte_crypto_op **free_cops; +} crypto_qp_data_t; + +typedef struct +{ + u8 qp_index; + void *sess; +} crypto_sa_session_t; + +typedef struct +{ + crypto_sa_session_t *sa_sess_d[2]; + crypto_qp_data_t *qp_data; + uword *algo_qp_map; +} crypto_worker_main_t; + +typedef struct +{ + struct rte_mempool **cop_pools; + crypto_worker_main_t *workers_main; +} dpdk_crypto_main_t; + +dpdk_crypto_main_t dpdk_crypto_main; + +extern vlib_node_registration_t dpdk_crypto_input_node; + +#define CRYPTO_N_FREE_COPS (VLIB_FRAME_SIZE * 3) + +static_always_inline void +crypto_alloc_cops () +{ + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + u32 cpu_index = os_get_cpu_number (); + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + unsigned socket_id = rte_socket_id (); + crypto_qp_data_t *qpd; + + /* *INDENT-OFF* */ + vec_foreach (qpd, cwm->qp_data) + { + u32 l = vec_len (qpd->free_cops); + + if (PREDICT_FALSE (l < VLIB_FRAME_SIZE)) + { + u32 n_alloc; + + if (PREDICT_FALSE (!qpd->free_cops)) + vec_alloc (qpd->free_cops, CRYPTO_N_FREE_COPS); + + n_alloc = rte_crypto_op_bulk_alloc (dcm->cop_pools[socket_id], + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + &qpd->free_cops[l], + CRYPTO_N_FREE_COPS - l - 1); + + _vec_len (qpd->free_cops) = l + n_alloc; + } + } + /* *INDENT-ON* */ +} + +static_always_inline void +crypto_free_cop (crypto_qp_data_t * qpd, struct rte_crypto_op **cops, u32 n) +{ + u32 l = vec_len (qpd->free_cops); + + if (l + n >= CRYPTO_N_FREE_COPS) + { + l -= VLIB_FRAME_SIZE; + rte_mempool_put_bulk (cops[0]->mempool, + (void **) &qpd->free_cops[l], VLIB_FRAME_SIZE); + } + clib_memcpy (&qpd->free_cops[l], cops, sizeof (*cops) * n); + + _vec_len (qpd->free_cops) = l + n; +} + +static_always_inline int +check_algo_is_supported (const struct rte_cryptodev_capabilities *cap, + char *name) +{ + struct + { + uint8_t cipher_algo; + enum rte_crypto_sym_xform_type type; + union + { + enum rte_crypto_auth_algorithm auth; + enum rte_crypto_cipher_algorithm cipher; + }; + char *name; + } supported_algo[] = + { + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_NULL,.name = "NULL"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_AES_CBC,.name = "AES_CBC"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_AES_CTR,.name = "AES_CTR"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_3DES_CBC,.name = "3DES-CBC"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.auth = + RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA1_HMAC,.name = "HMAC-SHA1"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA256_HMAC,.name = "HMAC-SHA256"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA384_HMAC,.name = "HMAC-SHA384"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA512_HMAC,.name = "HMAC-SHA512"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_AES_XCBC_MAC,.name = "AES-XCBC-MAC"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_AES_GCM,.name = "AES-GCM"}, + { + /* tail */ + .type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED},}; + uint32_t i = 0; + + if (cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC) + return -1; + + while (supported_algo[i].type != RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED) + { + if (cap->sym.xform_type == supported_algo[i].type) + { + if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER && + cap->sym.cipher.algo == supported_algo[i].cipher) || + (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH && + cap->sym.auth.algo == supported_algo[i].auth)) + { + if (name) + strcpy (name, supported_algo[i].name); + return 0; + } + } + + i++; + } + + return -1; +} + +#endif /* __DPDK_IPSEC_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/node.c b/src/vnet/devices/dpdk/node.c new file mode 100644 index 00000000..e541cdbc --- /dev/null +++ b/src/vnet/devices/dpdk/node.c @@ -0,0 +1,687 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "dpdk_priv.h" + +static char *dpdk_error_strings[] = { +#define _(n,s) s, + foreach_dpdk_error +#undef _ +}; + +always_inline int +vlib_buffer_is_ip4 (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); +} + +always_inline int +vlib_buffer_is_ip6 (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); +} + +always_inline int +vlib_buffer_is_mpls (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); +} + +#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) +/* New ol_flags bits added in DPDK-16.11 */ +#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7) +#endif + +always_inline u32 +dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0) +{ + if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0))) + if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0)) + return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; + else + return VNET_DEVICE_INPUT_NEXT_IP4_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0))) + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) + return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; + else + return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; +} + +always_inline int +dpdk_mbuf_is_vlan (struct rte_mbuf *mb) +{ +#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0) + return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) == + RTE_PTYPE_L2_ETHER_VLAN; +#else + return + (mb->ol_flags & + (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == + PKT_RX_VLAN_PKT; +#endif +} + +always_inline int +dpdk_mbuf_is_ip4 (struct rte_mbuf *mb) +{ + return RTE_ETH_IS_IPV4_HDR (mb->packet_type) != 0; +} + +always_inline int +dpdk_mbuf_is_ip6 (struct rte_mbuf *mb) +{ + return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0; +} + +always_inline u32 +dpdk_rx_next_from_mb (struct rte_mbuf * mb, vlib_buffer_t * b0) +{ + if (PREDICT_FALSE (dpdk_mbuf_is_vlan (mb))) + return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb))) + return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; + else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb))) + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) + return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; + else + return dpdk_rx_next_from_etype (mb, b0); +} + +always_inline void +dpdk_rx_error_from_mb (struct rte_mbuf *mb, u32 * next, u8 * error) +{ + if (mb->ol_flags & PKT_RX_IP_CKSUM_BAD) + { + *error = DPDK_ERROR_IP_CHECKSUM_ERROR; + *next = VNET_DEVICE_INPUT_NEXT_DROP; + } + else + *error = DPDK_ERROR_NONE; +} + +void +dpdk_rx_trace (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 * buffers, uword n_buffers) +{ + vlib_main_t *vm = vlib_get_main (); + u32 *b, n_left; + u32 next0; + + n_left = n_buffers; + b = buffers; + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t *b0; + dpdk_rx_dma_trace_t *t0; + struct rte_mbuf *mb; + u8 error0; + + bi0 = b[0]; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + mb = rte_mbuf_from_vlib_buffer (b0); + + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) + next0 = xd->per_interface_next_index; + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + next0 = dpdk_rx_next_from_mb (mb, b0); + else + next0 = dpdk_rx_next_from_etype (mb, b0); + + dpdk_rx_error_from_mb (mb, &next0, &error0); + + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->queue_index = queue_id; + t0->device_index = xd->device_index; + t0->buffer_index = bi0; + + clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); + clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + clib_memcpy (t0->buffer.pre_data, b0->data, + sizeof (t0->buffer.pre_data)); + clib_memcpy (&t0->data, mb->buf_addr + mb->data_off, sizeof (t0->data)); + + b += 1; + } +} + +static inline u32 +dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id) +{ + u32 n_buffers; + u32 n_left; + u32 n_this_chunk; + + n_left = VLIB_FRAME_SIZE; + n_buffers = 0; + + if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) + { + while (n_left) + { + n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id, + xd->rx_vectors[queue_id] + + n_buffers, n_left); + n_buffers += n_this_chunk; + n_left -= n_this_chunk; + + /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */ + if (n_this_chunk < 32) + break; + } + } + else + { + ASSERT (0); + } + + return n_buffers; +} + + +static_always_inline void +dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, + struct rte_mbuf *mb, vlib_buffer_free_list_t * fl) +{ + u8 nb_seg = 1; + struct rte_mbuf *mb_seg = 0; + vlib_buffer_t *b_seg, *b_chain = 0; + mb_seg = mb->next; + b_chain = b; + + while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) + { + ASSERT (mb_seg != 0); + + b_seg = vlib_buffer_from_rte_mbuf (mb_seg); + vlib_buffer_init_for_free_list (b_seg, fl); + + ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); + ASSERT (b_seg->current_data == 0); + + /* + * The driver (e.g. virtio) may not put the packet data at the start + * of the segment, so don't assume b_seg->current_data == 0 is correct. + */ + b_seg->current_data = + (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data; + + b_seg->current_length = mb_seg->data_len; + b->total_length_not_including_first_buffer += mb_seg->data_len; + + b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT; + b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg); + + b_chain = b_seg; + mb_seg = mb_seg->next; + nb_seg++; + } +} + +static_always_inline void +dpdk_prefetch_buffer (struct rte_mbuf *mb) +{ + vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); + CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE); +} + +/* + * This function is used when there are no worker threads. + * The main thread performs IO and forwards the packets. + */ +static_always_inline u32 +dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, + vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id) +{ + u32 n_buffers; + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + u32 n_left_to_next, *to_next; + u32 mb_index; + vlib_main_t *vm = vlib_get_main (); + uword n_rx_bytes = 0; + u32 n_trace, trace_cnt __attribute__ ((unused)); + vlib_buffer_free_list_t *fl; + u32 buffer_flags_template; + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) + return 0; + + n_buffers = dpdk_rx_burst (dm, xd, queue_id); + + if (n_buffers == 0) + { + return 0; + } + + buffer_flags_template = dm->buffer_flags_template; + + vec_reset_length (xd->d_trace_buffers[cpu_index]); + trace_cnt = n_trace = vlib_get_trace_count (vm, node); + + if (n_trace > 0) + { + u32 n = clib_min (n_trace, n_buffers); + mb_index = 0; + + while (n--) + { + struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++]; + vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); + vec_add1 (xd->d_trace_buffers[cpu_index], + vlib_get_buffer_index (vm, b)); + } + } + + fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + mb_index = 0; + + while (n_buffers > 0) + { + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 bi0, next0, l3_offset0; + u32 bi1, next1, l3_offset1; + u32 bi2, next2, l3_offset2; + u32 bi3, next3, l3_offset3; + u8 error0, error1, error2, error3; + u64 or_ol_flags; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_buffers > 8 && n_left_to_next > 4) + { + struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; + struct rte_mbuf *mb1 = xd->rx_vectors[queue_id][mb_index + 1]; + struct rte_mbuf *mb2 = xd->rx_vectors[queue_id][mb_index + 2]; + struct rte_mbuf *mb3 = xd->rx_vectors[queue_id][mb_index + 3]; + + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 4]); + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 5]); + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 6]); + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 7]); + + if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) + { + if (PREDICT_FALSE (mb0->nb_segs > 1)) + dpdk_prefetch_buffer (mb0->next); + if (PREDICT_FALSE (mb1->nb_segs > 1)) + dpdk_prefetch_buffer (mb1->next); + if (PREDICT_FALSE (mb2->nb_segs > 1)) + dpdk_prefetch_buffer (mb2->next); + if (PREDICT_FALSE (mb3->nb_segs > 1)) + dpdk_prefetch_buffer (mb3->next); + } + + ASSERT (mb0); + ASSERT (mb1); + ASSERT (mb2); + ASSERT (mb3); + + or_ol_flags = (mb0->ol_flags | mb1->ol_flags | + mb2->ol_flags | mb3->ol_flags); + b0 = vlib_buffer_from_rte_mbuf (mb0); + b1 = vlib_buffer_from_rte_mbuf (mb1); + b2 = vlib_buffer_from_rte_mbuf (mb2); + b3 = vlib_buffer_from_rte_mbuf (mb3); + + vlib_buffer_init_for_free_list (b0, fl); + vlib_buffer_init_for_free_list (b1, fl); + vlib_buffer_init_for_free_list (b2, fl); + vlib_buffer_init_for_free_list (b3, fl); + + bi0 = vlib_get_buffer_index (vm, b0); + bi1 = vlib_get_buffer_index (vm, b1); + bi2 = vlib_get_buffer_index (vm, b2); + bi3 = vlib_get_buffer_index (vm, b3); + + to_next[0] = bi0; + to_next[1] = bi1; + to_next[2] = bi2; + to_next[3] = bi3; + to_next += 4; + n_left_to_next -= 4; + + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) + { + next0 = next1 = next2 = next3 = xd->per_interface_next_index; + } + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + { + next0 = dpdk_rx_next_from_mb (mb0, b0); + next1 = dpdk_rx_next_from_mb (mb1, b1); + next2 = dpdk_rx_next_from_mb (mb2, b2); + next3 = dpdk_rx_next_from_mb (mb3, b3); + } + else + { + next0 = dpdk_rx_next_from_etype (mb0, b0); + next1 = dpdk_rx_next_from_etype (mb1, b1); + next2 = dpdk_rx_next_from_etype (mb2, b2); + next3 = dpdk_rx_next_from_etype (mb3, b3); + } + + if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD)) + { + dpdk_rx_error_from_mb (mb0, &next0, &error0); + dpdk_rx_error_from_mb (mb1, &next1, &error1); + dpdk_rx_error_from_mb (mb2, &next2, &error2); + dpdk_rx_error_from_mb (mb3, &next3, &error3); + b0->error = node->errors[error0]; + b1->error = node->errors[error1]; + b2->error = node->errors[error2]; + b3->error = node->errors[error3]; + } + else + { + b0->error = b1->error = node->errors[DPDK_ERROR_NONE]; + b2->error = b3->error = node->errors[DPDK_ERROR_NONE]; + } + + l3_offset0 = device_input_next_node_advance[next0]; + l3_offset1 = device_input_next_node_advance[next1]; + l3_offset2 = device_input_next_node_advance[next2]; + l3_offset3 = device_input_next_node_advance[next3]; + + b0->current_data = l3_offset0 + mb0->data_off; + b1->current_data = l3_offset1 + mb1->data_off; + b2->current_data = l3_offset2 + mb2->data_off; + b3->current_data = l3_offset3 + mb3->data_off; + + b0->current_data -= RTE_PKTMBUF_HEADROOM; + b1->current_data -= RTE_PKTMBUF_HEADROOM; + b2->current_data -= RTE_PKTMBUF_HEADROOM; + b3->current_data -= RTE_PKTMBUF_HEADROOM; + + b0->current_length = mb0->data_len - l3_offset0; + b1->current_length = mb1->data_len - l3_offset1; + b2->current_length = mb2->data_len - l3_offset2; + b3->current_length = mb3->data_len - l3_offset3; + + b0->flags = buffer_flags_template; + b1->flags = buffer_flags_template; + b2->flags = buffer_flags_template; + b3->flags = buffer_flags_template; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b2)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b3)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b2)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b3)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + n_rx_bytes += mb0->pkt_len; + n_rx_bytes += mb1->pkt_len; + n_rx_bytes += mb2->pkt_len; + n_rx_bytes += mb3->pkt_len; + + /* Process subsequent segments of multi-segment packets */ + if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) + { + dpdk_process_subseq_segs (vm, b0, mb0, fl); + dpdk_process_subseq_segs (vm, b1, mb1, fl); + dpdk_process_subseq_segs (vm, b2, mb2, fl); + dpdk_process_subseq_segs (vm, b3, mb3, fl); + } + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); + + /* Do we have any driver RX features configured on the interface? */ + vnet_feature_start_device_input_x4 (xd->vlib_sw_if_index, + &next0, &next1, &next2, &next3, + b0, b1, b2, b3, + l3_offset0, l3_offset1, + l3_offset2, l3_offset3); + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + n_buffers -= 4; + mb_index += 4; + } + while (n_buffers > 0 && n_left_to_next > 0) + { + struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; + + ASSERT (mb0); + + b0 = vlib_buffer_from_rte_mbuf (mb0); + + /* Prefetch one next segment if it exists. */ + if (PREDICT_FALSE (mb0->nb_segs > 1)) + dpdk_prefetch_buffer (mb0->next); + + vlib_buffer_init_for_free_list (b0, fl); + + bi0 = vlib_get_buffer_index (vm, b0); + + to_next[0] = bi0; + to_next++; + n_left_to_next--; + + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) + next0 = xd->per_interface_next_index; + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + next0 = dpdk_rx_next_from_mb (mb0, b0); + else + next0 = dpdk_rx_next_from_etype (mb0, b0); + + dpdk_rx_error_from_mb (mb0, &next0, &error0); + b0->error = node->errors[error0]; + + l3_offset0 = device_input_next_node_advance[next0]; + + b0->current_data = l3_offset0; + b0->current_data += mb0->data_off - RTE_PKTMBUF_HEADROOM; + b0->current_length = mb0->data_len - l3_offset0; + + b0->flags = buffer_flags_template; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + n_rx_bytes += mb0->pkt_len; + + /* Process subsequent segments of multi-segment packets */ + dpdk_process_subseq_segs (vm, b0, mb0, fl); + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + /* Do we have any driver RX features configured on the interface? */ + vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0, + b0, l3_offset0); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + n_buffers--; + mb_index++; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0)) + { + dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index], + vec_len (xd->d_trace_buffers[cpu_index])); + vlib_set_trace_count (vm, node, n_trace - + vec_len (xd->d_trace_buffers[cpu_index])); + } + + vlib_increment_combined_counter + (vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); + + dpdk_worker_t *dw = vec_elt_at_index (dm->workers, cpu_index); + dw->aggregate_rx_packets += mb_index; + + return mb_index; +} + +static inline void +poll_rate_limit (dpdk_main_t * dm) +{ + /* Limit the poll rate by sleeping for N msec between polls */ + if (PREDICT_FALSE (dm->poll_sleep != 0)) + { + struct timespec ts, tsrem; + + ts.tv_sec = 0; + ts.tv_nsec = 1000 * 1000 * dm->poll_sleep; /* 1ms */ + + while (nanosleep (&ts, &tsrem) < 0) + { + ts = tsrem; + } + } +} + +/** \brief Main DPDK input node + @node dpdk-input + + This is the main DPDK input node: across each assigned interface, + call rte_eth_rx_burst(...) or similar to obtain a vector of + packets to process. Handle early packet discard. Derive @c + vlib_buffer_t metadata from struct rte_mbuf metadata, + Depending on the resulting metadata: adjust b->current_data, + b->current_length and dispatch directly to + ip4-input-no-checksum, or ip6-input. Trace the packet if required. + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param f vlib_frame_t input-node, not used. + + @par Graph mechanics: buffer metadata, next index usage + + @em Uses: + - struct rte_mbuf mb->ol_flags + - PKT_RX_IP_CKSUM_BAD + - RTE_ETH_IS_xxx_HDR(mb->packet_type) + - packet classification result + + @em Sets: + - b->error if the packet is to be dropped immediately + - b->current_data, b->current_length + - adjusted as needed to skip the L2 header in direct-dispatch cases + - vnet_buffer(b)->sw_if_index[VLIB_RX] + - rx interface sw_if_index + - vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0 + - required by ipX-lookup + - b->flags + - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc. + + Next Nodes: + - Static arcs to: error-drop, ethernet-input, + ip4-input-no-checksum, ip6-input, mpls-input + - per-interface redirection, controlled by + xd->per_interface_next_index +*/ + +static uword +dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + uword n_rx_packets = 0; + dpdk_device_and_queue_t *dq; + u32 cpu_index = os_get_cpu_number (); + + /* + * Poll all devices on this cpu for input/interrupts. + */ + /* *INDENT-OFF* */ + vec_foreach (dq, dm->devices_by_cpu[cpu_index]) + { + xd = vec_elt_at_index(dm->devices, dq->device); + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id); + } + /* *INDENT-ON* */ + + poll_rate_limit (dm); + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_input_node) = { + .function = dpdk_input, + .type = VLIB_NODE_TYPE_INPUT, + .name = "dpdk-input", + .sibling_of = "device-input", + + /* Will be enabled if/when hardware is detected. */ + .state = VLIB_NODE_STATE_DISABLED, + + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_dpdk_rx_dma_trace, + + .n_errors = DPDK_N_ERROR, + .error_strings = dpdk_error_strings, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input); +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/qos_doc.md b/src/vnet/devices/dpdk/qos_doc.md new file mode 100644 index 00000000..9bd0659d --- /dev/null +++ b/src/vnet/devices/dpdk/qos_doc.md @@ -0,0 +1,404 @@ +# QoS Hierarchical Scheduler {#qos_doc} + +The Quality-of-Service (QoS) scheduler performs egress-traffic management by +prioritizing the transmission of the packets of different type services and +subcribers based on the Service Level Agreements (SLAs). The QoS scheduler can +be enabled on one or more NIC output interfaces depending upon the +requirement. + + +## Overview + +The QoS schdeuler supports a number of scheduling and shaping levels which +construct hierarchical-tree. The first level in the hierarchy is port (i.e. +the physical interface) that constitutes the root node of the tree. The +subsequent level is subport which represents the group of the +users/subscribers. The individual user/subscriber is represented by the pipe +at the next level. Each user can have different traffic type based on the +criteria of specific loss rate, jitter, and latency. These traffic types are +represented at the traffic-class level in the form of different traffic- +classes. The last level contains number of queues which are grouped together +to host the packets of the specific class type traffic. + +The QoS scheduler implementation requires flow classification, enqueue and +dequeue operations. The flow classification is mandatory stage for HQoS where +incoming packets are classified by mapping the packet fields information to +5-tuple (HQoS subport, pipe, traffic class, queue within traffic class, and +color) and storing that information in mbuf sched field. The enqueue operation +uses this information to determine the queue for storing the packet, and at +this stage, if the specific queue is full, QoS drops the packet. The dequeue +operation consists of scheduling the packet based on its length and available +credits, and handing over the scheduled packet to the output interface. + +For more information on QoS Scheduler, please refer DPDK Programmer's Guide- +http://dpdk.org/doc/guides/prog_guide/qos_framework.html + + +### QoS Schdeuler Parameters + +Following illustrates the default HQoS configuration for each 10GbE output +port: + +Single subport (subport 0): + - Subport rate set to 100% of port rate + - Each of the 4 traffic classes has rate set to 100% of port rate + +4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration: + - Pipe rate set to 1/4K of port rate + - Each of the 4 traffic classes has rate set to 100% of pipe rate + - Within each traffic class, the byte-level WRR weights for the 4 queues are set to 1:1:1:1 + + +#### Port configuration + +``` +port { + rate 1250000000 /* Assuming 10GbE port */ + frame_overhead 24 /* Overhead fields per Ethernet frame: + * 7B (Preamble) + + * 1B (Start of Frame Delimiter (SFD)) + + * 4B (Frame Check Sequence (FCS)) + + * 12B (Inter Frame Gap (IFG)) + */ + mtu 1522 /* Assuming Ethernet/IPv4 pkt (FCS not included) */ + n_subports_per_port 1 /* Number of subports per output interface */ + n_pipes_per_subport 4096 /* Number of pipes (users/subscribers) */ + queue_sizes 64 64 64 64 /* Packet queue size for each traffic class. + * All queues within the same pipe traffic class + * have the same size. Queues from different + * pipes serving the same traffic class have + * the same size. */ +} +``` + + +#### Subport configuration + +``` +subport 0 { + tb_rate 1250000000 /* Subport level token bucket rate (bytes per second) */ + tb_size 1000000 /* Subport level token bucket size (bytes) */ + tc0_rate 1250000000 /* Subport level token bucket rate for traffic class 0 (bytes per second) */ + tc1_rate 1250000000 /* Subport level token bucket rate for traffic class 1 (bytes per second) */ + tc2_rate 1250000000 /* Subport level token bucket rate for traffic class 2 (bytes per second) */ + tc3_rate 1250000000 /* Subport level token bucket rate for traffic class 3 (bytes per second) */ + tc_period 10 /* Time interval for refilling the token bucket associated with traffic class (Milliseconds) */ + pipe 0 4095 profile 0 /* pipes (users/subscribers) configured with pipe profile 0 */ +} +``` + + +#### Pipe configuration + +``` +pipe_profile 0 { + tb_rate 305175 /* Pipe level token bucket rate (bytes per second) */ + tb_size 1000000 /* Pipe level token bucket size (bytes) */ + tc0_rate 305175 /* Pipe level token bucket rate for traffic class 0 (bytes per second) */ + tc1_rate 305175 /* Pipe level token bucket rate for traffic class 1 (bytes per second) */ + tc2_rate 305175 /* Pipe level token bucket rate for traffic class 2 (bytes per second) */ + tc3_rate 305175 /* Pipe level token bucket rate for traffic class 3 (bytes per second) */ + tc_period 40 /* Time interval for refilling the token bucket associated with traffic class at pipe level (Milliseconds) */ + tc3_oversubscription_weight 1 /* Weight traffic class 3 oversubscription */ + tc0_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 0 */ + tc1_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 1 */ + tc2_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 2 */ + tc3_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 3 */ +} +``` + + +#### Random Early Detection (RED) parameters per traffic class and color (Green / Yellow / Red) + +``` +red { + tc0_wred_min 48 40 32 /* Minimum threshold for traffic class 0 queue (min_th) in number of packets */ + tc0_wred_max 64 64 64 /* Maximum threshold for traffic class 0 queue (max_th) in number of packets */ + tc0_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 0 queue (maxp = 1 / maxp_inv) */ + tc0_wred_weight 9 9 9 /* Traffic Class 0 queue weight */ + tc1_wred_min 48 40 32 /* Minimum threshold for traffic class 1 queue (min_th) in number of packets */ + tc1_wred_max 64 64 64 /* Maximum threshold for traffic class 1 queue (max_th) in number of packets */ + tc1_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 1 queue (maxp = 1 / maxp_inv) */ + tc1_wred_weight 9 9 9 /* Traffic Class 1 queue weight */ + tc2_wred_min 48 40 32 /* Minimum threshold for traffic class 2 queue (min_th) in number of packets */ + tc2_wred_max 64 64 64 /* Maximum threshold for traffic class 2 queue (max_th) in number of packets */ + tc2_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 2 queue (maxp = 1 / maxp_inv) */ + tc2_wred_weight 9 9 9 /* Traffic Class 2 queue weight */ + tc3_wred_min 48 40 32 /* Minimum threshold for traffic class 3 queue (min_th) in number of packets */ + tc3_wred_max 64 64 64 /* Maximum threshold for traffic class 3 queue (max_th) in number of packets */ + tc3_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 3 queue (maxp = 1 / maxp_inv) */ + tc3_wred_weight 9 9 9 /* Traffic Class 3 queue weight */ +} +``` + + +### DPDK QoS Scheduler Integration in VPP + +The Hierarchical Quaity-of-Service (HQoS) scheduler object could be seen as +part of the logical NIC output interface. To enable HQoS on specific output +interface, vpp startup.conf file has to be configured accordingly. The output +interface that requires HQoS, should have "hqos" parameter specified in dpdk +section. Another optional parameter "hqos-thread" has been defined which can +be used to associate the output interface with specific hqos thread. In cpu +section of the config file, "corelist-hqos-threads" is introduced to assign +logical cpu cores to run the HQoS threads. A HQoS thread can run multiple HQoS +objects each associated with different output interfaces. All worker threads +instead of writing packets to NIC TX queue directly, write the packets to a +software queues. The hqos_threads read the software queues, and enqueue the +packets to HQoS objects, as well as dequeue packets from HQOS objects and +write them to NIC output interfaces. The worker threads need to be able to +send the packets to any output interface, therefore, each HQoS object +associated with NIC output interface should have software queues equal to +worker threads count. + +Following illustrates the sample startup configuration file with 4x worker +threads feeding 2x hqos threads that handle each QoS scheduler for 1x output +interface. + +``` +dpdk { + socket-mem 16384,16384 + + dev 0000:02:00.0 { + num-rx-queues 2 + hqos + } + dev 0000:06:00.0 { + num-rx-queues 2 + hqos + } + + num-mbufs 1000000 +} + +cpu { + main-core 0 + corelist-workers 1, 2, 3, 4 + corelist-hqos-threads 5, 6 +} +``` + + +### QoS scheduler CLI Commands + +Each QoS scheduler instance is initialised with default parameters required to +configure hqos port, subport, pipe and queues. Some of the parameters can be +re-configured in run-time through CLI commands. + + +#### Configuration + +Following commands can be used to configure QoS scheduler parameters. + +The command below can be used to set the subport level parameters such as +token bucket rate (bytes per seconds), token bucket size (bytes), traffic +class rates (bytes per seconds) and token update period (Milliseconds). + +``` +set dpdk interface hqos subport subport [rate ] + [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] +``` + +For setting the pipe profile, following command can be used. + +``` +set dpdk interface hqos pipe subport pipe profile +``` + +To assign QoS scheduler instance to the specific thread, following command can +be used. + +``` +set dpdk interface hqos placement thread +``` + +The command below is used to set the packet fields required for classifiying +the incoming packet. As a result of classification process, packet field +information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, +color) and stored in packet mbuf. + +``` +set dpdk interface hqos pktfield id offset mask +``` + +The DSCP table entries used for idenfiying the traffic class and queue can be set using the command below; + +``` +set dpdk interface hqos tctbl entry tc queue +``` + + +#### Show Command + +The QoS Scheduler configuration can displayed using the command below. + +``` + vpp# show dpdk interface hqos TenGigabitEthernet2/0/0 + Thread: + Input SWQ size = 4096 packets + Enqueue burst size = 256 packets + Dequeue burst size = 220 packets + Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 + Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 + Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc + Packet field 2 translation table: + [ 0 .. 15]: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + [16 .. 31]: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + [32 .. 47]: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + [48 .. 63]: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + Port: + Rate = 1250000000 bytes/second + MTU = 1514 bytes + Frame overhead = 24 bytes + Number of subports = 1 + Number of pipes per subport = 4096 + Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets + Number of pipe profiles = 1 + Pipe profile 0: + Rate = 305175 bytes/second + Token bucket size = 1000000 bytes + Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second + TC period = 40 milliseconds + TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 +``` + +The QoS Scheduler placement over the logical cpu cores can be displayed using +below command. + +``` + vpp# show dpdk interface hqos placement + Thread 5 (vpp_hqos-threads_0 at lcore 5): + TenGigabitEthernet2/0/0 queue 0 + Thread 6 (vpp_hqos-threads_1 at lcore 6): + TenGigabitEthernet4/0/1 queue 0 +``` + + +### QoS Scheduler Binary APIs + +This section explans the available binary APIs for configuring QoS scheduler +parameters in run-time. + +The following API can be used to set the pipe profile of a pipe that belongs +to a given subport: + +``` +sw_interface_set_dpdk_hqos_pipe rx | sw_if_index + subport pipe profile +``` + +The data structures used for set the pipe profile parameter are as follows; + +``` + /** \\brief DPDK interface HQoS pipe profile set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param pipe - pipe ID within its subport + @param profile - pipe profile ID + */ + define sw_interface_set_dpdk_hqos_pipe { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 pipe; + u32 profile; + }; + + /** \\brief DPDK interface HQoS pipe profile set reply + @param context - sender context, to match reply w/ request + @param retval - request return code + */ + define sw_interface_set_dpdk_hqos_pipe_reply { + u32 context; + i32 retval; + }; +``` + +The following API can be used to set the subport level parameters, for +example- token bucket rate (bytes per seconds), token bucket size (bytes), +traffic class rate (bytes per seconds) and tokens update period. + +``` +sw_interface_set_dpdk_hqos_subport rx | sw_if_index + subport [rate ] [bktsize ] + [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] +``` + +The data structures used for set the subport level parameter are as follows; + +``` + /** \\brief DPDK interface HQoS subport parameters set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param tb_rate - subport token bucket rate (measured in bytes/second) + @param tb_size - subport token bucket size (measured in credits) + @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) + @param tc_period - enforcement period for rates (measured in milliseconds) + */ + define sw_interface_set_dpdk_hqos_subport { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 tb_rate; + u32 tb_size; + u32 tc_rate[4]; + u32 tc_period; + }; + + /** \\brief DPDK interface HQoS subport parameters set reply + @param context - sender context, to match reply w/ request + @param retval - request return code + */ + define sw_interface_set_dpdk_hqos_subport_reply { + u32 context; + i32 retval; + }; +``` + +The following API can be used set the DSCP table entry. The DSCP table have +64 entries to map the packet DSCP field onto traffic class and hqos input +queue. + +``` +sw_interface_set_dpdk_hqos_tctbl rx | sw_if_index + entry tc queue +``` + +The data structures used for setting DSCP table entries are given below. + +``` + /** \\brief DPDK interface HQoS tctbl entry set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param entry - entry index ID + @param tc - traffic class (0 .. 3) + @param queue - traffic class queue (0 .. 3) + */ + define sw_interface_set_dpdk_hqos_tctbl { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 entry; + u32 tc; + u32 queue; + }; + + /** \\brief DPDK interface HQoS tctbl entry set reply + @param context - sender context, to match reply w/ request + @param retval - request return code + */ + define sw_interface_set_dpdk_hqos_tctbl_reply { + u32 context; + i32 retval; + }; +``` diff --git a/src/vnet/devices/netmap/cli.c b/src/vnet/devices/netmap/cli.c new file mode 100644 index 00000000..68695250 --- /dev/null +++ b/src/vnet/devices/netmap/cli.c @@ -0,0 +1,146 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include +#include +#include + +#include +#include +#include + +#include +#include + +static clib_error_t * +netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *host_if_name = NULL; + u8 hwaddr[6]; + u8 *hw_addr_ptr = 0; + int r; + u8 is_pipe = 0; + u8 is_master = 0; + u32 sw_if_index = ~0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &host_if_name)) + ; + else + if (unformat + (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr)) + hw_addr_ptr = hwaddr; + else if (unformat (line_input, "pipe")) + is_pipe = 1; + else if (unformat (line_input, "master")) + is_master = 1; + else if (unformat (line_input, "slave")) + is_master = 0; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (host_if_name == NULL) + return clib_error_return (0, "missing host interface name"); + + r = + netmap_create_if (vm, host_if_name, hw_addr_ptr, is_pipe, is_master, + &sw_if_index); + + if (r == VNET_API_ERROR_SYSCALL_ERROR_1) + return clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + + if (r == VNET_API_ERROR_INVALID_INTERFACE) + return clib_error_return (0, "Invalid interface name"); + + if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) + return clib_error_return (0, "Interface already exists"); + + vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (netmap_create_command, static) = { + .path = "create netmap", + .short_help = "create netmap name [|valeXXX:YYY] " + "[hw-addr ] [pipe] [master|slave]", + .function = netmap_create_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *host_if_name = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &host_if_name)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (host_if_name == NULL) + return clib_error_return (0, "missing host interface name"); + + netmap_delete_if (vm, host_if_name); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (netmap_delete_command, static) = { + .path = "delete netmap", + .short_help = "delete netmap name ", + .function = netmap_delete_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +netmap_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (netmap_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/netmap/device.c b/src/vnet/devices/netmap/device.c new file mode 100644 index 00000000..2152824f --- /dev/null +++ b/src/vnet/devices/netmap/device.c @@ -0,0 +1,261 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#define foreach_netmap_tx_func_error \ +_(NO_FREE_SLOTS, "no free tx slots") \ +_(PENDING_MSGS, "pending msgs in tx ring") + +typedef enum +{ +#define _(f,s) NETMAP_TX_ERROR_##f, + foreach_netmap_tx_func_error +#undef _ + NETMAP_TX_N_ERROR, +} netmap_tx_func_error_t; + +static char *netmap_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_netmap_tx_func_error +#undef _ +}; + + +static u8 * +format_netmap_device_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + netmap_main_t *apm = &netmap_main; + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, i); + + s = format (s, "netmap-%s", nif->host_if_name); + return s; +} + +static u8 * +format_netmap_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + netmap_main_t *nm = &netmap_main; + netmap_if_t *nif = vec_elt_at_index (nm->interfaces, dev_instance); + uword indent = format_get_indent (s); + + s = format (s, "NETMAP interface"); + if (verbose) + { + s = format (s, "\n%U version %d flags 0x%x" + "\n%U region %u memsize 0x%x offset 0x%x" + "\n%U tx_slots %u rx_slots %u tx_rings %u rx_rings %u", + format_white_space, indent + 2, + nif->req->nr_version, + nif->req->nr_flags, + format_white_space, indent + 2, + nif->mem_region, + nif->req->nr_memsize, + nif->req->nr_offset, + format_white_space, indent + 2, + nif->req->nr_tx_slots, + nif->req->nr_rx_slots, + nif->req->nr_tx_rings, nif->req->nr_rx_rings); + } + return s; +} + +static u8 * +format_netmap_tx_trace (u8 * s, va_list * args) +{ + s = format (s, "Unimplemented..."); + return s; +} + +static uword +netmap_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + netmap_main_t *nm = &netmap_main; + u32 *buffers = vlib_frame_args (frame); + u32 n_left = frame->n_vectors; + f64 const time_constant = 1e3; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + netmap_if_t *nif = pool_elt_at_index (nm->interfaces, rd->dev_instance); + int cur_ring; + + if (PREDICT_FALSE (nif->lockp != 0)) + { + while (__sync_lock_test_and_set (nif->lockp, 1)) + ; + } + + cur_ring = nif->first_tx_ring; + + while (n_left && cur_ring <= nif->last_tx_ring) + { + struct netmap_ring *ring = NETMAP_TXRING (nif->nifp, cur_ring); + int n_free_slots = nm_ring_space (ring); + uint cur = ring->cur; + + if (nm_tx_pending (ring)) + { + if (ioctl (nif->fd, NIOCTXSYNC, NULL) < 0) + clib_unix_warning ("NIOCTXSYNC"); + clib_cpu_time_wait (time_constant); + + if (nm_tx_pending (ring) && !n_free_slots) + { + cur_ring++; + continue; + } + } + + while (n_left && n_free_slots) + { + vlib_buffer_t *b0 = 0; + u32 bi = buffers[0]; + u32 len; + u32 offset = 0; + buffers++; + + struct netmap_slot *slot = &ring->slot[cur]; + + do + { + b0 = vlib_get_buffer (vm, bi); + len = b0->current_length; + /* memcpy */ + clib_memcpy ((u8 *) NETMAP_BUF (ring, slot->buf_idx) + offset, + vlib_buffer_get_current (b0), len); + offset += len; + } + while ((bi = b0->next_buffer)); + + slot->len = offset; + cur = (cur + 1) % ring->num_slots; + n_free_slots--; + n_left--; + } + CLIB_MEMORY_BARRIER (); + ring->head = ring->cur = cur; + } + + if (n_left < frame->n_vectors) + ioctl (nif->fd, NIOCTXSYNC, NULL); + + if (PREDICT_FALSE (nif->lockp != 0)) + *nif->lockp = 0; + + if (n_left) + vlib_error_count (vm, node->node_index, + (n_left == + frame->n_vectors ? NETMAP_TX_ERROR_PENDING_MSGS : + NETMAP_TX_ERROR_NO_FREE_SLOTS), n_left); + + vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); + return frame->n_vectors; +} + +static void +netmap_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + netmap_main_t *apm = &netmap_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + nif->per_interface_next_index = node_index; + return; + } + + nif->per_interface_next_index = + vlib_node_add_next (vlib_get_main (), netmap_input_node.index, + node_index); +} + +static void +netmap_clear_hw_interface_counters (u32 instance) +{ + /* Nothing for now */ +} + +static clib_error_t * +netmap_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + netmap_main_t *apm = &netmap_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + u32 hw_flags; + + nif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + + if (nif->is_admin_up) + hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP; + else + hw_flags = 0; + + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + + return 0; +} + +static clib_error_t * +netmap_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + /* Nothing for now */ + return 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (netmap_device_class) = { + .name = "netmap", + .tx_function = netmap_interface_tx, + .format_device_name = format_netmap_device_name, + .format_device = format_netmap_device, + .format_tx_trace = format_netmap_tx_trace, + .tx_function_n_errors = NETMAP_TX_N_ERROR, + .tx_function_error_strings = netmap_tx_func_error_strings, + .rx_redirect_to_node = netmap_set_interface_next_node, + .clear_counters = netmap_clear_hw_interface_counters, + .admin_up_down_function = netmap_interface_admin_up_down, + .subif_add_del_function = netmap_subif_add_del_function, +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH(netmap_device_class, + netmap_interface_tx) +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/netmap/net_netmap.h b/src/vnet/devices/netmap/net_netmap.h new file mode 100644 index 00000000..fd4253b7 --- /dev/null +++ b/src/vnet/devices/netmap/net_netmap.h @@ -0,0 +1,650 @@ +/* + * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $ + * + * Definitions of constants and the structures used by the netmap + * framework, for the part visible to both kernel and userspace. + * Detailed info on netmap is available with "man netmap" or at + * + * http://info.iet.unipi.it/~luigi/netmap/ + * + * This API is also used to communicate with the VALE software switch + */ + +#ifndef _NET_NETMAP_H_ +#define _NET_NETMAP_H_ + +#define NETMAP_API 11 /* current API version */ + +#define NETMAP_MIN_API 11 /* min and max versions accepted */ +#define NETMAP_MAX_API 15 +/* + * Some fields should be cache-aligned to reduce contention. + * The alignment is architecture and OS dependent, but rather than + * digging into OS headers to find the exact value we use an estimate + * that should cover most architectures. + */ +#define NM_CACHE_ALIGN 128 + +/* + * --- Netmap data structures --- + * + * The userspace data structures used by netmap are shown below. + * They are allocated by the kernel and mmap()ed by userspace threads. + * Pointers are implemented as memory offsets or indexes, + * so that they can be easily dereferenced in kernel and userspace. + + KERNEL (opaque, obviously) + + ==================================================================== + | + USERSPACE | struct netmap_ring + +---->+---------------+ + / | head,cur,tail | + struct netmap_if (nifp, 1 per fd) / | buf_ofs | + +---------------+ / | other fields | + | ni_tx_rings | / +===============+ + | ni_rx_rings | / | buf_idx, len | slot[0] + | | / | flags, ptr | + | | / +---------------+ + +===============+ / | buf_idx, len | slot[1] + | txring_ofs[0] | (rel.to nifp)--' | flags, ptr | + | txring_ofs[1] | +---------------+ + (tx+1 entries) (num_slots entries) + | txring_ofs[t] | | buf_idx, len | slot[n-1] + +---------------+ | flags, ptr | + | rxring_ofs[0] | +---------------+ + | rxring_ofs[1] | + (rx+1 entries) + | rxring_ofs[r] | + +---------------+ + + * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to + * a file descriptor, the mmap()ed region contains a (logically readonly) + * struct netmap_if pointing to struct netmap_ring's. + * + * There is one netmap_ring per physical NIC ring, plus one tx/rx ring + * pair attached to the host stack (this pair is unused for non-NIC ports). + * + * All physical/host stack ports share the same memory region, + * so that zero-copy can be implemented between them. + * VALE switch ports instead have separate memory regions. + * + * The netmap_ring is the userspace-visible replica of the NIC ring. + * Each slot has the index of a buffer (MTU-sized and residing in the + * mmapped region), its length and some flags. An extra 64-bit pointer + * is provided for user-supplied buffers in the tx path. + * + * In user space, the buffer address is computed as + * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE + * + * Added in NETMAP_API 11: + * + * + NIOCREGIF can request the allocation of extra spare buffers from + * the same memory pool. The desired number of buffers must be in + * nr_arg3. The ioctl may return fewer buffers, depending on memory + * availability. nr_arg3 will return the actual value, and, once + * mapped, nifp->ni_bufs_head will be the index of the first buffer. + * + * The buffers are linked to each other using the first uint32_t + * as the index. On close, ni_bufs_head must point to the list of + * buffers to be released. + * + * + NIOCREGIF can request space for extra rings (and buffers) + * allocated in the same memory space. The number of extra rings + * is in nr_arg1, and is advisory. This is a no-op on NICs where + * the size of the memory space is fixed. + * + * + NIOCREGIF can attach to PIPE rings sharing the same memory + * space with a parent device. The ifname indicates the parent device, + * which must already exist. Flags in nr_flags indicate if we want to + * bind the master or slave side, the index (from nr_ringid) + * is just a cookie and does not need to be sequential. + * + * + NIOCREGIF can also attach to 'monitor' rings that replicate + * the content of specific rings, also from the same memory space. + * + * Extra flags in nr_flags support the above functions. + * Application libraries may use the following naming scheme: + * netmap:foo all NIC ring pairs + * netmap:foo^ only host ring pair + * netmap:foo+ all NIC ring + host ring pairs + * netmap:foo-k the k-th NIC ring pair + * netmap:foo{k PIPE ring pair k, master side + * netmap:foo}k PIPE ring pair k, slave side + */ + +/* + * struct netmap_slot is a buffer descriptor + */ +struct netmap_slot { + uint32_t buf_idx; /* buffer index */ + uint16_t len; /* length for this slot */ + uint16_t flags; /* buf changed, etc. */ + uint64_t ptr; /* pointer for indirect buffers */ +}; + +/* + * The following flags control how the slot is used + */ + +#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */ + /* + * must be set whenever buf_idx is changed (as it might be + * necessary to recompute the physical address and mapping) + * + * It is also set by the kernel whenever the buf_idx is + * changed internally (e.g., by pipes). Applications may + * use this information to know when they can reuse the + * contents of previously prepared buffers. + */ + +#define NS_REPORT 0x0002 /* ask the hardware to report results */ + /* + * Request notification when slot is used by the hardware. + * Normally transmit completions are handled lazily and + * may be unreported. This flag lets us know when a slot + * has been sent (e.g. to terminate the sender). + */ + +#define NS_FORWARD 0x0004 /* pass packet 'forward' */ + /* + * (Only for physical ports, rx rings with NR_FORWARD set). + * Slot released to the kernel (i.e. before ring->head) with + * this flag set are passed to the peer ring (host/NIC), + * thus restoring the host-NIC connection for these slots. + * This supports efficient traffic monitoring or firewalling. + */ + +#define NS_NO_LEARN 0x0008 /* disable bridge learning */ + /* + * On a VALE switch, do not 'learn' the source port for + * this buffer. + */ + +#define NS_INDIRECT 0x0010 /* userspace buffer */ + /* + * (VALE tx rings only) data is in a userspace buffer, + * whose address is in the 'ptr' field in the slot. + */ + +#define NS_MOREFRAG 0x0020 /* packet has more fragments */ + /* + * (VALE ports only) + * Set on all but the last slot of a multi-segment packet. + * The 'len' field refers to the individual fragment. + */ + +#define NS_PORT_SHIFT 8 +#define NS_PORT_MASK (0xff << NS_PORT_SHIFT) + /* + * The high 8 bits of the flag, if not zero, indicate the + * destination port for the VALE switch, overriding + * the lookup table. + */ + +#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff) + /* + * (VALE rx rings only) the high 8 bits + * are the number of fragments. + */ + + +/* + * struct netmap_ring + * + * Netmap representation of a TX or RX ring (also known as "queue"). + * This is a queue implemented as a fixed-size circular array. + * At the software level the important fields are: head, cur, tail. + * + * In TX rings: + * + * head first slot available for transmission. + * cur wakeup point. select() and poll() will unblock + * when 'tail' moves past 'cur' + * tail (readonly) first slot reserved to the kernel + * + * [head .. tail-1] can be used for new packets to send; + * 'head' and 'cur' must be incremented as slots are filled + * with new packets to be sent; + * 'cur' can be moved further ahead if we need more space + * for new transmissions. XXX todo (2014-03-12) + * + * In RX rings: + * + * head first valid received packet + * cur wakeup point. select() and poll() will unblock + * when 'tail' moves past 'cur' + * tail (readonly) first slot reserved to the kernel + * + * [head .. tail-1] contain received packets; + * 'head' and 'cur' must be incremented as slots are consumed + * and can be returned to the kernel; + * 'cur' can be moved further ahead if we want to wait for + * new packets without returning the previous ones. + * + * DATA OWNERSHIP/LOCKING: + * The netmap_ring, and all slots and buffers in the range + * [head .. tail-1] are owned by the user program; + * the kernel only accesses them during a netmap system call + * and in the user thread context. + * + * Other slots and buffers are reserved for use by the kernel + */ +struct netmap_ring { + /* + * buf_ofs is meant to be used through macros. + * It contains the offset of the buffer region from this + * descriptor. + */ + const int64_t buf_ofs; + const uint32_t num_slots; /* number of slots in the ring. */ + const uint32_t nr_buf_size; + const uint16_t ringid; + const uint16_t dir; /* 0: tx, 1: rx */ + + uint32_t head; /* (u) first user slot */ + uint32_t cur; /* (u) wakeup point */ + uint32_t tail; /* (k) first kernel slot */ + + uint32_t flags; + + struct timeval ts; /* (k) time of last *sync() */ + + /* opaque room for a mutex or similar object */ +#if !defined(_WIN32) || defined(__CYGWIN__) + uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128]; +#else + uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128]; +#endif + + /* the slots follow. This struct has variable size */ + struct netmap_slot slot[0]; /* array of slots. */ +}; + + +/* + * RING FLAGS + */ +#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ + /* + * updates the 'ts' field on each netmap syscall. This saves + * saves a separate gettimeofday(), and is not much worse than + * software timestamps generated in the interrupt handler. + */ + +#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */ + /* + * Enables the NS_FORWARD slot flag for the ring. + */ + + +/* + * Netmap representation of an interface and its queue(s). + * This is initialized by the kernel when binding a file + * descriptor to a port, and should be considered as readonly + * by user programs. The kernel never uses it. + * + * There is one netmap_if for each file descriptor on which we want + * to select/poll. + * select/poll operates on one or all pairs depending on the value of + * nmr_queueid passed on the ioctl. + */ +struct netmap_if { + char ni_name[IFNAMSIZ]; /* name of the interface. */ + const uint32_t ni_version; /* API version, currently unused */ + const uint32_t ni_flags; /* properties */ +#define NI_PRIV_MEM 0x1 /* private memory region */ + + /* + * The number of packet rings available in netmap mode. + * Physical NICs can have different numbers of tx and rx rings. + * Physical NICs also have a 'host' ring pair. + * Additionally, clients can request additional ring pairs to + * be used for internal communication. + */ + const uint32_t ni_tx_rings; /* number of HW tx rings */ + const uint32_t ni_rx_rings; /* number of HW rx rings */ + + uint32_t ni_bufs_head; /* head index for extra bufs */ + uint32_t ni_spare1[5]; + /* + * The following array contains the offset of each netmap ring + * from this structure, in the following order: + * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings; + * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings. + * + * The area is filled up by the kernel on NIOCREGIF, + * and then only read by userspace code. + */ + const ssize_t ring_ofs[0]; +}; + + +#ifndef NIOCREGIF +/* + * ioctl names and related fields + * + * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, + * whose identity is set in NIOCREGIF through nr_ringid. + * These are non blocking and take no argument. + * + * NIOCGINFO takes a struct ifreq, the interface name is the input, + * the outputs are number of queues and number of descriptor + * for each queue (useful to set number of threads etc.). + * The info returned is only advisory and may change before + * the interface is bound to a file descriptor. + * + * NIOCREGIF takes an interface name within a struct nmre, + * and activates netmap mode on the interface (if possible). + * + * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we + * can pass it down to other NIC-related ioctls. + * + * The actual argument (struct nmreq) has a number of options to request + * different functions. + * The following are used in NIOCREGIF when nr_cmd == 0: + * + * nr_name (in) + * The name of the port (em0, valeXXX:YYY, etc.) + * limited to IFNAMSIZ for backward compatibility. + * + * nr_version (in/out) + * Must match NETMAP_API as used in the kernel, error otherwise. + * Always returns the desired value on output. + * + * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out) + * On input, non-zero values may be used to reconfigure the port + * according to the requested values, but this is not guaranteed. + * On output the actual values in use are reported. + * + * nr_ringid (in) + * Indicates how rings should be bound to the file descriptors. + * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK) + * are used to indicate the ring number, and nr_flags specifies + * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected. + * + * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED: + * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control + * the binding as follows: + * 0 (default) binds all physical rings + * NETMAP_HW_RING | ring number binds a single ring pair + * NETMAP_SW_RING binds only the host tx/rx rings + * + * NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push + * packets on tx rings only if POLLOUT is set. + * The default is to push any pending packet. + * + * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release + * packets on rx rings also when POLLIN is NOT set. + * The default is to touch the rx ring only with POLLIN. + * Note that this is the opposite of TX because it + * reflects the common usage. + * + * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead. + * NETMAP_PRIV_MEM is set on return for ports that do not use + * the global memory allocator. + * This information is not significant and applications + * should look at the region id in nr_arg2 + * + * nr_flags is the recommended mode to indicate which rings should + * be bound to a file descriptor. Values are NR_REG_* + * + * nr_arg1 (in) The number of extra rings to be reserved. + * Especially when allocating a VALE port the system only + * allocates the amount of memory needed for the port. + * If more shared memory rings are desired (e.g. for pipes), + * the first invocation for the same basename/allocator + * should specify a suitable number. Memory cannot be + * extended after the first allocation without closing + * all ports on the same region. + * + * nr_arg2 (in/out) The identity of the memory region used. + * On input, 0 means the system decides autonomously, + * other values may try to select a specific region. + * On return the actual value is reported. + * Region '1' is the global allocator, normally shared + * by all interfaces. Other values are private regions. + * If two ports the same region zero-copy is possible. + * + * nr_arg3 (in/out) number of extra buffers to be allocated. + * + * + * + * nr_cmd (in) if non-zero indicates a special command: + * NETMAP_BDG_ATTACH and nr_name = vale*:ifname + * attaches the NIC to the switch; nr_ringid specifies + * which rings to use. Used by vale-ctl -a ... + * nr_arg1 = NETMAP_BDG_HOST also attaches the host port + * as in vale-ctl -h ... + * + * NETMAP_BDG_DETACH and nr_name = vale*:ifname + * disconnects a previously attached NIC. + * Used by vale-ctl -d ... + * + * NETMAP_BDG_LIST + * list the configuration of VALE switches. + * + * NETMAP_BDG_VNET_HDR + * Set the virtio-net header length used by the client + * of a VALE switch port. + * + * NETMAP_BDG_NEWIF + * create a persistent VALE port with name nr_name. + * Used by vale-ctl -n ... + * + * NETMAP_BDG_DELIF + * delete a persistent VALE port. Used by vale-ctl -d ... + * + * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific + * + * + * + */ + + +/* + * struct nmreq overlays a struct ifreq (just the name) + */ +struct nmreq { + char nr_name[IFNAMSIZ]; + uint32_t nr_version; /* API version */ + uint32_t nr_offset; /* nifp offset in the shared region */ + uint32_t nr_memsize; /* size of the shared region */ + uint32_t nr_tx_slots; /* slots in tx rings */ + uint32_t nr_rx_slots; /* slots in rx rings */ + uint16_t nr_tx_rings; /* number of tx rings */ + uint16_t nr_rx_rings; /* number of rx rings */ + + uint16_t nr_ringid; /* ring(s) we care about */ +#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */ +#define NETMAP_SW_RING 0x2000 /* only host ring pair */ + +#define NETMAP_RING_MASK 0x0fff /* the ring number */ + +#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ + +#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */ + + uint16_t nr_cmd; +#define NETMAP_BDG_ATTACH 1 /* attach the NIC */ +#define NETMAP_BDG_DETACH 2 /* detach the NIC */ +#define NETMAP_BDG_REGOPS 3 /* register bridge callbacks */ +#define NETMAP_BDG_LIST 4 /* get bridge's info */ +#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */ +#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */ +#define NETMAP_BDG_NEWIF 6 /* create a virtual port */ +#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */ +#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */ +#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */ +#define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */ +#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */ +#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */ + uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */ +#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */ + + uint16_t nr_arg2; + uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */ + uint32_t nr_flags; + /* various modes, extends nr_ringid */ + uint32_t spare2[1]; +}; + +#define NR_REG_MASK 0xf /* values for nr_flags */ +enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */ + NR_REG_ALL_NIC = 1, + NR_REG_SW = 2, + NR_REG_NIC_SW = 3, + NR_REG_ONE_NIC = 4, + NR_REG_PIPE_MASTER = 5, + NR_REG_PIPE_SLAVE = 6, +}; +/* monitor uses the NR_REG to select the rings to monitor */ +#define NR_MONITOR_TX 0x100 +#define NR_MONITOR_RX 0x200 +#define NR_ZCOPY_MON 0x400 +/* request exclusive access to the selected rings */ +#define NR_EXCLUSIVE 0x800 +/* request ptnetmap host support */ +#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */ +#define NR_PTNETMAP_HOST 0x1000 +#define NR_RX_RINGS_ONLY 0x2000 +#define NR_TX_RINGS_ONLY 0x4000 +/* Applications set this flag if they are able to deal with virtio-net headers, + * that is send/receive frames that start with a virtio-net header. + * If not set, NIOCREGIF will fail with netmap ports that require applications + * to use those headers. If the flag is set, the application can use the + * NETMAP_VNET_HDR_GET command to figure out the header length. */ +#define NR_ACCEPT_VNET_HDR 0x8000 + + +/* + * Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined + * in ws2def.h but not sure if they are in the form we need. + * XXX so we redefine them + * in a convenient way to use for DeviceIoControl signatures + */ +#ifdef _WIN32 +#undef _IO // ws2def.h +#define _WIN_NM_IOCTL_TYPE 40000 +#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ + METHOD_BUFFERED, FILE_ANY_ACCESS ) +#define _IO_direct(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ + METHOD_OUT_DIRECT, FILE_ANY_ACCESS ) + +#define _IOWR(_c, _n, _s) _IO(_c, _n) + +/* We havesome internal sysctl in addition to the externally visible ones */ +#define NETMAP_MMAP _IO_direct('i', 160) // note METHOD_OUT_DIRECT +#define NETMAP_POLL _IO('i', 162) + +/* and also two setsockopt for sysctl emulation */ +#define NETMAP_SETSOCKOPT _IO('i', 140) +#define NETMAP_GETSOCKOPT _IO('i', 141) + + +//These linknames are for the Netmap Core Driver +#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP" +#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap" + +//Definition of a structure used to pass a virtual address within an IOCTL +typedef struct _MEMORY_ENTRY { + PVOID pUsermodeVirtualAddress; +} MEMORY_ENTRY, *PMEMORY_ENTRY; + +typedef struct _POLL_REQUEST_DATA { + int events; + int timeout; + int revents; +} POLL_REQUEST_DATA; + +#endif /* _WIN32 */ + +/* + * FreeBSD uses the size value embedded in the _IOWR to determine + * how much to copy in/out. So we need it to match the actual + * data structure we pass. We put some spares in the structure + * to ease compatibility with other versions + */ +#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ +#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ +#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ +#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ +#define NIOCCONFIG _IOWR('i',150, struct nm_ifreq) /* for ext. modules */ +#endif /* !NIOCREGIF */ + + +/* + * Helper functions for kernel and userspace + */ + +/* + * check if space is available in the ring. + */ +static inline int +nm_ring_empty(struct netmap_ring *ring) +{ + return (ring->cur == ring->tail); +} + +/* + * Opaque structure that is passed to an external kernel + * module via ioctl(fd, NIOCCONFIG, req) for a user-owned + * bridge port (at this point ephemeral VALE interface). + */ +#define NM_IFRDATA_LEN 256 +struct nm_ifreq { + char nifr_name[IFNAMSIZ]; + char data[NM_IFRDATA_LEN]; +}; + +/* + * netmap kernel thread configuration + */ +/* bhyve/vmm.ko MSIX parameters for IOCTL */ +struct ptn_vmm_ioctl_msix { + uint64_t msg; + uint64_t addr; +}; + +/* IOCTL parameters */ +struct nm_kth_ioctl { + u_long com; + /* TODO: use union */ + union { + struct ptn_vmm_ioctl_msix msix; + } data; +}; + +/* Configuration of a ptnetmap ring */ +struct ptnet_ring_cfg { + uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */ + uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */ + struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */ +}; +#endif /* _NET_NETMAP_H_ */ diff --git a/src/vnet/devices/netmap/netmap.api b/src/vnet/devices/netmap/netmap.api new file mode 100644 index 00000000..377ccffd --- /dev/null +++ b/src/vnet/devices/netmap/netmap.api @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief Create netmap + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param netmap_if_name - interface name + @param hw_addr - interface MAC + @param use_random_hw_addr - use random generated MAC + @param is_pipe - is pipe + @param is_master - 0=slave, 1=master +*/ +define netmap_create +{ + u32 client_index; + u32 context; + + u8 netmap_if_name[64]; + u8 hw_addr[6]; + u8 use_random_hw_addr; + u8 is_pipe; + u8 is_master; +}; + +/** \brief Create netmap response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define netmap_create_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Delete netmap + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param netmap_if_name - interface name +*/ +define netmap_delete +{ + u32 client_index; + u32 context; + + u8 netmap_if_name[64]; +}; + +/** \brief Delete netmap response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define netmap_delete_reply +{ + u32 context; + i32 retval; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/netmap/netmap.c b/src/vnet/devices/netmap/netmap.c new file mode 100644 index 00000000..3bdb442d --- /dev/null +++ b/src/vnet/devices/netmap/netmap.c @@ -0,0 +1,316 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static u32 +netmap_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, + u32 flags) +{ + /* nothing for now */ + return 0; +} + +static clib_error_t * +netmap_fd_read_ready (unix_file_t * uf) +{ + vlib_main_t *vm = vlib_get_main (); + netmap_main_t *nm = &netmap_main; + u32 idx = uf->private_data; + + nm->pending_input_bitmap = + clib_bitmap_set (nm->pending_input_bitmap, idx, 1); + + /* Schedule the rx node */ + vlib_node_set_interrupt_pending (vm, netmap_input_node.index); + + return 0; +} + +static void +close_netmap_if (netmap_main_t * nm, netmap_if_t * nif) +{ + if (nif->unix_file_index != ~0) + { + unix_file_del (&unix_main, unix_main.file_pool + nif->unix_file_index); + nif->unix_file_index = ~0; + } + else if (nif->fd > -1) + close (nif->fd); + + if (nif->mem_region) + { + netmap_mem_region_t *reg = &nm->mem_regions[nif->mem_region]; + if (--reg->refcnt == 0) + { + munmap (reg->mem, reg->region_size); + reg->region_size = 0; + } + } + + + mhash_unset (&nm->if_index_by_host_if_name, nif->host_if_name, + &nif->if_index); + vec_free (nif->host_if_name); + vec_free (nif->req); + + memset (nif, 0, sizeof (*nif)); + pool_put (nm->interfaces, nif); +} + +int +netmap_worker_thread_enable () +{ + /* if worker threads are enabled, switch to polling mode */ + foreach_vlib_main (( + { + vlib_node_set_state (this_vlib_main, + netmap_input_node.index, + VLIB_NODE_STATE_POLLING); + })); + + return 0; +} + +int +netmap_worker_thread_disable () +{ + foreach_vlib_main (( + { + vlib_node_set_state (this_vlib_main, + netmap_input_node.index, + VLIB_NODE_STATE_INTERRUPT); + })); + + return 0; +} + +int +netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, + u8 is_pipe, u8 is_master, u32 * sw_if_index) +{ + netmap_main_t *nm = &netmap_main; + int ret = 0; + netmap_if_t *nif = 0; + u8 hw_addr[6]; + clib_error_t *error = 0; + vnet_sw_interface_t *sw; + vnet_main_t *vnm = vnet_get_main (); + uword *p; + struct nmreq *req = 0; + netmap_mem_region_t *reg; + vlib_thread_main_t *tm = vlib_get_thread_main (); + int fd; + + p = mhash_get (&nm->if_index_by_host_if_name, if_name); + if (p) + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + + fd = open ("/dev/netmap", O_RDWR); + if (fd < 0) + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + + pool_get (nm->interfaces, nif); + nif->if_index = nif - nm->interfaces; + nif->fd = fd; + nif->unix_file_index = ~0; + + vec_validate (req, 0); + nif->req = req; + req->nr_version = NETMAP_API; + req->nr_flags = NR_REG_ALL_NIC; + + if (is_pipe) + req->nr_flags = is_master ? NR_REG_PIPE_MASTER : NR_REG_PIPE_SLAVE; + else + req->nr_flags = NR_REG_ALL_NIC; + + req->nr_flags |= NR_ACCEPT_VNET_HDR; + snprintf (req->nr_name, IFNAMSIZ, "%s", if_name); + req->nr_name[IFNAMSIZ - 1] = 0; + + if (ioctl (nif->fd, NIOCREGIF, req)) + { + ret = VNET_API_ERROR_NOT_CONNECTED; + goto error; + } + + nif->mem_region = req->nr_arg2; + vec_validate (nm->mem_regions, nif->mem_region); + reg = &nm->mem_regions[nif->mem_region]; + if (reg->region_size == 0) + { + reg->mem = mmap (NULL, req->nr_memsize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + clib_warning ("mem %p", reg->mem); + if (reg->mem == MAP_FAILED) + { + ret = VNET_API_ERROR_NOT_CONNECTED; + goto error; + } + reg->region_size = req->nr_memsize; + } + reg->refcnt++; + + nif->nifp = NETMAP_IF (reg->mem, req->nr_offset); + nif->first_rx_ring = 0; + nif->last_rx_ring = 0; + nif->first_tx_ring = 0; + nif->last_tx_ring = 0; + nif->host_if_name = if_name; + nif->per_interface_next_index = ~0; + + if (tm->n_vlib_mains > 1) + { + nif->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) nif->lockp, 0, CLIB_CACHE_LINE_BYTES); + } + + { + unix_file_t template = { 0 }; + template.read_function = netmap_fd_read_ready; + template.file_descriptor = nif->fd; + template.private_data = nif->if_index; + nif->unix_file_index = unix_file_add (&unix_main, &template); + } + + /*use configured or generate random MAC address */ + if (hw_addr_set) + memcpy (hw_addr, hw_addr_set, 6); + else + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (hw_addr + 2, &rnd, sizeof (rnd)); + hw_addr[0] = 2; + hw_addr[1] = 0xfe; + } + + error = ethernet_register_interface (vnm, netmap_device_class.index, + nif->if_index, hw_addr, + &nif->hw_if_index, + netmap_eth_flag_change); + + if (error) + { + clib_error_report (error); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + sw = vnet_get_hw_sw_interface (vnm, nif->hw_if_index); + nif->sw_if_index = sw->sw_if_index; + + mhash_set_mem (&nm->if_index_by_host_if_name, if_name, &nif->if_index, 0); + + if (sw_if_index) + *sw_if_index = nif->sw_if_index; + + if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 1) + netmap_worker_thread_enable (); + + return 0; + +error: + close_netmap_if (nm, nif); + return ret; +} + +int +netmap_delete_if (vlib_main_t * vm, u8 * host_if_name) +{ + vnet_main_t *vnm = vnet_get_main (); + netmap_main_t *nm = &netmap_main; + netmap_if_t *nif; + uword *p; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + p = mhash_get (&nm->if_index_by_host_if_name, host_if_name); + if (p == NULL) + { + clib_warning ("Host interface %s does not exist", host_if_name); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + nif = pool_elt_at_index (nm->interfaces, p[0]); + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, nif->hw_if_index, 0); + + ethernet_delete_interface (vnm, nif->hw_if_index); + + close_netmap_if (nm, nif); + + if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 0) + netmap_worker_thread_disable (); + + return 0; +} + +static clib_error_t * +netmap_init (vlib_main_t * vm) +{ + netmap_main_t *nm = &netmap_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_thread_registration_t *tr; + uword *p; + + memset (nm, 0, sizeof (netmap_main_t)); + + nm->input_cpu_first_index = 0; + nm->input_cpu_count = 1; + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + nm->input_cpu_first_index = tr->first_index; + nm->input_cpu_count = tr->count; + } + + mhash_init_vec_string (&nm->if_index_by_host_if_name, sizeof (uword)); + + vec_validate_aligned (nm->rx_buffers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + return 0; +} + +VLIB_INIT_FUNCTION (netmap_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/netmap/netmap.h b/src/vnet/devices/netmap/netmap.h new file mode 100644 index 00000000..39a94043 --- /dev/null +++ b/src/vnet/devices/netmap/netmap.h @@ -0,0 +1,164 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +/* + * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + volatile u32 *lockp; + u8 *host_if_name; + uword if_index; + u32 hw_if_index; + u32 sw_if_index; + u32 unix_file_index; + + u32 per_interface_next_index; + u8 is_admin_up; + + /* netmap */ + struct nmreq *req; + u16 mem_region; + int fd; + struct netmap_if *nifp; + u16 first_tx_ring; + u16 last_tx_ring; + u16 first_rx_ring; + u16 last_rx_ring; + +} netmap_if_t; + +typedef struct +{ + char *mem; + u32 region_size; + int refcnt; +} netmap_mem_region_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + netmap_if_t *interfaces; + + /* bitmap of pending rx interfaces */ + uword *pending_input_bitmap; + + /* rx buffer cache */ + u32 **rx_buffers; + + /* hash of host interface names */ + mhash_t if_index_by_host_if_name; + + /* vector of memory regions */ + netmap_mem_region_t *mem_regions; + + /* first cpu index */ + u32 input_cpu_first_index; + + /* total cpu count */ + u32 input_cpu_count; +} netmap_main_t; + +netmap_main_t netmap_main; +extern vnet_device_class_t netmap_device_class; +extern vlib_node_registration_t netmap_input_node; + +int netmap_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, + u8 is_pipe, u8 is_master, u32 * sw_if_index); +int netmap_delete_if (vlib_main_t * vm, u8 * host_if_name); + + +/* Macros and helper functions from sys/net/netmap_user.h */ + +#ifdef _NET_NETMAP_H_ + +#define _NETMAP_OFFSET(type, ptr, offset) \ + ((type)(void *)((char *)(ptr) + (offset))) + +#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) + +#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ + nifp, (nifp)->ring_ofs[index] ) + +#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ + nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) + +#define NETMAP_BUF(ring, index) \ + ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) + +#define NETMAP_BUF_IDX(ring, buf) \ + ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ + (ring)->nr_buf_size ) + +static inline uint32_t +nm_ring_next (struct netmap_ring *ring, uint32_t i) +{ + return (PREDICT_FALSE (i + 1 == ring->num_slots) ? 0 : i + 1); +} + + +/* + * Return 1 if we have pending transmissions in the tx ring. + * When everything is complete ring->head = ring->tail + 1 (modulo ring size) + */ +static inline int +nm_tx_pending (struct netmap_ring *ring) +{ + return nm_ring_next (ring, ring->tail) != ring->head; +} + +static inline uint32_t +nm_ring_space (struct netmap_ring *ring) +{ + int ret = ring->tail - ring->cur; + if (ret < 0) + ret += ring->num_slots; + return ret; +} +#endif + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/netmap/netmap_api.c b/src/vnet/devices/netmap/netmap_api.c new file mode 100644 index 00000000..9a393b1f --- /dev/null +++ b/src/vnet/devices/netmap/netmap_api.c @@ -0,0 +1,137 @@ +/* + *------------------------------------------------------------------ + * netmap_api.c - netmap api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(NETMAP_CREATE, netmap_create) \ +_(NETMAP_DELETE, netmap_delete) \ + +static void +vl_api_netmap_create_t_handler (vl_api_netmap_create_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_netmap_create_reply_t *rmp; + int rv = 0; + u8 *if_name = NULL; + + if_name = format (0, "%s", mp->netmap_if_name); + vec_add1 (if_name, 0); + + rv = + netmap_create_if (vm, if_name, mp->use_random_hw_addr ? 0 : mp->hw_addr, + mp->is_pipe, mp->is_master, 0); + + vec_free (if_name); + + REPLY_MACRO (VL_API_NETMAP_CREATE_REPLY); +} + +static void +vl_api_netmap_delete_t_handler (vl_api_netmap_delete_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_netmap_delete_reply_t *rmp; + int rv = 0; + u8 *if_name = NULL; + + if_name = format (0, "%s", mp->netmap_if_name); + vec_add1 (if_name, 0); + + rv = netmap_delete_if (vm, if_name); + + vec_free (if_name); + + REPLY_MACRO (VL_API_NETMAP_DELETE_REPLY); +} + +/* + * netmap_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_netmap; +#undef _ +} + +static clib_error_t * +netmap_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (netmap_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/netmap/node.c b/src/vnet/devices/netmap/node.c new file mode 100644 index 00000000..19895e47 --- /dev/null +++ b/src/vnet/devices/netmap/node.c @@ -0,0 +1,300 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#define foreach_netmap_input_error + +typedef enum +{ +#define _(f,s) NETMAP_INPUT_ERROR_##f, + foreach_netmap_input_error +#undef _ + NETMAP_INPUT_N_ERROR, +} netmap_input_error_t; + +static char *netmap_input_error_strings[] = { +#define _(n,s) s, + foreach_netmap_input_error +#undef _ +}; + +typedef struct +{ + u32 next_index; + u32 hw_if_index; + struct netmap_slot slot; +} netmap_input_trace_t; + +static u8 * +format_netmap_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + netmap_input_trace_t *t = va_arg (*args, netmap_input_trace_t *); + uword indent = format_get_indent (s); + + s = format (s, "netmap: hw_if_index %d next-index %d", + t->hw_if_index, t->next_index); + s = format (s, "\n%Uslot: flags 0x%x len %u buf_idx %u", + format_white_space, indent + 2, + t->slot.flags, t->slot.len, t->slot.buf_idx); + return s; +} + +always_inline void +buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi); + vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi); + + /* update first buffer */ + first_b->total_length_not_including_first_buffer += b->current_length; + + /* update previous buffer */ + prev_b->next_buffer = bi; + prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT; + + /* update current buffer */ + b->next_buffer = 0; +} + +always_inline uword +netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, netmap_if_t * nif) +{ + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + uword n_trace = vlib_get_trace_count (vm, node); + netmap_main_t *nm = &netmap_main; + u32 n_rx_packets = 0; + u32 n_rx_bytes = 0; + u32 *to_next = 0; + u32 n_free_bufs; + struct netmap_ring *ring; + int cur_ring; + u32 cpu_index = os_get_cpu_number (); + u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + if (nif->per_interface_next_index != ~0) + next_index = nif->per_interface_next_index; + + n_free_bufs = vec_len (nm->rx_buffers[cpu_index]); + if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE)) + { + vec_validate (nm->rx_buffers[cpu_index], + VLIB_FRAME_SIZE + n_free_bufs - 1); + n_free_bufs += + vlib_buffer_alloc (vm, &nm->rx_buffers[cpu_index][n_free_bufs], + VLIB_FRAME_SIZE); + _vec_len (nm->rx_buffers[cpu_index]) = n_free_bufs; + } + + cur_ring = nif->first_rx_ring; + while (cur_ring <= nif->last_rx_ring && n_free_bufs) + { + int r = 0; + u32 cur_slot_index; + ring = NETMAP_RXRING (nif->nifp, cur_ring); + r = nm_ring_space (ring); + + if (!r) + { + cur_ring++; + continue; + } + + if (r > n_free_bufs) + r = n_free_bufs; + + cur_slot_index = ring->cur; + while (r) + { + u32 n_left_to_next; + u32 next0 = next_index; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (r && n_left_to_next) + { + vlib_buffer_t *first_b0 = 0; + u32 offset = 0; + u32 bi0 = 0, first_bi0 = 0, prev_bi0; + u32 next_slot_index = (cur_slot_index + 1) % ring->num_slots; + u32 next2_slot_index = (cur_slot_index + 2) % ring->num_slots; + struct netmap_slot *slot = &ring->slot[cur_slot_index]; + u32 data_len = slot->len; + + /* prefetch 2 slots in advance */ + CLIB_PREFETCH (&ring->slot[next2_slot_index], + CLIB_CACHE_LINE_BYTES, LOAD); + /* prefetch start of next packet */ + CLIB_PREFETCH (NETMAP_BUF + (ring, ring->slot[next_slot_index].buf_idx), + CLIB_CACHE_LINE_BYTES, LOAD); + + while (data_len && n_free_bufs) + { + vlib_buffer_t *b0; + /* grab free buffer */ + u32 last_empty_buffer = + vec_len (nm->rx_buffers[cpu_index]) - 1; + prev_bi0 = bi0; + bi0 = nm->rx_buffers[cpu_index][last_empty_buffer]; + b0 = vlib_get_buffer (vm, bi0); + _vec_len (nm->rx_buffers[cpu_index]) = last_empty_buffer; + n_free_bufs--; + + /* copy data */ + u32 bytes_to_copy = + data_len > n_buffer_bytes ? n_buffer_bytes : data_len; + b0->current_data = 0; + clib_memcpy (vlib_buffer_get_current (b0), + (u8 *) NETMAP_BUF (ring, + slot->buf_idx) + offset, + bytes_to_copy); + + /* fill buffer header */ + b0->current_length = bytes_to_copy; + + if (offset == 0) + { + b0->total_length_not_including_first_buffer = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + nif->sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + first_bi0 = bi0; + first_b0 = vlib_get_buffer (vm, first_bi0); + } + else + buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0); + + offset += bytes_to_copy; + data_len -= bytes_to_copy; + } + + /* trace */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0); + if (PREDICT_FALSE (n_trace > 0)) + { + if (PREDICT_TRUE (first_b0 != 0)) + { + netmap_input_trace_t *tr; + vlib_trace_buffer (vm, node, next0, first_b0, + /* follow_chain */ 0); + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = nif->hw_if_index; + memcpy (&tr->slot, slot, sizeof (struct netmap_slot)); + } + } + + /* redirect if feature path enabled */ + vnet_feature_start_device_input_x1 (nif->sw_if_index, &next0, + first_b0, 0); + + /* enque and take next packet */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, first_bi0, + next0); + + /* next packet */ + n_rx_packets++; + n_rx_bytes += slot->len; + to_next[0] = first_bi0; + to_next += 1; + n_left_to_next--; + cur_slot_index = next_slot_index; + + r--; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + ring->head = ring->cur = cur_slot_index; + cur_ring++; + } + + if (n_rx_packets) + ioctl (nif->fd, NIOCRXSYNC, NULL); + + vlib_increment_combined_counter + (vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + os_get_cpu_number (), nif->hw_if_index, n_rx_packets, n_rx_bytes); + + return n_rx_packets; +} + +static uword +netmap_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + int i; + u32 n_rx_packets = 0; + u32 cpu_index = os_get_cpu_number (); + netmap_main_t *nm = &netmap_main; + netmap_if_t *nmi; + + for (i = 0; i < vec_len (nm->interfaces); i++) + { + nmi = vec_elt_at_index (nm->interfaces, i); + if (nmi->is_admin_up && + (i % nm->input_cpu_count) == + (cpu_index - nm->input_cpu_first_index)) + n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi); + } + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (netmap_input_node) = { + .function = netmap_input_fn, + .name = "netmap-input", + .sibling_of = "device-input", + .format_trace = format_netmap_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + /* default state is INTERRUPT mode, switch to POLLING if worker threads are enabled */ + .state = VLIB_NODE_STATE_INTERRUPT, + .n_errors = NETMAP_INPUT_N_ERROR, + .error_strings = netmap_input_error_strings, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (netmap_input_node, netmap_input_fn) +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/nic/ixge.c b/src/vnet/devices/nic/ixge.c new file mode 100644 index 00000000..d4c4c6b7 --- /dev/null +++ b/src/vnet/devices/nic/ixge.c @@ -0,0 +1,2938 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * WARNING! + * This driver is not intended for production use and it is unsupported. + * It is provided for educational use only. + * Please use supported DPDK driver instead. + */ + +#if __x86_64__ +#include + +#ifndef CLIB_HAVE_VEC128 +#warning HACK: ixge driver wont really work, missing u32x4 +typedef unsigned long long u32x4; +#endif + +#include +#include +#include +#include +#include +#include + +#define IXGE_ALWAYS_POLL 0 + +#define EVENT_SET_FLAGS 0 +#define IXGE_HWBP_RACE_ELOG 0 + +#define PCI_VENDOR_ID_INTEL 0x8086 + +/* 10 GIG E (XGE) PHY IEEE 802.3 clause 45 definitions. */ +#define XGE_PHY_DEV_TYPE_PMA_PMD 1 +#define XGE_PHY_DEV_TYPE_PHY_XS 4 +#define XGE_PHY_ID1 0x2 +#define XGE_PHY_ID2 0x3 +#define XGE_PHY_CONTROL 0x0 +#define XGE_PHY_CONTROL_RESET (1 << 15) + +ixge_main_t ixge_main; +static vlib_node_registration_t ixge_input_node; +static vlib_node_registration_t ixge_process_node; + +static void +ixge_semaphore_get (ixge_device_t * xd) +{ + ixge_main_t *xm = &ixge_main; + vlib_main_t *vm = xm->vlib_main; + ixge_regs_t *r = xd->regs; + u32 i; + + i = 0; + while (!(r->software_semaphore & (1 << 0))) + { + if (i > 0) + vlib_process_suspend (vm, 100e-6); + i++; + } + do + { + r->software_semaphore |= 1 << 1; + } + while (!(r->software_semaphore & (1 << 1))); +} + +static void +ixge_semaphore_release (ixge_device_t * xd) +{ + ixge_regs_t *r = xd->regs; + r->software_semaphore &= ~3; +} + +static void +ixge_software_firmware_sync (ixge_device_t * xd, u32 sw_mask) +{ + ixge_main_t *xm = &ixge_main; + vlib_main_t *vm = xm->vlib_main; + ixge_regs_t *r = xd->regs; + u32 fw_mask = sw_mask << 5; + u32 m, done = 0; + + while (!done) + { + ixge_semaphore_get (xd); + m = r->software_firmware_sync; + done = (m & fw_mask) == 0; + if (done) + r->software_firmware_sync = m | sw_mask; + ixge_semaphore_release (xd); + if (!done) + vlib_process_suspend (vm, 10e-3); + } +} + +static void +ixge_software_firmware_sync_release (ixge_device_t * xd, u32 sw_mask) +{ + ixge_regs_t *r = xd->regs; + ixge_semaphore_get (xd); + r->software_firmware_sync &= ~sw_mask; + ixge_semaphore_release (xd); +} + +u32 +ixge_read_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, + u32 v, u32 is_read) +{ + ixge_regs_t *r = xd->regs; + const u32 busy_bit = 1 << 30; + u32 x; + + ASSERT (xd->phy_index < 2); + ixge_software_firmware_sync (xd, 1 << (1 + xd->phy_index)); + + ASSERT (reg_index < (1 << 16)); + ASSERT (dev_type < (1 << 5)); + if (!is_read) + r->xge_mac.phy_data = v; + + /* Address cycle. */ + x = + reg_index | (dev_type << 16) | (xd-> + phys[xd->phy_index].mdio_address << 21); + r->xge_mac.phy_command = x | busy_bit; + /* Busy wait timed to take 28e-6 secs. No suspend. */ + while (r->xge_mac.phy_command & busy_bit) + ; + + r->xge_mac.phy_command = x | ((is_read ? 2 : 1) << 26) | busy_bit; + while (r->xge_mac.phy_command & busy_bit) + ; + + if (is_read) + v = r->xge_mac.phy_data >> 16; + + ixge_software_firmware_sync_release (xd, 1 << (1 + xd->phy_index)); + + return v; +} + +static u32 +ixge_read_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index) +{ + return ixge_read_write_phy_reg (xd, dev_type, reg_index, 0, /* is_read */ + 1); +} + +static void +ixge_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, u32 v) +{ + (void) ixge_read_write_phy_reg (xd, dev_type, reg_index, v, /* is_read */ + 0); +} + +static void +ixge_i2c_put_bits (i2c_bus_t * b, int scl, int sda) +{ + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data); + u32 v; + + v = 0; + v |= (sda != 0) << 3; + v |= (scl != 0) << 1; + xd->regs->i2c_control = v; +} + +static void +ixge_i2c_get_bits (i2c_bus_t * b, int *scl, int *sda) +{ + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data); + u32 v; + + v = xd->regs->i2c_control; + *sda = (v & (1 << 2)) != 0; + *scl = (v & (1 << 0)) != 0; +} + +static u16 +ixge_read_eeprom (ixge_device_t * xd, u32 address) +{ + ixge_regs_t *r = xd->regs; + u32 v; + r->eeprom_read = (( /* start bit */ (1 << 0)) | (address << 2)); + /* Wait for done bit. */ + while (!((v = r->eeprom_read) & (1 << 1))) + ; + return v >> 16; +} + +static void +ixge_sfp_enable_disable_laser (ixge_device_t * xd, uword enable) +{ + u32 tx_disable_bit = 1 << 3; + if (enable) + xd->regs->sdp_control &= ~tx_disable_bit; + else + xd->regs->sdp_control |= tx_disable_bit; +} + +static void +ixge_sfp_enable_disable_10g (ixge_device_t * xd, uword enable) +{ + u32 is_10g_bit = 1 << 5; + if (enable) + xd->regs->sdp_control |= is_10g_bit; + else + xd->regs->sdp_control &= ~is_10g_bit; +} + +static clib_error_t * +ixge_sfp_phy_init_from_eeprom (ixge_device_t * xd, u16 sfp_type) +{ + u16 a, id, reg_values_addr = 0; + + a = ixge_read_eeprom (xd, 0x2b); + if (a == 0 || a == 0xffff) + return clib_error_create ("no init sequence in eeprom"); + + while (1) + { + id = ixge_read_eeprom (xd, ++a); + if (id == 0xffff) + break; + reg_values_addr = ixge_read_eeprom (xd, ++a); + if (id == sfp_type) + break; + } + if (id != sfp_type) + return clib_error_create ("failed to find id 0x%x", sfp_type); + + ixge_software_firmware_sync (xd, 1 << 3); + while (1) + { + u16 v = ixge_read_eeprom (xd, ++reg_values_addr); + if (v == 0xffff) + break; + xd->regs->core_analog_config = v; + } + ixge_software_firmware_sync_release (xd, 1 << 3); + + /* Make sure laser is off. We'll turn on the laser when + the interface is brought up. */ + ixge_sfp_enable_disable_laser (xd, /* enable */ 0); + ixge_sfp_enable_disable_10g (xd, /* is_10g */ 1); + + return 0; +} + +static void +ixge_sfp_device_up_down (ixge_device_t * xd, uword is_up) +{ + u32 v; + + if (is_up) + { + /* pma/pmd 10g serial SFI. */ + xd->regs->xge_mac.auto_negotiation_control2 &= ~(3 << 16); + xd->regs->xge_mac.auto_negotiation_control2 |= 2 << 16; + + v = xd->regs->xge_mac.auto_negotiation_control; + v &= ~(7 << 13); + v |= (0 << 13); + /* Restart autoneg. */ + v |= (1 << 12); + xd->regs->xge_mac.auto_negotiation_control = v; + + while (!(xd->regs->xge_mac.link_partner_ability[0] & 0xf0000)) + ; + + v = xd->regs->xge_mac.auto_negotiation_control; + + /* link mode 10g sfi serdes */ + v &= ~(7 << 13); + v |= (3 << 13); + + /* Restart autoneg. */ + v |= (1 << 12); + xd->regs->xge_mac.auto_negotiation_control = v; + + xd->regs->xge_mac.link_status; + } + + ixge_sfp_enable_disable_laser (xd, /* enable */ is_up); + + /* Give time for link partner to notice that we're up. */ + if (is_up && vlib_in_process_context (vlib_get_main ())) + { + vlib_process_suspend (vlib_get_main (), 300e-3); + } +} + +always_inline ixge_dma_regs_t * +get_dma_regs (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 qi) +{ + ixge_regs_t *r = xd->regs; + ASSERT (qi < 128); + if (rt == VLIB_RX) + return qi < 64 ? &r->rx_dma0[qi] : &r->rx_dma1[qi - 64]; + else + return &r->tx_dma[qi]; +} + +static clib_error_t * +ixge_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); + uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, hif->dev_instance); + ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0); + + if (is_up) + { + xd->regs->rx_enable |= 1; + xd->regs->tx_dma_control |= 1; + dr->control |= 1 << 25; + while (!(dr->control & (1 << 25))) + ; + } + else + { + xd->regs->rx_enable &= ~1; + xd->regs->tx_dma_control &= ~1; + } + + ixge_sfp_device_up_down (xd, is_up); + + return /* no error */ 0; +} + +static void +ixge_sfp_phy_init (ixge_device_t * xd) +{ + ixge_phy_t *phy = xd->phys + xd->phy_index; + i2c_bus_t *ib = &xd->i2c_bus; + + ib->private_data = xd->device_index; + ib->put_bits = ixge_i2c_put_bits; + ib->get_bits = ixge_i2c_get_bits; + vlib_i2c_init (ib); + + vlib_i2c_read_eeprom (ib, 0x50, 0, 128, (u8 *) & xd->sfp_eeprom); + + if (vlib_i2c_bus_timed_out (ib) || !sfp_eeprom_is_valid (&xd->sfp_eeprom)) + xd->sfp_eeprom.id = SFP_ID_unknown; + else + { + /* FIXME 5 => SR/LR eeprom ID. */ + clib_error_t *e = + ixge_sfp_phy_init_from_eeprom (xd, 5 + xd->pci_function); + if (e) + clib_error_report (e); + } + + phy->mdio_address = ~0; +} + +static void +ixge_phy_init (ixge_device_t * xd) +{ + ixge_main_t *xm = &ixge_main; + vlib_main_t *vm = xm->vlib_main; + ixge_phy_t *phy = xd->phys + xd->phy_index; + + switch (xd->device_id) + { + case IXGE_82599_sfp: + case IXGE_82599_sfp_em: + case IXGE_82599_sfp_fcoe: + /* others? */ + return ixge_sfp_phy_init (xd); + + default: + break; + } + + /* Probe address of phy. */ + { + u32 i, v; + + phy->mdio_address = ~0; + for (i = 0; i < 32; i++) + { + phy->mdio_address = i; + v = ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1); + if (v != 0xffff && v != 0) + break; + } + + /* No PHY found? */ + if (i >= 32) + return; + } + + phy->id = + ((ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1) << 16) | + ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID2)); + + { + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__,.format = + "ixge %d, phy id 0x%d mdio address %d",.format_args = "i4i4i4",}; + struct + { + u32 instance, id, address; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->instance = xd->device_index; + ed->id = phy->id; + ed->address = phy->mdio_address; + } + + /* Reset phy. */ + ixge_write_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL, + XGE_PHY_CONTROL_RESET); + + /* Wait for self-clearning reset bit to clear. */ + do + { + vlib_process_suspend (vm, 1e-3); + } + while (ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL) & + XGE_PHY_CONTROL_RESET); +} + +static u8 * +format_ixge_rx_from_hw_descriptor (u8 * s, va_list * va) +{ + ixge_rx_from_hw_descriptor_t *d = + va_arg (*va, ixge_rx_from_hw_descriptor_t *); + u32 s0 = d->status[0], s2 = d->status[2]; + u32 is_ip4, is_ip6, is_ip, is_tcp, is_udp; + uword indent = format_get_indent (s); + + s = format (s, "%s-owned", + (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE) ? "sw" : + "hw"); + s = + format (s, ", length this descriptor %d, l3 offset %d", + d->n_packet_bytes_this_descriptor, + IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s0)); + if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) + s = format (s, ", end-of-packet"); + + s = format (s, "\n%U", format_white_space, indent); + + if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR) + s = format (s, "layer2 error"); + + if (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2) + { + s = format (s, "layer 2 type %d", (s0 & 0x1f)); + return s; + } + + if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN) + s = format (s, "vlan header 0x%x\n%U", d->vlan_tag, + format_white_space, indent); + + if ((is_ip4 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4))) + { + s = format (s, "ip4%s", + (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT) ? " options" : + ""); + if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED) + s = format (s, " checksum %s", + (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) ? + "bad" : "ok"); + } + if ((is_ip6 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6))) + s = format (s, "ip6%s", + (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT) ? " extended" : + ""); + is_tcp = is_udp = 0; + if ((is_ip = (is_ip4 | is_ip6))) + { + is_tcp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP) != 0; + is_udp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP) != 0; + if (is_tcp) + s = format (s, ", tcp"); + if (is_udp) + s = format (s, ", udp"); + } + + if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED) + s = format (s, ", tcp checksum %s", + (s2 & IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR) ? "bad" : + "ok"); + if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED) + s = format (s, ", udp checksum %s", + (s2 & IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR) ? "bad" : + "ok"); + + return s; +} + +static u8 * +format_ixge_tx_descriptor (u8 * s, va_list * va) +{ + ixge_tx_descriptor_t *d = va_arg (*va, ixge_tx_descriptor_t *); + u32 s0 = d->status0, s1 = d->status1; + uword indent = format_get_indent (s); + u32 v; + + s = format (s, "buffer 0x%Lx, %d packet bytes, %d bytes this buffer", + d->buffer_address, s1 >> 14, d->n_bytes_this_buffer); + + s = format (s, "\n%U", format_white_space, indent); + + if ((v = (s0 >> 0) & 3)) + s = format (s, "reserved 0x%x, ", v); + + if ((v = (s0 >> 2) & 3)) + s = format (s, "mac 0x%x, ", v); + + if ((v = (s0 >> 4) & 0xf) != 3) + s = format (s, "type 0x%x, ", v); + + s = format (s, "%s%s%s%s%s%s%s%s", + (s0 & (1 << 8)) ? "eop, " : "", + (s0 & (1 << 9)) ? "insert-fcs, " : "", + (s0 & (1 << 10)) ? "reserved26, " : "", + (s0 & (1 << 11)) ? "report-status, " : "", + (s0 & (1 << 12)) ? "reserved28, " : "", + (s0 & (1 << 13)) ? "is-advanced, " : "", + (s0 & (1 << 14)) ? "vlan-enable, " : "", + (s0 & (1 << 15)) ? "tx-segmentation, " : ""); + + if ((v = s1 & 0xf) != 0) + s = format (s, "status 0x%x, ", v); + + if ((v = (s1 >> 4) & 0xf)) + s = format (s, "context 0x%x, ", v); + + if ((v = (s1 >> 8) & 0x3f)) + s = format (s, "options 0x%x, ", v); + + return s; +} + +typedef struct +{ + ixge_descriptor_t before, after; + + u32 buffer_index; + + u16 device_index; + + u8 queue_index; + + u8 is_start_of_packet; + + /* Copy of VLIB buffer; packet data stored in pre_data. */ + vlib_buffer_t buffer; +} ixge_rx_dma_trace_t; + +static u8 * +format_ixge_rx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + vlib_node_t *node = va_arg (*va, vlib_node_t *); + vnet_main_t *vnm = vnet_get_main (); + ixge_rx_dma_trace_t *t = va_arg (*va, ixge_rx_dma_trace_t *); + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index); + format_function_t *f; + uword indent = format_get_indent (s); + + { + vnet_sw_interface_t *sw = + vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + s = + format (s, "%U rx queue %d", format_vnet_sw_interface_name, vnm, sw, + t->queue_index); + } + + s = format (s, "\n%Ubefore: %U", + format_white_space, indent, + format_ixge_rx_from_hw_descriptor, &t->before); + s = format (s, "\n%Uafter : head/tail address 0x%Lx/0x%Lx", + format_white_space, indent, + t->after.rx_to_hw.head_address, t->after.rx_to_hw.tail_address); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U", format_white_space, indent); + + f = node->format_buffer; + if (!f || !t->is_start_of_packet) + f = format_hex_bytes; + s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); + + return s; +} + +#define foreach_ixge_error \ + _ (none, "no error") \ + _ (tx_full_drops, "tx ring full drops") \ + _ (ip4_checksum_error, "ip4 checksum errors") \ + _ (rx_alloc_fail, "rx buf alloc from free list failed") \ + _ (rx_alloc_no_physmem, "rx buf alloc failed no physmem") + +typedef enum +{ +#define _(f,s) IXGE_ERROR_##f, + foreach_ixge_error +#undef _ + IXGE_N_ERROR, +} ixge_error_t; + +always_inline void +ixge_rx_next_and_error_from_status_x1 (ixge_device_t * xd, + u32 s00, u32 s02, + u8 * next0, u8 * error0, u32 * flags0) +{ + u8 is0_ip4, is0_ip6, n0, e0; + u32 f0; + + e0 = IXGE_ERROR_none; + n0 = IXGE_RX_NEXT_ETHERNET_INPUT; + + is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; + n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0; + + e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) + ? IXGE_ERROR_ip4_checksum_error : e0); + + is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; + n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0; + + n0 = (xd->per_interface_next_index != ~0) ? + xd->per_interface_next_index : n0; + + /* Check for error. */ + n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0; + + f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED + | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) + ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); + + f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR + | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) + ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); + + *error0 = e0; + *next0 = n0; + *flags0 = f0; +} + +always_inline void +ixge_rx_next_and_error_from_status_x2 (ixge_device_t * xd, + u32 s00, u32 s02, + u32 s10, u32 s12, + u8 * next0, u8 * error0, u32 * flags0, + u8 * next1, u8 * error1, u32 * flags1) +{ + u8 is0_ip4, is0_ip6, n0, e0; + u8 is1_ip4, is1_ip6, n1, e1; + u32 f0, f1; + + e0 = e1 = IXGE_ERROR_none; + n0 = n1 = IXGE_RX_NEXT_IP4_INPUT; + + is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; + is1_ip4 = s12 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; + + n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0; + n1 = is1_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n1; + + e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) + ? IXGE_ERROR_ip4_checksum_error : e0); + e1 = (is1_ip4 && (s12 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) + ? IXGE_ERROR_ip4_checksum_error : e1); + + is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; + is1_ip6 = s10 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; + + n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0; + n1 = is1_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n1; + + n0 = (xd->per_interface_next_index != ~0) ? + xd->per_interface_next_index : n0; + n1 = (xd->per_interface_next_index != ~0) ? + xd->per_interface_next_index : n1; + + /* Check for error. */ + n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0; + n1 = e1 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n1; + + *error0 = e0; + *error1 = e1; + + *next0 = n0; + *next1 = n1; + + f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED + | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) + ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); + f1 = ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED + | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) + ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); + + f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR + | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) + ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); + f1 |= ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR + | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) + ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); + + *flags0 = f0; + *flags1 = f1; +} + +static void +ixge_rx_trace (ixge_main_t * xm, + ixge_device_t * xd, + ixge_dma_queue_t * dq, + ixge_descriptor_t * before_descriptors, + u32 * before_buffers, + ixge_descriptor_t * after_descriptors, uword n_descriptors) +{ + vlib_main_t *vm = xm->vlib_main; + vlib_node_runtime_t *node = dq->rx.node; + ixge_rx_from_hw_descriptor_t *bd; + ixge_rx_to_hw_descriptor_t *ad; + u32 *b, n_left, is_sop, next_index_sop; + + n_left = n_descriptors; + b = before_buffers; + bd = &before_descriptors->rx_from_hw; + ad = &after_descriptors->rx_to_hw; + is_sop = dq->rx.is_start_of_packet; + next_index_sop = dq->rx.saved_start_of_packet_next_index; + + while (n_left >= 2) + { + u32 bi0, bi1, flags0, flags1; + vlib_buffer_t *b0, *b1; + ixge_rx_dma_trace_t *t0, *t1; + u8 next0, error0, next1, error1; + + bi0 = b[0]; + bi1 = b[1]; + n_left -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + ixge_rx_next_and_error_from_status_x2 (xd, + bd[0].status[0], bd[0].status[2], + bd[1].status[0], bd[1].status[2], + &next0, &error0, &flags0, + &next1, &error1, &flags1); + + next_index_sop = is_sop ? next0 : next_index_sop; + vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0); + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->is_start_of_packet = is_sop; + is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + next_index_sop = is_sop ? next1 : next_index_sop; + vlib_trace_buffer (vm, node, next_index_sop, b1, /* follow_chain */ 0); + t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); + t1->is_start_of_packet = is_sop; + is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + t0->queue_index = dq->queue_index; + t1->queue_index = dq->queue_index; + t0->device_index = xd->device_index; + t1->device_index = xd->device_index; + t0->before.rx_from_hw = bd[0]; + t1->before.rx_from_hw = bd[1]; + t0->after.rx_to_hw = ad[0]; + t1->after.rx_to_hw = ad[1]; + t0->buffer_index = bi0; + t1->buffer_index = bi1; + memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data)); + memcpy (t0->buffer.pre_data, b0->data + b0->current_data, + sizeof (t0->buffer.pre_data)); + memcpy (t1->buffer.pre_data, b1->data + b1->current_data, + sizeof (t1->buffer.pre_data)); + + b += 2; + bd += 2; + ad += 2; + } + + while (n_left >= 1) + { + u32 bi0, flags0; + vlib_buffer_t *b0; + ixge_rx_dma_trace_t *t0; + u8 next0, error0; + + bi0 = b[0]; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ixge_rx_next_and_error_from_status_x1 (xd, + bd[0].status[0], bd[0].status[2], + &next0, &error0, &flags0); + + next_index_sop = is_sop ? next0 : next_index_sop; + vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0); + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->is_start_of_packet = is_sop; + is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + t0->queue_index = dq->queue_index; + t0->device_index = xd->device_index; + t0->before.rx_from_hw = bd[0]; + t0->after.rx_to_hw = ad[0]; + t0->buffer_index = bi0; + memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + memcpy (t0->buffer.pre_data, b0->data + b0->current_data, + sizeof (t0->buffer.pre_data)); + + b += 1; + bd += 1; + ad += 1; + } +} + +typedef struct +{ + ixge_tx_descriptor_t descriptor; + + u32 buffer_index; + + u16 device_index; + + u8 queue_index; + + u8 is_start_of_packet; + + /* Copy of VLIB buffer; packet data stored in pre_data. */ + vlib_buffer_t buffer; +} ixge_tx_dma_trace_t; + +static u8 * +format_ixge_tx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ixge_tx_dma_trace_t *t = va_arg (*va, ixge_tx_dma_trace_t *); + vnet_main_t *vnm = vnet_get_main (); + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index); + format_function_t *f; + uword indent = format_get_indent (s); + + { + vnet_sw_interface_t *sw = + vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + s = + format (s, "%U tx queue %d", format_vnet_sw_interface_name, vnm, sw, + t->queue_index); + } + + s = format (s, "\n%Udescriptor: %U", + format_white_space, indent, + format_ixge_tx_descriptor, &t->descriptor); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U", format_white_space, indent); + + f = format_ethernet_header_with_length; + if (!f || !t->is_start_of_packet) + f = format_hex_bytes; + s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); + + return s; +} + +typedef struct +{ + vlib_node_runtime_t *node; + + u32 is_start_of_packet; + + u32 n_bytes_in_packet; + + ixge_tx_descriptor_t *start_of_packet_descriptor; +} ixge_tx_state_t; + +static void +ixge_tx_trace (ixge_main_t * xm, + ixge_device_t * xd, + ixge_dma_queue_t * dq, + ixge_tx_state_t * tx_state, + ixge_tx_descriptor_t * descriptors, + u32 * buffers, uword n_descriptors) +{ + vlib_main_t *vm = xm->vlib_main; + vlib_node_runtime_t *node = tx_state->node; + ixge_tx_descriptor_t *d; + u32 *b, n_left, is_sop; + + n_left = n_descriptors; + b = buffers; + d = descriptors; + is_sop = tx_state->is_start_of_packet; + + while (n_left >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + ixge_tx_dma_trace_t *t0, *t1; + + bi0 = b[0]; + bi1 = b[1]; + n_left -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->is_start_of_packet = is_sop; + is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); + t1->is_start_of_packet = is_sop; + is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + t0->queue_index = dq->queue_index; + t1->queue_index = dq->queue_index; + t0->device_index = xd->device_index; + t1->device_index = xd->device_index; + t0->descriptor = d[0]; + t1->descriptor = d[1]; + t0->buffer_index = bi0; + t1->buffer_index = bi1; + memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data)); + memcpy (t0->buffer.pre_data, b0->data + b0->current_data, + sizeof (t0->buffer.pre_data)); + memcpy (t1->buffer.pre_data, b1->data + b1->current_data, + sizeof (t1->buffer.pre_data)); + + b += 2; + d += 2; + } + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t *b0; + ixge_tx_dma_trace_t *t0; + + bi0 = b[0]; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->is_start_of_packet = is_sop; + is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + t0->queue_index = dq->queue_index; + t0->device_index = xd->device_index; + t0->descriptor = d[0]; + t0->buffer_index = bi0; + memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + memcpy (t0->buffer.pre_data, b0->data + b0->current_data, + sizeof (t0->buffer.pre_data)); + + b += 1; + d += 1; + } +} + +always_inline uword +ixge_ring_sub (ixge_dma_queue_t * q, u32 i0, u32 i1) +{ + i32 d = i1 - i0; + ASSERT (i0 < q->n_descriptors); + ASSERT (i1 < q->n_descriptors); + return d < 0 ? q->n_descriptors + d : d; +} + +always_inline uword +ixge_ring_add (ixge_dma_queue_t * q, u32 i0, u32 i1) +{ + u32 d = i0 + i1; + ASSERT (i0 < q->n_descriptors); + ASSERT (i1 < q->n_descriptors); + d -= d >= q->n_descriptors ? q->n_descriptors : 0; + return d; +} + +always_inline uword +ixge_tx_descriptor_matches_template (ixge_main_t * xm, + ixge_tx_descriptor_t * d) +{ + u32 cmp; + + cmp = ((d->status0 & xm->tx_descriptor_template_mask.status0) + ^ xm->tx_descriptor_template.status0); + if (cmp) + return 0; + cmp = ((d->status1 & xm->tx_descriptor_template_mask.status1) + ^ xm->tx_descriptor_template.status1); + if (cmp) + return 0; + + return 1; +} + +static uword +ixge_tx_no_wrap (ixge_main_t * xm, + ixge_device_t * xd, + ixge_dma_queue_t * dq, + u32 * buffers, + u32 start_descriptor_index, + u32 n_descriptors, ixge_tx_state_t * tx_state) +{ + vlib_main_t *vm = xm->vlib_main; + ixge_tx_descriptor_t *d, *d_sop; + u32 n_left = n_descriptors; + u32 *to_free = vec_end (xm->tx_buffers_pending_free); + u32 *to_tx = + vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index); + u32 is_sop = tx_state->is_start_of_packet; + u32 len_sop = tx_state->n_bytes_in_packet; + u16 template_status = xm->tx_descriptor_template.status0; + u32 descriptor_prefetch_rotor = 0; + + ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors); + d = &dq->descriptors[start_descriptor_index].tx; + d_sop = is_sop ? d : tx_state->start_of_packet_descriptor; + + while (n_left >= 4) + { + vlib_buffer_t *b0, *b1; + u32 bi0, fi0, len0; + u32 bi1, fi1, len1; + u8 is_eop0, is_eop1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD); + vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD); + + if ((descriptor_prefetch_rotor & 0x3) == 0) + CLIB_PREFETCH (d + 4, CLIB_CACHE_LINE_BYTES, STORE); + + descriptor_prefetch_rotor += 2; + + bi0 = buffers[0]; + bi1 = buffers[1]; + + to_free[0] = fi0 = to_tx[0]; + to_tx[0] = bi0; + to_free += fi0 != 0; + + to_free[0] = fi1 = to_tx[1]; + to_tx[1] = bi1; + to_free += fi1 != 0; + + buffers += 2; + n_left -= 2; + to_tx += 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + is_eop1 = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + len0 = b0->current_length; + len1 = b1->current_length; + + ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0)); + ASSERT (ixge_tx_descriptor_matches_template (xm, d + 1)); + + d[0].buffer_address = + vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data; + d[1].buffer_address = + vlib_get_buffer_data_physical_address (vm, bi1) + b1->current_data; + + d[0].n_bytes_this_buffer = len0; + d[1].n_bytes_this_buffer = len1; + + d[0].status0 = + template_status | (is_eop0 << + IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); + d[1].status0 = + template_status | (is_eop1 << + IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); + + len_sop = (is_sop ? 0 : len_sop) + len0; + d_sop[0].status1 = + IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); + d += 1; + d_sop = is_eop0 ? d : d_sop; + + is_sop = is_eop0; + + len_sop = (is_sop ? 0 : len_sop) + len1; + d_sop[0].status1 = + IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); + d += 1; + d_sop = is_eop1 ? d : d_sop; + + is_sop = is_eop1; + } + + while (n_left > 0) + { + vlib_buffer_t *b0; + u32 bi0, fi0, len0; + u8 is_eop0; + + bi0 = buffers[0]; + + to_free[0] = fi0 = to_tx[0]; + to_tx[0] = bi0; + to_free += fi0 != 0; + + buffers += 1; + n_left -= 1; + to_tx += 1; + + b0 = vlib_get_buffer (vm, bi0); + + is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + len0 = b0->current_length; + + ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0)); + + d[0].buffer_address = + vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data; + + d[0].n_bytes_this_buffer = len0; + + d[0].status0 = + template_status | (is_eop0 << + IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); + + len_sop = (is_sop ? 0 : len_sop) + len0; + d_sop[0].status1 = + IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); + d += 1; + d_sop = is_eop0 ? d : d_sop; + + is_sop = is_eop0; + } + + if (tx_state->node->flags & VLIB_NODE_FLAG_TRACE) + { + to_tx = + vec_elt_at_index (dq->descriptor_buffer_indices, + start_descriptor_index); + ixge_tx_trace (xm, xd, dq, tx_state, + &dq->descriptors[start_descriptor_index].tx, to_tx, + n_descriptors); + } + + _vec_len (xm->tx_buffers_pending_free) = + to_free - xm->tx_buffers_pending_free; + + /* When we are done d_sop can point to end of ring. Wrap it if so. */ + { + ixge_tx_descriptor_t *d_start = &dq->descriptors[0].tx; + + ASSERT (d_sop - d_start <= dq->n_descriptors); + d_sop = d_sop - d_start == dq->n_descriptors ? d_start : d_sop; + } + + tx_state->is_start_of_packet = is_sop; + tx_state->start_of_packet_descriptor = d_sop; + tx_state->n_bytes_in_packet = len_sop; + + return n_descriptors; +} + +static uword +ixge_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * f) +{ + ixge_main_t *xm = &ixge_main; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + ixge_device_t *xd = vec_elt_at_index (xm->devices, rd->dev_instance); + ixge_dma_queue_t *dq; + u32 *from, n_left_tx, n_descriptors_to_tx, n_tail_drop; + u32 queue_index = 0; /* fixme parameter */ + ixge_tx_state_t tx_state; + + tx_state.node = node; + tx_state.is_start_of_packet = 1; + tx_state.start_of_packet_descriptor = 0; + tx_state.n_bytes_in_packet = 0; + + from = vlib_frame_vector_args (f); + + dq = vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index); + + dq->head_index = dq->tx.head_index_write_back[0]; + + /* Since head == tail means ring is empty we can send up to dq->n_descriptors - 1. */ + n_left_tx = dq->n_descriptors - 1; + n_left_tx -= ixge_ring_sub (dq, dq->head_index, dq->tail_index); + + _vec_len (xm->tx_buffers_pending_free) = 0; + + n_descriptors_to_tx = f->n_vectors; + n_tail_drop = 0; + if (PREDICT_FALSE (n_descriptors_to_tx > n_left_tx)) + { + i32 i, n_ok, i_eop, i_sop; + + i_sop = i_eop = ~0; + for (i = n_left_tx - 1; i >= 0; i--) + { + vlib_buffer_t *b = vlib_get_buffer (vm, from[i]); + if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + if (i_sop != ~0 && i_eop != ~0) + break; + i_eop = i; + i_sop = i + 1; + } + } + if (i == 0) + n_ok = 0; + else + n_ok = i_eop + 1; + + { + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__,.format = + "ixge %d, ring full to tx %d head %d tail %d",.format_args = + "i2i2i2i2",}; + struct + { + u16 instance, to_tx, head, tail; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->instance = xd->device_index; + ed->to_tx = n_descriptors_to_tx; + ed->head = dq->head_index; + ed->tail = dq->tail_index; + } + + if (n_ok < n_descriptors_to_tx) + { + n_tail_drop = n_descriptors_to_tx - n_ok; + vec_add (xm->tx_buffers_pending_free, from + n_ok, n_tail_drop); + vlib_error_count (vm, ixge_input_node.index, + IXGE_ERROR_tx_full_drops, n_tail_drop); + } + + n_descriptors_to_tx = n_ok; + } + + dq->tx.n_buffers_on_ring += n_descriptors_to_tx; + + /* Process from tail to end of descriptor ring. */ + if (n_descriptors_to_tx > 0 && dq->tail_index < dq->n_descriptors) + { + u32 n = + clib_min (dq->n_descriptors - dq->tail_index, n_descriptors_to_tx); + n = ixge_tx_no_wrap (xm, xd, dq, from, dq->tail_index, n, &tx_state); + from += n; + n_descriptors_to_tx -= n; + dq->tail_index += n; + ASSERT (dq->tail_index <= dq->n_descriptors); + if (dq->tail_index == dq->n_descriptors) + dq->tail_index = 0; + } + + if (n_descriptors_to_tx > 0) + { + u32 n = + ixge_tx_no_wrap (xm, xd, dq, from, 0, n_descriptors_to_tx, &tx_state); + from += n; + ASSERT (n == n_descriptors_to_tx); + dq->tail_index += n; + ASSERT (dq->tail_index <= dq->n_descriptors); + if (dq->tail_index == dq->n_descriptors) + dq->tail_index = 0; + } + + /* We should only get full packets. */ + ASSERT (tx_state.is_start_of_packet); + + /* Report status when last descriptor is done. */ + { + u32 i = dq->tail_index == 0 ? dq->n_descriptors - 1 : dq->tail_index - 1; + ixge_tx_descriptor_t *d = &dq->descriptors[i].tx; + d->status0 |= IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS; + } + + /* Give new descriptors to hardware. */ + { + ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_TX, queue_index); + + CLIB_MEMORY_BARRIER (); + + dr->tail_index = dq->tail_index; + } + + /* Free any buffers that are done. */ + { + u32 n = _vec_len (xm->tx_buffers_pending_free); + if (n > 0) + { + vlib_buffer_free_no_next (vm, xm->tx_buffers_pending_free, n); + _vec_len (xm->tx_buffers_pending_free) = 0; + ASSERT (dq->tx.n_buffers_on_ring >= n); + dq->tx.n_buffers_on_ring -= (n - n_tail_drop); + } + } + + return f->n_vectors; +} + +static uword +ixge_rx_queue_no_wrap (ixge_main_t * xm, + ixge_device_t * xd, + ixge_dma_queue_t * dq, + u32 start_descriptor_index, u32 n_descriptors) +{ + vlib_main_t *vm = xm->vlib_main; + vlib_node_runtime_t *node = dq->rx.node; + ixge_descriptor_t *d; + static ixge_descriptor_t *d_trace_save; + static u32 *d_trace_buffers; + u32 n_descriptors_left = n_descriptors; + u32 *to_rx = + vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index); + u32 *to_add; + u32 bi_sop = dq->rx.saved_start_of_packet_buffer_index; + u32 bi_last = dq->rx.saved_last_buffer_index; + u32 next_index_sop = dq->rx.saved_start_of_packet_next_index; + u32 is_sop = dq->rx.is_start_of_packet; + u32 next_index, n_left_to_next, *to_next; + u32 n_packets = 0; + u32 n_bytes = 0; + u32 n_trace = vlib_get_trace_count (vm, node); + vlib_buffer_t *b_last, b_dummy; + + ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors); + d = &dq->descriptors[start_descriptor_index]; + + b_last = bi_last != ~0 ? vlib_get_buffer (vm, bi_last) : &b_dummy; + next_index = dq->rx.next_index; + + if (n_trace > 0) + { + u32 n = clib_min (n_trace, n_descriptors); + if (d_trace_save) + { + _vec_len (d_trace_save) = 0; + _vec_len (d_trace_buffers) = 0; + } + vec_add (d_trace_save, (ixge_descriptor_t *) d, n); + vec_add (d_trace_buffers, to_rx, n); + } + + { + uword l = vec_len (xm->rx_buffers_to_add); + + if (l < n_descriptors_left) + { + u32 n_to_alloc = 2 * dq->n_descriptors - l; + u32 n_allocated; + + vec_resize (xm->rx_buffers_to_add, n_to_alloc); + + _vec_len (xm->rx_buffers_to_add) = l; + n_allocated = vlib_buffer_alloc_from_free_list + (vm, xm->rx_buffers_to_add + l, n_to_alloc, + xm->vlib_buffer_free_list_index); + _vec_len (xm->rx_buffers_to_add) += n_allocated; + + /* Handle transient allocation failure */ + if (PREDICT_FALSE (l + n_allocated <= n_descriptors_left)) + { + if (n_allocated == 0) + vlib_error_count (vm, ixge_input_node.index, + IXGE_ERROR_rx_alloc_no_physmem, 1); + else + vlib_error_count (vm, ixge_input_node.index, + IXGE_ERROR_rx_alloc_fail, 1); + + n_descriptors_left = l + n_allocated; + } + n_descriptors = n_descriptors_left; + } + + /* Add buffers from end of vector going backwards. */ + to_add = vec_end (xm->rx_buffers_to_add) - 1; + } + + while (n_descriptors_left > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_descriptors_left >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t *b0, *b1; + u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0; + u32 bi1, fi1, len1, l3_offset1, s21, s01, flags1; + u8 is_eop0, error0, next0; + u8 is_eop1, error1, next1; + ixge_descriptor_t d0, d1; + + vlib_prefetch_buffer_with_index (vm, to_rx[2], STORE); + vlib_prefetch_buffer_with_index (vm, to_rx[3], STORE); + + CLIB_PREFETCH (d + 2, 32, STORE); + + d0.as_u32x4 = d[0].as_u32x4; + d1.as_u32x4 = d[1].as_u32x4; + + s20 = d0.rx_from_hw.status[2]; + s21 = d1.rx_from_hw.status[2]; + + s00 = d0.rx_from_hw.status[0]; + s01 = d1.rx_from_hw.status[0]; + + if (! + ((s20 & s21) & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE)) + goto found_hw_owned_descriptor_x2; + + bi0 = to_rx[0]; + bi1 = to_rx[1]; + + ASSERT (to_add - 1 >= xm->rx_buffers_to_add); + fi0 = to_add[0]; + fi1 = to_add[-1]; + + to_rx[0] = fi0; + to_rx[1] = fi1; + to_rx += 2; + to_add -= 2; + + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, bi0)); + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, bi1)); + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, fi0)); + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, fi1)); + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + + CLIB_PREFETCH (b0->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b1->data, CLIB_CACHE_LINE_BYTES, LOAD); + + is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; + is_eop1 = (s21 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; + + ixge_rx_next_and_error_from_status_x2 (xd, s00, s20, s01, s21, + &next0, &error0, &flags0, + &next1, &error1, &flags1); + + next0 = is_sop ? next0 : next_index_sop; + next1 = is_eop0 ? next1 : next0; + next_index_sop = next1; + + b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT); + b1->flags |= flags1 | (!is_eop1 << VLIB_BUFFER_LOG2_NEXT_PRESENT); + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + b0->error = node->errors[error0]; + b1->error = node->errors[error1]; + + len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor; + len1 = d1.rx_from_hw.n_packet_bytes_this_descriptor; + n_bytes += len0 + len1; + n_packets += is_eop0 + is_eop1; + + /* Give new buffers to hardware. */ + d0.rx_to_hw.tail_address = + vlib_get_buffer_data_physical_address (vm, fi0); + d1.rx_to_hw.tail_address = + vlib_get_buffer_data_physical_address (vm, fi1); + d0.rx_to_hw.head_address = d[0].rx_to_hw.tail_address; + d1.rx_to_hw.head_address = d[1].rx_to_hw.tail_address; + d[0].as_u32x4 = d0.as_u32x4; + d[1].as_u32x4 = d1.as_u32x4; + + d += 2; + n_descriptors_left -= 2; + + /* Point to either l2 or l3 header depending on next. */ + l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT)) + ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0; + l3_offset1 = (is_eop0 && (next1 != IXGE_RX_NEXT_ETHERNET_INPUT)) + ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s01) : 0; + + b0->current_length = len0 - l3_offset0; + b1->current_length = len1 - l3_offset1; + b0->current_data = l3_offset0; + b1->current_data = l3_offset1; + + b_last->next_buffer = is_sop ? ~0 : bi0; + b0->next_buffer = is_eop0 ? ~0 : bi1; + bi_last = bi1; + b_last = b1; + + if (CLIB_DEBUG > 0) + { + u32 bi_sop0 = is_sop ? bi0 : bi_sop; + u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0; + + if (is_eop0) + { + u8 *msg = vlib_validate_buffer (vm, bi_sop0, + /* follow_buffer_next */ 1); + ASSERT (!msg); + } + if (is_eop1) + { + u8 *msg = vlib_validate_buffer (vm, bi_sop1, + /* follow_buffer_next */ 1); + ASSERT (!msg); + } + } + if (0) /* "Dave" version */ + { + u32 bi_sop0 = is_sop ? bi0 : bi_sop; + u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0; + + if (is_eop0) + { + to_next[0] = bi_sop0; + to_next++; + n_left_to_next--; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi_sop0, next0); + } + if (is_eop1) + { + to_next[0] = bi_sop1; + to_next++; + n_left_to_next--; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi_sop1, next1); + } + is_sop = is_eop1; + bi_sop = bi_sop1; + } + if (1) /* "Eliot" version */ + { + /* Speculatively enqueue to cached next. */ + u8 saved_is_sop = is_sop; + u32 bi_sop_save = bi_sop; + + bi_sop = saved_is_sop ? bi0 : bi_sop; + to_next[0] = bi_sop; + to_next += is_eop0; + n_left_to_next -= is_eop0; + + bi_sop = is_eop0 ? bi1 : bi_sop; + to_next[0] = bi_sop; + to_next += is_eop1; + n_left_to_next -= is_eop1; + + is_sop = is_eop1; + + if (PREDICT_FALSE + (!(next0 == next_index && next1 == next_index))) + { + /* Undo speculation. */ + to_next -= is_eop0 + is_eop1; + n_left_to_next += is_eop0 + is_eop1; + + /* Re-do both descriptors being careful about where we enqueue. */ + bi_sop = saved_is_sop ? bi0 : bi_sop_save; + if (is_eop0) + { + if (next0 != next_index) + vlib_set_next_frame_buffer (vm, node, next0, bi_sop); + else + { + to_next[0] = bi_sop; + to_next += 1; + n_left_to_next -= 1; + } + } + + bi_sop = is_eop0 ? bi1 : bi_sop; + if (is_eop1) + { + if (next1 != next_index) + vlib_set_next_frame_buffer (vm, node, next1, bi_sop); + else + { + to_next[0] = bi_sop; + to_next += 1; + n_left_to_next -= 1; + } + } + + /* Switch cached next index when next for both packets is the same. */ + if (is_eop0 && is_eop1 && next0 == next1) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + next_index = next0; + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + } + } + } + } + + /* Bail out of dual loop and proceed with single loop. */ + found_hw_owned_descriptor_x2: + + while (n_descriptors_left > 0 && n_left_to_next > 0) + { + vlib_buffer_t *b0; + u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0; + u8 is_eop0, error0, next0; + ixge_descriptor_t d0; + + d0.as_u32x4 = d[0].as_u32x4; + + s20 = d0.rx_from_hw.status[2]; + s00 = d0.rx_from_hw.status[0]; + + if (!(s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE)) + goto found_hw_owned_descriptor_x1; + + bi0 = to_rx[0]; + ASSERT (to_add >= xm->rx_buffers_to_add); + fi0 = to_add[0]; + + to_rx[0] = fi0; + to_rx += 1; + to_add -= 1; + + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, bi0)); + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, fi0)); + + b0 = vlib_get_buffer (vm, bi0); + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; + ixge_rx_next_and_error_from_status_x1 + (xd, s00, s20, &next0, &error0, &flags0); + + next0 = is_sop ? next0 : next_index_sop; + next_index_sop = next0; + + b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT); + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + b0->error = node->errors[error0]; + + len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor; + n_bytes += len0; + n_packets += is_eop0; + + /* Give new buffer to hardware. */ + d0.rx_to_hw.tail_address = + vlib_get_buffer_data_physical_address (vm, fi0); + d0.rx_to_hw.head_address = d0.rx_to_hw.tail_address; + d[0].as_u32x4 = d0.as_u32x4; + + d += 1; + n_descriptors_left -= 1; + + /* Point to either l2 or l3 header depending on next. */ + l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT)) + ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0; + b0->current_length = len0 - l3_offset0; + b0->current_data = l3_offset0; + + b_last->next_buffer = is_sop ? ~0 : bi0; + bi_last = bi0; + b_last = b0; + + bi_sop = is_sop ? bi0 : bi_sop; + + if (CLIB_DEBUG > 0 && is_eop0) + { + u8 *msg = + vlib_validate_buffer (vm, bi_sop, /* follow_buffer_next */ 1); + ASSERT (!msg); + } + + if (0) /* "Dave" version */ + { + if (is_eop0) + { + to_next[0] = bi_sop; + to_next++; + n_left_to_next--; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi_sop, next0); + } + } + if (1) /* "Eliot" version */ + { + if (PREDICT_TRUE (next0 == next_index)) + { + to_next[0] = bi_sop; + to_next += is_eop0; + n_left_to_next -= is_eop0; + } + else + { + if (next0 != next_index && is_eop0) + vlib_set_next_frame_buffer (vm, node, next0, bi_sop); + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + next_index = next0; + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + } + } + is_sop = is_eop0; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + +found_hw_owned_descriptor_x1: + if (n_descriptors_left > 0) + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + + _vec_len (xm->rx_buffers_to_add) = (to_add + 1) - xm->rx_buffers_to_add; + + { + u32 n_done = n_descriptors - n_descriptors_left; + + if (n_trace > 0 && n_done > 0) + { + u32 n = clib_min (n_trace, n_done); + ixge_rx_trace (xm, xd, dq, + d_trace_save, + d_trace_buffers, + &dq->descriptors[start_descriptor_index], n); + vlib_set_trace_count (vm, node, n_trace - n); + } + if (d_trace_save) + { + _vec_len (d_trace_save) = 0; + _vec_len (d_trace_buffers) = 0; + } + + /* Don't keep a reference to b_last if we don't have to. + Otherwise we can over-write a next_buffer pointer after already haven + enqueued a packet. */ + if (is_sop) + { + b_last->next_buffer = ~0; + bi_last = ~0; + } + + dq->rx.n_descriptors_done_this_call = n_done; + dq->rx.n_descriptors_done_total += n_done; + dq->rx.is_start_of_packet = is_sop; + dq->rx.saved_start_of_packet_buffer_index = bi_sop; + dq->rx.saved_last_buffer_index = bi_last; + dq->rx.saved_start_of_packet_next_index = next_index_sop; + dq->rx.next_index = next_index; + dq->rx.n_bytes += n_bytes; + + return n_packets; + } +} + +static uword +ixge_rx_queue (ixge_main_t * xm, + ixge_device_t * xd, + vlib_node_runtime_t * node, u32 queue_index) +{ + ixge_dma_queue_t *dq = + vec_elt_at_index (xd->dma_queues[VLIB_RX], queue_index); + ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, dq->queue_index); + uword n_packets = 0; + u32 hw_head_index, sw_head_index; + + /* One time initialization. */ + if (!dq->rx.node) + { + dq->rx.node = node; + dq->rx.is_start_of_packet = 1; + dq->rx.saved_start_of_packet_buffer_index = ~0; + dq->rx.saved_last_buffer_index = ~0; + } + + dq->rx.next_index = node->cached_next_index; + + dq->rx.n_descriptors_done_total = 0; + dq->rx.n_descriptors_done_this_call = 0; + dq->rx.n_bytes = 0; + + /* Fetch head from hardware and compare to where we think we are. */ + hw_head_index = dr->head_index; + sw_head_index = dq->head_index; + + if (hw_head_index == sw_head_index) + goto done; + + if (hw_head_index < sw_head_index) + { + u32 n_tried = dq->n_descriptors - sw_head_index; + n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried); + sw_head_index = + ixge_ring_add (dq, sw_head_index, + dq->rx.n_descriptors_done_this_call); + + if (dq->rx.n_descriptors_done_this_call != n_tried) + goto done; + } + if (hw_head_index >= sw_head_index) + { + u32 n_tried = hw_head_index - sw_head_index; + n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried); + sw_head_index = + ixge_ring_add (dq, sw_head_index, + dq->rx.n_descriptors_done_this_call); + } + +done: + dq->head_index = sw_head_index; + dq->tail_index = + ixge_ring_add (dq, dq->tail_index, dq->rx.n_descriptors_done_total); + + /* Give tail back to hardware. */ + CLIB_MEMORY_BARRIER (); + + dr->tail_index = dq->tail_index; + + vlib_increment_combined_counter (vnet_main. + interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + 0 /* cpu_index */ , + xd->vlib_sw_if_index, n_packets, + dq->rx.n_bytes); + + return n_packets; +} + +static void +ixge_interrupt (ixge_main_t * xm, ixge_device_t * xd, u32 i) +{ + vlib_main_t *vm = xm->vlib_main; + ixge_regs_t *r = xd->regs; + + if (i != 20) + { + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__,.format = + "ixge %d, %s",.format_args = "i1t1",.n_enum_strings = + 16,.enum_strings = + { + "flow director", + "rx miss", + "pci exception", + "mailbox", + "link status change", + "linksec key exchange", + "manageability event", + "reserved23", + "sdp0", + "sdp1", + "sdp2", + "sdp3", + "ecc", "descriptor handler error", "tcp timer", "other",},}; + struct + { + u8 instance; + u8 index; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->instance = xd->device_index; + ed->index = i - 16; + } + else + { + u32 v = r->xge_mac.link_status; + uword is_up = (v & (1 << 30)) != 0; + + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__,.format = + "ixge %d, link status change 0x%x",.format_args = "i4i4",}; + struct + { + u32 instance, link_status; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->instance = xd->device_index; + ed->link_status = v; + xd->link_status_at_last_link_change = v; + + vlib_process_signal_event (vm, ixge_process_node.index, + EVENT_SET_FLAGS, + ((is_up << 31) | xd->vlib_hw_if_index)); + } +} + +always_inline u32 +clean_block (u32 * b, u32 * t, u32 n_left) +{ + u32 *t0 = t; + + while (n_left >= 4) + { + u32 bi0, bi1, bi2, bi3; + + t[0] = bi0 = b[0]; + b[0] = 0; + t += bi0 != 0; + + t[0] = bi1 = b[1]; + b[1] = 0; + t += bi1 != 0; + + t[0] = bi2 = b[2]; + b[2] = 0; + t += bi2 != 0; + + t[0] = bi3 = b[3]; + b[3] = 0; + t += bi3 != 0; + + b += 4; + n_left -= 4; + } + + while (n_left > 0) + { + u32 bi0; + + t[0] = bi0 = b[0]; + b[0] = 0; + t += bi0 != 0; + b += 1; + n_left -= 1; + } + + return t - t0; +} + +static void +ixge_tx_queue (ixge_main_t * xm, ixge_device_t * xd, u32 queue_index) +{ + vlib_main_t *vm = xm->vlib_main; + ixge_dma_queue_t *dq = + vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index); + u32 n_clean, *b, *t, *t0; + i32 n_hw_owned_descriptors; + i32 first_to_clean, last_to_clean; + u64 hwbp_race = 0; + + /* Handle case where head write back pointer update + * arrives after the interrupt during high PCI bus loads. + */ + while ((dq->head_index == dq->tx.head_index_write_back[0]) && + dq->tx.n_buffers_on_ring && (dq->head_index != dq->tail_index)) + { + hwbp_race++; + if (IXGE_HWBP_RACE_ELOG && (hwbp_race == 1)) + { + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__,.format = + "ixge %d tx head index race: head %4d, tail %4d, buffs %4d",.format_args + = "i4i4i4i4",}; + struct + { + u32 instance, head_index, tail_index, n_buffers_on_ring; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->instance = xd->device_index; + ed->head_index = dq->head_index; + ed->tail_index = dq->tail_index; + ed->n_buffers_on_ring = dq->tx.n_buffers_on_ring; + } + } + + dq->head_index = dq->tx.head_index_write_back[0]; + n_hw_owned_descriptors = ixge_ring_sub (dq, dq->head_index, dq->tail_index); + ASSERT (dq->tx.n_buffers_on_ring >= n_hw_owned_descriptors); + n_clean = dq->tx.n_buffers_on_ring - n_hw_owned_descriptors; + + if (IXGE_HWBP_RACE_ELOG && hwbp_race) + { + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__,.format = + "ixge %d tx head index race: head %4d, hw_owned %4d, n_clean %4d, retries %d",.format_args + = "i4i4i4i4i4",}; + struct + { + u32 instance, head_index, n_hw_owned_descriptors, n_clean, retries; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->instance = xd->device_index; + ed->head_index = dq->head_index; + ed->n_hw_owned_descriptors = n_hw_owned_descriptors; + ed->n_clean = n_clean; + ed->retries = hwbp_race; + } + + /* + * This function used to wait until hardware owned zero descriptors. + * At high PPS rates, that doesn't happen until the TX ring is + * completely full of descriptors which need to be cleaned up. + * That, in turn, causes TX ring-full drops and/or long RX service + * interruptions. + */ + if (n_clean == 0) + return; + + /* Clean the n_clean descriptors prior to the reported hardware head */ + last_to_clean = dq->head_index - 1; + last_to_clean = (last_to_clean < 0) ? last_to_clean + dq->n_descriptors : + last_to_clean; + + first_to_clean = (last_to_clean) - (n_clean - 1); + first_to_clean = (first_to_clean < 0) ? first_to_clean + dq->n_descriptors : + first_to_clean; + + vec_resize (xm->tx_buffers_pending_free, dq->n_descriptors - 1); + t0 = t = xm->tx_buffers_pending_free; + b = dq->descriptor_buffer_indices + first_to_clean; + + /* Wrap case: clean from first to end, then start to last */ + if (first_to_clean > last_to_clean) + { + t += clean_block (b, t, (dq->n_descriptors - 1) - first_to_clean); + first_to_clean = 0; + b = dq->descriptor_buffer_indices; + } + + /* Typical case: clean from first to last */ + if (first_to_clean <= last_to_clean) + t += clean_block (b, t, (last_to_clean - first_to_clean) + 1); + + if (t > t0) + { + u32 n = t - t0; + vlib_buffer_free_no_next (vm, t0, n); + ASSERT (dq->tx.n_buffers_on_ring >= n); + dq->tx.n_buffers_on_ring -= n; + _vec_len (xm->tx_buffers_pending_free) = 0; + } +} + +/* RX queue interrupts 0 thru 7; TX 8 thru 15. */ +always_inline uword +ixge_interrupt_is_rx_queue (uword i) +{ + return i < 8; +} + +always_inline uword +ixge_interrupt_is_tx_queue (uword i) +{ + return i >= 8 && i < 16; +} + +always_inline uword +ixge_tx_queue_to_interrupt (uword i) +{ + return 8 + i; +} + +always_inline uword +ixge_rx_queue_to_interrupt (uword i) +{ + return 0 + i; +} + +always_inline uword +ixge_interrupt_rx_queue (uword i) +{ + ASSERT (ixge_interrupt_is_rx_queue (i)); + return i - 0; +} + +always_inline uword +ixge_interrupt_tx_queue (uword i) +{ + ASSERT (ixge_interrupt_is_tx_queue (i)); + return i - 8; +} + +static uword +ixge_device_input (ixge_main_t * xm, + ixge_device_t * xd, vlib_node_runtime_t * node) +{ + ixge_regs_t *r = xd->regs; + u32 i, s; + uword n_rx_packets = 0; + + s = r->interrupt.status_write_1_to_set; + if (s) + r->interrupt.status_write_1_to_clear = s; + + /* *INDENT-OFF* */ + foreach_set_bit (i, s, ({ + if (ixge_interrupt_is_rx_queue (i)) + n_rx_packets += ixge_rx_queue (xm, xd, node, ixge_interrupt_rx_queue (i)); + + else if (ixge_interrupt_is_tx_queue (i)) + ixge_tx_queue (xm, xd, ixge_interrupt_tx_queue (i)); + + else + ixge_interrupt (xm, xd, i); + })); + /* *INDENT-ON* */ + + return n_rx_packets; +} + +static uword +ixge_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd; + uword n_rx_packets = 0; + + if (node->state == VLIB_NODE_STATE_INTERRUPT) + { + uword i; + + /* Loop over devices with interrupts. */ + /* *INDENT-OFF* */ + foreach_set_bit (i, node->runtime_data[0], ({ + xd = vec_elt_at_index (xm->devices, i); + n_rx_packets += ixge_device_input (xm, xd, node); + + /* Re-enable interrupts since we're going to stay in interrupt mode. */ + if (! (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) + xd->regs->interrupt.enable_write_1_to_set = ~0; + })); + /* *INDENT-ON* */ + + /* Clear mask of devices with pending interrupts. */ + node->runtime_data[0] = 0; + } + else + { + /* Poll all devices for input/interrupts. */ + vec_foreach (xd, xm->devices) + { + n_rx_packets += ixge_device_input (xm, xd, node); + + /* Re-enable interrupts when switching out of polling mode. */ + if (node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) + xd->regs->interrupt.enable_write_1_to_set = ~0; + } + } + + return n_rx_packets; +} + +static char *ixge_error_strings[] = { +#define _(n,s) s, + foreach_ixge_error +#undef _ +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ixge_input_node, static) = { + .function = ixge_input, + .type = VLIB_NODE_TYPE_INPUT, + .name = "ixge-input", + + /* Will be enabled if/when hardware is detected. */ + .state = VLIB_NODE_STATE_DISABLED, + + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_ixge_rx_dma_trace, + + .n_errors = IXGE_N_ERROR, + .error_strings = ixge_error_strings, + + .n_next_nodes = IXGE_RX_N_NEXT, + .next_nodes = { + [IXGE_RX_NEXT_DROP] = "error-drop", + [IXGE_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", + [IXGE_RX_NEXT_IP4_INPUT] = "ip4-input", + [IXGE_RX_NEXT_IP6_INPUT] = "ip6-input", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH_CLONE (ixge_input) +CLIB_MULTIARCH_SELECT_FN (ixge_input) +/* *INDENT-ON* */ + +static u8 * +format_ixge_device_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, i); + return format (s, "TenGigabitEthernet%U", + format_vlib_pci_handle, &xd->pci_device.bus_address); +} + +#define IXGE_COUNTER_IS_64_BIT (1 << 0) +#define IXGE_COUNTER_NOT_CLEAR_ON_READ (1 << 1) + +static u8 ixge_counter_flags[] = { +#define _(a,f) 0, +#define _64(a,f) IXGE_COUNTER_IS_64_BIT, + foreach_ixge_counter +#undef _ +#undef _64 +}; + +static void +ixge_update_counters (ixge_device_t * xd) +{ + /* Byte offset for counter registers. */ + static u32 reg_offsets[] = { +#define _(a,f) (a) / sizeof (u32), +#define _64(a,f) _(a,f) + foreach_ixge_counter +#undef _ +#undef _64 + }; + volatile u32 *r = (volatile u32 *) xd->regs; + int i; + + for (i = 0; i < ARRAY_LEN (xd->counters); i++) + { + u32 o = reg_offsets[i]; + xd->counters[i] += r[o]; + if (ixge_counter_flags[i] & IXGE_COUNTER_NOT_CLEAR_ON_READ) + r[o] = 0; + if (ixge_counter_flags[i] & IXGE_COUNTER_IS_64_BIT) + xd->counters[i] += (u64) r[o + 1] << (u64) 32; + } +} + +static u8 * +format_ixge_device_id (u8 * s, va_list * args) +{ + u32 device_id = va_arg (*args, u32); + char *t = 0; + switch (device_id) + { +#define _(f,n) case n: t = #f; break; + foreach_ixge_pci_device_id; +#undef _ + default: + t = 0; + break; + } + if (t == 0) + s = format (s, "unknown 0x%x", device_id); + else + s = format (s, "%s", t); + return s; +} + +static u8 * +format_ixge_link_status (u8 * s, va_list * args) +{ + ixge_device_t *xd = va_arg (*args, ixge_device_t *); + u32 v = xd->link_status_at_last_link_change; + + s = format (s, "%s", (v & (1 << 30)) ? "up" : "down"); + + { + char *modes[] = { + "1g", "10g parallel", "10g serial", "autoneg", + }; + char *speeds[] = { + "unknown", "100m", "1g", "10g", + }; + s = format (s, ", mode %s, speed %s", + modes[(v >> 26) & 3], speeds[(v >> 28) & 3]); + } + + return s; +} + +static u8 * +format_ixge_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, dev_instance); + ixge_phy_t *phy = xd->phys + xd->phy_index; + uword indent = format_get_indent (s); + + ixge_update_counters (xd); + xd->link_status_at_last_link_change = xd->regs->xge_mac.link_status; + + s = format (s, "Intel 8259X: id %U\n%Ulink %U", + format_ixge_device_id, xd->device_id, + format_white_space, indent + 2, format_ixge_link_status, xd); + + { + + s = format (s, "\n%UPCIe %U", format_white_space, indent + 2, + format_vlib_pci_link_speed, &xd->pci_device); + } + + s = format (s, "\n%U", format_white_space, indent + 2); + if (phy->mdio_address != ~0) + s = format (s, "PHY address %d, id 0x%x", phy->mdio_address, phy->id); + else if (xd->sfp_eeprom.id == SFP_ID_sfp) + s = format (s, "SFP %U", format_sfp_eeprom, &xd->sfp_eeprom); + else + s = format (s, "PHY not found"); + + /* FIXME */ + { + ixge_dma_queue_t *dq = vec_elt_at_index (xd->dma_queues[VLIB_RX], 0); + ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0); + u32 hw_head_index = dr->head_index; + u32 sw_head_index = dq->head_index; + u32 nitems; + + nitems = ixge_ring_sub (dq, hw_head_index, sw_head_index); + s = format (s, "\n%U%d unprocessed, %d total buffers on rx queue 0 ring", + format_white_space, indent + 2, nitems, dq->n_descriptors); + + s = format (s, "\n%U%d buffers in driver rx cache", + format_white_space, indent + 2, + vec_len (xm->rx_buffers_to_add)); + + s = format (s, "\n%U%d buffers on tx queue 0 ring", + format_white_space, indent + 2, + xd->dma_queues[VLIB_TX][0].tx.n_buffers_on_ring); + } + { + u32 i; + u64 v; + static char *names[] = { +#define _(a,f) #f, +#define _64(a,f) _(a,f) + foreach_ixge_counter +#undef _ +#undef _64 + }; + + for (i = 0; i < ARRAY_LEN (names); i++) + { + v = xd->counters[i] - xd->counters_last_clear[i]; + if (v != 0) + s = format (s, "\n%U%-40U%16Ld", + format_white_space, indent + 2, + format_c_identifier, names[i], v); + } + } + + return s; +} + +static void +ixge_clear_hw_interface_counters (u32 instance) +{ + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, instance); + ixge_update_counters (xd); + memcpy (xd->counters_last_clear, xd->counters, sizeof (xd->counters)); +} + +/* + * Dynamically redirect all pkts from a specific interface + * to the specified node + */ +static void +ixge_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + ixge_main_t *xm = &ixge_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + ixge_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + xd->per_interface_next_index = node_index; + return; + } + + xd->per_interface_next_index = + vlib_node_add_next (xm->vlib_main, ixge_input_node.index, node_index); +} + + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (ixge_device_class) = { + .name = "ixge", + .tx_function = ixge_interface_tx, + .format_device_name = format_ixge_device_name, + .format_device = format_ixge_device, + .format_tx_trace = format_ixge_tx_dma_trace, + .clear_counters = ixge_clear_hw_interface_counters, + .admin_up_down_function = ixge_interface_admin_up_down, + .rx_redirect_to_node = ixge_set_interface_next_node, + .flatten_output_chains = 1, +}; +/* *INDENT-ON* */ + +#define IXGE_N_BYTES_IN_RX_BUFFER (2048) // DAW-HACK: Set Rx buffer size so all packets < ETH_MTU_SIZE fit in the buffer (i.e. sop & eop for all descriptors). + +static clib_error_t * +ixge_dma_init (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 queue_index) +{ + ixge_main_t *xm = &ixge_main; + vlib_main_t *vm = xm->vlib_main; + ixge_dma_queue_t *dq; + clib_error_t *error = 0; + + vec_validate (xd->dma_queues[rt], queue_index); + dq = vec_elt_at_index (xd->dma_queues[rt], queue_index); + + if (!xm->n_descriptors_per_cache_line) + xm->n_descriptors_per_cache_line = + CLIB_CACHE_LINE_BYTES / sizeof (dq->descriptors[0]); + + if (!xm->n_bytes_in_rx_buffer) + xm->n_bytes_in_rx_buffer = IXGE_N_BYTES_IN_RX_BUFFER; + xm->n_bytes_in_rx_buffer = round_pow2 (xm->n_bytes_in_rx_buffer, 1024); + if (!xm->vlib_buffer_free_list_index) + { + xm->vlib_buffer_free_list_index = + vlib_buffer_get_or_create_free_list (vm, xm->n_bytes_in_rx_buffer, + "ixge rx"); + ASSERT (xm->vlib_buffer_free_list_index != 0); + } + + if (!xm->n_descriptors[rt]) + xm->n_descriptors[rt] = 4 * VLIB_FRAME_SIZE; + + dq->queue_index = queue_index; + dq->n_descriptors = + round_pow2 (xm->n_descriptors[rt], xm->n_descriptors_per_cache_line); + dq->head_index = dq->tail_index = 0; + + dq->descriptors = vlib_physmem_alloc_aligned (vm, &error, + dq->n_descriptors * + sizeof (dq->descriptors[0]), + 128 /* per chip spec */ ); + if (error) + return error; + + memset (dq->descriptors, 0, + dq->n_descriptors * sizeof (dq->descriptors[0])); + vec_resize (dq->descriptor_buffer_indices, dq->n_descriptors); + + if (rt == VLIB_RX) + { + u32 n_alloc, i; + + n_alloc = vlib_buffer_alloc_from_free_list + (vm, dq->descriptor_buffer_indices, + vec_len (dq->descriptor_buffer_indices), + xm->vlib_buffer_free_list_index); + ASSERT (n_alloc == vec_len (dq->descriptor_buffer_indices)); + for (i = 0; i < n_alloc; i++) + { + vlib_buffer_t *b = + vlib_get_buffer (vm, dq->descriptor_buffer_indices[i]); + dq->descriptors[i].rx_to_hw.tail_address = + vlib_physmem_virtual_to_physical (vm, b->data); + } + } + else + { + u32 i; + + dq->tx.head_index_write_back = + vlib_physmem_alloc (vm, &error, CLIB_CACHE_LINE_BYTES); + + for (i = 0; i < dq->n_descriptors; i++) + dq->descriptors[i].tx = xm->tx_descriptor_template; + + vec_validate (xm->tx_buffers_pending_free, dq->n_descriptors - 1); + } + + { + ixge_dma_regs_t *dr = get_dma_regs (xd, rt, queue_index); + u64 a; + + a = vlib_physmem_virtual_to_physical (vm, dq->descriptors); + dr->descriptor_address[0] = a & 0xFFFFFFFF; + dr->descriptor_address[1] = a >> (u64) 32; + dr->n_descriptor_bytes = dq->n_descriptors * sizeof (dq->descriptors[0]); + dq->head_index = dq->tail_index = 0; + + if (rt == VLIB_RX) + { + ASSERT ((xm->n_bytes_in_rx_buffer / 1024) < 32); + dr->rx_split_control = + ( /* buffer size */ ((xm->n_bytes_in_rx_buffer / 1024) << 0) + | ( /* lo free descriptor threshold (units of 64 descriptors) */ + (1 << 22)) | ( /* descriptor type: advanced one buffer */ + (1 << 25)) | ( /* drop if no descriptors available */ + (1 << 28))); + + /* Give hardware all but last 16 cache lines' worth of descriptors. */ + dq->tail_index = dq->n_descriptors - + 16 * xm->n_descriptors_per_cache_line; + } + else + { + /* Make sure its initialized before hardware can get to it. */ + dq->tx.head_index_write_back[0] = dq->head_index; + + a = + vlib_physmem_virtual_to_physical (vm, dq->tx.head_index_write_back); + dr->tx.head_index_write_back_address[0] = /* enable bit */ 1 | a; + dr->tx.head_index_write_back_address[1] = (u64) a >> (u64) 32; + } + + /* DMA on 82599 does not work with [13] rx data write relaxed ordering + and [12] undocumented set. */ + if (rt == VLIB_RX) + dr->dca_control &= ~((1 << 13) | (1 << 12)); + + CLIB_MEMORY_BARRIER (); + + if (rt == VLIB_TX) + { + xd->regs->tx_dma_control |= (1 << 0); + dr->control |= ((32 << 0) /* prefetch threshold */ + | (64 << 8) /* host threshold */ + | (0 << 16) /* writeback threshold */ ); + } + + /* Enable this queue and wait for hardware to initialize + before adding to tail. */ + if (rt == VLIB_TX) + { + dr->control |= 1 << 25; + while (!(dr->control & (1 << 25))) + ; + } + + /* Set head/tail indices and enable DMA. */ + dr->head_index = dq->head_index; + dr->tail_index = dq->tail_index; + } + + return error; +} + +static u32 +ixge_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags) +{ + ixge_device_t *xd; + ixge_regs_t *r; + u32 old; + ixge_main_t *xm = &ixge_main; + + xd = vec_elt_at_index (xm->devices, hw->dev_instance); + r = xd->regs; + + old = r->filter_control; + + if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) + r->filter_control = old | (1 << 9) /* unicast promiscuous */ ; + else + r->filter_control = old & ~(1 << 9); + + return old; +} + +static void +ixge_device_init (ixge_main_t * xm) +{ + vnet_main_t *vnm = vnet_get_main (); + ixge_device_t *xd; + + /* Reset chip(s). */ + vec_foreach (xd, xm->devices) + { + ixge_regs_t *r = xd->regs; + const u32 reset_bit = (1 << 26) | (1 << 3); + + r->control |= reset_bit; + + /* No need to suspend. Timed to take ~1e-6 secs */ + while (r->control & reset_bit) + ; + + /* Software loaded. */ + r->extended_control |= (1 << 28); + + ixge_phy_init (xd); + + /* Register ethernet interface. */ + { + u8 addr8[6]; + u32 i, addr32[2]; + clib_error_t *error; + + addr32[0] = r->rx_ethernet_address0[0][0]; + addr32[1] = r->rx_ethernet_address0[0][1]; + for (i = 0; i < 6; i++) + addr8[i] = addr32[i / 4] >> ((i % 4) * 8); + + error = ethernet_register_interface + (vnm, ixge_device_class.index, xd->device_index, + /* ethernet address */ addr8, + &xd->vlib_hw_if_index, ixge_flag_change); + if (error) + clib_error_report (error); + } + + { + vnet_sw_interface_t *sw = + vnet_get_hw_sw_interface (vnm, xd->vlib_hw_if_index); + xd->vlib_sw_if_index = sw->sw_if_index; + } + + ixge_dma_init (xd, VLIB_RX, /* queue_index */ 0); + + xm->n_descriptors[VLIB_TX] = 20 * VLIB_FRAME_SIZE; + + ixge_dma_init (xd, VLIB_TX, /* queue_index */ 0); + + /* RX/TX queue 0 gets mapped to interrupt bits 0 & 8. */ + r->interrupt.queue_mapping[0] = (( /* valid bit */ (1 << 7) | + ixge_rx_queue_to_interrupt (0)) << 0); + + r->interrupt.queue_mapping[0] |= (( /* valid bit */ (1 << 7) | + ixge_tx_queue_to_interrupt (0)) << 8); + + /* No use in getting too many interrupts. + Limit them to one every 3/4 ring size at line rate + min sized packets. + No need for this since kernel/vlib main loop provides adequate interrupt + limiting scheme. */ + if (0) + { + f64 line_rate_max_pps = + 10e9 / (8 * (64 + /* interframe padding */ 20)); + ixge_throttle_queue_interrupt (r, 0, + .75 * xm->n_descriptors[VLIB_RX] / + line_rate_max_pps); + } + + /* Accept all multicast and broadcast packets. Should really add them + to the dst_ethernet_address register array. */ + r->filter_control |= (1 << 10) | (1 << 8); + + /* Enable frames up to size in mac frame size register. */ + r->xge_mac.control |= 1 << 2; + r->xge_mac.rx_max_frame_size = (9216 + 14) << 16; + + /* Enable all interrupts. */ + if (!IXGE_ALWAYS_POLL) + r->interrupt.enable_write_1_to_set = ~0; + } +} + +static uword +ixge_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + vnet_main_t *vnm = vnet_get_main (); + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd; + uword event_type, *event_data = 0; + f64 timeout, link_debounce_deadline; + + ixge_device_init (xm); + + /* Clear all counters. */ + vec_foreach (xd, xm->devices) + { + ixge_update_counters (xd); + memset (xd->counters, 0, sizeof (xd->counters)); + } + + timeout = 30.0; + link_debounce_deadline = 1e70; + + while (1) + { + /* 36 bit stat counters could overflow in ~50 secs. + We poll every 30 secs to be conservative. */ + vlib_process_wait_for_event_or_clock (vm, timeout); + + event_type = vlib_process_get_events (vm, &event_data); + + switch (event_type) + { + case EVENT_SET_FLAGS: + /* 1 ms */ + link_debounce_deadline = vlib_time_now (vm) + 1e-3; + timeout = 1e-3; + break; + + case ~0: + /* No events found: timer expired. */ + if (vlib_time_now (vm) > link_debounce_deadline) + { + vec_foreach (xd, xm->devices) + { + ixge_regs_t *r = xd->regs; + u32 v = r->xge_mac.link_status; + uword is_up = (v & (1 << 30)) != 0; + + vnet_hw_interface_set_flags + (vnm, xd->vlib_hw_if_index, + is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + } + link_debounce_deadline = 1e70; + timeout = 30.0; + } + break; + + default: + ASSERT (0); + } + + if (event_data) + _vec_len (event_data) = 0; + + /* Query stats every 30 secs. */ + { + f64 now = vlib_time_now (vm); + if (now - xm->time_last_stats_update > 30) + { + xm->time_last_stats_update = now; + vec_foreach (xd, xm->devices) ixge_update_counters (xd); + } + } + } + + return 0; +} + +static vlib_node_registration_t ixge_process_node = { + .function = ixge_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "ixge-process", +}; + +clib_error_t * +ixge_init (vlib_main_t * vm) +{ + ixge_main_t *xm = &ixge_main; + clib_error_t *error; + + xm->vlib_main = vm; + memset (&xm->tx_descriptor_template, 0, + sizeof (xm->tx_descriptor_template)); + memset (&xm->tx_descriptor_template_mask, 0, + sizeof (xm->tx_descriptor_template_mask)); + xm->tx_descriptor_template.status0 = + (IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED | + IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED | + IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS); + xm->tx_descriptor_template_mask.status0 = 0xffff; + xm->tx_descriptor_template_mask.status1 = 0x00003fff; + + xm->tx_descriptor_template_mask.status0 &= + ~(IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET + | IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS); + xm->tx_descriptor_template_mask.status1 &= + ~(IXGE_TX_DESCRIPTOR_STATUS1_DONE); + + error = vlib_call_init_function (vm, pci_bus_init); + + return error; +} + +VLIB_INIT_FUNCTION (ixge_init); + + +static void +ixge_pci_intr_handler (vlib_pci_device_t * dev) +{ + ixge_main_t *xm = &ixge_main; + vlib_main_t *vm = xm->vlib_main; + + vlib_node_set_interrupt_pending (vm, ixge_input_node.index); + + /* Let node know which device is interrupting. */ + { + vlib_node_runtime_t *rt = + vlib_node_get_runtime (vm, ixge_input_node.index); + rt->runtime_data[0] |= 1 << dev->private_data; + } +} + +static clib_error_t * +ixge_pci_init (vlib_main_t * vm, vlib_pci_device_t * dev) +{ + ixge_main_t *xm = &ixge_main; + clib_error_t *error; + void *r; + ixge_device_t *xd; + + /* Device found: make sure we have dma memory. */ + if (unix_physmem_is_fake (vm)) + return clib_error_return (0, "no physical memory available"); + + error = vlib_pci_map_resource (dev, 0, &r); + if (error) + return error; + + vec_add2 (xm->devices, xd, 1); + + if (vec_len (xm->devices) == 1) + { + ixge_input_node.function = ixge_input_multiarch_select (); + } + + xd->pci_device = dev[0]; + xd->device_id = xd->pci_device.config0.header.device_id; + xd->regs = r; + xd->device_index = xd - xm->devices; + xd->pci_function = dev->bus_address.function; + xd->per_interface_next_index = ~0; + + + /* Chip found so enable node. */ + { + vlib_node_set_state (vm, ixge_input_node.index, + (IXGE_ALWAYS_POLL + ? VLIB_NODE_STATE_POLLING + : VLIB_NODE_STATE_INTERRUPT)); + + dev->private_data = xd->device_index; + } + + if (vec_len (xm->devices) == 1) + { + vlib_register_node (vm, &ixge_process_node); + xm->process_node_index = ixge_process_node.index; + } + + error = vlib_pci_bus_master_enable (dev); + + if (error) + return error; + + return vlib_pci_intr_enable (dev); +} + +/* *INDENT-OFF* */ +PCI_REGISTER_DEVICE (ixge_pci_device_registration,static) = { + .init_function = ixge_pci_init, + .interrupt_handler = ixge_pci_intr_handler, + .supported_devices = { +#define _(t,i) { .vendor_id = PCI_VENDOR_ID_INTEL, .device_id = i, }, + foreach_ixge_pci_device_id +#undef _ + { 0 }, + }, +}; +/* *INDENT-ON* */ + +void +ixge_set_next_node (ixge_rx_next_t next, char *name) +{ + vlib_node_registration_t *r = &ixge_input_node; + + switch (next) + { + case IXGE_RX_NEXT_IP4_INPUT: + case IXGE_RX_NEXT_IP6_INPUT: + case IXGE_RX_NEXT_ETHERNET_INPUT: + r->next_nodes[next] = name; + break; + + default: + clib_warning ("%s: illegal next %d\n", __FUNCTION__, next); + break; + } +} +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/nic/ixge.h b/src/vnet/devices/nic/ixge.h new file mode 100644 index 00000000..a8e652dc --- /dev/null +++ b/src/vnet/devices/nic/ixge.h @@ -0,0 +1,1293 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_ixge_h +#define included_ixge_h + +#include +#include +#include +#include +#include +#include + +typedef volatile struct +{ + /* [31:7] 128 byte aligned. */ + u32 descriptor_address[2]; + u32 n_descriptor_bytes; + + /* [5] rx/tx descriptor dca enable + [6] rx packet head dca enable + [7] rx packet tail dca enable + [9] rx/tx descriptor relaxed order + [11] rx/tx descriptor write back relaxed order + [13] rx/tx data write/read relaxed order + [15] rx head data write relaxed order + [31:24] apic id for cpu's cache. */ + u32 dca_control; + + u32 head_index; + + /* [4:0] tail buffer size (in 1k byte units) + [13:8] head buffer size (in 64 byte units) + [24:22] lo free descriptors threshold (units of 64 descriptors) + [27:25] descriptor type 0 = legacy, 1 = advanced one buffer (e.g. tail), + 2 = advanced header splitting (head + tail), 5 = advanced header + splitting (head only). + [28] drop if no descriptors available. */ + u32 rx_split_control; + + u32 tail_index; + CLIB_PAD_FROM_TO (0x1c, 0x28); + + /* [7:0] rx/tx prefetch threshold + [15:8] rx/tx host threshold + [24:16] rx/tx write back threshold + [25] rx/tx enable + [26] tx descriptor writeback flush + [30] rx strip vlan enable */ + u32 control; + + u32 rx_coallesce_control; + + union + { + struct + { + /* packets bytes lo hi */ + u32 stats[3]; + + u32 unused; + } rx; + + struct + { + u32 unused[2]; + + /* [0] enables head write back. */ + u32 head_index_write_back_address[2]; + } tx; + }; +} ixge_dma_regs_t; + +/* Only advanced descriptors are supported. */ +typedef struct +{ + u64 tail_address; + u64 head_address; +} ixge_rx_to_hw_descriptor_t; + +typedef struct +{ + u32 status[3]; + u16 n_packet_bytes_this_descriptor; + u16 vlan_tag; +} ixge_rx_from_hw_descriptor_t; + +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2 (1 << (4 + 11)) +/* Valid if not layer2. */ +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4 (1 << (4 + 0)) +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT (1 << (4 + 1)) +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6 (1 << (4 + 2)) +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT (1 << (4 + 3)) +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP (1 << (4 + 4)) +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP (1 << (4 + 5)) +#define IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET(s) (((s) >> 21) & 0x3ff) + +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE (1 << (0 + 0)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET (1 << (0 + 1)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN (1 << (0 + 3)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED (1 << (0 + 4)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED (1 << (0 + 5)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED (1 << (0 + 6)) +#define IXGE_RX_DESCRIPTOR_STATUS2_NOT_UNICAST (1 << (0 + 7)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_DOUBLE_VLAN (1 << (0 + 9)) +#define IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR (1 << (0 + 10)) +#define IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR (1 << (20 + 9)) +#define IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR (1 << (20 + 10)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR (1 << (20 + 11)) + +/* For layer2 packets stats0 bottom 3 bits give ether type index from filter. */ +#define IXGE_RX_DESCRIPTOR_STATUS0_LAYER2_ETHERNET_TYPE(s) ((s) & 7) + +typedef struct +{ + u64 buffer_address; + u16 n_bytes_this_buffer; + u16 status0; + u32 status1; +#define IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED (3 << 4) +#define IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED (1 << (8 + 5)) +#define IXGE_TX_DESCRIPTOR_STATUS0_LOG2_REPORT_STATUS (8 + 3) +#define IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS (1 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_REPORT_STATUS) +#define IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS (1 << (8 + 1)) +#define IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET (8 + 0) +#define IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET (1 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET) +#define IXGE_TX_DESCRIPTOR_STATUS1_DONE (1 << 0) +#define IXGE_TX_DESCRIPTOR_STATUS1_CONTEXT(i) (/* valid */ (1 << 7) | ((i) << 4)) +#define IXGE_TX_DESCRIPTOR_STATUS1_IPSEC_OFFLOAD (1 << (8 + 2)) +#define IXGE_TX_DESCRIPTOR_STATUS1_INSERT_TCP_UDP_CHECKSUM (1 << (8 + 1)) +#define IXGE_TX_DESCRIPTOR_STATUS1_INSERT_IP4_CHECKSUM (1 << (8 + 0)) +#define IXGE_TX_DESCRIPTOR_STATUS0_N_BYTES_THIS_BUFFER(l) ((l) << 0) +#define IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET(l) ((l) << 14) +} ixge_tx_descriptor_t; + +typedef struct +{ + struct + { + u8 checksum_start_offset; + u8 checksum_insert_offset; + u16 checksum_end_offset; + } ip, tcp; + u32 status0; + + u8 status1; + + /* Byte offset after UDP/TCP header. */ + u8 payload_offset; + + u16 max_tcp_segment_size; +} __attribute__ ((packed)) ixge_tx_context_descriptor_t; + +typedef union +{ + ixge_rx_to_hw_descriptor_t rx_to_hw; + ixge_rx_from_hw_descriptor_t rx_from_hw; + ixge_tx_descriptor_t tx; + u32x4 as_u32x4; +} ixge_descriptor_t; + +typedef volatile struct +{ + /* [2] pcie master disable + [3] mac reset + [26] global device reset */ + u32 control; + u32 control_alias; + /* [3:2] device id (0 or 1 for dual port chips) + [7] link is up + [17:10] num vfs + [18] io active + [19] pcie master enable status */ + u32 status_read_only; + CLIB_PAD_FROM_TO (0xc, 0x18); + /* [14] pf reset done + [17] relaxed ordering disable + [26] extended vlan enable + [28] driver loaded */ + u32 extended_control; + CLIB_PAD_FROM_TO (0x1c, 0x20); + + /* software definable pins. + sdp_data [7:0] + sdp_is_output [15:8] + sdp_is_native [23:16] + sdp_function [31:24]. + */ + u32 sdp_control; + CLIB_PAD_FROM_TO (0x24, 0x28); + + /* [0] i2c clock in + [1] i2c clock out + [2] i2c data in + [3] i2c data out */ + u32 i2c_control; + CLIB_PAD_FROM_TO (0x2c, 0x4c); + u32 tcp_timer; + + CLIB_PAD_FROM_TO (0x50, 0x200); + + u32 led_control; + + CLIB_PAD_FROM_TO (0x204, 0x600); + u32 core_spare; + CLIB_PAD_FROM_TO (0x604, 0x700); + + struct + { + u32 vflr_events_clear[4]; + u32 mailbox_interrupt_status[4]; + u32 mailbox_interrupt_enable[4]; + CLIB_PAD_FROM_TO (0x730, 0x800); + } pf_foo; + + struct + { + u32 status_write_1_to_clear; + CLIB_PAD_FROM_TO (0x804, 0x808); + u32 status_write_1_to_set; + CLIB_PAD_FROM_TO (0x80c, 0x810); + u32 status_auto_clear_enable; + CLIB_PAD_FROM_TO (0x814, 0x820); + + /* [11:3] minimum inter-interrupt interval + (2e-6 units; 20e-6 units for fast ethernet). + [15] low-latency interrupt moderation enable + [20:16] low-latency interrupt credit + [27:21] interval counter + [31] write disable for credit and counter (write only). */ + u32 throttle0[24]; + + u32 enable_write_1_to_set; + CLIB_PAD_FROM_TO (0x884, 0x888); + u32 enable_write_1_to_clear; + CLIB_PAD_FROM_TO (0x88c, 0x890); + u32 enable_auto_clear; + u32 msi_to_eitr_select; + /* [3:0] spd 0-3 interrupt detection enable + [4] msi-x enable + [5] other clear disable (makes other bits in status not clear on read) + etc. */ + u32 control; + CLIB_PAD_FROM_TO (0x89c, 0x900); + + /* Defines interrupt mapping for 128 rx + 128 tx queues. + 64 x 4 8 bit entries. + For register [i]: + [5:0] bit in interrupt status for rx queue 2*i + 0 + [7] valid bit + [13:8] bit for tx queue 2*i + 0 + [15] valid bit + similar for rx 2*i + 1 and tx 2*i + 1. */ + u32 queue_mapping[64]; + + /* tcp timer [7:0] and other interrupts [15:8] */ + u32 misc_mapping; + CLIB_PAD_FROM_TO (0xa04, 0xa90); + + /* 64 interrupts determined by mappings. */ + u32 status1_write_1_to_clear[4]; + u32 enable1_write_1_to_set[4]; + u32 enable1_write_1_to_clear[4]; + CLIB_PAD_FROM_TO (0xac0, 0xad0); + u32 status1_enable_auto_clear[4]; + CLIB_PAD_FROM_TO (0xae0, 0x1000); + } interrupt; + + ixge_dma_regs_t rx_dma0[64]; + + CLIB_PAD_FROM_TO (0x2000, 0x2140); + u32 dcb_rx_packet_plane_t4_config[8]; + u32 dcb_rx_packet_plane_t4_status[8]; + CLIB_PAD_FROM_TO (0x2180, 0x2300); + + /* reg i defines mapping for 4 rx queues starting at 4*i + 0. */ + u32 rx_queue_stats_mapping[32]; + u32 rx_queue_stats_control; + + CLIB_PAD_FROM_TO (0x2384, 0x2410); + u32 fc_user_descriptor_ptr[2]; + u32 fc_buffer_control; + CLIB_PAD_FROM_TO (0x241c, 0x2420); + u32 fc_rx_dma; + CLIB_PAD_FROM_TO (0x2424, 0x2430); + u32 dcb_packet_plane_control; + CLIB_PAD_FROM_TO (0x2434, 0x2f00); + + u32 rx_dma_control; + u32 pf_queue_drop_enable; + CLIB_PAD_FROM_TO (0x2f08, 0x2f20); + u32 rx_dma_descriptor_cache_config; + CLIB_PAD_FROM_TO (0x2f24, 0x3000); + + /* 1 bit. */ + u32 rx_enable; + CLIB_PAD_FROM_TO (0x3004, 0x3008); + /* [15:0] ether type (little endian) + [31:16] opcode (big endian) */ + u32 flow_control_control; + CLIB_PAD_FROM_TO (0x300c, 0x3020); + /* 3 bit traffic class for each of 8 priorities. */ + u32 rx_priority_to_traffic_class; + CLIB_PAD_FROM_TO (0x3024, 0x3028); + u32 rx_coallesce_data_buffer_control; + CLIB_PAD_FROM_TO (0x302c, 0x3190); + u32 rx_packet_buffer_flush_detect; + CLIB_PAD_FROM_TO (0x3194, 0x3200); + u32 flow_control_tx_timers[4]; /* 2 timer values */ + CLIB_PAD_FROM_TO (0x3210, 0x3220); + u32 flow_control_rx_threshold_lo[8]; + CLIB_PAD_FROM_TO (0x3240, 0x3260); + u32 flow_control_rx_threshold_hi[8]; + CLIB_PAD_FROM_TO (0x3280, 0x32a0); + u32 flow_control_refresh_threshold; + CLIB_PAD_FROM_TO (0x32a4, 0x3c00); + /* For each of 8 traffic classes (units of bytes). */ + u32 rx_packet_buffer_size[8]; + CLIB_PAD_FROM_TO (0x3c20, 0x3d00); + u32 flow_control_config; + CLIB_PAD_FROM_TO (0x3d04, 0x4200); + + struct + { + u32 pcs_config; + CLIB_PAD_FROM_TO (0x4204, 0x4208); + u32 link_control; + u32 link_status; + u32 pcs_debug[2]; + u32 auto_negotiation; + u32 link_partner_ability; + u32 auto_negotiation_tx_next_page; + u32 auto_negotiation_link_partner_next_page; + CLIB_PAD_FROM_TO (0x4228, 0x4240); + } gige_mac; + + struct + { + /* [0] tx crc enable + [2] enable frames up to max frame size register [31:16] + [10] pad frames < 64 bytes if specified by user + [15] loopback enable + [16] mdc hi speed + [17] turn off mdc between mdio packets */ + u32 control; + + /* [5] rx symbol error (all bits clear on read) + [6] rx illegal symbol + [7] rx idle error + [8] rx local fault + [9] rx remote fault */ + u32 status; + + u32 pause_and_pace_control; + CLIB_PAD_FROM_TO (0x424c, 0x425c); + u32 phy_command; + u32 phy_data; + CLIB_PAD_FROM_TO (0x4264, 0x4268); + + /* [31:16] max frame size in bytes. */ + u32 rx_max_frame_size; + CLIB_PAD_FROM_TO (0x426c, 0x4288); + + /* [0] + [2] pcs receive link up? (latch lo) + [7] local fault + [1] + [0] pcs 10g base r capable + [1] pcs 10g base x capable + [2] pcs 10g base w capable + [10] rx local fault + [11] tx local fault + [15:14] 2 => device present at this address (else not present) */ + u32 xgxs_status[2]; + + u32 base_x_pcs_status; + + /* [0] pass unrecognized flow control frames + [1] discard pause frames + [2] rx priority flow control enable (only in dcb mode) + [3] rx flow control enable. */ + u32 flow_control; + + /* [3:0] tx lanes change polarity + [7:4] rx lanes change polarity + [11:8] swizzle tx lanes + [15:12] swizzle rx lanes + 4 x 2 bit tx lane swap + 4 x 2 bit rx lane swap. */ + u32 serdes_control; + + u32 fifo_control; + + /* [0] force link up + [1] autoneg ack2 bit to transmit + [6:2] autoneg selector field to transmit + [8:7] 10g pma/pmd type 0 => xaui, 1 kx4, 2 cx4 + [9] 1g pma/pmd type 0 => sfi, 1 => kx/bx + [10] disable 10g on without main power + [11] restart autoneg on transition to dx power state + [12] restart autoneg + [15:13] link mode: + 0 => 1g no autoneg + 1 => 10g kx4 parallel link no autoneg + 2 => 1g bx autoneg + 3 => 10g sfi serdes + 4 => kx4/kx/kr + 5 => xgmii 1g/100m + 6 => kx4/kx/kr 1g an + 7 kx4/kx/kr sgmii. + [16] kr support + [17] fec requested + [18] fec ability + etc. */ + u32 auto_negotiation_control; + + /* [0] signal detect 1g/100m + [1] fec signal detect + [2] 10g serial pcs fec block lock + [3] 10g serial high error rate + [4] 10g serial pcs block lock + [5] kx/kx4/kr autoneg next page received + [6] kx/kx4/kr backplane autoneg next page received + [7] link status clear to read + [11:8] 10g signal detect (4 lanes) (for serial just lane 0) + [12] 10g serial signal detect + [16:13] 10g parallel lane sync status + [17] 10g parallel align status + [18] 1g sync status + [19] kx/kx4/kr backplane autoneg is idle + [20] 1g autoneg enabled + [21] 1g pcs enabled for sgmii + [22] 10g xgxs enabled + [23] 10g serial fec enabled (forward error detection) + [24] 10g kr pcs enabled + [25] sgmii enabled + [27:26] mac link mode + 0 => 1g + 1 => 10g parallel + 2 => 10g serial + 3 => autoneg + [29:28] link speed + 1 => 100m + 2 => 1g + 3 => 10g + [30] link is up + [31] kx/kx4/kr backplane autoneg completed successfully. */ + u32 link_status; + + /* [17:16] pma/pmd for 10g serial + 0 => kr, 2 => sfi + [18] disable dme pages */ + u32 auto_negotiation_control2; + + CLIB_PAD_FROM_TO (0x42ac, 0x42b0); + u32 link_partner_ability[2]; + CLIB_PAD_FROM_TO (0x42b8, 0x42d0); + u32 manageability_control; + u32 link_partner_next_page[2]; + CLIB_PAD_FROM_TO (0x42dc, 0x42e0); + u32 kr_pcs_control; + u32 kr_pcs_status; + u32 fec_status[2]; + CLIB_PAD_FROM_TO (0x42f0, 0x4314); + u32 sgmii_control; + CLIB_PAD_FROM_TO (0x4318, 0x4324); + u32 link_status2; + CLIB_PAD_FROM_TO (0x4328, 0x4900); + } xge_mac; + + u32 tx_dcb_control; + u32 tx_dcb_descriptor_plane_queue_select; + u32 tx_dcb_descriptor_plane_t1_config; + u32 tx_dcb_descriptor_plane_t1_status; + CLIB_PAD_FROM_TO (0x4910, 0x4950); + + /* For each TC in units of 1k bytes. */ + u32 tx_packet_buffer_thresholds[8]; + CLIB_PAD_FROM_TO (0x4970, 0x4980); + struct + { + u32 mmw; + u32 config; + u32 status; + u32 rate_drift; + } dcb_tx_rate_scheduler; + CLIB_PAD_FROM_TO (0x4990, 0x4a80); + u32 tx_dma_control; + CLIB_PAD_FROM_TO (0x4a84, 0x4a88); + u32 tx_dma_tcp_flags_control[2]; + CLIB_PAD_FROM_TO (0x4a90, 0x4b00); + u32 pf_mailbox[64]; + CLIB_PAD_FROM_TO (0x4c00, 0x5000); + + /* RX */ + u32 checksum_control; + CLIB_PAD_FROM_TO (0x5004, 0x5008); + u32 rx_filter_control; + CLIB_PAD_FROM_TO (0x500c, 0x5010); + u32 management_vlan_tag[8]; + u32 management_udp_tcp_ports[8]; + CLIB_PAD_FROM_TO (0x5050, 0x5078); + /* little endian. */ + u32 extended_vlan_ether_type; + CLIB_PAD_FROM_TO (0x507c, 0x5080); + /* [1] store/dma bad packets + [8] accept all multicast + [9] accept all unicast + [10] accept all broadcast. */ + u32 filter_control; + CLIB_PAD_FROM_TO (0x5084, 0x5088); + /* [15:0] vlan ethernet type (0x8100) little endian + [28] cfi bit expected + [29] drop packets with unexpected cfi bit + [30] vlan filter enable. */ + u32 vlan_control; + CLIB_PAD_FROM_TO (0x508c, 0x5090); + /* [1:0] hi bit of ethernet address for 12 bit index into multicast table + 0 => 47, 1 => 46, 2 => 45, 3 => 43. + [2] enable multicast filter + */ + u32 multicast_control; + CLIB_PAD_FROM_TO (0x5094, 0x5100); + u32 fcoe_rx_control; + CLIB_PAD_FROM_TO (0x5104, 0x5108); + u32 fc_flt_context; + CLIB_PAD_FROM_TO (0x510c, 0x5110); + u32 fc_filter_control; + CLIB_PAD_FROM_TO (0x5114, 0x5120); + u32 rx_message_type_lo; + CLIB_PAD_FROM_TO (0x5124, 0x5128); + /* [15:0] ethernet type (little endian) + [18:16] matche pri in vlan tag + [19] priority match enable + [25:20] virtualization pool + [26] pool enable + [27] is fcoe + [30] ieee 1588 timestamp enable + [31] filter enable. + (See ethernet_type_queue_select.) */ + u32 ethernet_type_queue_filter[8]; + CLIB_PAD_FROM_TO (0x5148, 0x5160); + /* [7:0] l2 ethernet type and + [15:8] l2 ethernet type or */ + u32 management_decision_filters1[8]; + u32 vf_vm_tx_switch_loopback_enable[2]; + u32 rx_time_sync_control; + CLIB_PAD_FROM_TO (0x518c, 0x5190); + u32 management_ethernet_type_filters[4]; + u32 rx_timestamp_attributes_lo; + u32 rx_timestamp_hi; + u32 rx_timestamp_attributes_hi; + CLIB_PAD_FROM_TO (0x51ac, 0x51b0); + u32 pf_virtual_control; + CLIB_PAD_FROM_TO (0x51b4, 0x51d8); + u32 fc_offset_parameter; + CLIB_PAD_FROM_TO (0x51dc, 0x51e0); + u32 vf_rx_enable[2]; + u32 rx_timestamp_lo; + CLIB_PAD_FROM_TO (0x51ec, 0x5200); + /* 12 bits determined by multicast_control + lookup bits in this vector. */ + u32 multicast_enable[128]; + + /* [0] ethernet address [31:0] + [1] [15:0] ethernet address [47:32] + [31] valid bit. + Index 0 is read from eeprom after reset. */ + u32 rx_ethernet_address0[16][2]; + + CLIB_PAD_FROM_TO (0x5480, 0x5800); + u32 wake_up_control; + CLIB_PAD_FROM_TO (0x5804, 0x5808); + u32 wake_up_filter_control; + CLIB_PAD_FROM_TO (0x580c, 0x5818); + u32 multiple_rx_queue_command_82598; + CLIB_PAD_FROM_TO (0x581c, 0x5820); + u32 management_control; + u32 management_filter_control; + CLIB_PAD_FROM_TO (0x5828, 0x5838); + u32 wake_up_ip4_address_valid; + CLIB_PAD_FROM_TO (0x583c, 0x5840); + u32 wake_up_ip4_address_table[4]; + u32 management_control_to_host; + CLIB_PAD_FROM_TO (0x5854, 0x5880); + u32 wake_up_ip6_address_table[4]; + + /* unicast_and broadcast_and vlan_and ip_address_and + etc. */ + u32 management_decision_filters[8]; + + u32 management_ip4_or_ip6_address_filters[4][4]; + CLIB_PAD_FROM_TO (0x58f0, 0x5900); + u32 wake_up_packet_length; + CLIB_PAD_FROM_TO (0x5904, 0x5910); + u32 management_ethernet_address_filters[4][2]; + CLIB_PAD_FROM_TO (0x5930, 0x5a00); + u32 wake_up_packet_memory[32]; + CLIB_PAD_FROM_TO (0x5a80, 0x5c00); + u32 redirection_table_82598[32]; + u32 rss_random_keys_82598[10]; + CLIB_PAD_FROM_TO (0x5ca8, 0x6000); + + ixge_dma_regs_t tx_dma[128]; + + u32 pf_vm_vlan_insert[64]; + u32 tx_dma_tcp_max_alloc_size_requests; + CLIB_PAD_FROM_TO (0x8104, 0x8110); + u32 vf_tx_enable[2]; + CLIB_PAD_FROM_TO (0x8118, 0x8120); + /* [0] dcb mode enable + [1] virtualization mode enable + [3:2] number of tcs/qs per pool. */ + u32 multiple_tx_queues_command; + CLIB_PAD_FROM_TO (0x8124, 0x8200); + u32 pf_vf_anti_spoof[8]; + u32 pf_dma_tx_switch_control; + CLIB_PAD_FROM_TO (0x8224, 0x82e0); + u32 tx_strict_low_latency_queues[4]; + CLIB_PAD_FROM_TO (0x82f0, 0x8600); + u32 tx_queue_stats_mapping_82599[32]; + u32 tx_queue_packet_counts[32]; + u32 tx_queue_byte_counts[32][2]; + + struct + { + u32 control; + u32 status; + u32 buffer_almost_full; + CLIB_PAD_FROM_TO (0x880c, 0x8810); + u32 buffer_min_ifg; + CLIB_PAD_FROM_TO (0x8814, 0x8900); + } tx_security; + + struct + { + u32 index; + u32 salt; + u32 key[4]; + CLIB_PAD_FROM_TO (0x8918, 0x8a00); + } tx_ipsec; + + struct + { + u32 capabilities; + u32 control; + u32 tx_sci[2]; + u32 sa; + u32 sa_pn[2]; + u32 key[2][4]; + /* untagged packets, encrypted packets, protected packets, + encrypted bytes, protected bytes */ + u32 stats[5]; + CLIB_PAD_FROM_TO (0x8a50, 0x8c00); + } tx_link_security; + + struct + { + u32 control; + u32 timestamp_value[2]; + u32 system_time[2]; + u32 increment_attributes; + u32 time_adjustment_offset[2]; + u32 aux_control; + u32 target_time[2][2]; + CLIB_PAD_FROM_TO (0x8c34, 0x8c3c); + u32 aux_time_stamp[2][2]; + CLIB_PAD_FROM_TO (0x8c4c, 0x8d00); + } tx_timesync; + + struct + { + u32 control; + u32 status; + CLIB_PAD_FROM_TO (0x8d08, 0x8e00); + } rx_security; + + struct + { + u32 index; + u32 ip_address[4]; + u32 spi; + u32 ip_index; + u32 key[4]; + u32 salt; + u32 mode; + CLIB_PAD_FROM_TO (0x8e34, 0x8f00); + } rx_ipsec; + + struct + { + u32 capabilities; + u32 control; + u32 sci[2]; + u32 sa[2]; + u32 sa_pn[2]; + u32 key[2][4]; + /* see datasheet */ + u32 stats[17]; + CLIB_PAD_FROM_TO (0x8f84, 0x9000); + } rx_link_security; + + /* 4 wake up, 2 management, 2 wake up. */ + u32 flexible_filters[8][16][4]; + CLIB_PAD_FROM_TO (0x9800, 0xa000); + + /* 4096 bits. */ + u32 vlan_filter[128]; + + /* [0] ethernet address [31:0] + [1] [15:0] ethernet address [47:32] + [31] valid bit. + Index 0 is read from eeprom after reset. */ + u32 rx_ethernet_address1[128][2]; + + /* select one of 64 pools for each rx address. */ + u32 rx_ethernet_address_pool_select[128][2]; + CLIB_PAD_FROM_TO (0xaa00, 0xc800); + u32 tx_priority_to_traffic_class; + CLIB_PAD_FROM_TO (0xc804, 0xcc00); + + /* In bytes units of 1k. Total packet buffer is 160k. */ + u32 tx_packet_buffer_size[8]; + + CLIB_PAD_FROM_TO (0xcc20, 0xcd10); + u32 tx_manageability_tc_mapping; + CLIB_PAD_FROM_TO (0xcd14, 0xcd20); + u32 dcb_tx_packet_plane_t2_config[8]; + u32 dcb_tx_packet_plane_t2_status[8]; + CLIB_PAD_FROM_TO (0xcd60, 0xce00); + + u32 tx_flow_control_status; + CLIB_PAD_FROM_TO (0xce04, 0xd000); + + ixge_dma_regs_t rx_dma1[64]; + + struct + { + /* Bigendian ip4 src/dst address. */ + u32 src_address[128]; + u32 dst_address[128]; + + /* TCP/UDP ports [15:0] src [31:16] dst; bigendian. */ + u32 tcp_udp_port[128]; + + /* [1:0] protocol tcp, udp, sctp, other + [4:2] match priority (highest wins) + [13:8] pool + [25] src address match disable + [26] dst address match disable + [27] src port match disable + [28] dst port match disable + [29] protocol match disable + [30] pool match disable + [31] enable. */ + u32 control[128]; + + /* [12] size bypass + [19:13] must be 0x80 + [20] low-latency interrupt + [27:21] rx queue. */ + u32 interrupt[128]; + } ip4_filters; + + CLIB_PAD_FROM_TO (0xea00, 0xeb00); + /* 4 bit rss output index indexed by 7 bit hash. + 128 8 bit fields = 32 registers. */ + u32 redirection_table_82599[32]; + + u32 rss_random_key_82599[10]; + CLIB_PAD_FROM_TO (0xeba8, 0xec00); + /* [15:0] reserved + [22:16] rx queue index + [29] low-latency interrupt on match + [31] enable */ + u32 ethernet_type_queue_select[8]; + CLIB_PAD_FROM_TO (0xec20, 0xec30); + u32 syn_packet_queue_filter; + CLIB_PAD_FROM_TO (0xec34, 0xec60); + u32 immediate_interrupt_rx_vlan_priority; + CLIB_PAD_FROM_TO (0xec64, 0xec70); + u32 rss_queues_per_traffic_class; + CLIB_PAD_FROM_TO (0xec74, 0xec90); + u32 lli_size_threshold; + CLIB_PAD_FROM_TO (0xec94, 0xed00); + + struct + { + u32 control; + CLIB_PAD_FROM_TO (0xed04, 0xed10); + u32 table[8]; + CLIB_PAD_FROM_TO (0xed30, 0xee00); + } fcoe_redirection; + + struct + { + /* [1:0] packet buffer allocation 0 => disabled, else 64k*2^(f-1) + [3] packet buffer initialization done + [4] perfetch match mode + [5] report status in rss field of rx descriptors + [7] report status always + [14:8] drop queue + [20:16] flex 2 byte packet offset (units of 2 bytes) + [27:24] max linked list length + [31:28] full threshold. */ + u32 control; + CLIB_PAD_FROM_TO (0xee04, 0xee0c); + + u32 data[8]; + + /* [1:0] 0 => no action, 1 => add, 2 => remove, 3 => query. + [2] valid filter found by query command + [3] filter update override + [4] ip6 adress table + [6:5] l4 protocol reserved, udp, tcp, sctp + [7] is ip6 + [8] clear head/tail + [9] packet drop action + [10] matched packet generates low-latency interrupt + [11] last in linked list + [12] collision + [15] rx queue enable + [22:16] rx queue + [29:24] pool. */ + u32 command; + + CLIB_PAD_FROM_TO (0xee30, 0xee3c); + /* ip4 dst/src address, tcp ports, udp ports. + set bits mean bit is ignored. */ + u32 ip4_masks[4]; + u32 filter_length; + u32 usage_stats; + u32 failed_usage_stats; + u32 filters_match_stats; + u32 filters_miss_stats; + CLIB_PAD_FROM_TO (0xee60, 0xee68); + /* Lookup, signature. */ + u32 hash_keys[2]; + /* [15:0] ip6 src address 1 bit per byte + [31:16] ip6 dst address. */ + u32 ip6_mask; + /* [0] vlan id + [1] vlan priority + [2] pool + [3] ip protocol + [4] flex + [5] dst ip6. */ + u32 other_mask; + CLIB_PAD_FROM_TO (0xee78, 0xf000); + } flow_director; + + struct + { + u32 l2_control[64]; + u32 vlan_pool_filter[64]; + u32 vlan_pool_filter_bitmap[128]; + u32 dst_ethernet_address[128]; + u32 mirror_rule[4]; + u32 mirror_rule_vlan[8]; + u32 mirror_rule_pool[8]; + CLIB_PAD_FROM_TO (0xf650, 0x10010); + } pf_bar; + + u32 eeprom_flash_control; + /* [0] start + [1] done + [15:2] address + [31:16] read data. */ + u32 eeprom_read; + CLIB_PAD_FROM_TO (0x10018, 0x1001c); + u32 flash_access; + CLIB_PAD_FROM_TO (0x10020, 0x10114); + u32 flash_data; + u32 flash_control; + u32 flash_read_data; + CLIB_PAD_FROM_TO (0x10120, 0x1013c); + u32 flash_opcode; + u32 software_semaphore; + CLIB_PAD_FROM_TO (0x10144, 0x10148); + u32 firmware_semaphore; + CLIB_PAD_FROM_TO (0x1014c, 0x10160); + u32 software_firmware_sync; + CLIB_PAD_FROM_TO (0x10164, 0x10200); + u32 general_rx_control; + CLIB_PAD_FROM_TO (0x10204, 0x11000); + + struct + { + u32 control; + CLIB_PAD_FROM_TO (0x11004, 0x11010); + /* [3:0] enable counters + [7:4] leaky bucket counter mode + [29] reset + [30] stop + [31] start. */ + u32 counter_control; + /* [7:0],[15:8],[23:16],[31:24] event for counters 0-3. + event codes: + 0x0 bad tlp + 0x10 reqs that reached timeout + etc. */ + u32 counter_event; + CLIB_PAD_FROM_TO (0x11018, 0x11020); + u32 counters_clear_on_read[4]; + u32 counter_config[4]; + struct + { + u32 address; + u32 data; + } indirect_access; + CLIB_PAD_FROM_TO (0x11048, 0x11050); + u32 extended_control; + CLIB_PAD_FROM_TO (0x11054, 0x11064); + u32 mirrored_revision_id; + CLIB_PAD_FROM_TO (0x11068, 0x11070); + u32 dca_requester_id_information; + + /* [0] global disable + [4:1] mode: 0 => legacy, 1 => dca 1.0. */ + u32 dca_control; + CLIB_PAD_FROM_TO (0x11078, 0x110b0); + /* [0] pci completion abort + [1] unsupported i/o address + [2] wrong byte enable + [3] pci timeout */ + u32 pcie_interrupt_status; + CLIB_PAD_FROM_TO (0x110b4, 0x110b8); + u32 pcie_interrupt_enable; + CLIB_PAD_FROM_TO (0x110bc, 0x110c0); + u32 msi_x_pba_clear[8]; + CLIB_PAD_FROM_TO (0x110e0, 0x12300); + } pcie; + + u32 interrupt_throttle1[128 - 24]; + CLIB_PAD_FROM_TO (0x124a0, 0x14f00); + + u32 core_analog_config; + CLIB_PAD_FROM_TO (0x14f04, 0x14f10); + u32 core_common_config; + CLIB_PAD_FROM_TO (0x14f14, 0x15f14); + + u32 link_sec_software_firmware_interface; +} ixge_regs_t; + +typedef union +{ + struct + { + /* Addresses bigendian. */ + union + { + struct + { + ip6_address_t src_address; + u32 unused[1]; + } ip6; + struct + { + u32 unused[3]; + ip4_address_t src_address, dst_address; + } ip4; + }; + + /* [15:0] src port (little endian). + [31:16] dst port. */ + u32 tcp_udp_ports; + + /* [15:0] vlan (cfi bit set to 0). + [31:16] flex bytes. bigendian. */ + u32 vlan_and_flex_word; + + /* [14:0] hash + [15] bucket valid + [31:16] signature (signature filers)/sw-index (perfect match). */ + u32 hash; + }; + + u32 as_u32[8]; +} ixge_flow_director_key_t; + +always_inline void +ixge_throttle_queue_interrupt (ixge_regs_t * r, + u32 queue_interrupt_index, + f64 inter_interrupt_interval_in_secs) +{ + volatile u32 *tr = + (queue_interrupt_index < ARRAY_LEN (r->interrupt.throttle0) + ? &r->interrupt.throttle0[queue_interrupt_index] + : &r->interrupt_throttle1[queue_interrupt_index]); + ASSERT (queue_interrupt_index < 128); + u32 v; + i32 i, mask = (1 << 9) - 1; + + i = flt_round_nearest (inter_interrupt_interval_in_secs / 2e-6); + i = i < 1 ? 1 : i; + i = i >= mask ? mask : i; + + v = tr[0]; + v &= ~(mask << 3); + v |= i << 3; + tr[0] = v; +} + +#define foreach_ixge_counter \ + _ (0x40d0, rx_total_packets) \ + _64 (0x40c0, rx_total_bytes) \ + _ (0x41b0, rx_good_packets_before_filtering) \ + _64 (0x41b4, rx_good_bytes_before_filtering) \ + _ (0x2f50, rx_dma_good_packets) \ + _64 (0x2f54, rx_dma_good_bytes) \ + _ (0x2f5c, rx_dma_duplicated_good_packets) \ + _64 (0x2f60, rx_dma_duplicated_good_bytes) \ + _ (0x2f68, rx_dma_good_loopback_packets) \ + _64 (0x2f6c, rx_dma_good_loopback_bytes) \ + _ (0x2f74, rx_dma_good_duplicated_loopback_packets) \ + _64 (0x2f78, rx_dma_good_duplicated_loopback_bytes) \ + _ (0x4074, rx_good_packets) \ + _64 (0x4088, rx_good_bytes) \ + _ (0x407c, rx_multicast_packets) \ + _ (0x4078, rx_broadcast_packets) \ + _ (0x405c, rx_64_byte_packets) \ + _ (0x4060, rx_65_127_byte_packets) \ + _ (0x4064, rx_128_255_byte_packets) \ + _ (0x4068, rx_256_511_byte_packets) \ + _ (0x406c, rx_512_1023_byte_packets) \ + _ (0x4070, rx_gt_1023_byte_packets) \ + _ (0x4000, rx_crc_errors) \ + _ (0x4120, rx_ip_checksum_errors) \ + _ (0x4004, rx_illegal_symbol_errors) \ + _ (0x4008, rx_error_symbol_errors) \ + _ (0x4034, rx_mac_local_faults) \ + _ (0x4038, rx_mac_remote_faults) \ + _ (0x4040, rx_length_errors) \ + _ (0x41a4, rx_xons) \ + _ (0x41a8, rx_xoffs) \ + _ (0x40a4, rx_undersize_packets) \ + _ (0x40a8, rx_fragments) \ + _ (0x40ac, rx_oversize_packets) \ + _ (0x40b0, rx_jabbers) \ + _ (0x40b4, rx_management_packets) \ + _ (0x40b8, rx_management_drops) \ + _ (0x3fa0, rx_missed_packets_pool_0) \ + _ (0x40d4, tx_total_packets) \ + _ (0x4080, tx_good_packets) \ + _64 (0x4090, tx_good_bytes) \ + _ (0x40f0, tx_multicast_packets) \ + _ (0x40f4, tx_broadcast_packets) \ + _ (0x87a0, tx_dma_good_packets) \ + _64 (0x87a4, tx_dma_good_bytes) \ + _ (0x40d8, tx_64_byte_packets) \ + _ (0x40dc, tx_65_127_byte_packets) \ + _ (0x40e0, tx_128_255_byte_packets) \ + _ (0x40e4, tx_256_511_byte_packets) \ + _ (0x40e8, tx_512_1023_byte_packets) \ + _ (0x40ec, tx_gt_1023_byte_packets) \ + _ (0x4010, tx_undersize_drops) \ + _ (0x8780, switch_security_violation_packets) \ + _ (0x5118, fc_crc_errors) \ + _ (0x241c, fc_rx_drops) \ + _ (0x2424, fc_last_error_count) \ + _ (0x2428, fcoe_rx_packets) \ + _ (0x242c, fcoe_rx_dwords) \ + _ (0x8784, fcoe_tx_packets) \ + _ (0x8788, fcoe_tx_dwords) \ + _ (0x1030, queue_0_rx_count) \ + _ (0x1430, queue_0_drop_count) \ + _ (0x1070, queue_1_rx_count) \ + _ (0x1470, queue_1_drop_count) \ + _ (0x10b0, queue_2_rx_count) \ + _ (0x14b0, queue_2_drop_count) \ + _ (0x10f0, queue_3_rx_count) \ + _ (0x14f0, queue_3_drop_count) \ + _ (0x1130, queue_4_rx_count) \ + _ (0x1530, queue_4_drop_count) \ + _ (0x1170, queue_5_rx_count) \ + _ (0x1570, queue_5_drop_count) \ + _ (0x11b0, queue_6_rx_count) \ + _ (0x15b0, queue_6_drop_count) \ + _ (0x11f0, queue_7_rx_count) \ + _ (0x15f0, queue_7_drop_count) \ + _ (0x1230, queue_8_rx_count) \ + _ (0x1630, queue_8_drop_count) \ + _ (0x1270, queue_9_rx_count) \ + _ (0x1270, queue_9_drop_count) + + + + +typedef enum +{ +#define _(a,f) IXGE_COUNTER_##f, +#define _64(a,f) _(a,f) + foreach_ixge_counter +#undef _ +#undef _64 + IXGE_N_COUNTER, +} ixge_counter_type_t; + +typedef struct +{ + u32 mdio_address; + + /* 32 bit ID read from ID registers. */ + u32 id; +} ixge_phy_t; + +typedef struct +{ + /* Cache aligned descriptors. */ + ixge_descriptor_t *descriptors; + + /* Number of descriptors in table. */ + u32 n_descriptors; + + /* Software head and tail pointers into descriptor ring. */ + u32 head_index, tail_index; + + /* Index into dma_queues vector. */ + u32 queue_index; + + /* Buffer indices corresponding to each active descriptor. */ + u32 *descriptor_buffer_indices; + + union + { + struct + { + u32 *volatile head_index_write_back; + + u32 n_buffers_on_ring; + } tx; + + struct + { + /* Buffer indices to use to replenish each descriptor. */ + u32 *replenish_buffer_indices; + + vlib_node_runtime_t *node; + u32 next_index; + + u32 saved_start_of_packet_buffer_index; + + u32 saved_start_of_packet_next_index; + u32 saved_last_buffer_index; + + u32 is_start_of_packet; + + u32 n_descriptors_done_total; + + u32 n_descriptors_done_this_call; + + u32 n_bytes; + } rx; + }; +} ixge_dma_queue_t; + +#define foreach_ixge_pci_device_id \ + _ (82598, 0x10b6) \ + _ (82598_bx, 0x1508) \ + _ (82598af_dual_port, 0x10c6) \ + _ (82598af_single_port, 0x10c7) \ + _ (82598at, 0x10c8) \ + _ (82598at2, 0x150b) \ + _ (82598eb_sfp_lom, 0x10db) \ + _ (82598eb_cx4, 0x10dd) \ + _ (82598_cx4_dual_port, 0x10ec) \ + _ (82598_da_dual_port, 0x10f1) \ + _ (82598_sr_dual_port_em, 0x10e1) \ + _ (82598eb_xf_lr, 0x10f4) \ + _ (82599_kx4, 0x10f7) \ + _ (82599_kx4_mezz, 0x1514) \ + _ (82599_kr, 0x1517) \ + _ (82599_combo_backplane, 0x10f8) \ + _ (82599_cx4, 0x10f9) \ + _ (82599_sfp, 0x10fb) \ + _ (82599_backplane_fcoe, 0x152a) \ + _ (82599_sfp_fcoe, 0x1529) \ + _ (82599_sfp_em, 0x1507) \ + _ (82599_xaui_lom, 0x10fc) \ + _ (82599_t3_lom, 0x151c) \ + _ (x540t, 0x1528) + +typedef enum +{ +#define _(f,n) IXGE_##f = n, + foreach_ixge_pci_device_id +#undef _ +} ixge_pci_device_id_t; + +typedef struct +{ + /* registers */ + ixge_regs_t *regs; + + /* Specific next index when using dynamic redirection */ + u32 per_interface_next_index; + + /* PCI bus info. */ + vlib_pci_device_t pci_device; + + /* From PCI config space header. */ + ixge_pci_device_id_t device_id; + + u16 device_index; + + /* 0 or 1. */ + u16 pci_function; + + /* VLIB interface for this instance. */ + u32 vlib_hw_if_index, vlib_sw_if_index; + + ixge_dma_queue_t *dma_queues[VLIB_N_RX_TX]; + + /* Phy index (0 or 1) and address on MDI bus. */ + u32 phy_index; + ixge_phy_t phys[2]; + + /* Value of link_status register at last link change. */ + u32 link_status_at_last_link_change; + + i2c_bus_t i2c_bus; + sfp_eeprom_t sfp_eeprom; + + /* Counters. */ + u64 counters[IXGE_N_COUNTER], counters_last_clear[IXGE_N_COUNTER]; +} ixge_device_t; + +typedef struct +{ + vlib_main_t *vlib_main; + + /* Vector of devices. */ + ixge_device_t *devices; + + /* Descriptor ring sizes. */ + u32 n_descriptors[VLIB_N_RX_TX]; + + /* RX buffer size. Must be at least 1k; will be rounded to + next largest 1k size. */ + u32 n_bytes_in_rx_buffer; + + u32 n_descriptors_per_cache_line; + + u32 vlib_buffer_free_list_index; + + u32 process_node_index; + + /* Template and mask for initializing/validating TX descriptors. */ + ixge_tx_descriptor_t tx_descriptor_template, tx_descriptor_template_mask; + + /* Vector of buffers for which TX is done and can be freed. */ + u32 *tx_buffers_pending_free; + + u32 *rx_buffers_to_add; + + f64 time_last_stats_update; +} ixge_main_t; + +ixge_main_t ixge_main; +vnet_device_class_t ixge_device_class; + +typedef enum +{ + IXGE_RX_NEXT_IP4_INPUT, + IXGE_RX_NEXT_IP6_INPUT, + IXGE_RX_NEXT_ETHERNET_INPUT, + IXGE_RX_NEXT_DROP, + IXGE_RX_N_NEXT, +} ixge_rx_next_t; + +void ixge_set_next_node (ixge_rx_next_t, char *); + +#endif /* included_ixge_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/nic/sfp.c b/src/vnet/devices/nic/sfp.c new file mode 100644 index 00000000..9e9c008d --- /dev/null +++ b/src/vnet/devices/nic/sfp.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +static u8 * +format_space_terminated (u8 * s, va_list * args) +{ + u32 l = va_arg (*args, u32); + u8 *v = va_arg (*args, u8 *); + u8 *p; + + for (p = v + l - 1; p >= v && p[0] == ' '; p--) + ; + vec_add (s, v, clib_min (p - v + 1, l)); + return s; +} + +static u8 * +format_sfp_id (u8 * s, va_list * args) +{ + u32 id = va_arg (*args, u32); + char *t = 0; + switch (id) + { +#define _(f) case SFP_ID_##f: t = #f; break; + foreach_sfp_id +#undef _ + default: + return format (s, "unknown 0x%x", id); + } + return format (s, "%s", t); +} + +static u8 * +format_sfp_compatibility (u8 * s, va_list * args) +{ + u32 c = va_arg (*args, u32); + char *t = 0; + switch (c) + { +#define _(a,b,f) case SFP_COMPATIBILITY_##f: t = #f; break; + foreach_sfp_compatibility +#undef _ + default: + return format (s, "unknown 0x%x", c); + } + return format (s, "%s", t); +} + +u32 +sfp_is_comatible (sfp_eeprom_t * e, sfp_compatibility_t c) +{ + static struct + { + u8 byte, bit; + } t[] = + { +#define _(a,b,f) { .byte = a, .bit = b, }, + foreach_sfp_compatibility +#undef _ + }; + + ASSERT (c < ARRAY_LEN (t)); + return (e->compatibility[t[c].byte] & (1 << t[c].bit)) != 0; +} + +u8 * +format_sfp_eeprom (u8 * s, va_list * args) +{ + sfp_eeprom_t *e = va_arg (*args, sfp_eeprom_t *); + uword indent = format_get_indent (s); + int i; + + if (e->id != SFP_ID_sfp) + s = format (s, "id %U, ", format_sfp_id, e->id); + + s = format (s, "compatibility:"); + for (i = 0; i < SFP_N_COMPATIBILITY; i++) + if (sfp_is_comatible (e, i)) + s = format (s, " %U", format_sfp_compatibility, i); + + s = format (s, "\n%Uvendor: %U, part %U", + format_white_space, indent, + format_space_terminated, sizeof (e->vendor_name), + e->vendor_name, format_space_terminated, + sizeof (e->vendor_part_number), e->vendor_part_number); + s = + format (s, "\n%Urevision: %U, serial: %U, date code: %U", + format_white_space, indent, format_space_terminated, + sizeof (e->vendor_revision), e->vendor_revision, + format_space_terminated, sizeof (e->vendor_serial_number), + e->vendor_serial_number, format_space_terminated, + sizeof (e->vendor_date_code), e->vendor_date_code); + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/nic/sfp.h b/src/vnet/devices/nic/sfp.h new file mode 100644 index 00000000..a1ac7997 --- /dev/null +++ b/src/vnet/devices/nic/sfp.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_optics_sfp_h +#define included_vnet_optics_sfp_h + +#include + +#define foreach_sfp_id \ + _ (unknown) \ + _ (gbic) \ + _ (on_motherboard) \ + _ (sfp) + +typedef enum +{ +#define _(f) SFP_ID_##f, + foreach_sfp_id +#undef _ +} sfp_id_t; + +typedef struct +{ + u8 id; + u8 extended_id; + u8 connector_type; + u8 compatibility[8]; + u8 encoding; + u8 nominal_bit_rate_100mbits_per_sec; + u8 reserved13; + u8 link_length[5]; + u8 reserved19; + u8 vendor_name[16]; + u8 reserved36; + u8 vendor_oui[3]; + u8 vendor_part_number[16]; + u8 vendor_revision[4]; + /* 16 bit value network byte order. */ + u8 laser_wavelength_in_nm[2]; + u8 reserved62; + u8 checksum_0_to_62; + + u8 options[2]; + u8 max_bit_rate_margin_percent; + u8 min_bit_rate_margin_percent; + u8 vendor_serial_number[16]; + u8 vendor_date_code[8]; + u8 reserved92[3]; + u8 checksum_63_to_94; + u8 vendor_specific[32]; + u8 reserved128[384]; + + /* Vendor specific data follows. */ + u8 vendor_specific1[0]; +} sfp_eeprom_t; + +always_inline uword +sfp_eeprom_is_valid (sfp_eeprom_t * e) +{ + int i; + u8 sum = 0; + for (i = 0; i < 63; i++) + sum += ((u8 *) e)[i]; + return sum == e->checksum_0_to_62; +} + +/* _ (byte_index, bit_index, name) */ +#define foreach_sfp_compatibility \ + _ (0, 4, 10g_base_sr) \ + _ (0, 5, 10g_base_lr) \ + _ (1, 2, oc48_long_reach) \ + _ (1, 1, oc48_intermediate_reach) \ + _ (1, 0, oc48_short_reach) \ + _ (2, 6, oc12_long_reach) \ + _ (2, 5, oc12_intermediate_reach) \ + _ (2, 4, oc12_short_reach) \ + _ (2, 2, oc3_long_reach) \ + _ (2, 1, oc3_intermediate_reach) \ + _ (2, 0, oc3_short_reach) \ + _ (3, 3, 1g_base_t) \ + _ (3, 2, 1g_base_cx) \ + _ (3, 1, 1g_base_lx) \ + _ (3, 0, 1g_base_sx) + +typedef enum +{ +#define _(a,b,f) SFP_COMPATIBILITY_##f, + foreach_sfp_compatibility +#undef _ + SFP_N_COMPATIBILITY, +} sfp_compatibility_t; + +u32 sfp_is_comatible (sfp_eeprom_t * e, sfp_compatibility_t c); + +format_function_t format_sfp_eeprom; + +#endif /* included_vnet_optics_sfp_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/ssvm/node.c b/src/vnet/devices/ssvm/node.c new file mode 100644 index 00000000..3a695b1d --- /dev/null +++ b/src/vnet/devices/ssvm/node.c @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ssvm_eth.h" + +vlib_node_registration_t ssvm_eth_input_node; + +typedef struct +{ + u32 next_index; + u32 sw_if_index; +} ssvm_eth_input_trace_t; + +/* packet trace format function */ +static u8 * +format_ssvm_eth_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ssvm_eth_input_trace_t *t = va_arg (*args, ssvm_eth_input_trace_t *); + + s = format (s, "SSVM_ETH_INPUT: sw_if_index %d, next index %d", + t->sw_if_index, t->next_index); + return s; +} + +vlib_node_registration_t ssvm_eth_input_node; + +#define foreach_ssvm_eth_input_error \ +_(NO_BUFFERS, "Rx packet drops (no buffers)") + +typedef enum +{ +#define _(sym,str) SSVM_ETH_INPUT_ERROR_##sym, + foreach_ssvm_eth_input_error +#undef _ + SSVM_ETH_INPUT_N_ERROR, +} ssvm_eth_input_error_t; + +static char *ssvm_eth_input_error_strings[] = { +#define _(sym,string) string, + foreach_ssvm_eth_input_error +#undef _ +}; + +typedef enum +{ + SSVM_ETH_INPUT_NEXT_DROP, + SSVM_ETH_INPUT_NEXT_ETHERNET_INPUT, + SSVM_ETH_INPUT_NEXT_IP4_INPUT, + SSVM_ETH_INPUT_NEXT_IP6_INPUT, + SSVM_ETH_INPUT_NEXT_MPLS_INPUT, + SSVM_ETH_INPUT_N_NEXT, +} ssvm_eth_input_next_t; + +static inline uword +ssvm_eth_device_input (ssvm_eth_main_t * em, + ssvm_private_t * intfc, vlib_node_runtime_t * node) +{ + ssvm_shared_header_t *sh = intfc->sh; + vlib_main_t *vm = em->vlib_main; + unix_shared_memory_queue_t *q; + ssvm_eth_queue_elt_t *elt, *elts; + u32 elt_index; + u32 my_pid = intfc->my_pid; + int rx_queue_index; + u32 n_to_alloc = VLIB_FRAME_SIZE * 2; + u32 n_allocated, n_present_in_cache; + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + vlib_buffer_free_list_t *fl; + u32 n_left_to_next, *to_next; + u32 next0; + u32 n_buffers; + u32 n_available; + u32 bi0, saved_bi0; + vlib_buffer_t *b0, *prev; + u32 saved_cache_size = 0; + ethernet_header_t *eh0; + u16 type0; + u32 n_rx_bytes = 0, l3_offset0; + u32 cpu_index = os_get_cpu_number (); + u32 trace_cnt __attribute__ ((unused)) = vlib_get_trace_count (vm, node); + volatile u32 *lock; + u32 *elt_indices; + uword n_trace = vlib_get_trace_count (vm, node); + + /* Either side down? buh-bye... */ + if (pointer_to_uword (sh->opaque[MASTER_ADMIN_STATE_INDEX]) == 0 || + pointer_to_uword (sh->opaque[SLAVE_ADMIN_STATE_INDEX]) == 0) + return 0; + + if (intfc->i_am_master) + q = (unix_shared_memory_queue_t *) (sh->opaque[TO_MASTER_Q_INDEX]); + else + q = (unix_shared_memory_queue_t *) (sh->opaque[TO_SLAVE_Q_INDEX]); + + /* Nothing to do? */ + if (q->cursize == 0) + return 0; + + fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + vec_reset_length (intfc->rx_queue); + + lock = (u32 *) q; + while (__sync_lock_test_and_set (lock, 1)) + ; + while (q->cursize > 0) + { + unix_shared_memory_queue_sub_raw (q, (u8 *) & elt_index); + ASSERT (elt_index < 2048); + vec_add1 (intfc->rx_queue, elt_index); + } + CLIB_MEMORY_BARRIER (); + *lock = 0; + + n_present_in_cache = vec_len (em->buffer_cache); + + if (vec_len (em->buffer_cache) < vec_len (intfc->rx_queue) * 2) + { + vec_validate (em->buffer_cache, + n_to_alloc + vec_len (em->buffer_cache) - 1); + n_allocated = + vlib_buffer_alloc (vm, &em->buffer_cache[n_present_in_cache], + n_to_alloc); + + n_present_in_cache += n_allocated; + _vec_len (em->buffer_cache) = n_present_in_cache; + } + + elts = (ssvm_eth_queue_elt_t *) (sh->opaque[CHUNK_POOL_INDEX]); + + n_buffers = vec_len (intfc->rx_queue); + rx_queue_index = 0; + + while (n_buffers > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_buffers > 0 && n_left_to_next > 0) + { + elt = elts + intfc->rx_queue[rx_queue_index]; + + saved_cache_size = n_present_in_cache; + if (PREDICT_FALSE (saved_cache_size == 0)) + { + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + goto out; + } + saved_bi0 = bi0 = em->buffer_cache[--n_present_in_cache]; + b0 = vlib_get_buffer (vm, bi0); + prev = 0; + + while (1) + { + vlib_buffer_init_for_free_list (b0, fl); + + b0->current_data = elt->current_data_hint; + b0->current_length = elt->length_this_buffer; + b0->total_length_not_including_first_buffer = + elt->total_length_not_including_first_buffer; + + clib_memcpy (b0->data + b0->current_data, elt->data, + b0->current_length); + + if (PREDICT_FALSE (prev != 0)) + prev->next_buffer = bi0; + + if (PREDICT_FALSE (elt->flags & SSVM_BUFFER_NEXT_PRESENT)) + { + prev = b0; + if (PREDICT_FALSE (n_present_in_cache == 0)) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + goto out; + } + bi0 = em->buffer_cache[--n_present_in_cache]; + b0 = vlib_get_buffer (vm, bi0); + } + else + break; + } + + saved_cache_size = n_present_in_cache; + + to_next[0] = saved_bi0; + to_next++; + n_left_to_next--; + + b0 = vlib_get_buffer (vm, saved_bi0); + eh0 = vlib_buffer_get_current (b0); + + type0 = clib_net_to_host_u16 (eh0->type); + + next0 = SSVM_ETH_INPUT_NEXT_ETHERNET_INPUT; + + if (type0 == ETHERNET_TYPE_IP4) + next0 = SSVM_ETH_INPUT_NEXT_IP4_INPUT; + else if (type0 == ETHERNET_TYPE_IP6) + next0 = SSVM_ETH_INPUT_NEXT_IP6_INPUT; + else if (type0 == ETHERNET_TYPE_MPLS_UNICAST) + next0 = SSVM_ETH_INPUT_NEXT_MPLS_INPUT; + + l3_offset0 = ((next0 == SSVM_ETH_INPUT_NEXT_IP4_INPUT || + next0 == SSVM_ETH_INPUT_NEXT_IP6_INPUT || + next0 == SSVM_ETH_INPUT_NEXT_MPLS_INPUT) ? + sizeof (ethernet_header_t) : 0); + + n_rx_bytes += b0->current_length + + b0->total_length_not_including_first_buffer; + + b0->current_data += l3_offset0; + b0->current_length -= l3_offset0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = intfc->vlib_hw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + if (PREDICT_FALSE (n_trace > 0)) + { + ssvm_eth_input_trace_t *tr; + + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 1); + vlib_set_trace_count (vm, node, --n_trace); + + tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + + tr->next_index = next0; + tr->sw_if_index = intfc->vlib_hw_if_index; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + n_buffers--; + rx_queue_index++; + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + +out: + if (em->buffer_cache) + _vec_len (em->buffer_cache) = saved_cache_size; + else + ASSERT (saved_cache_size == 0); + + ssvm_lock (sh, my_pid, 2); + + ASSERT (vec_len (intfc->rx_queue) > 0); + + n_available = (u32) pointer_to_uword (sh->opaque[CHUNK_POOL_NFREE]); + elt_indices = (u32 *) (sh->opaque[CHUNK_POOL_FREELIST_INDEX]); + + clib_memcpy (&elt_indices[n_available], intfc->rx_queue, + vec_len (intfc->rx_queue) * sizeof (u32)); + + n_available += vec_len (intfc->rx_queue); + sh->opaque[CHUNK_POOL_NFREE] = uword_to_pointer (n_available, void *); + + ssvm_unlock (sh); + + vlib_error_count (vm, node->node_index, SSVM_ETH_INPUT_ERROR_NO_BUFFERS, + n_buffers); + + vlib_increment_combined_counter + (vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, cpu_index, + intfc->vlib_hw_if_index, rx_queue_index, n_rx_bytes); + + return rx_queue_index; +} + +static uword +ssvm_eth_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + ssvm_eth_main_t *em = &ssvm_eth_main; + ssvm_private_t *intfc; + uword n_rx_packets = 0; + + vec_foreach (intfc, em->intfcs) + { + n_rx_packets += ssvm_eth_device_input (em, intfc, node); + } + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ssvm_eth_input_node) = { + .function = ssvm_eth_input_node_fn, + .name = "ssvm_eth_input", + .vector_size = sizeof (u32), + .format_trace = format_ssvm_eth_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, + + .n_errors = ARRAY_LEN(ssvm_eth_input_error_strings), + .error_strings = ssvm_eth_input_error_strings, + + .n_next_nodes = SSVM_ETH_INPUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SSVM_ETH_INPUT_NEXT_DROP] = "error-drop", + [SSVM_ETH_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input", + [SSVM_ETH_INPUT_NEXT_IP4_INPUT] = "ip4-input", + [SSVM_ETH_INPUT_NEXT_IP6_INPUT] = "ip6-input", + [SSVM_ETH_INPUT_NEXT_MPLS_INPUT] = "mpls-input", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ssvm_eth_input_node, ssvm_eth_input_node_fn) +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/ssvm/ssvm_eth.c b/src/vnet/devices/ssvm/ssvm_eth.c new file mode 100644 index 00000000..db4fafa9 --- /dev/null +++ b/src/vnet/devices/ssvm/ssvm_eth.c @@ -0,0 +1,491 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ssvm_eth.h" + +ssvm_eth_main_t ssvm_eth_main; + +#define foreach_ssvm_eth_tx_func_error \ +_(RING_FULL, "Tx packet drops (ring full)") \ +_(NO_BUFFERS, "Tx packet drops (no buffers)") \ +_(ADMIN_DOWN, "Tx packet drops (admin down)") + +typedef enum +{ +#define _(f,s) SSVM_ETH_TX_ERROR_##f, + foreach_ssvm_eth_tx_func_error +#undef _ + SSVM_ETH_TX_N_ERROR, +} ssvm_eth_tx_func_error_t; + +static u32 ssvm_eth_flag_change (vnet_main_t * vnm, + vnet_hw_interface_t * hi, u32 flags); + +int +ssvm_eth_create (ssvm_eth_main_t * em, u8 * name, int is_master) +{ + ssvm_private_t *intfc; + void *oldheap; + clib_error_t *e; + unix_shared_memory_queue_t *q; + ssvm_shared_header_t *sh; + ssvm_eth_queue_elt_t *elts; + u32 *elt_indices; + u8 enet_addr[6]; + int i, rv; + + vec_add2 (em->intfcs, intfc, 1); + + intfc->ssvm_size = em->segment_size; + intfc->i_am_master = 1; + intfc->name = name; + intfc->my_pid = getpid (); + if (is_master == 0) + { + rv = ssvm_slave_init (intfc, 20 /* timeout in seconds */ ); + if (rv < 0) + return rv; + goto create_vnet_interface; + } + + intfc->requested_va = em->next_base_va; + em->next_base_va += em->segment_size; + rv = ssvm_master_init (intfc, intfc - em->intfcs /* master index */ ); + + if (rv < 0) + return rv; + + /* OK, segment created, set up queues and so forth. */ + + sh = intfc->sh; + oldheap = ssvm_push_heap (sh); + + q = unix_shared_memory_queue_init (em->queue_elts, sizeof (u32), + 0 /* consumer pid not interesting */ , + 0 /* signal not sent */ ); + sh->opaque[TO_MASTER_Q_INDEX] = (void *) q; + q = unix_shared_memory_queue_init (em->queue_elts, sizeof (u32), + 0 /* consumer pid not interesting */ , + 0 /* signal not sent */ ); + sh->opaque[TO_SLAVE_Q_INDEX] = (void *) q; + + /* + * Preallocate the requested number of buffer chunks + * There must be a better way to do this, etc. + * Add some slop to avoid pool reallocation, which will not go well + */ + elts = 0; + elt_indices = 0; + + vec_validate_aligned (elts, em->nbuffers - 1, CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (elt_indices, em->nbuffers - 1, CLIB_CACHE_LINE_BYTES); + + for (i = 0; i < em->nbuffers; i++) + elt_indices[i] = i; + + sh->opaque[CHUNK_POOL_INDEX] = (void *) elts; + sh->opaque[CHUNK_POOL_FREELIST_INDEX] = (void *) elt_indices; + sh->opaque[CHUNK_POOL_NFREE] = (void *) (uword) em->nbuffers; + + ssvm_pop_heap (oldheap); + +create_vnet_interface: + + sh = intfc->sh; + + memset (enet_addr, 0, sizeof (enet_addr)); + enet_addr[0] = 2; + enet_addr[1] = 0xFE; + enet_addr[2] = is_master; + enet_addr[5] = sh->master_index; + + e = ethernet_register_interface + (em->vnet_main, ssvm_eth_device_class.index, intfc - em->intfcs, + /* ethernet address */ enet_addr, + &intfc->vlib_hw_if_index, ssvm_eth_flag_change); + + if (e) + { + clib_error_report (e); + /* $$$$ unmap offending region? */ + return VNET_API_ERROR_INVALID_INTERFACE; + } + + /* Declare link up */ + vnet_hw_interface_set_flags (em->vnet_main, intfc->vlib_hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + + /* Let the games begin... */ + if (is_master) + sh->ready = 1; + return 0; +} + +static clib_error_t * +ssvm_config (vlib_main_t * vm, unformat_input_t * input) +{ + u8 *name; + int is_master = 1; + int i, rv; + ssvm_eth_main_t *em = &ssvm_eth_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "base-va %llx", &em->next_base_va)) + ; + else if (unformat (input, "segment-size %lld", &em->segment_size)) + em->segment_size = 1ULL << (max_log2 (em->segment_size)); + else if (unformat (input, "nbuffers %lld", &em->nbuffers)) + ; + else if (unformat (input, "queue-elts %lld", &em->queue_elts)) + ; + else if (unformat (input, "slave")) + is_master = 0; + else if (unformat (input, "%s", &name)) + vec_add1 (em->names, name); + else + break; + } + + /* No configured instances, we're done... */ + if (vec_len (em->names) == 0) + return 0; + + for (i = 0; i < vec_len (em->names); i++) + { + rv = ssvm_eth_create (em, em->names[i], is_master); + if (rv < 0) + return clib_error_return (0, "ssvm_eth_create '%s' failed, error %d", + em->names[i], rv); + } + + vlib_node_set_state (vm, ssvm_eth_input_node.index, + VLIB_NODE_STATE_POLLING); + + return 0; +} + +VLIB_CONFIG_FUNCTION (ssvm_config, "ssvm_eth"); + + +static clib_error_t * +ssvm_eth_init (vlib_main_t * vm) +{ + ssvm_eth_main_t *em = &ssvm_eth_main; + + if (((sizeof (ssvm_eth_queue_elt_t) / CLIB_CACHE_LINE_BYTES) + * CLIB_CACHE_LINE_BYTES) != sizeof (ssvm_eth_queue_elt_t)) + clib_warning ("ssvm_eth_queue_elt_t size %d not a multiple of %d", + sizeof (ssvm_eth_queue_elt_t), CLIB_CACHE_LINE_BYTES); + + em->vlib_main = vm; + em->vnet_main = vnet_get_main (); + em->elog_main = &vm->elog_main; + + /* default config param values... */ + + em->next_base_va = 0x600000000ULL; + /* + * Allocate 2 full superframes in each dir (256 x 2 x 2 x 2048 bytes), + * 2mb; double that so we have plenty of space... 4mb + */ + em->segment_size = 8 << 20; + em->nbuffers = 1024; + em->queue_elts = 512; + return 0; +} + +VLIB_INIT_FUNCTION (ssvm_eth_init); + +static char *ssvm_eth_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_ssvm_eth_tx_func_error +#undef _ +}; + +static u8 * +format_ssvm_eth_device_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + + s = format (s, "ssvmEthernet%d", i); + return s; +} + +static u8 * +format_ssvm_eth_device (u8 * s, va_list * args) +{ + s = format (s, "SSVM Ethernet"); + return s; +} + +static u8 * +format_ssvm_eth_tx_trace (u8 * s, va_list * args) +{ + s = format (s, "Unimplemented..."); + return s; +} + + +static uword +ssvm_eth_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * f) +{ + ssvm_eth_main_t *em = &ssvm_eth_main; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + ssvm_private_t *intfc = vec_elt_at_index (em->intfcs, rd->dev_instance); + ssvm_shared_header_t *sh = intfc->sh; + unix_shared_memory_queue_t *q; + u32 *from; + u32 n_left; + ssvm_eth_queue_elt_t *elts, *elt, *prev_elt; + u32 my_pid = intfc->my_pid; + vlib_buffer_t *b0; + u32 bi0; + u32 size_this_buffer; + u32 chunks_this_buffer; + u8 i_am_master = intfc->i_am_master; + u32 elt_index; + int is_ring_full, interface_down; + int i; + volatile u32 *queue_lock; + u32 n_to_alloc = VLIB_FRAME_SIZE; + u32 n_allocated, n_present_in_cache, n_available; + u32 *elt_indices; + + if (i_am_master) + q = (unix_shared_memory_queue_t *) sh->opaque[TO_SLAVE_Q_INDEX]; + else + q = (unix_shared_memory_queue_t *) sh->opaque[TO_MASTER_Q_INDEX]; + + queue_lock = (u32 *) q; + + from = vlib_frame_vector_args (f); + n_left = f->n_vectors; + is_ring_full = 0; + interface_down = 0; + + n_present_in_cache = vec_len (em->chunk_cache); + + /* admin / link up/down check */ + if (sh->opaque[MASTER_ADMIN_STATE_INDEX] == 0 || + sh->opaque[SLAVE_ADMIN_STATE_INDEX] == 0) + { + interface_down = 1; + goto out; + } + + ssvm_lock (sh, my_pid, 1); + + elts = (ssvm_eth_queue_elt_t *) (sh->opaque[CHUNK_POOL_INDEX]); + elt_indices = (u32 *) (sh->opaque[CHUNK_POOL_FREELIST_INDEX]); + n_available = (u32) pointer_to_uword (sh->opaque[CHUNK_POOL_NFREE]); + + if (n_present_in_cache < n_left * 2) + { + vec_validate (em->chunk_cache, n_to_alloc + n_present_in_cache - 1); + + n_allocated = n_to_alloc < n_available ? n_to_alloc : n_available; + + if (PREDICT_TRUE (n_allocated > 0)) + { + clib_memcpy (&em->chunk_cache[n_present_in_cache], + &elt_indices[n_available - n_allocated], + sizeof (u32) * n_allocated); + } + + n_present_in_cache += n_allocated; + n_available -= n_allocated; + sh->opaque[CHUNK_POOL_NFREE] = uword_to_pointer (n_available, void *); + _vec_len (em->chunk_cache) = n_present_in_cache; + } + + ssvm_unlock (sh); + + while (n_left) + { + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + + size_this_buffer = vlib_buffer_length_in_chain (vm, b0); + chunks_this_buffer = (size_this_buffer + (SSVM_BUFFER_SIZE - 1)) + / SSVM_BUFFER_SIZE; + + /* If we're not going to be able to enqueue the buffer, tail drop. */ + if (q->cursize >= q->maxsize) + { + is_ring_full = 1; + break; + } + + prev_elt = 0; + elt_index = ~0; + for (i = 0; i < chunks_this_buffer; i++) + { + if (PREDICT_FALSE (n_present_in_cache == 0)) + goto out; + + elt_index = em->chunk_cache[--n_present_in_cache]; + elt = elts + elt_index; + + elt->type = SSVM_PACKET_TYPE; + elt->flags = 0; + elt->total_length_not_including_first_buffer = + b0->total_length_not_including_first_buffer; + elt->length_this_buffer = b0->current_length; + elt->current_data_hint = b0->current_data; + elt->owner = !i_am_master; + elt->tag = 1; + + clib_memcpy (elt->data, b0->data + b0->current_data, + b0->current_length); + + if (PREDICT_FALSE (prev_elt != 0)) + prev_elt->next_index = elt - elts; + + if (PREDICT_FALSE (i < (chunks_this_buffer - 1))) + { + elt->flags = SSVM_BUFFER_NEXT_PRESENT; + ASSERT (b0->flags & VLIB_BUFFER_NEXT_PRESENT); + b0 = vlib_get_buffer (vm, b0->next_buffer); + } + prev_elt = elt; + } + + while (__sync_lock_test_and_set (queue_lock, 1)) + ; + + unix_shared_memory_queue_add_raw (q, (u8 *) & elt_index); + CLIB_MEMORY_BARRIER (); + *queue_lock = 0; + + from++; + n_left--; + } + +out: + if (PREDICT_FALSE (n_left)) + { + if (is_ring_full) + vlib_error_count (vm, node->node_index, SSVM_ETH_TX_ERROR_RING_FULL, + n_left); + else if (interface_down) + vlib_error_count (vm, node->node_index, SSVM_ETH_TX_ERROR_ADMIN_DOWN, + n_left); + else + vlib_error_count (vm, node->node_index, SSVM_ETH_TX_ERROR_NO_BUFFERS, + n_left); + + vlib_buffer_free (vm, from, n_left); + } + else + vlib_buffer_free (vm, vlib_frame_vector_args (f), f->n_vectors); + + if (PREDICT_TRUE (vec_len (em->chunk_cache))) + _vec_len (em->chunk_cache) = n_present_in_cache; + + return f->n_vectors; +} + +static void +ssvm_eth_clear_hw_interface_counters (u32 instance) +{ + /* Nothing for now */ +} + +static clib_error_t * +ssvm_eth_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, + u32 flags) +{ + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); + uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + ssvm_eth_main_t *em = &ssvm_eth_main; + ssvm_private_t *intfc = vec_elt_at_index (em->intfcs, hif->dev_instance); + ssvm_shared_header_t *sh; + + /* publish link-state in shared-memory, to discourage buffer-wasting */ + sh = intfc->sh; + if (intfc->i_am_master) + sh->opaque[MASTER_ADMIN_STATE_INDEX] = (void *) is_up; + else + sh->opaque[SLAVE_ADMIN_STATE_INDEX] = (void *) is_up; + + return 0; +} + +static clib_error_t * +ssvm_eth_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + /* Nothing for now */ + return 0; +} + +/* + * Dynamically redirect all pkts from a specific interface + * to the specified node + */ +static void +ssvm_eth_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + ssvm_eth_main_t *em = &ssvm_eth_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + ssvm_private_t *intfc = pool_elt_at_index (em->intfcs, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + intfc->per_interface_next_index = node_index; + return; + } + + intfc->per_interface_next_index = + vlib_node_add_next (em->vlib_main, ssvm_eth_input_node.index, node_index); +} + +static u32 +ssvm_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) +{ + /* nothing for now */ + return 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (ssvm_eth_device_class) = { + .name = "ssvm-eth", + .tx_function = ssvm_eth_interface_tx, + .tx_function_n_errors = SSVM_ETH_TX_N_ERROR, + .tx_function_error_strings = ssvm_eth_tx_func_error_strings, + .format_device_name = format_ssvm_eth_device_name, + .format_device = format_ssvm_eth_device, + .format_tx_trace = format_ssvm_eth_tx_trace, + .clear_counters = ssvm_eth_clear_hw_interface_counters, + .admin_up_down_function = ssvm_eth_interface_admin_up_down, + .subif_add_del_function = ssvm_eth_subif_add_del_function, + .rx_redirect_to_node = ssvm_eth_set_interface_next_node, +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (ssvm_eth_device_class, + ssvm_eth_interface_tx) +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/ssvm/ssvm_eth.h b/src/vnet/devices/ssvm/ssvm_eth.h new file mode 100644 index 00000000..f877df3c --- /dev/null +++ b/src/vnet/devices/ssvm/ssvm_eth.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ssvm_eth_h__ +#define __included_ssvm_eth_h__ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +extern vnet_device_class_t ssvm_eth_device_class; +extern vlib_node_registration_t ssvm_eth_input_node; + +#define SSVM_BUFFER_SIZE \ + (VLIB_BUFFER_DATA_SIZE + VLIB_BUFFER_PRE_DATA_SIZE) +#define SSVM_PACKET_TYPE 1 + +typedef struct +{ + /* Type of queue element */ + u8 type; + u8 flags; +#define SSVM_BUFFER_NEXT_PRESENT (1<<0) + u8 owner; + u8 tag; + i16 current_data_hint; + u16 length_this_buffer; + u16 total_length_not_including_first_buffer; + u16 pad; + u32 next_index; + /* offset 16 */ + u8 data[SSVM_BUFFER_SIZE]; + /* pad to an even multiple of 64 octets */ + u8 pad2[CLIB_CACHE_LINE_BYTES - 16]; +} ssvm_eth_queue_elt_t; + +typedef struct +{ + /* vector of point-to-point connections */ + ssvm_private_t *intfcs; + + u32 *buffer_cache; + u32 *chunk_cache; + + /* Configurable parameters */ + /* base address for next placement */ + u64 next_base_va; + u64 segment_size; + u64 nbuffers; + u64 queue_elts; + + /* Segment names */ + u8 **names; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + elog_main_t *elog_main; +} ssvm_eth_main_t; + +ssvm_eth_main_t ssvm_eth_main; + +typedef enum +{ + CHUNK_POOL_FREELIST_INDEX = 0, + CHUNK_POOL_INDEX, + CHUNK_POOL_NFREE, + TO_MASTER_Q_INDEX, + TO_SLAVE_Q_INDEX, + MASTER_ADMIN_STATE_INDEX, + SLAVE_ADMIN_STATE_INDEX, +} ssvm_eth_opaque_index_t; + +/* + * debug scaffolding. + */ +static inline void +ssvm_eth_validate_freelists (int need_lock) +{ +#if CLIB_DEBUG > 0 + ssvm_eth_main_t *em = &ssvm_eth_main; + ssvm_private_t *intfc; + ssvm_shared_header_t *sh; + u32 *elt_indices; + u32 n_available; + int i; + + for (i = 0; i < vec_len (em->intfcs); i++) + { + intfc = em->intfcs + i; + sh = intfc->sh; + u32 my_pid = intfc->my_pid; + + if (need_lock) + ssvm_lock (sh, my_pid, 15); + + elt_indices = (u32 *) (sh->opaque[CHUNK_POOL_FREELIST_INDEX]); + n_available = (u32) (uword) (sh->opaque[CHUNK_POOL_NFREE]); + + for (i = 0; i < n_available; i++) + ASSERT (elt_indices[i] < 2048); + + if (need_lock) + ssvm_unlock (sh); + } +#endif +} + +#endif /* __included_ssvm_eth_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/dir.dox b/src/vnet/devices/virtio/dir.dox new file mode 100644 index 00000000..50150799 --- /dev/null +++ b/src/vnet/devices/virtio/dir.dox @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Doxygen directory documentation */ + +/** +@dir +@brief vHost User Interface Implementation. + +This directory contains the source code for vHost User driver. + +*/ +/*? %%clicmd:group_label vHost User %% ?*/ +/*? %%syscfg:group_label vHost User %% ?*/ diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c new file mode 100644 index 00000000..bde8106c --- /dev/null +++ b/src/vnet/devices/virtio/vhost-user.c @@ -0,0 +1,3314 @@ +/* + *------------------------------------------------------------------ + * vhost.c - vhost-user + * + * Copyright (c) 2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include /* for open */ +#include +#include +#include +#include +#include +#include /* for iovec */ +#include +#include + +#include +#include + +#include +#include + +#include + +#include +#include +#include + +#include + +/** + * @file + * @brief vHost User Device Driver. + * + * This file contains the source code for vHost User interface. + */ + + +#define VHOST_USER_DEBUG_SOCKET 0 +#define VHOST_DEBUG_VQ 0 + +#if VHOST_USER_DEBUG_SOCKET == 1 +#define DBG_SOCK(args...) clib_warning(args); +#else +#define DBG_SOCK(args...) +#endif + +#if VHOST_DEBUG_VQ == 1 +#define DBG_VQ(args...) clib_warning(args); +#else +#define DBG_VQ(args...) +#endif + +/* + * When an RX queue is down but active, received packets + * must be discarded. This value controls up to how many + * packets will be discarded during each round. + */ +#define VHOST_USER_DOWN_DISCARD_COUNT 256 + +/* + * When the number of available buffers gets under this threshold, + * RX node will start discarding packets. + */ +#define VHOST_USER_RX_BUFFER_STARVATION 32 + +/* + * On the receive side, the host should free descriptors as soon + * as possible in order to avoid TX drop in the VM. + * This value controls the number of copy operations that are stacked + * before copy is done for all and descriptors are given back to + * the guest. + * The value 64 was obtained by testing (48 and 128 were not as good). + */ +#define VHOST_USER_RX_COPY_THRESHOLD 64 + +#define UNIX_GET_FD(unixfd_idx) \ + (unixfd_idx != ~0) ? \ + pool_elt_at_index (unix_main.file_pool, \ + unixfd_idx)->file_descriptor : -1; + +#define foreach_virtio_trace_flags \ + _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \ + _ (SINGLE_DESC, 1, "Single descriptor packet") \ + _ (INDIRECT, 2, "Indirect descriptor") \ + _ (MAP_ERROR, 4, "Memory mapping error") + +typedef enum +{ +#define _(n,i,s) VIRTIO_TRACE_F_##n, + foreach_virtio_trace_flags +#undef _ +} virtio_trace_flag_t; + +vlib_node_registration_t vhost_user_input_node; + +#define foreach_vhost_user_tx_func_error \ + _(NONE, "no error") \ + _(NOT_READY, "vhost vring not ready") \ + _(DOWN, "vhost interface is down") \ + _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \ + _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \ + _(MMAP_FAIL, "mmap failure") \ + _(INDIRECT_OVERFLOW, "indirect descriptor table overflow") + +typedef enum +{ +#define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f, + foreach_vhost_user_tx_func_error +#undef _ + VHOST_USER_TX_FUNC_N_ERROR, +} vhost_user_tx_func_error_t; + +static char *vhost_user_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_vhost_user_tx_func_error +#undef _ +}; + +#define foreach_vhost_user_input_func_error \ + _(NO_ERROR, "no error") \ + _(NO_BUFFER, "no available buffer") \ + _(MMAP_FAIL, "mmap failure") \ + _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \ + _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \ + _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)") + +typedef enum +{ +#define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f, + foreach_vhost_user_input_func_error +#undef _ + VHOST_USER_INPUT_FUNC_N_ERROR, +} vhost_user_input_func_error_t; + +static char *vhost_user_input_func_error_strings[] = { +#define _(n,s) s, + foreach_vhost_user_input_func_error +#undef _ +}; + +/* *INDENT-OFF* */ +static vhost_user_main_t vhost_user_main = { + .mtu_bytes = 1518, +}; + +VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = { + .name = "vhost-user", +}; +/* *INDENT-ON* */ + +static u8 * +format_vhost_user_interface_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u32 show_dev_instance = ~0; + vhost_user_main_t *vum = &vhost_user_main; + + if (i < vec_len (vum->show_dev_instance_by_real_dev_instance)) + show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i]; + + if (show_dev_instance != ~0) + i = show_dev_instance; + + s = format (s, "VirtualEthernet0/0/%d", i); + return s; +} + +static int +vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance) +{ + // FIXME: check if the new dev instance is already used + vhost_user_main_t *vum = &vhost_user_main; + vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance, + hi->dev_instance, ~0); + + vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] = + new_dev_instance; + + DBG_SOCK ("renumbered vhost-user interface dev_instance %d to %d", + hi->dev_instance, new_dev_instance); + + return 0; +} + +static_always_inline void * +map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint) +{ + int i = *hint; + if (PREDICT_TRUE ((vui->regions[i].guest_phys_addr <= addr) && + ((vui->regions[i].guest_phys_addr + + vui->regions[i].memory_size) > addr))) + { + return (void *) (vui->region_mmap_addr[i] + addr - + vui->regions[i].guest_phys_addr); + } +#if __SSE4_2__ + __m128i rl, rh, al, ah, r; + al = _mm_set1_epi64x (addr + 1); + ah = _mm_set1_epi64x (addr); + + rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[0]); + rl = _mm_cmpgt_epi64 (al, rl); + rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[0]); + rh = _mm_cmpgt_epi64 (rh, ah); + r = _mm_and_si128 (rl, rh); + + rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[2]); + rl = _mm_cmpgt_epi64 (al, rl); + rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[2]); + rh = _mm_cmpgt_epi64 (rh, ah); + r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x22); + + rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[4]); + rl = _mm_cmpgt_epi64 (al, rl); + rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[4]); + rh = _mm_cmpgt_epi64 (rh, ah); + r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x44); + + rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[6]); + rl = _mm_cmpgt_epi64 (al, rl); + rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[6]); + rh = _mm_cmpgt_epi64 (rh, ah); + r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88); + + r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800)); + i = __builtin_ctzll (_mm_movemask_epi8 (r)); + + if (i < vui->nregions) + { + *hint = i; + return (void *) (vui->region_mmap_addr[i] + addr - + vui->regions[i].guest_phys_addr); + } + +#else + for (i = 0; i < vui->nregions; i++) + { + if ((vui->regions[i].guest_phys_addr <= addr) && + ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) > + addr)) + { + *hint = i; + return (void *) (vui->region_mmap_addr[i] + addr - + vui->regions[i].guest_phys_addr); + } + } +#endif + DBG_VQ ("failed to map guest mem addr %llx", addr); + *hint = 0; + return 0; +} + +static inline void * +map_user_mem (vhost_user_intf_t * vui, uword addr) +{ + int i; + for (i = 0; i < vui->nregions; i++) + { + if ((vui->regions[i].userspace_addr <= addr) && + ((vui->regions[i].userspace_addr + vui->regions[i].memory_size) > + addr)) + { + return (void *) (vui->region_mmap_addr[i] + addr - + vui->regions[i].userspace_addr); + } + } + return 0; +} + +static long +get_huge_page_size (int fd) +{ + struct statfs s; + fstatfs (fd, &s); + return s.f_bsize; +} + +static void +unmap_all_mem_regions (vhost_user_intf_t * vui) +{ + int i, r; + for (i = 0; i < vui->nregions; i++) + { + if (vui->region_mmap_addr[i] != (void *) -1) + { + + long page_sz = get_huge_page_size (vui->region_mmap_fd[i]); + + ssize_t map_sz = (vui->regions[i].memory_size + + vui->regions[i].mmap_offset + + page_sz) & ~(page_sz - 1); + + r = + munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset, + map_sz); + + DBG_SOCK + ("unmap memory region %d addr 0x%lx len 0x%lx page_sz 0x%x", i, + vui->region_mmap_addr[i], map_sz, page_sz); + + vui->region_mmap_addr[i] = (void *) -1; + + if (r == -1) + { + clib_warning ("failed to unmap memory region (errno %d)", + errno); + } + close (vui->region_mmap_fd[i]); + } + } + vui->nregions = 0; +} + +static void +vhost_user_tx_thread_placement (vhost_user_intf_t * vui) +{ + //Let's try to assign one queue to each thread + u32 qid = 0; + u32 cpu_index = 0; + vui->use_tx_spinlock = 0; + while (1) + { + for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++) + { + vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)]; + if (!rxvq->started || !rxvq->enabled) + continue; + + vui->per_cpu_tx_qid[cpu_index] = qid; + cpu_index++; + if (cpu_index == vlib_get_thread_main ()->n_vlib_mains) + return; + } + //We need to loop, meaning the spinlock has to be used + vui->use_tx_spinlock = 1; + if (cpu_index == 0) + { + //Could not find a single valid one + for (cpu_index = 0; + cpu_index < vlib_get_thread_main ()->n_vlib_mains; cpu_index++) + { + vui->per_cpu_tx_qid[cpu_index] = 0; + } + return; + } + } +} + +static void +vhost_user_rx_thread_placement () +{ + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui; + vhost_cpu_t *vhc; + u32 *workers = 0; + + //Let's list all workers cpu indexes + u32 i; + for (i = vum->input_cpu_first_index; + i < vum->input_cpu_first_index + vum->input_cpu_count; i++) + { + vlib_node_set_state (vlib_mains ? vlib_mains[i] : &vlib_global_main, + vhost_user_input_node.index, + VLIB_NODE_STATE_DISABLED); + vec_add1 (workers, i); + } + + vec_foreach (vhc, vum->cpus) + { + vec_reset_length (vhc->rx_queues); + } + + i = 0; + vhost_iface_and_queue_t iaq; + /* *INDENT-OFF* */ + pool_foreach (vui, vum->vhost_user_interfaces, { + u32 *vui_workers = vec_len (vui->workers) ? vui->workers : workers; + u32 qid; + for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++) + { + vhost_user_vring_t *txvq = + &vui->vrings[VHOST_VRING_IDX_TX (qid)]; + if (!txvq->started) + continue; + + i %= vec_len (vui_workers); + u32 cpu_index = vui_workers[i]; + i++; + vhc = &vum->cpus[cpu_index]; + + iaq.qid = qid; + iaq.vhost_iface_index = vui - vum->vhost_user_interfaces; + vec_add1 (vhc->rx_queues, iaq); + vlib_node_set_state (vlib_mains ? vlib_mains[cpu_index] : + &vlib_global_main, vhost_user_input_node.index, + VLIB_NODE_STATE_POLLING); + } + }); + /* *INDENT-ON* */ +} + +static int +vhost_user_thread_placement (u32 sw_if_index, u32 worker_thread_index, u8 del) +{ + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui; + vnet_hw_interface_t *hw; + + if (worker_thread_index < vum->input_cpu_first_index || + worker_thread_index >= + vum->input_cpu_first_index + vum->input_cpu_count) + return -1; + + if (!(hw = vnet_get_sup_hw_interface (vnet_get_main (), sw_if_index))) + return -2; + + vui = pool_elt_at_index (vum->vhost_user_interfaces, hw->dev_instance); + u32 found = ~0, *w; + vec_foreach (w, vui->workers) + { + if (*w == worker_thread_index) + { + found = w - vui->workers; + break; + } + } + + if (del) + { + if (found == ~0) + return -3; + vec_del1 (vui->workers, found); + } + else if (found == ~0) + { + vec_add1 (vui->workers, worker_thread_index); + } + + vhost_user_rx_thread_placement (); + return 0; +} + +/** @brief Returns whether at least one TX and one RX vring are enabled */ +int +vhost_user_intf_ready (vhost_user_intf_t * vui) +{ + int i, found[2] = { }; //RX + TX + + for (i = 0; i < VHOST_VRING_MAX_N; i++) + if (vui->vrings[i].started && vui->vrings[i].enabled) + found[i & 1] = 1; + + return found[0] && found[1]; +} + +static void +vhost_user_update_iface_state (vhost_user_intf_t * vui) +{ + /* if we have pointers to descriptor table, go up */ + int is_up = vhost_user_intf_ready (vui); + if (is_up != vui->is_up) + { + DBG_SOCK ("interface %d %s", vui->sw_if_index, + is_up ? "ready" : "down"); + vnet_hw_interface_set_flags (vnet_get_main (), vui->hw_if_index, + is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : + 0); + vui->is_up = is_up; + } + vhost_user_rx_thread_placement (); + vhost_user_tx_thread_placement (vui); +} + +static clib_error_t * +vhost_user_callfd_read_ready (unix_file_t * uf) +{ + __attribute__ ((unused)) int n; + u8 buff[8]; + n = read (uf->file_descriptor, ((char *) &buff), 8); + return 0; +} + +static clib_error_t * +vhost_user_kickfd_read_ready (unix_file_t * uf) +{ + __attribute__ ((unused)) int n; + u8 buff[8]; + vhost_user_intf_t *vui = + pool_elt_at_index (vhost_user_main.vhost_user_interfaces, + uf->private_data >> 8); + u32 qid = uf->private_data & 0xff; + n = read (uf->file_descriptor, ((char *) &buff), 8); + DBG_SOCK ("if %d KICK queue %d", uf->private_data >> 8, qid); + + vlib_worker_thread_barrier_sync (vlib_get_main ()); + vui->vrings[qid].started = 1; + vhost_user_update_iface_state (vui); + vlib_worker_thread_barrier_release (vlib_get_main ()); + return 0; +} + +/** + * @brief Try once to lock the vring + * @return 0 on success, non-zero on failure. + */ +static inline int +vhost_user_vring_try_lock (vhost_user_intf_t * vui, u32 qid) +{ + return __sync_lock_test_and_set (vui->vring_locks[qid], 1); +} + +/** + * @brief Spin until the vring is successfully locked + */ +static inline void +vhost_user_vring_lock (vhost_user_intf_t * vui, u32 qid) +{ + while (vhost_user_vring_try_lock (vui, qid)) + ; +} + +/** + * @brief Unlock the vring lock + */ +static inline void +vhost_user_vring_unlock (vhost_user_intf_t * vui, u32 qid) +{ + *vui->vring_locks[qid] = 0; +} + +static inline void +vhost_user_vring_init (vhost_user_intf_t * vui, u32 qid) +{ + vhost_user_vring_t *vring = &vui->vrings[qid]; + memset (vring, 0, sizeof (*vring)); + vring->kickfd_idx = ~0; + vring->callfd_idx = ~0; + vring->errfd = -1; + + /* + * We have a bug with some qemu 2.5, and this may be a fix. + * Feel like interpretation holy text, but this is from vhost-user.txt. + * " + * One queue pair is enabled initially. More queues are enabled + * dynamically, by sending message VHOST_USER_SET_VRING_ENABLE. + * " + * Don't know who's right, but this is what DPDK does. + */ + if (qid == 0 || qid == 1) + vring->enabled = 1; +} + +static inline void +vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid) +{ + vhost_user_vring_t *vring = &vui->vrings[qid]; + if (vring->kickfd_idx != ~0) + { + unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + vring->kickfd_idx); + unix_file_del (&unix_main, uf); + vring->kickfd_idx = ~0; + } + if (vring->callfd_idx != ~0) + { + unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + vring->callfd_idx); + unix_file_del (&unix_main, uf); + vring->callfd_idx = ~0; + } + if (vring->errfd != -1) + close (vring->errfd); + vhost_user_vring_init (vui, qid); +} + +static inline void +vhost_user_if_disconnect (vhost_user_intf_t * vui) +{ + vnet_main_t *vnm = vnet_get_main (); + int q; + + vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); + + if (vui->unix_file_index != ~0) + { + unix_file_del (&unix_main, unix_main.file_pool + vui->unix_file_index); + vui->unix_file_index = ~0; + } + + vui->is_up = 0; + + for (q = 0; q < VHOST_VRING_MAX_N; q++) + vhost_user_vring_close (vui, q); + + unmap_all_mem_regions (vui); + DBG_SOCK ("interface ifindex %d disconnected", vui->sw_if_index); +} + +#define VHOST_LOG_PAGE 0x1000 +static_always_inline void +vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui, + u64 addr, u64 len, u8 is_host_address) +{ + if (PREDICT_TRUE (vui->log_base_addr == 0 + || !(vui->features & (1 << FEAT_VHOST_F_LOG_ALL)))) + { + return; + } + if (is_host_address) + { + addr = (u64) map_user_mem (vui, (uword) addr); + } + if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size)) + { + DBG_SOCK ("vhost_user_log_dirty_pages(): out of range\n"); + return; + } + + CLIB_MEMORY_BARRIER (); + u64 page = addr / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < addr + len) + { + ((u8 *) vui->log_base_addr)[page / 8] |= 1 << page % 8; + page++; + } +} + +static_always_inline void +vhost_user_log_dirty_pages (vhost_user_intf_t * vui, u64 addr, u64 len) +{ + vhost_user_log_dirty_pages_2 (vui, addr, len, 0); +} + +#define vhost_user_log_dirty_ring(vui, vq, member) \ + if (PREDICT_FALSE(vq->log_used)) { \ + vhost_user_log_dirty_pages(vui, vq->log_guest_addr + STRUCT_OFFSET_OF(vring_used_t, member), \ + sizeof(vq->used->member)); \ + } + +static clib_error_t * +vhost_user_socket_read (unix_file_t * uf) +{ + int n, i; + int fd, number_of_fds = 0; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + vhost_user_msg_t msg; + struct msghdr mh; + struct iovec iov[1]; + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui; + struct cmsghdr *cmsg; + u8 q; + unix_file_t template = { 0 }; + vnet_main_t *vnm = vnet_get_main (); + + vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data); + + char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))]; + + memset (&mh, 0, sizeof (mh)); + memset (control, 0, sizeof (control)); + + for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) + fds[i] = -1; + + /* set the payload */ + iov[0].iov_base = (void *) &msg; + iov[0].iov_len = VHOST_USER_MSG_HDR_SZ; + + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = control; + mh.msg_controllen = sizeof (control); + + n = recvmsg (uf->file_descriptor, &mh, 0); + + /* Stop workers to avoid end of the world */ + vlib_worker_thread_barrier_sync (vlib_get_main ()); + + if (n != VHOST_USER_MSG_HDR_SZ) + { + if (n == -1) + { + DBG_SOCK ("recvmsg returned error %d %s", errno, strerror (errno)); + } + else + { + DBG_SOCK ("n (%d) != VHOST_USER_MSG_HDR_SZ (%d)", + n, VHOST_USER_MSG_HDR_SZ); + } + goto close_socket; + } + + if (mh.msg_flags & MSG_CTRUNC) + { + DBG_SOCK ("MSG_CTRUNC is set"); + goto close_socket; + } + + cmsg = CMSG_FIRSTHDR (&mh); + + if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) && + (cmsg->cmsg_type == SCM_RIGHTS) && + (cmsg->cmsg_len - CMSG_LEN (0) <= + VHOST_MEMORY_MAX_NREGIONS * sizeof (int))) + { + number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int); + clib_memcpy (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int)); + } + + /* version 1, no reply bit set */ + if ((msg.flags & 7) != 1) + { + DBG_SOCK ("malformed message received. closing socket"); + goto close_socket; + } + + { + int rv; + rv = + read (uf->file_descriptor, ((char *) &msg) + VHOST_USER_MSG_HDR_SZ, + msg.size); + if (rv < 0) + { + DBG_SOCK ("read failed %s", strerror (errno)); + goto close_socket; + } + else if (rv != msg.size) + { + DBG_SOCK ("message too short (read %dB should be %dB)", rv, msg.size); + goto close_socket; + } + } + + switch (msg.request) + { + case VHOST_USER_GET_FEATURES: + msg.flags |= 4; + msg.u64 = (1ULL << FEAT_VIRTIO_NET_F_MRG_RXBUF) | + (1ULL << FEAT_VIRTIO_NET_F_CTRL_VQ) | + (1ULL << FEAT_VIRTIO_F_ANY_LAYOUT) | + (1ULL << FEAT_VIRTIO_F_INDIRECT_DESC) | + (1ULL << FEAT_VHOST_F_LOG_ALL) | + (1ULL << FEAT_VIRTIO_NET_F_GUEST_ANNOUNCE) | + (1ULL << FEAT_VIRTIO_NET_F_MQ) | + (1ULL << FEAT_VHOST_USER_F_PROTOCOL_FEATURES) | + (1ULL << FEAT_VIRTIO_F_VERSION_1); + msg.u64 &= vui->feature_mask; + msg.size = sizeof (msg.u64); + DBG_SOCK ("if %d msg VHOST_USER_GET_FEATURES - reply 0x%016llx", + vui->hw_if_index, msg.u64); + break; + + case VHOST_USER_SET_FEATURES: + DBG_SOCK ("if %d msg VHOST_USER_SET_FEATURES features 0x%016llx", + vui->hw_if_index, msg.u64); + + vui->features = msg.u64; + + if (vui->features & + ((1 << FEAT_VIRTIO_NET_F_MRG_RXBUF) | + (1ULL << FEAT_VIRTIO_F_VERSION_1))) + vui->virtio_net_hdr_sz = 12; + else + vui->virtio_net_hdr_sz = 10; + + vui->is_any_layout = + (vui->features & (1 << FEAT_VIRTIO_F_ANY_LAYOUT)) ? 1 : 0; + + ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE); + vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); + vui->is_up = 0; + + /*for (q = 0; q < VHOST_VRING_MAX_N; q++) + vhost_user_vring_close(&vui->vrings[q]); */ + + break; + + case VHOST_USER_SET_MEM_TABLE: + DBG_SOCK ("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d", + vui->hw_if_index, msg.memory.nregions); + + if ((msg.memory.nregions < 1) || + (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS)) + { + + DBG_SOCK ("number of mem regions must be between 1 and %i", + VHOST_MEMORY_MAX_NREGIONS); + + goto close_socket; + } + + if (msg.memory.nregions != number_of_fds) + { + DBG_SOCK ("each memory region must have FD"); + goto close_socket; + } + unmap_all_mem_regions (vui); + for (i = 0; i < msg.memory.nregions; i++) + { + clib_memcpy (&(vui->regions[i]), &msg.memory.regions[i], + sizeof (vhost_user_memory_region_t)); + + long page_sz = get_huge_page_size (fds[i]); + + /* align size to 2M page */ + ssize_t map_sz = (vui->regions[i].memory_size + + vui->regions[i].mmap_offset + + page_sz) & ~(page_sz - 1); + + vui->region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE, + MAP_SHARED, fds[i], 0); + vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr; + vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr + + vui->regions[i].memory_size; + + DBG_SOCK + ("map memory region %d addr 0 len 0x%lx fd %d mapped 0x%lx " + "page_sz 0x%x", i, map_sz, fds[i], vui->region_mmap_addr[i], + page_sz); + + if (vui->region_mmap_addr[i] == MAP_FAILED) + { + clib_warning ("failed to map memory. errno is %d", errno); + goto close_socket; + } + vui->region_mmap_addr[i] += vui->regions[i].mmap_offset; + vui->region_mmap_fd[i] = fds[i]; + } + vui->nregions = msg.memory.nregions; + break; + + case VHOST_USER_SET_VRING_NUM: + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d", + vui->hw_if_index, msg.state.index, msg.state.num); + + if ((msg.state.num > 32768) || /* maximum ring size is 32768 */ + (msg.state.num == 0) || /* it cannot be zero */ + ((msg.state.num - 1) & msg.state.num)) /* must be power of 2 */ + goto close_socket; + vui->vrings[msg.state.index].qsz = msg.state.num; + break; + + case VHOST_USER_SET_VRING_ADDR: + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ADDR idx %d", + vui->hw_if_index, msg.state.index); + + if (msg.state.index >= VHOST_VRING_MAX_N) + { + DBG_SOCK ("invalid vring index VHOST_USER_SET_VRING_ADDR:" + " %d >= %d", msg.state.index, VHOST_VRING_MAX_N); + goto close_socket; + } + + if (msg.size < sizeof (msg.addr)) + { + DBG_SOCK ("vhost message is too short (%d < %d)", + msg.size, sizeof (msg.addr)); + goto close_socket; + } + + vui->vrings[msg.state.index].desc = (vring_desc_t *) + map_user_mem (vui, msg.addr.desc_user_addr); + vui->vrings[msg.state.index].used = (vring_used_t *) + map_user_mem (vui, msg.addr.used_user_addr); + vui->vrings[msg.state.index].avail = (vring_avail_t *) + map_user_mem (vui, msg.addr.avail_user_addr); + + if ((vui->vrings[msg.state.index].desc == NULL) || + (vui->vrings[msg.state.index].used == NULL) || + (vui->vrings[msg.state.index].avail == NULL)) + { + DBG_SOCK ("failed to map user memory for hw_if_index %d", + vui->hw_if_index); + goto close_socket; + } + + vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr; + vui->vrings[msg.state.index].log_used = + (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0; + + /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated, + the ring is initialized in an enabled state. */ + if (!(vui->features & (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES))) + { + vui->vrings[msg.state.index].enabled = 1; + } + + vui->vrings[msg.state.index].last_used_idx = + vui->vrings[msg.state.index].last_avail_idx = + vui->vrings[msg.state.index].used->idx; + + /* tell driver that we don't want interrupts */ + vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY; + break; + + case VHOST_USER_SET_OWNER: + DBG_SOCK ("if %d msg VHOST_USER_SET_OWNER", vui->hw_if_index); + break; + + case VHOST_USER_RESET_OWNER: + DBG_SOCK ("if %d msg VHOST_USER_RESET_OWNER", vui->hw_if_index); + break; + + case VHOST_USER_SET_VRING_CALL: + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL u64 %d", + vui->hw_if_index, msg.u64); + + q = (u8) (msg.u64 & 0xFF); + + /* if there is old fd, delete and close it */ + if (vui->vrings[q].callfd_idx != ~0) + { + unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + vui->vrings[q].callfd_idx); + unix_file_del (&unix_main, uf); + vui->vrings[q].callfd_idx = ~0; + } + + if (!(msg.u64 & 0x100)) + { + if (number_of_fds != 1) + { + DBG_SOCK ("More than one fd received !"); + goto close_socket; + } + + template.read_function = vhost_user_callfd_read_ready; + template.file_descriptor = fds[0]; + template.private_data = + ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q; + vui->vrings[q].callfd_idx = unix_file_add (&unix_main, &template); + } + else + vui->vrings[q].callfd_idx = ~0; + break; + + case VHOST_USER_SET_VRING_KICK: + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK u64 %d", + vui->hw_if_index, msg.u64); + + q = (u8) (msg.u64 & 0xFF); + + if (vui->vrings[q].kickfd_idx != ~0) + { + unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + vui->vrings[q].kickfd_idx); + unix_file_del (&unix_main, uf); + vui->vrings[q].kickfd_idx = ~0; + } + + if (!(msg.u64 & 0x100)) + { + if (number_of_fds != 1) + { + DBG_SOCK ("More than one fd received !"); + goto close_socket; + } + + template.read_function = vhost_user_kickfd_read_ready; + template.file_descriptor = fds[0]; + template.private_data = + (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) + + q; + vui->vrings[q].kickfd_idx = unix_file_add (&unix_main, &template); + } + else + { + //When no kickfd is set, the queue is initialized as started + vui->vrings[q].kickfd_idx = ~0; + vui->vrings[q].started = 1; + } + + break; + + case VHOST_USER_SET_VRING_ERR: + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR u64 %d", + vui->hw_if_index, msg.u64); + + q = (u8) (msg.u64 & 0xFF); + + if (vui->vrings[q].errfd != -1) + close (vui->vrings[q].errfd); + + if (!(msg.u64 & 0x100)) + { + if (number_of_fds != 1) + goto close_socket; + + vui->vrings[q].errfd = fds[0]; + } + else + vui->vrings[q].errfd = -1; + + break; + + case VHOST_USER_SET_VRING_BASE: + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d", + vui->hw_if_index, msg.state.index, msg.state.num); + + vui->vrings[msg.state.index].last_avail_idx = msg.state.num; + break; + + case VHOST_USER_GET_VRING_BASE: + DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d", + vui->hw_if_index, msg.state.index, msg.state.num); + + if (msg.state.index >= VHOST_VRING_MAX_N) + { + DBG_SOCK ("invalid vring index VHOST_USER_GET_VRING_BASE:" + " %d >= %d", msg.state.index, VHOST_VRING_MAX_N); + goto close_socket; + } + + /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */ + vhost_user_vring_close (vui, msg.state.index); + + msg.state.num = vui->vrings[msg.state.index].last_avail_idx; + msg.flags |= 4; + msg.size = sizeof (msg.state); + break; + + case VHOST_USER_NONE: + DBG_SOCK ("if %d msg VHOST_USER_NONE", vui->hw_if_index); + + break; + + case VHOST_USER_SET_LOG_BASE: + { + DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_BASE", vui->hw_if_index); + + if (msg.size != sizeof (msg.log)) + { + DBG_SOCK + ("invalid msg size for VHOST_USER_SET_LOG_BASE: %d instead of %d", + msg.size, sizeof (msg.log)); + goto close_socket; + } + + if (! + (vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD))) + { + DBG_SOCK + ("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received"); + goto close_socket; + } + + fd = fds[0]; + /* align size to 2M page */ + long page_sz = get_huge_page_size (fd); + ssize_t map_sz = + (msg.log.size + msg.log.offset + page_sz) & ~(page_sz - 1); + + vui->log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + + DBG_SOCK + ("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped 0x%lx", + map_sz, msg.log.offset, fd, vui->log_base_addr); + + if (vui->log_base_addr == MAP_FAILED) + { + clib_warning ("failed to map memory. errno is %d", errno); + goto close_socket; + } + + vui->log_base_addr += msg.log.offset; + vui->log_size = msg.log.size; + + msg.flags |= 4; + msg.size = sizeof (msg.u64); + + break; + } + + case VHOST_USER_SET_LOG_FD: + DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_FD", vui->hw_if_index); + + break; + + case VHOST_USER_GET_PROTOCOL_FEATURES: + DBG_SOCK ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES", + vui->hw_if_index); + + msg.flags |= 4; + msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | + (1 << VHOST_USER_PROTOCOL_F_MQ); + msg.size = sizeof (msg.u64); + break; + + case VHOST_USER_SET_PROTOCOL_FEATURES: + DBG_SOCK ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%lx", + vui->hw_if_index, msg.u64); + + vui->protocol_features = msg.u64; + + break; + + case VHOST_USER_GET_QUEUE_NUM: + DBG_SOCK ("if %d msg VHOST_USER_GET_QUEUE_NUM", vui->hw_if_index); + msg.flags |= 4; + msg.u64 = VHOST_VRING_MAX_N; + msg.size = sizeof (msg.u64); + break; + + case VHOST_USER_SET_VRING_ENABLE: + DBG_SOCK ("if %d VHOST_USER_SET_VRING_ENABLE: %s queue %d", + vui->hw_if_index, msg.state.num ? "enable" : "disable", + msg.state.index); + if (msg.state.index >= VHOST_VRING_MAX_N) + { + DBG_SOCK ("invalid vring index VHOST_USER_SET_VRING_ENABLE:" + " %d >= %d", msg.state.index, VHOST_VRING_MAX_N); + goto close_socket; + } + + vui->vrings[msg.state.index].enabled = msg.state.num; + break; + + default: + DBG_SOCK ("unknown vhost-user message %d received. closing socket", + msg.request); + goto close_socket; + } + + /* if we need to reply */ + if (msg.flags & 4) + { + n = + send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0); + if (n != (msg.size + VHOST_USER_MSG_HDR_SZ)) + { + DBG_SOCK ("could not send message response"); + goto close_socket; + } + } + + vhost_user_update_iface_state (vui); + vlib_worker_thread_barrier_release (vlib_get_main ()); + return 0; + +close_socket: + vhost_user_if_disconnect (vui); + vhost_user_update_iface_state (vui); + vlib_worker_thread_barrier_release (vlib_get_main ()); + return 0; +} + +static clib_error_t * +vhost_user_socket_error (unix_file_t * uf) +{ + vlib_main_t *vm = vlib_get_main (); + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui = + pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data); + + DBG_SOCK ("socket error on if %d", vui->sw_if_index); + vlib_worker_thread_barrier_sync (vm); + vhost_user_if_disconnect (vui); + vhost_user_rx_thread_placement (); + vlib_worker_thread_barrier_release (vm); + return 0; +} + +static clib_error_t * +vhost_user_socksvr_accept_ready (unix_file_t * uf) +{ + int client_fd, client_len; + struct sockaddr_un client; + unix_file_t template = { 0 }; + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui; + + vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data); + + client_len = sizeof (client); + client_fd = accept (uf->file_descriptor, + (struct sockaddr *) &client, + (socklen_t *) & client_len); + + if (client_fd < 0) + return clib_error_return_unix (0, "accept"); + + DBG_SOCK ("New client socket for vhost interface %d", vui->sw_if_index); + template.read_function = vhost_user_socket_read; + template.error_function = vhost_user_socket_error; + template.file_descriptor = client_fd; + template.private_data = vui - vhost_user_main.vhost_user_interfaces; + vui->unix_file_index = unix_file_add (&unix_main, &template); + return 0; +} + +static clib_error_t * +vhost_user_init (vlib_main_t * vm) +{ + clib_error_t *error; + vhost_user_main_t *vum = &vhost_user_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_thread_registration_t *tr; + uword *p; + + error = vlib_call_init_function (vm, ip4_init); + if (error) + return error; + + vum->coalesce_frames = 32; + vum->coalesce_time = 1e-3; + + vec_validate (vum->cpus, tm->n_vlib_mains - 1); + + vhost_cpu_t *cpu; + vec_foreach (cpu, vum->cpus) + { + /* This is actually not necessary as validate already zeroes it + * Just keeping the loop here for later because I am lazy. */ + cpu->rx_buffers_len = 0; + } + + /* find out which cpus will be used for input */ + vum->input_cpu_first_index = 0; + vum->input_cpu_count = 1; + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + vum->input_cpu_first_index = tr->first_index; + vum->input_cpu_count = tr->count; + } + + vum->random = random_default_seed (); + + return 0; +} + +VLIB_INIT_FUNCTION (vhost_user_init); + +static clib_error_t * +vhost_user_exit (vlib_main_t * vm) +{ + /* TODO cleanup */ + return 0; +} + +VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit); + +static u8 * +format_vhost_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); + vhost_user_main_t *vum = &vhost_user_main; + vhost_trace_t *t = va_arg (*va, vhost_trace_t *); + vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces, + t->device_index); + + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, vui->sw_if_index); + + uword indent = format_get_indent (s); + + s = format (s, "%U %U queue %d\n", format_white_space, indent, + format_vnet_sw_interface_name, vnm, sw, t->qid); + + s = format (s, "%U virtio flags:\n", format_white_space, indent); +#define _(n,i,st) \ + if (t->virtio_ring_flags & (1 << VIRTIO_TRACE_F_##n)) \ + s = format (s, "%U %s %s\n", format_white_space, indent, #n, st); + foreach_virtio_trace_flags +#undef _ + s = format (s, "%U virtio_net_hdr first_desc_len %u\n", + format_white_space, indent, t->first_desc_len); + + s = format (s, "%U flags 0x%02x gso_type %u\n", + format_white_space, indent, + t->hdr.hdr.flags, t->hdr.hdr.gso_type); + + if (vui->virtio_net_hdr_sz == 12) + s = format (s, "%U num_buff %u", + format_white_space, indent, t->hdr.num_buffers); + + return s; +} + +void +vhost_user_rx_trace (vhost_trace_t * t, + vhost_user_intf_t * vui, u16 qid, + vlib_buffer_t * b, vhost_user_vring_t * txvq) +{ + vhost_user_main_t *vum = &vhost_user_main; + u32 qsz_mask = txvq->qsz - 1; + u32 last_avail_idx = txvq->last_avail_idx; + u32 desc_current = txvq->avail->ring[last_avail_idx & qsz_mask]; + vring_desc_t *hdr_desc = 0; + virtio_net_hdr_mrg_rxbuf_t *hdr; + u32 hint = 0; + + memset (t, 0, sizeof (*t)); + t->device_index = vui - vum->vhost_user_interfaces; + t->qid = qid; + + hdr_desc = &txvq->desc[desc_current]; + if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT) + { + t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT; + /* Header is the first here */ + hdr_desc = map_guest_mem (vui, txvq->desc[desc_current].addr, &hint); + } + if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) + { + t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED; + } + if (!(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) && + !(txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)) + { + t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC; + } + + t->first_desc_len = hdr_desc ? hdr_desc->len : 0; + + if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint))) + { + t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR; + } + else + { + u32 len = vui->virtio_net_hdr_sz; + memcpy (&t->hdr, hdr, len > hdr_desc->len ? hdr_desc->len : len); + } +} + +static inline void +vhost_user_send_call (vlib_main_t * vm, vhost_user_vring_t * vq) +{ + vhost_user_main_t *vum = &vhost_user_main; + u64 x = 1; + int fd = UNIX_GET_FD (vq->callfd_idx); + int rv __attribute__ ((unused)); + /* TODO: pay attention to rv */ + rv = write (fd, &x, sizeof (x)); + vq->n_since_last_int = 0; + vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time; +} + +static_always_inline u32 +vhost_user_input_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy, + u16 copy_len, u32 * map_hint) +{ + void *src0, *src1, *src2, *src3; + if (PREDICT_TRUE (copy_len >= 4)) + { + if (PREDICT_FALSE (!(src2 = map_guest_mem (vui, cpy[0].src, map_hint)))) + return 1; + if (PREDICT_FALSE (!(src3 = map_guest_mem (vui, cpy[1].src, map_hint)))) + return 1; + + while (PREDICT_TRUE (copy_len >= 4)) + { + src0 = src2; + src1 = src3; + + if (PREDICT_FALSE + (!(src2 = map_guest_mem (vui, cpy[2].src, map_hint)))) + return 1; + if (PREDICT_FALSE + (!(src3 = map_guest_mem (vui, cpy[3].src, map_hint)))) + return 1; + + CLIB_PREFETCH (src2, 64, LOAD); + CLIB_PREFETCH (src3, 64, LOAD); + + clib_memcpy ((void *) cpy[0].dst, src0, cpy[0].len); + clib_memcpy ((void *) cpy[1].dst, src1, cpy[1].len); + copy_len -= 2; + cpy += 2; + } + } + while (copy_len) + { + if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint)))) + return 1; + clib_memcpy ((void *) cpy->dst, src0, cpy->len); + copy_len -= 1; + cpy += 1; + } + return 0; +} + +/** + * Try to discard packets from the tx ring (VPP RX path). + * Returns the number of discarded packets. + */ +u32 +vhost_user_rx_discard_packet (vlib_main_t * vm, + vhost_user_intf_t * vui, + vhost_user_vring_t * txvq, u32 discard_max) +{ + /* + * On the RX side, each packet corresponds to one descriptor + * (it is the same whether it is a shallow descriptor, chained, or indirect). + * Therefore, discarding a packet is like discarding a descriptor. + */ + u32 discarded_packets = 0; + u32 avail_idx = txvq->avail->idx; + u16 qsz_mask = txvq->qsz - 1; + while (discarded_packets != discard_max) + { + if (avail_idx == txvq->last_avail_idx) + goto out; + + u16 desc_chain_head = + txvq->avail->ring[txvq->last_avail_idx & qsz_mask]; + txvq->last_avail_idx++; + txvq->used->ring[txvq->last_used_idx & qsz_mask].id = desc_chain_head; + txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0; + vhost_user_log_dirty_ring (vui, txvq, + ring[txvq->last_used_idx & qsz_mask]); + txvq->last_used_idx++; + discarded_packets++; + } + +out: + CLIB_MEMORY_BARRIER (); + txvq->used->idx = txvq->last_used_idx; + vhost_user_log_dirty_ring (vui, txvq, idx); + return discarded_packets; +} + +/* + * In case of overflow, we need to rewind the array of allocated buffers. + */ +static void +vhost_user_input_rewind_buffers (vlib_main_t * vm, + vhost_cpu_t * cpu, vlib_buffer_t * b_head) +{ + u32 bi_current = cpu->rx_buffers[cpu->rx_buffers_len]; + vlib_buffer_t *b_current = vlib_get_buffer (vm, bi_current); + b_current->current_length = 0; + b_current->flags = 0; + while (b_current != b_head) + { + cpu->rx_buffers_len++; + bi_current = cpu->rx_buffers[cpu->rx_buffers_len]; + b_current = vlib_get_buffer (vm, bi_current); + b_current->current_length = 0; + b_current->flags = 0; + } +} + +static u32 +vhost_user_if_input (vlib_main_t * vm, + vhost_user_main_t * vum, + vhost_user_intf_t * vui, + u16 qid, vlib_node_runtime_t * node) +{ + vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; + u16 n_rx_packets = 0; + u32 n_rx_bytes = 0; + u16 n_left; + u32 n_left_to_next, *to_next; + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + u32 n_trace = vlib_get_trace_count (vm, node); + u16 qsz_mask; + u32 map_hint = 0; + u16 cpu_index = os_get_cpu_number (); + u16 copy_len = 0; + + { + /* do we have pending interrupts ? */ + vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)]; + f64 now = vlib_time_now (vm); + + if ((txvq->n_since_last_int) && (txvq->int_deadline < now)) + vhost_user_send_call (vm, txvq); + + if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now)) + vhost_user_send_call (vm, rxvq); + } + + if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE)) + return 0; + + n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx); + + /* nothing to do */ + if (PREDICT_FALSE (n_left == 0)) + return 0; + + if (PREDICT_FALSE (!vui->admin_up || !(txvq->enabled))) + { + /* + * Discard input packet if interface is admin down or vring is not + * enabled. + * "For example, for a networking device, in the disabled state + * client must not supply any new RX packets, but must process + * and discard any TX packets." + */ + vhost_user_rx_discard_packet (vm, vui, txvq, + VHOST_USER_DOWN_DISCARD_COUNT); + return 0; + } + + if (PREDICT_FALSE (n_left == txvq->qsz)) + { + /* + * Informational error logging when VPP is not + * receiving packets fast enough. + */ + vlib_error_count (vm, node->node_index, + VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1); + } + + qsz_mask = txvq->qsz - 1; + + if (n_left > VLIB_FRAME_SIZE) + n_left = VLIB_FRAME_SIZE; + + /* + * For small packets (<2kB), we will not need more than one vlib buffer + * per packet. In case packets are bigger, we will just yeld at some point + * in the loop and come back later. This is not an issue as for big packet, + * processing cost really comes from the memory copy. + */ + if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len < n_left + 1)) + { + u32 curr_len = vum->cpus[cpu_index].rx_buffers_len; + vum->cpus[cpu_index].rx_buffers_len += + vlib_buffer_alloc_from_free_list (vm, + vum->cpus[cpu_index].rx_buffers + + curr_len, + VHOST_USER_RX_BUFFERS_N - curr_len, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + if (PREDICT_FALSE + (vum->cpus[cpu_index].rx_buffers_len < + VHOST_USER_RX_BUFFER_STARVATION)) + { + /* In case of buffer starvation, discard some packets from the queue + * and log the event. + * We keep doing best effort for the remaining packets. */ + u32 flush = (n_left + 1 > vum->cpus[cpu_index].rx_buffers_len) ? + n_left + 1 - vum->cpus[cpu_index].rx_buffers_len : 1; + flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush); + + n_left -= flush; + vlib_increment_simple_counter (vnet_main. + interface_main.sw_if_counters + + VNET_INTERFACE_COUNTER_DROP, + os_get_cpu_number (), + vui->sw_if_index, flush); + + vlib_error_count (vm, vhost_user_input_node.index, + VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush); + } + } + + while (n_left > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left > 0 && n_left_to_next > 0) + { + vlib_buffer_t *b_head, *b_current; + u32 bi_current; + u16 desc_current; + u32 desc_data_offset; + vring_desc_t *desc_table = txvq->desc; + + if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len <= 1)) + { + /* Not enough rx_buffers + * Note: We yeld on 1 so we don't need to do an additional + * check for the next buffer prefetch. + */ + n_left = 0; + break; + } + + desc_current = txvq->avail->ring[txvq->last_avail_idx & qsz_mask]; + vum->cpus[cpu_index].rx_buffers_len--; + bi_current = (vum->cpus[cpu_index].rx_buffers) + [vum->cpus[cpu_index].rx_buffers_len]; + b_head = b_current = vlib_get_buffer (vm, bi_current); + to_next[0] = bi_current; //We do that now so we can forget about bi_current + to_next++; + n_left_to_next--; + + vlib_prefetch_buffer_with_index (vm, + (vum->cpus[cpu_index].rx_buffers) + [vum->cpus[cpu_index]. + rx_buffers_len - 1], LOAD); + + /* Just preset the used descriptor id and length for later */ + txvq->used->ring[txvq->last_used_idx & qsz_mask].id = desc_current; + txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0; + vhost_user_log_dirty_ring (vui, txvq, + ring[txvq->last_used_idx & qsz_mask]); + + /* The buffer should already be initialized */ + b_head->total_length_not_including_first_buffer = 0; + b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + + if (PREDICT_FALSE (n_trace)) + { + //TODO: next_index is not exactly known at that point + vlib_trace_buffer (vm, node, next_index, b_head, + /* follow_chain */ 0); + vhost_trace_t *t0 = + vlib_add_trace (vm, node, b_head, sizeof (t0[0])); + vhost_user_rx_trace (t0, vui, qid, b_head, txvq); + n_trace--; + vlib_set_trace_count (vm, node, n_trace); + } + + /* This depends on the setup but is very consistent + * So I think the CPU branch predictor will make a pretty good job + * at optimizing the decision. */ + if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT) + { + desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr, + &map_hint); + desc_current = 0; + if (PREDICT_FALSE (desc_table == 0)) + { + //FIXME: Handle error by shutdown the queue + goto out; + } + } + + if (PREDICT_TRUE (vui->is_any_layout) || + (!(desc_table[desc_current].flags & VIRTQ_DESC_F_NEXT))) + { + /* ANYLAYOUT or single buffer */ + desc_data_offset = vui->virtio_net_hdr_sz; + } + else + { + /* CSR case without ANYLAYOUT, skip 1st buffer */ + desc_data_offset = desc_table[desc_current].len; + } + + while (1) + { + /* Get more input if necessary. Or end of packet. */ + if (desc_data_offset == desc_table[desc_current].len) + { + if (PREDICT_FALSE (desc_table[desc_current].flags & + VIRTQ_DESC_F_NEXT)) + { + desc_current = desc_table[desc_current].next; + desc_data_offset = 0; + } + else + { + goto out; + } + } + + /* Get more output if necessary. Or end of packet. */ + if (PREDICT_FALSE + (b_current->current_length == VLIB_BUFFER_DATA_SIZE)) + { + if (PREDICT_FALSE + (vum->cpus[cpu_index].rx_buffers_len == 0)) + { + /* + * Checking if there are some left buffers. + * If not, just rewind the used buffers and stop. + * Note: Scheduled copies are not cancelled. This is + * not an issue as they would still be valid. Useless, + * but valid. + */ + vhost_user_input_rewind_buffers (vm, + &vum->cpus[cpu_index], + b_head); + n_left = 0; + goto stop; + } + + /* Get next output */ + vum->cpus[cpu_index].rx_buffers_len--; + u32 bi_next = + (vum->cpus[cpu_index].rx_buffers)[vum->cpus + [cpu_index].rx_buffers_len]; + b_current->next_buffer = bi_next; + b_current->flags |= VLIB_BUFFER_NEXT_PRESENT; + bi_current = bi_next; + b_current = vlib_get_buffer (vm, bi_current); + } + + /* Prepare a copy order executed later for the data */ + vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len]; + copy_len++; + u32 desc_data_l = + desc_table[desc_current].len - desc_data_offset; + cpy->len = VLIB_BUFFER_DATA_SIZE - b_current->current_length; + cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len; + cpy->dst = (uword) vlib_buffer_get_current (b_current); + cpy->src = desc_table[desc_current].addr + desc_data_offset; + + desc_data_offset += cpy->len; + + b_current->current_length += cpy->len; + b_head->total_length_not_including_first_buffer += cpy->len; + } + + out: + CLIB_PREFETCH (&n_left, sizeof (n_left), LOAD); + + n_rx_bytes += b_head->total_length_not_including_first_buffer; + n_rx_packets++; + + b_head->total_length_not_including_first_buffer -= + b_head->current_length; + + /* consume the descriptor and return it as used */ + txvq->last_avail_idx++; + txvq->last_used_idx++; + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head); + + vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index; + vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0; + b_head->error = 0; + + { + u32 next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + + /* redirect if feature path enabled */ + vnet_feature_start_device_input_x1 (vui->sw_if_index, &next0, + b_head, 0); + + u32 bi = to_next[-1]; //Cannot use to_next[-1] in the macro + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi, next0); + } + + n_left--; + + /* + * Although separating memory copies from virtio ring parsing + * is beneficial, we can offer to perform the copies from time + * to time in order to free some space in the ring. + */ + if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD)) + { + if (PREDICT_FALSE + (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy, + copy_len, &map_hint))) + { + clib_warning + ("Memory mapping error on interface hw_if_index=%d " + "(Shutting down - Switch interface down and up to restart)", + vui->hw_if_index); + vui->admin_up = 0; + copy_len = 0; + break; + } + copy_len = 0; + + /* give buffers back to driver */ + CLIB_MEMORY_BARRIER (); + txvq->used->idx = txvq->last_used_idx; + vhost_user_log_dirty_ring (vui, txvq, idx); + } + } + stop: + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Do the memory copies */ + if (PREDICT_FALSE + (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy, + copy_len, &map_hint))) + { + clib_warning ("Memory mapping error on interface hw_if_index=%d " + "(Shutting down - Switch interface down and up to restart)", + vui->hw_if_index); + vui->admin_up = 0; + } + + /* give buffers back to driver */ + CLIB_MEMORY_BARRIER (); + txvq->used->idx = txvq->last_used_idx; + vhost_user_log_dirty_ring (vui, txvq, idx); + + /* interrupt (call) handling */ + if ((txvq->callfd_idx != ~0) && !(txvq->avail->flags & 1)) + { + txvq->n_since_last_int += n_rx_packets; + + if (txvq->n_since_last_int > vum->coalesce_frames) + vhost_user_send_call (vm, txvq); + } + + /* increase rx counters */ + vlib_increment_combined_counter + (vnet_main.interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + os_get_cpu_number (), vui->sw_if_index, n_rx_packets, n_rx_bytes); + + return n_rx_packets; +} + +static uword +vhost_user_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * f) +{ + vhost_user_main_t *vum = &vhost_user_main; + uword n_rx_packets = 0; + u32 cpu_index = os_get_cpu_number (); + + + vhost_iface_and_queue_t *vhiq; + vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues) + { + vhost_user_intf_t *vui = + &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; + n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node); + } + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vhost_user_input_node) = { + .function = vhost_user_input, + .type = VLIB_NODE_TYPE_INPUT, + .name = "vhost-user-input", + .sibling_of = "device-input", + + /* Will be enabled if/when hardware is detected. */ + .state = VLIB_NODE_STATE_DISABLED, + + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_vhost_trace, + + .n_errors = VHOST_USER_INPUT_FUNC_N_ERROR, + .error_strings = vhost_user_input_func_error_strings, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (vhost_user_input_node, vhost_user_input) +/* *INDENT-ON* */ + + +void +vhost_user_tx_trace (vhost_trace_t * t, + vhost_user_intf_t * vui, u16 qid, + vlib_buffer_t * b, vhost_user_vring_t * rxvq) +{ + vhost_user_main_t *vum = &vhost_user_main; + u32 qsz_mask = rxvq->qsz - 1; + u32 last_avail_idx = rxvq->last_avail_idx; + u32 desc_current = rxvq->avail->ring[last_avail_idx & qsz_mask]; + vring_desc_t *hdr_desc = 0; + u32 hint = 0; + + memset (t, 0, sizeof (*t)); + t->device_index = vui - vum->vhost_user_interfaces; + t->qid = qid; + + hdr_desc = &rxvq->desc[desc_current]; + if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT) + { + t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT; + /* Header is the first here */ + hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint); + } + if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) + { + t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED; + } + if (!(rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) && + !(rxvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)) + { + t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC; + } + + t->first_desc_len = hdr_desc ? hdr_desc->len : 0; +} + +static_always_inline u32 +vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy, + u16 copy_len, u32 * map_hint) +{ + void *dst0, *dst1, *dst2, *dst3; + if (PREDICT_TRUE (copy_len >= 4)) + { + if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint)))) + return 1; + if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint)))) + return 1; + while (PREDICT_TRUE (copy_len >= 4)) + { + dst0 = dst2; + dst1 = dst3; + + if (PREDICT_FALSE + (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint)))) + return 1; + if (PREDICT_FALSE + (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint)))) + return 1; + + CLIB_PREFETCH ((void *) cpy[2].src, 64, LOAD); + CLIB_PREFETCH ((void *) cpy[3].src, 64, LOAD); + + clib_memcpy (dst0, (void *) cpy[0].src, cpy[0].len); + clib_memcpy (dst1, (void *) cpy[1].src, cpy[1].len); + + vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1); + vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1); + copy_len -= 2; + cpy += 2; + } + } + while (copy_len) + { + if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint)))) + return 1; + clib_memcpy (dst0, (void *) cpy->src, cpy->len); + vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1); + copy_len -= 1; + cpy += 1; + } + return 0; +} + + +static uword +vhost_user_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 *buffers = vlib_frame_args (frame); + u32 n_left = frame->n_vectors; + vhost_user_main_t *vum = &vhost_user_main; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + vhost_user_intf_t *vui = + pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance); + u32 qid = ~0; + vhost_user_vring_t *rxvq; + u16 qsz_mask; + u8 error; + u32 cpu_index = os_get_cpu_number (); + u32 map_hint = 0; + u8 retry = 8; + u16 copy_len; + u16 tx_headers_len; + + if (PREDICT_FALSE (!vui->admin_up)) + { + error = VHOST_USER_TX_FUNC_ERROR_DOWN; + goto done3; + } + + if (PREDICT_FALSE (!vui->is_up)) + { + error = VHOST_USER_TX_FUNC_ERROR_NOT_READY; + goto done3; + } + + qid = + VHOST_VRING_IDX_RX (*vec_elt_at_index + (vui->per_cpu_tx_qid, os_get_cpu_number ())); + rxvq = &vui->vrings[qid]; + if (PREDICT_FALSE (vui->use_tx_spinlock)) + vhost_user_vring_lock (vui, qid); + + qsz_mask = rxvq->qsz - 1; /* qsz is always power of 2 */ + +retry: + error = VHOST_USER_TX_FUNC_ERROR_NONE; + tx_headers_len = 0; + copy_len = 0; + while (n_left > 0) + { + vlib_buffer_t *b0, *current_b0; + u16 desc_head, desc_index, desc_len; + vring_desc_t *desc_table; + uword buffer_map_addr; + u32 buffer_len; + u16 bytes_left; + + if (PREDICT_TRUE (n_left > 1)) + vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD); + + b0 = vlib_get_buffer (vm, buffers[0]); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vum->cpus[cpu_index].current_trace = + vlib_add_trace (vm, node, b0, + sizeof (*vum->cpus[cpu_index].current_trace)); + vhost_user_tx_trace (vum->cpus[cpu_index].current_trace, + vui, qid / 2, b0, rxvq); + } + + if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx)) + { + error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF; + goto done; + } + + desc_table = rxvq->desc; + desc_head = desc_index = + rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask]; + + /* Go deeper in case of indirect descriptor + * I don't know of any driver providing indirect for RX. */ + if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT)) + { + if (PREDICT_FALSE + (rxvq->desc[desc_head].len < sizeof (vring_desc_t))) + { + error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW; + goto done; + } + if (PREDICT_FALSE + (!(desc_table = + map_guest_mem (vui, rxvq->desc[desc_index].addr, + &map_hint)))) + { + error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL; + goto done; + } + desc_index = 0; + } + + desc_len = vui->virtio_net_hdr_sz; + buffer_map_addr = desc_table[desc_index].addr; + buffer_len = desc_table[desc_index].len; + + { + // Get a header from the header array + virtio_net_hdr_mrg_rxbuf_t *hdr = + &vum->cpus[cpu_index].tx_headers[tx_headers_len]; + tx_headers_len++; + hdr->hdr.flags = 0; + hdr->hdr.gso_type = 0; + hdr->num_buffers = 1; //This is local, no need to check + + // Prepare a copy order executed later for the header + vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len]; + copy_len++; + cpy->len = vui->virtio_net_hdr_sz; + cpy->dst = buffer_map_addr; + cpy->src = (uword) hdr; + } + + buffer_map_addr += vui->virtio_net_hdr_sz; + buffer_len -= vui->virtio_net_hdr_sz; + bytes_left = b0->current_length; + current_b0 = b0; + while (1) + { + if (buffer_len == 0) + { //Get new output + if (desc_table[desc_index].flags & VIRTQ_DESC_F_NEXT) + { + //Next one is chained + desc_index = desc_table[desc_index].next; + buffer_map_addr = desc_table[desc_index].addr; + buffer_len = desc_table[desc_index].len; + } + else if (vui->virtio_net_hdr_sz == 12) //MRG is available + { + virtio_net_hdr_mrg_rxbuf_t *hdr = + &vum->cpus[cpu_index].tx_headers[tx_headers_len - 1]; + + //Move from available to used buffer + rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id = + desc_head; + rxvq->used->ring[rxvq->last_used_idx & qsz_mask].len = + desc_len; + vhost_user_log_dirty_ring (vui, rxvq, + ring[rxvq->last_used_idx & + qsz_mask]); + + rxvq->last_avail_idx++; + rxvq->last_used_idx++; + hdr->num_buffers++; + desc_len = 0; + + if (PREDICT_FALSE + (rxvq->last_avail_idx == rxvq->avail->idx)) + { + //Dequeue queued descriptors for this packet + rxvq->last_used_idx -= hdr->num_buffers - 1; + rxvq->last_avail_idx -= hdr->num_buffers - 1; + error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF; + goto done; + } + + desc_table = rxvq->desc; + desc_head = desc_index = + rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask]; + if (PREDICT_FALSE + (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT)) + { + //It is seriously unlikely that a driver will put indirect descriptor + //after non-indirect descriptor. + if (PREDICT_FALSE + (rxvq->desc[desc_head].len < sizeof (vring_desc_t))) + { + error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW; + goto done; + } + if (PREDICT_FALSE + (!(desc_table = + map_guest_mem (vui, + rxvq->desc[desc_index].addr, + &map_hint)))) + { + error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL; + goto done; + } + desc_index = 0; + } + buffer_map_addr = desc_table[desc_index].addr; + buffer_len = desc_table[desc_index].len; + } + else + { + error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG; + goto done; + } + } + + { + vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len]; + copy_len++; + cpy->len = bytes_left; + cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len; + cpy->dst = buffer_map_addr; + cpy->src = (uword) vlib_buffer_get_current (current_b0) + + current_b0->current_length - bytes_left; + + bytes_left -= cpy->len; + buffer_len -= cpy->len; + buffer_map_addr += cpy->len; + desc_len += cpy->len; + + CLIB_PREFETCH (&rxvq->desc, CLIB_CACHE_LINE_BYTES, LOAD); + } + + // Check if vlib buffer has more data. If not, get more or break. + if (PREDICT_TRUE (!bytes_left)) + { + if (PREDICT_FALSE + (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + current_b0 = vlib_get_buffer (vm, current_b0->next_buffer); + bytes_left = current_b0->current_length; + } + else + { + //End of packet + break; + } + } + } + + //Move from available to used ring + rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id = desc_head; + rxvq->used->ring[rxvq->last_used_idx & qsz_mask].len = desc_len; + vhost_user_log_dirty_ring (vui, rxvq, + ring[rxvq->last_used_idx & qsz_mask]); + rxvq->last_avail_idx++; + rxvq->last_used_idx++; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vum->cpus[cpu_index].current_trace->hdr = + vum->cpus[cpu_index].tx_headers[tx_headers_len - 1]; + } + + n_left--; //At the end for error counting when 'goto done' is invoked + buffers++; + } + +done: + //Do the memory copies + if (PREDICT_FALSE + (vhost_user_tx_copy (vui, vum->cpus[cpu_index].copy, + copy_len, &map_hint))) + { + clib_warning ("Memory mapping error on interface hw_if_index=%d " + "(Shutting down - Switch interface down and up to restart)", + vui->hw_if_index); + vui->admin_up = 0; + } + + CLIB_MEMORY_BARRIER (); + rxvq->used->idx = rxvq->last_used_idx; + vhost_user_log_dirty_ring (vui, rxvq, idx); + + /* + * When n_left is set, error is always set to something too. + * In case error is due to lack of remaining buffers, we go back up and + * retry. + * The idea is that it is better to waste some time on packets + * that have been processed already than dropping them and get + * more fresh packets with a good likelyhood that they will be dropped too. + * This technique also gives more time to VM driver to pick-up packets. + * In case the traffic flows from physical to virtual interfaces, this + * technique will end-up leveraging the physical NIC buffer in order to + * absorb the VM's CPU jitter. + */ + if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry) + { + retry--; + goto retry; + } + + /* interrupt (call) handling */ + if ((rxvq->callfd_idx != ~0) && !(rxvq->avail->flags & 1)) + { + rxvq->n_since_last_int += frame->n_vectors - n_left; + + if (rxvq->n_since_last_int > vum->coalesce_frames) + vhost_user_send_call (vm, rxvq); + } + + vhost_user_vring_unlock (vui, qid); + +done3: + if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE)) + { + vlib_error_count (vm, node->node_index, error, n_left); + vlib_increment_simple_counter + (vnet_main.interface_main.sw_if_counters + + VNET_INTERFACE_COUNTER_DROP, + os_get_cpu_number (), vui->sw_if_index, n_left); + } + + vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); + return frame->n_vectors; +} + +static clib_error_t * +vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, + u32 flags) +{ + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); + uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui = + pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance); + + vui->admin_up = is_up; + + if (is_up) + vnet_hw_interface_set_flags (vnm, vui->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + + return /* no error */ 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (vhost_user_dev_class,static) = { + .name = "vhost-user", + .tx_function = vhost_user_tx, + .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR, + .tx_function_error_strings = vhost_user_tx_func_error_strings, + .format_device_name = format_vhost_user_interface_name, + .name_renumber = vhost_user_name_renumber, + .admin_up_down_function = vhost_user_interface_admin_up_down, + .format_tx_trace = format_vhost_trace, +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (vhost_user_dev_class, + vhost_user_tx) +/* *INDENT-ON* */ + +static uword +vhost_user_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui; + struct sockaddr_un sun; + int sockfd; + unix_file_t template = { 0 }; + f64 timeout = 3153600000.0 /* 100 years */ ; + uword *event_data = 0; + + sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + sun.sun_family = AF_UNIX; + template.read_function = vhost_user_socket_read; + template.error_function = vhost_user_socket_error; + + if (sockfd < 0) + return 0; + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, timeout); + vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + timeout = 3.0; + + /* *INDENT-OFF* */ + pool_foreach (vui, vum->vhost_user_interfaces, { + + if (vui->unix_server_index == ~0) { //Nothing to do for server sockets + if (vui->unix_file_index == ~0) + { + /* try to connect */ + strncpy (sun.sun_path, (char *) vui->sock_filename, + sizeof (sun.sun_path) - 1); + + if (connect (sockfd, (struct sockaddr *) &sun, + sizeof (struct sockaddr_un)) == 0) + { + vui->sock_errno = 0; + template.file_descriptor = sockfd; + template.private_data = + vui - vhost_user_main.vhost_user_interfaces; + vui->unix_file_index = unix_file_add (&unix_main, &template); + + //Re-open for next connect + if ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) { + clib_warning("Critical: Could not open unix socket"); + return 0; + } + } + else + { + vui->sock_errno = errno; + } + } + else + { + /* check if socket is alive */ + int error = 0; + socklen_t len = sizeof (error); + int fd = UNIX_GET_FD(vui->unix_file_index); + int retval = + getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len); + + if (retval) + { + DBG_SOCK ("getsockopt returned %d", retval); + vhost_user_if_disconnect (vui); + } + } + } + }); + /* *INDENT-ON* */ + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vhost_user_process_node,static) = { + .function = vhost_user_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "vhost-user-process", +}; +/* *INDENT-ON* */ + +/** + * Disables and reset interface structure. + * It can then be either init again, or removed from used interfaces. + */ +static void +vhost_user_term_if (vhost_user_intf_t * vui) +{ + // Delete configured thread pinning + vec_reset_length (vui->workers); + // disconnect interface sockets + vhost_user_if_disconnect (vui); + vhost_user_update_iface_state (vui); + + if (vui->unix_server_index != ~0) + { + //Close server socket + unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + vui->unix_server_index); + unix_file_del (&unix_main, uf); + vui->unix_server_index = ~0; + } +} + +int +vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) +{ + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui; + int rv = 0; + vnet_hw_interface_t *hwif; + + if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) || + hwif->dev_class_index != vhost_user_dev_class.index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + DBG_SOCK ("Deleting vhost-user interface %s (instance %d)", + hwif->name, hwif->dev_instance); + + vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance); + + // Disable and reset interface + vhost_user_term_if (vui); + + // Back to pool + pool_put (vum->vhost_user_interfaces, vui); + + // Reset renumbered iface + if (hwif->dev_instance < + vec_len (vum->show_dev_instance_by_real_dev_instance)) + vum->show_dev_instance_by_real_dev_instance[hwif->dev_instance] = ~0; + + // Delete ethernet interface + ethernet_delete_interface (vnm, vui->hw_if_index); + return rv; +} + +/** + * Open server unix socket on specified sock_filename. + */ +static int +vhost_user_init_server_sock (const char *sock_filename, int *sock_fd) +{ + int rv = 0; + struct sockaddr_un un = { }; + int fd; + /* create listening socket */ + if ((fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) + return VNET_API_ERROR_SYSCALL_ERROR_1; + + un.sun_family = AF_UNIX; + strncpy ((char *) un.sun_path, (char *) sock_filename, + sizeof (un.sun_path) - 1); + + /* remove if exists */ + unlink ((char *) sock_filename); + + if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_2; + goto error; + } + + if (listen (fd, 1) == -1) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_3; + goto error; + } + + *sock_fd = fd; + return 0; + +error: + close (fd); + return rv; +} + +/** + * Create ethernet interface for vhost user interface. + */ +static void +vhost_user_create_ethernet (vnet_main_t * vnm, vlib_main_t * vm, + vhost_user_intf_t * vui, u8 * hwaddress) +{ + vhost_user_main_t *vum = &vhost_user_main; + u8 hwaddr[6]; + clib_error_t *error; + + /* create hw and sw interface */ + if (hwaddress) + { + clib_memcpy (hwaddr, hwaddress, 6); + } + else + { + random_u32 (&vum->random); + clib_memcpy (hwaddr + 2, &vum->random, sizeof (vum->random)); + hwaddr[0] = 2; + hwaddr[1] = 0xfe; + } + + error = ethernet_register_interface + (vnm, + vhost_user_dev_class.index, + vui - vum->vhost_user_interfaces /* device instance */ , + hwaddr /* ethernet address */ , + &vui->hw_if_index, 0 /* flag change */ ); + + if (error) + clib_error_report (error); + + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, vui->hw_if_index); + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000; +} + +/* + * Initialize vui with specified attributes + */ +static void +vhost_user_vui_init (vnet_main_t * vnm, + vhost_user_intf_t * vui, + int server_sock_fd, + const char *sock_filename, + u64 feature_mask, u32 * sw_if_index) +{ + vnet_sw_interface_t *sw; + sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); + int q; + + if (server_sock_fd != -1) + { + unix_file_t template = { 0 }; + template.read_function = vhost_user_socksvr_accept_ready; + template.file_descriptor = server_sock_fd; + template.private_data = vui - vhost_user_main.vhost_user_interfaces; //hw index + vui->unix_server_index = unix_file_add (&unix_main, &template); + } + else + { + vui->unix_server_index = ~0; + } + + vui->sw_if_index = sw->sw_if_index; + strncpy (vui->sock_filename, sock_filename, + ARRAY_LEN (vui->sock_filename) - 1); + vui->sock_errno = 0; + vui->is_up = 0; + vui->feature_mask = feature_mask; + vui->unix_file_index = ~0; + vui->log_base_addr = 0; + + for (q = 0; q < VHOST_VRING_MAX_N; q++) + vhost_user_vring_init (vui, q); + + vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); + + if (sw_if_index) + *sw_if_index = vui->sw_if_index; + + for (q = 0; q < VHOST_VRING_MAX_N; q++) + { + vui->vring_locks[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) vui->vring_locks[q], 0, CLIB_CACHE_LINE_BYTES); + } + + vec_validate (vui->per_cpu_tx_qid, + vlib_get_thread_main ()->n_vlib_mains - 1); + vhost_user_tx_thread_placement (vui); +} + +int +vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, + const char *sock_filename, + u8 is_server, + u32 * sw_if_index, + u64 feature_mask, + u8 renumber, u32 custom_dev_instance, u8 * hwaddr) +{ + vhost_user_intf_t *vui = NULL; + u32 sw_if_idx = ~0; + int rv = 0; + int server_sock_fd = -1; + + if (is_server) + { + if ((rv = + vhost_user_init_server_sock (sock_filename, &server_sock_fd)) != 0) + { + return rv; + } + } + + pool_get (vhost_user_main.vhost_user_interfaces, vui); + + vhost_user_create_ethernet (vnm, vm, vui, hwaddr); + vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename, + feature_mask, &sw_if_idx); + + if (renumber) + vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); + + if (sw_if_index) + *sw_if_index = sw_if_idx; + + // Process node must connect + vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0); + return rv; +} + +int +vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, + const char *sock_filename, + u8 is_server, + u32 sw_if_index, + u64 feature_mask, u8 renumber, u32 custom_dev_instance) +{ + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui = NULL; + u32 sw_if_idx = ~0; + int server_sock_fd = -1; + int rv = 0; + vnet_hw_interface_t *hwif; + + if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) || + hwif->dev_class_index != vhost_user_dev_class.index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance); + + // First try to open server socket + if (is_server) + if ((rv = vhost_user_init_server_sock (sock_filename, + &server_sock_fd)) != 0) + return rv; + + vhost_user_term_if (vui); + vhost_user_vui_init (vnm, vui, server_sock_fd, + sock_filename, feature_mask, &sw_if_idx); + + if (renumber) + vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); + + // Process node must connect + vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0); + return rv; +} + +clib_error_t * +vhost_user_connect_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *sock_filename = NULL; + u32 sw_if_index; + u8 is_server = 0; + u64 feature_mask = (u64) ~ (0ULL); + u8 renumber = 0; + u32 custom_dev_instance = ~0; + u8 hwaddr[6]; + u8 *hw = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "socket %s", &sock_filename)) + ; + else if (unformat (line_input, "server")) + is_server = 1; + else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask)) + ; + else + if (unformat + (line_input, "hwaddr %U", unformat_ethernet_address, hwaddr)) + hw = hwaddr; + else if (unformat (line_input, "renumber %d", &custom_dev_instance)) + { + renumber = 1; + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + vnet_main_t *vnm = vnet_get_main (); + + int rv; + if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename, + is_server, &sw_if_index, feature_mask, + renumber, custom_dev_instance, hw))) + { + vec_free (sock_filename); + return clib_error_return (0, "vhost_user_create_if returned %d", rv); + } + + vec_free (sock_filename); + vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index); + return 0; +} + +clib_error_t * +vhost_user_delete_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 sw_if_index = ~0; + vnet_main_t *vnm = vnet_get_main (); + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat + (line_input, "%U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + { + vnet_hw_interface_t *hwif = + vnet_get_sup_hw_interface (vnm, sw_if_index); + if (hwif == NULL || + vhost_user_dev_class.index != hwif->dev_class_index) + return clib_error_return (0, "Not a vhost interface"); + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + vhost_user_delete_if (vnm, vm, sw_if_index); + return 0; +} + +int +vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, + vhost_user_intf_details_t ** out_vuids) +{ + int rv = 0; + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui; + vhost_user_intf_details_t *r_vuids = NULL; + vhost_user_intf_details_t *vuid = NULL; + u32 *hw_if_indices = 0; + vnet_hw_interface_t *hi; + u8 *s = NULL; + int i; + + if (!out_vuids) + return -1; + + pool_foreach (vui, vum->vhost_user_interfaces, + vec_add1 (hw_if_indices, vui->hw_if_index); + ); + + for (i = 0; i < vec_len (hw_if_indices); i++) + { + hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); + vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance); + + vec_add2 (r_vuids, vuid, 1); + vuid->sw_if_index = vui->sw_if_index; + vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz; + vuid->features = vui->features; + vuid->num_regions = vui->nregions; + vuid->sock_errno = vui->sock_errno; + strncpy ((char *) vuid->sock_filename, (char *) vui->sock_filename, + ARRAY_LEN (vuid->sock_filename) - 1); + + s = format (s, "%v%c", hi->name, 0); + + strncpy ((char *) vuid->if_name, (char *) s, + ARRAY_LEN (vuid->if_name) - 1); + _vec_len (s) = 0; + } + + vec_free (s); + vec_free (hw_if_indices); + + *out_vuids = r_vuids; + + return rv; +} + +clib_error_t * +show_vhost_user_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + vnet_main_t *vnm = vnet_get_main (); + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui; + u32 hw_if_index, *hw_if_indices = 0; + vnet_hw_interface_t *hi; + vhost_cpu_t *vhc; + vhost_iface_and_queue_t *vhiq; + u32 ci; + + int i, j, q; + int show_descr = 0; + struct feat_struct + { + u8 bit; + char *str; + }; + struct feat_struct *feat_entry; + + static struct feat_struct feat_array[] = { +#define _(s,b) { .str = #s, .bit = b, }, + foreach_virtio_net_feature +#undef _ + {.str = NULL} + }; + +#define foreach_protocol_feature \ + _(VHOST_USER_PROTOCOL_F_MQ) \ + _(VHOST_USER_PROTOCOL_F_LOG_SHMFD) + + static struct feat_struct proto_feat_array[] = { +#define _(s) { .str = #s, .bit = s}, + foreach_protocol_feature +#undef _ + {.str = NULL} + }; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + { + vec_add1 (hw_if_indices, hw_if_index); + } + else if (unformat (input, "descriptors") || unformat (input, "desc")) + show_descr = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + if (vec_len (hw_if_indices) == 0) + { + pool_foreach (vui, vum->vhost_user_interfaces, + vec_add1 (hw_if_indices, vui->hw_if_index); + ); + } + vlib_cli_output (vm, "Virtio vhost-user interfaces"); + vlib_cli_output (vm, "Global:\n coalesce frames %d time %e", + vum->coalesce_frames, vum->coalesce_time); + + for (i = 0; i < vec_len (hw_if_indices); i++) + { + hi = vnet_get_hw_interface (vnm, hw_if_indices[i]); + vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance); + vlib_cli_output (vm, "Interface: %s (ifindex %d)", + hi->name, hw_if_indices[i]); + + vlib_cli_output (vm, "virtio_net_hdr_sz %d\n" + " features mask (0x%llx): \n" + " features (0x%llx): \n", + vui->virtio_net_hdr_sz, vui->feature_mask, + vui->features); + + feat_entry = (struct feat_struct *) &feat_array; + while (feat_entry->str) + { + if (vui->features & (1ULL << feat_entry->bit)) + vlib_cli_output (vm, " %s (%d)", feat_entry->str, + feat_entry->bit); + feat_entry++; + } + + vlib_cli_output (vm, " protocol features (0x%llx)", + vui->protocol_features); + feat_entry = (struct feat_struct *) &proto_feat_array; + while (feat_entry->str) + { + if (vui->protocol_features & (1ULL << feat_entry->bit)) + vlib_cli_output (vm, " %s (%d)", feat_entry->str, + feat_entry->bit); + feat_entry++; + } + + vlib_cli_output (vm, "\n"); + + vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n", + vui->sock_filename, + (vui->unix_server_index != ~0) ? "server" : "client", + strerror (vui->sock_errno)); + + vlib_cli_output (vm, " rx placement: "); + vec_foreach (vhc, vum->cpus) + { + vec_foreach (vhiq, vhc->rx_queues) + { + if (vhiq->vhost_iface_index == vui - vum->vhost_user_interfaces) + vlib_cli_output (vm, " thread %d on vring %d\n", + vhc - vum->cpus, VHOST_VRING_IDX_TX (vhiq->qid)); + } + } + + vlib_cli_output (vm, " tx placement: %s\n", + vui->use_tx_spinlock ? "spin-lock" : "lock-free"); + + vec_foreach_index (ci, vui->per_cpu_tx_qid) + { + vlib_cli_output (vm, " thread %d on vring %d\n", ci, + VHOST_VRING_IDX_RX (vui->per_cpu_tx_qid[ci])); + } + + vlib_cli_output (vm, "\n"); + + vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions); + + if (vui->nregions) + { + vlib_cli_output (vm, + " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n"); + vlib_cli_output (vm, + " ====== ===== ================== ================== ================== ================== ==================\n"); + } + for (j = 0; j < vui->nregions; j++) + { + vlib_cli_output (vm, + " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n", + j, vui->region_mmap_fd[j], + vui->regions[j].guest_phys_addr, + vui->regions[j].memory_size, + vui->regions[j].userspace_addr, + vui->regions[j].mmap_offset, + pointer_to_uword (vui->region_mmap_addr[j])); + } + for (q = 0; q < VHOST_VRING_MAX_N; q++) + { + if (!vui->vrings[q].started) + continue; + + vlib_cli_output (vm, "\n Virtqueue %d (%s%s)\n", q, + (q & 1) ? "RX" : "TX", + vui->vrings[q].enabled ? "" : " disabled"); + + vlib_cli_output (vm, + " qsz %d last_avail_idx %d last_used_idx %d\n", + vui->vrings[q].qsz, vui->vrings[q].last_avail_idx, + vui->vrings[q].last_used_idx); + + if (vui->vrings[q].avail && vui->vrings[q].used) + vlib_cli_output (vm, + " avail.flags %x avail.idx %d used.flags %x used.idx %d\n", + vui->vrings[q].avail->flags, + vui->vrings[q].avail->idx, + vui->vrings[q].used->flags, + vui->vrings[q].used->idx); + + int kickfd = UNIX_GET_FD (vui->vrings[q].kickfd_idx); + int callfd = UNIX_GET_FD (vui->vrings[q].callfd_idx); + vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n", + kickfd, callfd, vui->vrings[q].errfd); + + if (show_descr) + { + vlib_cli_output (vm, "\n descriptor table:\n"); + vlib_cli_output (vm, + " id addr len flags next user_addr\n"); + vlib_cli_output (vm, + " ===== ================== ===== ====== ===== ==================\n"); + for (j = 0; j < vui->vrings[q].qsz; j++) + { + u32 mem_hint = 0; + vlib_cli_output (vm, + " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", + j, vui->vrings[q].desc[j].addr, + vui->vrings[q].desc[j].len, + vui->vrings[q].desc[j].flags, + vui->vrings[q].desc[j].next, + pointer_to_uword (map_guest_mem + (vui, + vui->vrings[q].desc[j]. + addr, &mem_hint))); + } + } + } + vlib_cli_output (vm, "\n"); + } +done: + vec_free (hw_if_indices); + return error; +} + +/* + * CLI functions + */ + +/*? + * Create a vHost User interface. Once created, a new virtual interface + * will exist with the name 'VirtualEthernet0/0/x', where 'x' + * is the next free index. + * + * There are several parameters associated with a vHost interface: + * + * - socket - Name of the linux socket used by QEMU/VM and + * VPP to manage the vHost interface. If socket does not already exist, VPP will + * create the socket. + * + * - server - Optional flag to indicate that VPP should be the server for the + * linux socket. If not provided, VPP will be the client. + * + * - feature-mask - Optional virtio/vhost feature set negotiated at + * startup. By default, all supported features will be advertised. Otherwise, + * provide the set of features desired. + * - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF + * - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ + * - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE + * - 0x000400000 (22) - VIRTIO_NET_F_MQ + * - 0x004000000 (26) - VHOST_F_LOG_ALL + * - 0x008000000 (27) - VIRTIO_F_ANY_LAYOUT + * - 0x010000000 (28) - VIRTIO_F_INDIRECT_DESC + * - 0x040000000 (30) - VHOST_USER_F_PROTOCOL_FEATURES + * - 0x100000000 (32) - VIRTIO_F_VERSION_1 + * + * - hwaddr - Optional ethernet address, can be in either + * X:X:X:X:X:X unix or X.X.X cisco format. + * + * - renumber - Optional parameter which allows the instance + * in the name to be specified. If instance already exists, name will be used + * anyway and multiple instances will have the same name. Use with caution. + * + * @cliexpar + * Example of how to create a vhost interface with VPP as the client and all features enabled: + * @cliexstart{create vhost-user socket /tmp/vhost1.sock} + * VirtualEthernet0/0/0 + * @cliexend + * Example of how to create a vhost interface with VPP as the server and with just + * multiple queues enabled: + * @cliexstart{create vhost-user socket /tmp/vhost2.sock server feature-mask 0x40400000} + * VirtualEthernet0/0/1 + * @cliexend + * Once the vHost interface is created, enable the interface using: + * @cliexcmd{set interface state VirtualEthernet0/0/0 up} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (vhost_user_connect_command, static) = { + .path = "create vhost-user", + .short_help = "create vhost-user socket [server] [feature-mask ] [hwaddr ] [renumber ]", + .function = vhost_user_connect_command_fn, +}; +/* *INDENT-ON* */ + +/*? + * Delete a vHost User interface using the interface name or the + * software interface index. Use the 'show interfaces' + * command to determine the software interface index. On deletion, + * the linux socket will not be deleted. + * + * @cliexpar + * Example of how to delete a vhost interface by name: + * @cliexcmd{delete vhost-user VirtualEthernet0/0/1} + * Example of how to delete a vhost interface by software interface index: + * @cliexcmd{delete vhost-user sw_if_index 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (vhost_user_delete_command, static) = { + .path = "delete vhost-user", + .short_help = "delete vhost-user { | sw_if_index }", + .function = vhost_user_delete_command_fn, +}; + +/*? + * Display the attributes of a single vHost User interface (provide interface + * name), multiple vHost User interfaces (provide a list of interface names seperated + * by spaces) or all Vhost User interfaces (omit an interface name to display all + * vHost interfaces). + * + * @cliexpar + * @parblock + * Example of how to display a vhost interface: + * @cliexstart{show vhost-user VirtualEthernet0/0/0} + * Virtio vhost-user interfaces + * Global: + * coalesce frames 32 time 1e-3 + * Interface: VirtualEthernet0/0/0 (ifindex 1) + * virtio_net_hdr_sz 12 + * features mask (0xffffffffffffffff): + * features (0x50408000): + * VIRTIO_NET_F_MRG_RXBUF (15) + * VIRTIO_NET_F_MQ (22) + * VIRTIO_F_INDIRECT_DESC (28) + * VHOST_USER_F_PROTOCOL_FEATURES (30) + * protocol features (0x3) + * VHOST_USER_PROTOCOL_F_MQ (0) + * VHOST_USER_PROTOCOL_F_LOG_SHMFD (1) + * + * socket filename /tmp/vhost1.sock type client errno "Success" + * + * rx placement: + * thread 1 on vring 1 + * thread 1 on vring 5 + * thread 2 on vring 3 + * thread 2 on vring 7 + * tx placement: spin-lock + * thread 0 on vring 0 + * thread 1 on vring 2 + * thread 2 on vring 0 + * + * Memory regions (total 2) + * region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr + * ====== ===== ================== ================== ================== ================== ================== + * 0 60 0x0000000000000000 0x00000000000a0000 0x00002aaaaac00000 0x0000000000000000 0x00002aab2b400000 + * 1 61 0x00000000000c0000 0x000000003ff40000 0x00002aaaaacc0000 0x00000000000c0000 0x00002aababcc0000 + * + * Virtqueue 0 (TX) + * qsz 256 last_avail_idx 0 last_used_idx 0 + * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0 + * kickfd 62 callfd 64 errfd -1 + * + * Virtqueue 1 (RX) + * qsz 256 last_avail_idx 0 last_used_idx 0 + * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0 + * kickfd 65 callfd 66 errfd -1 + * + * Virtqueue 2 (TX) + * qsz 256 last_avail_idx 0 last_used_idx 0 + * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0 + * kickfd 63 callfd 70 errfd -1 + * + * Virtqueue 3 (RX) + * qsz 256 last_avail_idx 0 last_used_idx 0 + * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0 + * kickfd 72 callfd 74 errfd -1 + * + * Virtqueue 4 (TX disabled) + * qsz 256 last_avail_idx 0 last_used_idx 0 + * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0 + * kickfd 76 callfd 78 errfd -1 + * + * Virtqueue 5 (RX disabled) + * qsz 256 last_avail_idx 0 last_used_idx 0 + * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0 + * kickfd 80 callfd 82 errfd -1 + * + * Virtqueue 6 (TX disabled) + * qsz 256 last_avail_idx 0 last_used_idx 0 + * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0 + * kickfd 84 callfd 86 errfd -1 + * + * Virtqueue 7 (RX disabled) + * qsz 256 last_avail_idx 0 last_used_idx 0 + * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0 + * kickfd 88 callfd 90 errfd -1 + * + * @cliexend + * + * The optional 'descriptors' parameter will display the same output as + * the previous example but will include the descriptor table for each queue. + * The output is truncated below: + * @cliexstart{show vhost-user VirtualEthernet0/0/0 descriptors} + * Virtio vhost-user interfaces + * Global: + * coalesce frames 32 time 1e-3 + * Interface: VirtualEthernet0/0/0 (ifindex 1) + * virtio_net_hdr_sz 12 + * features mask (0xffffffffffffffff): + * features (0x50408000): + * VIRTIO_NET_F_MRG_RXBUF (15) + * VIRTIO_NET_F_MQ (22) + * : + * Virtqueue 0 (TX) + * qsz 256 last_avail_idx 0 last_used_idx 0 + * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0 + * kickfd 62 callfd 64 errfd -1 + * + * descriptor table: + * id addr len flags next user_addr + * ===== ================== ===== ====== ===== ================== + * 0 0x0000000010b6e974 2060 0x0002 1 0x00002aabbc76e974 + * 1 0x0000000010b6e034 2060 0x0002 2 0x00002aabbc76e034 + * 2 0x0000000010b6d6f4 2060 0x0002 3 0x00002aabbc76d6f4 + * 3 0x0000000010b6cdb4 2060 0x0002 4 0x00002aabbc76cdb4 + * 4 0x0000000010b6c474 2060 0x0002 5 0x00002aabbc76c474 + * 5 0x0000000010b6bb34 2060 0x0002 6 0x00002aabbc76bb34 + * 6 0x0000000010b6b1f4 2060 0x0002 7 0x00002aabbc76b1f4 + * 7 0x0000000010b6a8b4 2060 0x0002 8 0x00002aabbc76a8b4 + * 8 0x0000000010b69f74 2060 0x0002 9 0x00002aabbc769f74 + * 9 0x0000000010b69634 2060 0x0002 10 0x00002aabbc769634 + * 10 0x0000000010b68cf4 2060 0x0002 11 0x00002aabbc768cf4 + * : + * 249 0x0000000000000000 0 0x0000 250 0x00002aab2b400000 + * 250 0x0000000000000000 0 0x0000 251 0x00002aab2b400000 + * 251 0x0000000000000000 0 0x0000 252 0x00002aab2b400000 + * 252 0x0000000000000000 0 0x0000 253 0x00002aab2b400000 + * 253 0x0000000000000000 0 0x0000 254 0x00002aab2b400000 + * 254 0x0000000000000000 0 0x0000 255 0x00002aab2b400000 + * 255 0x0000000000000000 0 0x0000 32768 0x00002aab2b400000 + * + * Virtqueue 1 (RX) + * qsz 256 last_avail_idx 0 last_used_idx 0 + * : + * @cliexend + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_vhost_user_command, static) = { + .path = "show vhost-user", + .short_help = "show vhost-user [ [ [..]]] [descriptors]", + .function = show_vhost_user_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +vhost_user_config (vlib_main_t * vm, unformat_input_t * input) +{ + vhost_user_main_t *vum = &vhost_user_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames)) + ; + else if (unformat (input, "coalesce-time %f", &vum->coalesce_time)) + ; + else if (unformat (input, "dont-dump-memory")) + vum->dont_dump_vhost_user_memory = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + return 0; +} + +/* vhost-user { ... } configuration. */ +VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user"); + +void +vhost_user_unmap_all (void) +{ + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui; + + if (vum->dont_dump_vhost_user_memory) + { + pool_foreach (vui, vum->vhost_user_interfaces, + unmap_all_mem_regions (vui); + ); + } +} + +static clib_error_t * +vhost_thread_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 worker_thread_index; + u32 sw_if_index; + u8 del = 0; + int rv; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + if (!unformat + (line_input, "%U %d", unformat_vnet_sw_interface, vnet_get_main (), + &sw_if_index, &worker_thread_index)) + { + unformat_free (line_input); + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (unformat (line_input, "del")) + del = 1; + + if ((rv = + vhost_user_thread_placement (sw_if_index, worker_thread_index, del))) + return clib_error_return (0, "vhost_user_thread_placement returned %d", + rv); + return 0; +} + + +/*? + * This command is used to move the RX processing for the given + * interfaces to the provided thread. If the 'del' option is used, + * the forced thread assignment is removed and the thread assigment is + * reassigned automatically. Use 'show vhost-user ' + * to see the thread assignment. + * + * @cliexpar + * Example of how to move the RX processing for a given interface to a given thread: + * @cliexcmd{vhost thread VirtualEthernet0/0/0 1} + * Example of how to remove the forced thread assignment for a given interface: + * @cliexcmd{vhost thread VirtualEthernet0/0/0 1 del} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (vhost_user_thread_command, static) = { + .path = "vhost thread", + .short_help = "vhost thread [del]", + .function = vhost_thread_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h new file mode 100644 index 00000000..3083b614 --- /dev/null +++ b/src/vnet/devices/virtio/vhost-user.h @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __VIRTIO_VHOST_USER_H__ +#define __VIRTIO_VHOST_USER_H__ +/* vhost-user data structures */ + +#define VHOST_MEMORY_MAX_NREGIONS 8 +#define VHOST_USER_MSG_HDR_SZ 12 +#define VHOST_VRING_MAX_SIZE 32768 +#define VHOST_VRING_MAX_N 16 //8TX + 8RX +#define VHOST_VRING_IDX_RX(qid) (2*qid) +#define VHOST_VRING_IDX_TX(qid) (2*qid + 1) + +#define VIRTQ_DESC_F_NEXT 1 +#define VIRTQ_DESC_F_INDIRECT 4 +#define VHOST_USER_REPLY_MASK (0x1 << 2) + +#define VHOST_USER_PROTOCOL_F_MQ 0 +#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 +#define VHOST_VRING_F_LOG 0 + +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ + (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD)) + +/* If multiqueue is provided by host, then we suppport it. */ +#define VIRTIO_NET_CTRL_MQ 4 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 + +#define VRING_USED_F_NO_NOTIFY 1 + +#define foreach_virtio_net_feature \ + _ (VIRTIO_NET_F_MRG_RXBUF, 15) \ + _ (VIRTIO_NET_F_CTRL_VQ, 17) \ + _ (VIRTIO_NET_F_GUEST_ANNOUNCE, 21) \ + _ (VIRTIO_NET_F_MQ, 22) \ + _ (VHOST_F_LOG_ALL, 26) \ + _ (VIRTIO_F_ANY_LAYOUT, 27) \ + _ (VIRTIO_F_INDIRECT_DESC, 28) \ + _ (VHOST_USER_F_PROTOCOL_FEATURES, 30) \ + _ (VIRTIO_F_VERSION_1, 32) + + +typedef enum +{ +#define _(f,n) FEAT_##f = (n), + foreach_virtio_net_feature +#undef _ +} virtio_net_feature_t; + +int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, + const char *sock_filename, u8 is_server, + u32 * sw_if_index, u64 feature_mask, + u8 renumber, u32 custom_dev_instance, u8 * hwaddr); +int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, + const char *sock_filename, u8 is_server, + u32 sw_if_index, u64 feature_mask, + u8 renumber, u32 custom_dev_instance); +int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, + u32 sw_if_index); + +/* *INDENT-OFF* */ +typedef struct vhost_user_memory_region +{ + u64 guest_phys_addr; + u64 memory_size; + u64 userspace_addr; + u64 mmap_offset; +} __attribute ((packed)) vhost_user_memory_region_t; + +typedef struct vhost_user_memory +{ + u32 nregions; + u32 padding; + vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS]; +} __attribute ((packed)) vhost_user_memory_t; + +typedef struct +{ + u32 index, num; +} __attribute ((packed)) vhost_vring_state_t; + +typedef struct +{ + u32 index, flags; + u64 desc_user_addr, used_user_addr, avail_user_addr, log_guest_addr; +} __attribute ((packed)) vhost_vring_addr_t; + +typedef struct vhost_user_log +{ + u64 size; + u64 offset; +} __attribute ((packed)) vhost_user_log_t; + +typedef enum vhost_user_req +{ + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_MAX +} vhost_user_req_t; + +// vring_desc I/O buffer descriptor +typedef struct +{ + uint64_t addr; // packet data buffer address + uint32_t len; // packet data buffer size + uint16_t flags; // (see below) + uint16_t next; // optional index next descriptor in chain +} __attribute ((packed)) vring_desc_t; + +typedef struct +{ + uint16_t flags; + volatile uint16_t idx; + uint16_t ring[VHOST_VRING_MAX_SIZE]; +} __attribute ((packed)) vring_avail_t; + +typedef struct +{ + uint16_t flags; + uint16_t idx; + struct /* vring_used_elem */ + { + uint32_t id; + uint32_t len; + } ring[VHOST_VRING_MAX_SIZE]; +} __attribute ((packed)) vring_used_t; + +typedef struct +{ + u8 flags; + u8 gso_type; + u16 hdr_len; + u16 gso_size; + u16 csum_start; + u16 csum_offset; +} __attribute ((packed)) virtio_net_hdr_t; + +typedef struct { + virtio_net_hdr_t hdr; + u16 num_buffers; +} __attribute ((packed)) virtio_net_hdr_mrg_rxbuf_t; + +typedef struct vhost_user_msg { + vhost_user_req_t request; + u32 flags; + u32 size; + union + { + u64 u64; + vhost_vring_state_t state; + vhost_vring_addr_t addr; + vhost_user_memory_t memory; + vhost_user_log_t log; + }; +} __attribute ((packed)) vhost_user_msg_t; +/* *INDENT-ON* */ + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u16 qsz; + u16 last_avail_idx; + u16 last_used_idx; + u16 n_since_last_int; + vring_desc_t *desc; + vring_avail_t *avail; + vring_used_t *used; + f64 int_deadline; + u8 started; + u8 enabled; + u8 log_used; + //Put non-runtime in a different cache line + CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); + int errfd; + u32 callfd_idx; + u32 kickfd_idx; + u64 log_guest_addr; +} vhost_user_vring_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u32 is_up; + u32 admin_up; + u32 unix_server_index; + u32 unix_file_index; + char sock_filename[256]; + int sock_errno; + u32 hw_if_index, sw_if_index; + + //Feature negotiation + u64 features; + u64 feature_mask; + u64 protocol_features; + + //Memory region information + u32 nregions; + vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS]; + void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS]; + u64 region_guest_addr_lo[VHOST_MEMORY_MAX_NREGIONS]; + u64 region_guest_addr_hi[VHOST_MEMORY_MAX_NREGIONS]; + u32 region_mmap_fd[VHOST_MEMORY_MAX_NREGIONS]; + + //Virtual rings + vhost_user_vring_t vrings[VHOST_VRING_MAX_N]; + volatile u32 *vring_locks[VHOST_VRING_MAX_N]; + + int virtio_net_hdr_sz; + int is_any_layout; + + void *log_base_addr; + u64 log_size; + + /* Whether to use spinlock or per_cpu_tx_qid assignment */ + u8 use_tx_spinlock; + u16 *per_cpu_tx_qid; + + /* Vector of workers for this interface */ + u32 *workers; +} vhost_user_intf_t; + +typedef struct +{ + u16 vhost_iface_index; + u16 qid; +} vhost_iface_and_queue_t; + +typedef struct +{ + uword dst; + uword src; + u32 len; +} vhost_copy_t; + +typedef struct +{ + u16 qid; /** The interface queue index (Not the virtio vring idx) */ + u16 device_index; /** The device index */ + u32 virtio_ring_flags; /** Runtime queue flags **/ + u16 first_desc_len; /** Length of the first data descriptor **/ + virtio_net_hdr_mrg_rxbuf_t hdr; /** Virtio header **/ +} vhost_trace_t; + + +#define VHOST_USER_RX_BUFFERS_N (2 * VLIB_FRAME_SIZE + 2) +#define VHOST_USER_COPY_ARRAY_N (4 * VLIB_FRAME_SIZE) + +typedef struct +{ + vhost_iface_and_queue_t *rx_queues; + u32 rx_buffers_len; + u32 rx_buffers[VHOST_USER_RX_BUFFERS_N]; + + virtio_net_hdr_mrg_rxbuf_t tx_headers[VLIB_FRAME_SIZE]; + vhost_copy_t copy[VHOST_USER_COPY_ARRAY_N]; + + /* This is here so it doesn't end-up + * using stack or registers. */ + vhost_trace_t *current_trace; +} vhost_cpu_t; + +typedef struct +{ + u32 mtu_bytes; + vhost_user_intf_t *vhost_user_interfaces; + u32 *show_dev_instance_by_real_dev_instance; + u32 coalesce_frames; + f64 coalesce_time; + int dont_dump_vhost_user_memory; + + /** first cpu index */ + u32 input_cpu_first_index; + + /** total cpu count */ + u32 input_cpu_count; + + /** Per-CPU data for vhost-user */ + vhost_cpu_t *cpus; + + /** Pseudo random iterator */ + u32 random; +} vhost_user_main_t; + +typedef struct +{ + u8 if_name[64]; + u32 sw_if_index; + u32 virtio_net_hdr_sz; + u64 features; + u8 is_server; + u8 sock_filename[256]; + u32 num_regions; + int sock_errno; +} vhost_user_intf_details_t; + +int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, + vhost_user_intf_details_t ** out_vuids); + +// CLI commands to be used from dpdk +clib_error_t *vhost_user_connect_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd); +clib_error_t *vhost_user_delete_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd); +clib_error_t *show_vhost_user_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd); + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/vnet/devices/virtio/vhost_user.api new file mode 100644 index 00000000..21e42298 --- /dev/null +++ b/src/vnet/devices/virtio/vhost_user.api @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief vhost-user interface create request + @param client_index - opaque cookie to identify the sender + @param is_server - our side is socket server + @param sock_filename - unix socket filename, used to speak with frontend + @param use_custom_mac - enable or disable the use of the provided hardware address + @param mac_address - hardware address to use if 'use_custom_mac' is set +*/ +define create_vhost_user_if +{ + u32 client_index; + u32 context; + u8 is_server; + u8 sock_filename[256]; + u8 renumber; + u32 custom_dev_instance; + u8 use_custom_mac; + u8 mac_address[6]; + u8 tag[64]; +}; + +/** \brief vhost-user interface create response + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param sw_if_index - interface the operation is applied to +*/ +define create_vhost_user_if_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief vhost-user interface modify request + @param client_index - opaque cookie to identify the sender + @param is_server - our side is socket server + @param sock_filename - unix socket filename, used to speak with frontend +*/ +define modify_vhost_user_if +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 is_server; + u8 sock_filename[256]; + u8 renumber; + u32 custom_dev_instance; +}; + +/** \brief vhost-user interface modify response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define modify_vhost_user_if_reply +{ + u32 context; + i32 retval; +}; + +/** \brief vhost-user interface delete request + @param client_index - opaque cookie to identify the sender +*/ +define delete_vhost_user_if +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief vhost-user interface delete response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define delete_vhost_user_if_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Vhost-user interface details structure (fix this) + @param sw_if_index - index of the interface + @param interface_name - name of interface + @param virtio_net_hdr_sz - net header size + @param features - interface features + @param is_server - vhost-user server socket + @param sock_filename - socket filename + @param num_regions - number of used memory regions +*/ +define sw_interface_vhost_user_details +{ + u32 context; + u32 sw_if_index; + u8 interface_name[64]; + u32 virtio_net_hdr_sz; + u64 features; + u8 is_server; + u8 sock_filename[256]; + u32 num_regions; + i32 sock_errno; +}; + +define sw_interface_vhost_user_dump +{ + u32 client_index; + u32 context; +}; +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c new file mode 100644 index 00000000..dd517c26 --- /dev/null +++ b/src/vnet/devices/virtio/vhost_user_api.c @@ -0,0 +1,262 @@ +/* + *------------------------------------------------------------------ + * vhost-user_api.c - vhost-user api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(CREATE_VHOST_USER_IF, create_vhost_user_if) \ +_(MODIFY_VHOST_USER_IF, modify_vhost_user_if) \ +_(DELETE_VHOST_USER_IF, delete_vhost_user_if) \ +_(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) \ +_(SW_INTERFACE_VHOST_USER_DETAILS, sw_interface_vhost_user_details) + +/* + * WARNING: replicated pending api refactor completion + */ +static void +send_sw_interface_flags_deleted (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + u32 sw_if_index) +{ + vl_api_sw_interface_set_flags_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_FLAGS); + mp->sw_if_index = ntohl (sw_if_index); + + mp->admin_up_down = 0; + mp->link_up_down = 0; + mp->deleted = 1; + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_create_vhost_user_if_t_handler (vl_api_create_vhost_user_if_t * mp) +{ + int rv = 0; + vl_api_create_vhost_user_if_reply_t *rmp; + u32 sw_if_index = (u32) ~ 0; + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + + rv = vhost_user_create_if (vnm, vm, (char *) mp->sock_filename, + mp->is_server, &sw_if_index, (u64) ~ 0, + mp->renumber, ntohl (mp->custom_dev_instance), + (mp->use_custom_mac) ? mp->mac_address : NULL); + + /* Remember an interface tag for the new interface */ + if (rv == 0) + { + /* If a tag was supplied... */ + if (mp->tag[0]) + { + /* Make sure it's a proper C-string */ + mp->tag[ARRAY_LEN (mp->tag) - 1] = 0; + u8 *tag = format (0, "%s%c", mp->tag, 0); + vnet_set_sw_interface_tag (vnm, tag, sw_if_index); + } + } + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_CREATE_VHOST_USER_IF_REPLY, + ({ + rmp->sw_if_index = ntohl (sw_if_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_modify_vhost_user_if_t_handler (vl_api_modify_vhost_user_if_t * mp) +{ + int rv = 0; + vl_api_modify_vhost_user_if_reply_t *rmp; + u32 sw_if_index = ntohl (mp->sw_if_index); + + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + + rv = vhost_user_modify_if (vnm, vm, (char *) mp->sock_filename, + mp->is_server, sw_if_index, (u64) ~ 0, + mp->renumber, ntohl (mp->custom_dev_instance)); + + REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_REPLY); +} + +static void +vl_api_delete_vhost_user_if_t_handler (vl_api_delete_vhost_user_if_t * mp) +{ + int rv = 0; + vl_api_delete_vhost_user_if_reply_t *rmp; + vpe_api_main_t *vam = &vpe_api_main; + u32 sw_if_index = ntohl (mp->sw_if_index); + + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + + rv = vhost_user_delete_if (vnm, vm, sw_if_index); + + REPLY_MACRO (VL_API_DELETE_VHOST_USER_IF_REPLY); + if (!rv) + { + unix_shared_memory_queue_t *q = + vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + vnet_clear_sw_interface_tag (vnm, sw_if_index); + send_sw_interface_flags_deleted (vam, q, sw_if_index); + } +} + +static void + vl_api_sw_interface_vhost_user_details_t_handler + (vl_api_sw_interface_vhost_user_details_t * mp) +{ + clib_warning ("BUG"); +} + +static void +send_sw_interface_vhost_user_details (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + vhost_user_intf_details_t * vui, + u32 context) +{ + vl_api_sw_interface_vhost_user_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_VHOST_USER_DETAILS); + mp->sw_if_index = ntohl (vui->sw_if_index); + mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz); + mp->features = clib_net_to_host_u64 (vui->features); + mp->is_server = vui->is_server; + mp->num_regions = ntohl (vui->num_regions); + mp->sock_errno = ntohl (vui->sock_errno); + mp->context = context; + + strncpy ((char *) mp->sock_filename, + (char *) vui->sock_filename, ARRAY_LEN (mp->sock_filename) - 1); + strncpy ((char *) mp->interface_name, + (char *) vui->if_name, ARRAY_LEN (mp->interface_name) - 1); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void + vl_api_sw_interface_vhost_user_dump_t_handler + (vl_api_sw_interface_vhost_user_dump_t * mp) +{ + int rv = 0; + vpe_api_main_t *am = &vpe_api_main; + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + vhost_user_intf_details_t *ifaces = NULL; + vhost_user_intf_details_t *vuid = NULL; + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + rv = vhost_user_dump_ifs (vnm, vm, &ifaces); + if (rv) + return; + + vec_foreach (vuid, ifaces) + { + send_sw_interface_vhost_user_details (am, q, vuid, mp->context); + } + vec_free (ifaces); +} + +/* + * vhost-user_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_vhost_user; +#undef _ +} + +static clib_error_t * +vhost_user_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (vhost_user_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 3cd9eed64b82bd50735434e0679e7fd085ec2884 Mon Sep 17 00:00:00 2001 From: Wojciech Dec Date: Tue, 3 Jan 2017 10:38:37 +0100 Subject: Fix crash on null vhost-user socket - VPP-573 Fix for VPP-573. Change-Id: If7d9690901efebf62fdf28219097153d98c79c0c Signed-off-by: Wojciech Dec --- src/vnet/devices/virtio/vhost-user.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index bde8106c..f9bbae4f 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2578,6 +2578,11 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, int rv = 0; int server_sock_fd = -1; + if (sock_filename == NULL || !(strlen (sock_filename) > 0)) + { + return VNET_API_ERROR_INVALID_ARGUMENT; + } + if (is_server) { if ((rv = -- cgit 1.2.3-korg From 0c3d467df4c5651df03895d06444f6e830e9688c Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Tue, 3 Jan 2017 16:52:22 +0000 Subject: VPP-574: fix VPP hang during security group configuration on a suspended VM The unix connect() in vhost-user driver in VPP is blocking, and a non-expedient accept() on the other side causes the entire VPP to hang. Solution: set the nonblocking flag for the socket fd before calling connect(), and set the socket back to blocking after the accept() succeeds. Change-Id: Ia5ee782037eeffabdad71db8241f476a048a4f6f Signed-off-by: Andrew Yourtchenko --- src/vnet/devices/virtio/vhost-user.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index f9bbae4f..3fbcee90 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2325,9 +2325,13 @@ vhost_user_process (vlib_main_t * vm, strncpy (sun.sun_path, (char *) vui->sock_filename, sizeof (sun.sun_path) - 1); + /* Avoid hanging VPP if the other end does not accept */ + fcntl(sockfd, F_SETFL, O_NONBLOCK); if (connect (sockfd, (struct sockaddr *) &sun, sizeof (struct sockaddr_un)) == 0) { + /* Set the socket to blocking as it was before */ + fcntl(sockfd, F_SETFL, 0); vui->sock_errno = 0; template.file_descriptor = sockfd; template.private_data = -- cgit 1.2.3-korg From a1b99dada3190d793555a075aa4eb3560c68fec7 Mon Sep 17 00:00:00 2001 From: Billy McFall Date: Fri, 6 Jan 2017 12:40:14 -0500 Subject: VPP-279: doxygen documentation for host interface CLI commands Change-Id: I2c6c16688be35e2e122c2377ded467c68a4c5a97 Signed-off-by: Billy McFall --- src/vnet/devices/af_packet/cli.c | 38 ++++++++++++++++++++++++++++++++++++-- src/vnet/devices/af_packet/dir.dox | 29 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 src/vnet/devices/af_packet/dir.dox (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/af_packet/cli.c b/src/vnet/devices/af_packet/cli.c index 2cbd4152..5c55cb4f 100644 --- a/src/vnet/devices/af_packet/cli.c +++ b/src/vnet/devices/af_packet/cli.c @@ -32,6 +32,13 @@ #include +/** + * @file + * @brief CLI for Host Interface Device Driver. + * + * This file contains the source code for CLI for the host interface. + */ + static clib_error_t * af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -81,10 +88,27 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } +/*? + * Create a host interface that will attach to a linux AF_PACKET + * interface, one side of a veth pair. The veth pair must already + * exist. Once created, a new host interface will exist in VPP + * with the name 'host-', where '' + * is the name of the specified veth pair. Use the + * 'show interfaces' command to display host interface details. + * + * @cliexpar + * Example of how to create a host interface tied to one side of an + * existing linux veth pair named vpp1: + * @cliexstart{create host-interface name vpp1} + * host-vpp1 + * @cliexend + * Once the host interface is created, enable the interface using: + * @cliexcmd{set interface state host-vpp1 up} +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (af_packet_create_command, static) = { .path = "create host-interface", - .short_help = "create host-interface name [hw-addr ]", + .short_help = "create host-interface name [hw-addr ]", .function = af_packet_create_command_fn, }; /* *INDENT-ON* */ @@ -119,10 +143,20 @@ af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } +/*? + * Delete a host interface. Use the linux interface name to identify + * the host interface to be deleted. In VPP, host interfaces are + * named as 'host-', where '' + * is the name of the linux interface. + * + * @cliexpar + * Example of how to delete a host interface named host-vpp1: + * @cliexcmd{delete host-interface name vpp1} +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (af_packet_delete_command, static) = { .path = "delete host-interface", - .short_help = "delete host-interface name ", + .short_help = "delete host-interface name ", .function = af_packet_delete_command_fn, }; /* *INDENT-ON* */ diff --git a/src/vnet/devices/af_packet/dir.dox b/src/vnet/devices/af_packet/dir.dox new file mode 100644 index 00000000..78991c6d --- /dev/null +++ b/src/vnet/devices/af_packet/dir.dox @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Doxygen directory documentation */ + +/** +@dir +@brief Host Interface Implementation. + +This directory contains the source code for Host Interface driver. The +Host Interface driver leverages the DPDK AF_PACKET driver. + + +*/ +/*? %%clicmd:group_label Host Interface %% ?*/ +/*? %%syscfg:group_label Host Interface %% ?*/ -- cgit 1.2.3-korg From 0d3c1cc7626ac39f25975ebf9d92ba903d470f79 Mon Sep 17 00:00:00 2001 From: Billy McFall Date: Fri, 6 Jan 2017 17:55:25 -0500 Subject: VPP-279: af_packet via Command-line Arg should no longer be supported With the CLI command 'create host-interface', no longer need to support af_packet interface creation via Command-line Arg. However, this is mostly implemented by passing arguments to DPDK. Instead of blocking functionality, put a warning in the log directing the user to the CLI. Change-Id: I6c6fba6096f32ef232f1da0c5d39396c6d13f54f Signed-off-by: Billy McFall --- src/vnet/devices/dpdk/init.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c index 693ca985..60689463 100755 --- a/src/vnet/devices/dpdk/init.c +++ b/src/vnet/devices/dpdk/init.c @@ -1114,6 +1114,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) tmp = format (0, "--%s%c", #a, 0); \ vec_add1 (conf->eal_init_args, tmp); \ vec_add1 (s, 0); \ + if (!strncmp(#a, "vdev", 4)) \ + if (strstr((char*)s, "af_packet")) \ + clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \ vec_add1 (conf->eal_init_args, s); \ } foreach_eal_double_hyphen_arg -- cgit 1.2.3-korg From 738f3f2a170bace45180bc8718d5a7e75939b275 Mon Sep 17 00:00:00 2001 From: Pavel Kotucek Date: Mon, 9 Jan 2017 15:11:03 +0100 Subject: API refactoring : dpdk Change-Id: If2541be803a0303401b013390e117c26fd1d9739 Signed-off-by: Pavel Kotucek --- src/vat/api_format.c | 119 ++++++++++++++++--- src/vnet.am | 8 +- src/vnet/devices/dpdk/dpdk.api | 103 ++++++++++++++++ src/vnet/devices/dpdk/dpdk_api.c | 246 +++++++++++++++++++++++++++++++++++++++ src/vnet/vnet_all_api_h.h | 3 + src/vpp/api/api.c | 153 ------------------------ src/vpp/api/custom_dump.c | 17 ++- src/vpp/api/vpe.api | 83 +------------ 8 files changed, 475 insertions(+), 257 deletions(-) create mode 100644 src/vnet/devices/dpdk/dpdk.api create mode 100644 src/vnet/devices/dpdk/dpdk_api.c (limited to 'src/vnet/devices') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index e6c0f244..c00104de 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -3569,9 +3569,6 @@ _(sw_interface_set_mpls_enable_reply) \ _(sw_interface_set_vpath_reply) \ _(sw_interface_set_vxlan_bypass_reply) \ _(sw_interface_set_l2_bridge_reply) \ -_(sw_interface_set_dpdk_hqos_pipe_reply) \ -_(sw_interface_set_dpdk_hqos_subport_reply) \ -_(sw_interface_set_dpdk_hqos_tctbl_reply) \ _(bridge_domain_add_del_reply) \ _(sw_interface_set_l2_xconnect_reply) \ _(l2fib_add_del_reply) \ @@ -3671,6 +3668,13 @@ _(feature_enable_disable_reply) \ _(sw_interface_tag_add_del_reply) \ _(sw_interface_set_mtu_reply) +#if DPDK > 0 +#define foreach_standard_dpdk_reply_retval_handler \ +_(sw_interface_set_dpdk_hqos_pipe_reply) \ +_(sw_interface_set_dpdk_hqos_subport_reply) \ +_(sw_interface_set_dpdk_hqos_tctbl_reply) +#endif + #define _(n) \ static void vl_api_##n##_t_handler \ (vl_api_##n##_t * mp) \ @@ -3702,6 +3706,39 @@ foreach_standard_reply_retval_handler; foreach_standard_reply_retval_handler; #undef _ +#if DPDK > 0 +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = &vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_dpdk_reply_retval_handler; +#undef _ + +#define _(n) \ + static void vl_api_##n##_t_handler_json \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = &vat_main; \ + vat_json_node_t node; \ + vat_json_init_object(&node); \ + vat_json_object_add_int(&node, "retval", ntohl(mp->retval)); \ + vat_json_print(vam->ofp, &node); \ + vam->retval = ntohl(mp->retval); \ + vam->result_ready = 1; \ + } +foreach_standard_dpdk_reply_retval_handler; +#undef _ +#endif + /* * Table of message reply handlers, must include boilerplate handlers * we just generated @@ -3725,12 +3762,6 @@ _(SW_INTERFACE_SET_L2_XCONNECT_REPLY, \ sw_interface_set_l2_xconnect_reply) \ _(SW_INTERFACE_SET_L2_BRIDGE_REPLY, \ sw_interface_set_l2_bridge_reply) \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY, \ - sw_interface_set_dpdk_hqos_pipe_reply) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY, \ - sw_interface_set_dpdk_hqos_subport_reply) \ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY, \ - sw_interface_set_dpdk_hqos_tctbl_reply) \ _(BRIDGE_DOMAIN_ADD_DEL_REPLY, bridge_domain_add_del_reply) \ _(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ _(BRIDGE_DOMAIN_SW_IF_DETAILS, bridge_domain_sw_if_details) \ @@ -3924,6 +3955,16 @@ _(SW_INTERFACE_SET_MTU_REPLY, sw_interface_set_mtu_reply) \ _(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) +#if DPDK > 0 +#define foreach_vpe_dpdk_api_reply_msg \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY, \ + sw_interface_set_dpdk_hqos_pipe_reply) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY, \ + sw_interface_set_dpdk_hqos_subport_reply) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY, \ + sw_interface_set_dpdk_hqos_tctbl_reply) +#endif + /* M: construct, but don't yet send a message */ #define M(T,t) \ @@ -4724,6 +4765,7 @@ api_sw_interface_clear_stats (vat_main_t * vam) W; } +#if DPDK >0 static int api_sw_interface_set_dpdk_hqos_pipe (vat_main_t * vam) { @@ -4944,6 +4986,7 @@ api_sw_interface_set_dpdk_hqos_tctbl (vat_main_t * vam) /* NOTREACHED */ return 0; } +#endif static int api_sw_interface_add_del_address (vat_main_t * vam) @@ -17434,14 +17477,6 @@ _(sw_interface_set_l2_bridge, \ " | sw_if_index bd_id \n" \ "[shg ] [bvi]\n" \ "enable | disable") \ -_(sw_interface_set_dpdk_hqos_pipe, \ - "rx | sw_if_index subport pipe \n" \ - "profile \n") \ -_(sw_interface_set_dpdk_hqos_subport, \ - "rx | sw_if_index subport [rate ]\n" \ - "[bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ]\n") \ -_(sw_interface_set_dpdk_hqos_tctbl, \ - "rx | sw_if_index entry tc queue \n") \ _(bridge_domain_add_del, \ "bd_id [flood 1|0] [uu-flood 1|0] [forward 1|0] [learn 1|0] [arp-term 1|0] [del]\n") \ _(bridge_domain_dump, "[bd_id ]\n") \ @@ -17739,6 +17774,18 @@ _(sw_interface_set_mtu, " | sw_if_index mtu ") \ _(ip_neighbor_dump, "[ip6] | sw_if_index ") \ _(sw_interface_get_table, " | sw_if_index [ipv6]") +#if DPDK > 0 +#define foreach_vpe_dpdk_api_msg \ +_(sw_interface_set_dpdk_hqos_pipe, \ + "rx | sw_if_index subport pipe \n" \ + "profile \n") \ +_(sw_interface_set_dpdk_hqos_subport, \ + "rx | sw_if_index subport [rate ]\n" \ + "[bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ]\n") \ +_(sw_interface_set_dpdk_hqos_tctbl, \ + "rx | sw_if_index entry tc queue \n") +#endif + /* List of command functions, CLI names map directly to functions */ #define foreach_cli_function \ _(comment, "usage: comment ") \ @@ -17776,6 +17823,22 @@ _(unset, "usage: unset ") foreach_vpe_api_reply_msg; #undef _ +#if DPDK > 0 +#define _(N,n) \ + static void vl_api_##n##_t_handler_uni \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = &vat_main; \ + if (vam->json_output) { \ + vl_api_##n##_t_handler_json(mp); \ + } else { \ + vl_api_##n##_t_handler(mp); \ + } \ + } +foreach_vpe_dpdk_api_reply_msg; +#undef _ +#endif + void vat_api_hookup (vat_main_t * vam) { @@ -17789,6 +17852,18 @@ vat_api_hookup (vat_main_t * vam) foreach_vpe_api_reply_msg; #undef _ +#if DPDK > 0 +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler_uni, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_dpdk_api_reply_msg; +#undef _ +#endif + #if (VPP_API_TEST_BUILTIN==0) vl_msg_api_set_first_available_msg_id (VL_MSG_FIRST_AVAILABLE); #endif @@ -17803,11 +17878,21 @@ vat_api_hookup (vat_main_t * vam) #define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); foreach_vpe_api_msg; #undef _ +#if DPDK >0 +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_dpdk_api_msg; +#undef _ +#endif /* Help strings */ #define _(n,h) hash_set_mem (vam->help_by_name, #n, h); foreach_vpe_api_msg; #undef _ +#if DPDK >0 +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_dpdk_api_msg; +#undef _ +#endif /* CLI functions */ #define _(n,h) hash_set_mem (vam->function_by_name, #n, n); diff --git a/src/vnet.am b/src/vnet.am index 16ade4d1..bc0820a3 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -759,10 +759,14 @@ libvnet_la_SOURCES += \ vnet/devices/dpdk/init.c \ vnet/devices/dpdk/node.c \ vnet/devices/dpdk/hqos.c \ - vnet/devices/dpdk/cli.c + vnet/devices/dpdk/cli.c \ + vnet/devices/dpdk/dpdk_api.c nobase_include_HEADERS += \ - vnet/devices/dpdk/dpdk.h + vnet/devices/dpdk/dpdk.h \ + vnet/devices/dpdk/dpdk.api.h + +API_FILES += vnet/devices/dpdk/dpdk.api else libvnet_la_SOURCES += \ vnet/devices/nic/ixge.c \ diff --git a/src/vnet/devices/dpdk/dpdk.api b/src/vnet/devices/dpdk/dpdk.api new file mode 100644 index 00000000..21215d45 --- /dev/null +++ b/src/vnet/devices/dpdk/dpdk.api @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief DPDK interface HQoS pipe profile set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param pipe - pipe ID within its subport + @param profile - pipe profile ID +*/ +define sw_interface_set_dpdk_hqos_pipe { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 pipe; + u32 profile; +}; + +/** \brief DPDK interface HQoS pipe profile set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_pipe_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS subport parameters set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param tb_rate - subport token bucket rate (measured in bytes/second) + @param tb_size - subport token bucket size (measured in credits) + @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) + @param tc_period - enforcement period for rates (measured in milliseconds) +*/ +define sw_interface_set_dpdk_hqos_subport { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 tb_rate; + u32 tb_size; + u32 tc_rate[4]; + u32 tc_period; +}; + +/** \brief DPDK interface HQoS subport parameters set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_subport_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS tctbl entry set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param entry - entry index ID + @param tc - traffic class (0 .. 3) + @param queue - traffic class queue (0 .. 3) +*/ +define sw_interface_set_dpdk_hqos_tctbl { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 entry; + u32 tc; + u32 queue; +}; + +/** \brief DPDK interface HQoS tctbl entry set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_tctbl_reply { + u32 context; + i32 retval; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + \ No newline at end of file diff --git a/src/vnet/devices/dpdk/dpdk_api.c b/src/vnet/devices/dpdk/dpdk_api.c new file mode 100644 index 00000000..8faf5c2c --- /dev/null +++ b/src/vnet/devices/dpdk/dpdk_api.c @@ -0,0 +1,246 @@ +/* + *------------------------------------------------------------------ + * dpdk_api.c - dpdk interface api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#if DPDK > 0 +#include +#endif + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) + +static void + vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; + int rv = 0; + +#if DPDK > 0 + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + u32 pipe = ntohl (mp->pipe); + u32 profile = ntohl (mp->profile); + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); + + BAD_SW_IF_INDEX_LABEL; +#else + clib_warning ("setting HQoS pipe parameters without DPDK not implemented"); + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif /* DPDK */ + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); +} + +static void + vl_api_sw_interface_set_dpdk_hqos_subport_t_handler + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; + int rv = 0; + +#if DPDK > 0 + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + struct rte_sched_subport_params p; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + p.tb_rate = ntohl (mp->tb_rate); + p.tb_size = ntohl (mp->tb_size); + p.tc_rate[0] = ntohl (mp->tc_rate[0]); + p.tc_rate[1] = ntohl (mp->tc_rate[1]); + p.tc_rate[2] = ntohl (mp->tc_rate[2]); + p.tc_rate[3] = ntohl (mp->tc_rate[3]); + p.tc_period = ntohl (mp->tc_period); + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); + + BAD_SW_IF_INDEX_LABEL; +#else + clib_warning + ("setting HQoS subport parameters without DPDK not implemented"); + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif /* DPDK */ + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); +} + +static void + vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; + int rv = 0; + +#if DPDK > 0 + dpdk_main_t *dm = &dpdk_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 entry = ntohl (mp->entry); + u32 tc = ntohl (mp->tc); + u32 queue = ntohl (mp->queue); + u32 val, i; + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + { + clib_warning ("invalid traffic class !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + { + clib_warning ("invalid queue !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + + if (p == 0) + { + clib_warning ("worker thread registration AWOL !!"); + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto done; + } + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + for (i = 0; i < worker_thread_count; i++) + xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; + + BAD_SW_IF_INDEX_LABEL; +done: +#else + clib_warning ("setting HQoS DSCP table entry without DPDK not implemented"); + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif /* DPDK */ + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); +} + +/* + * dpdk_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_dpdk; +#undef _ +} + +static clib_error_t * +dpdk_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (dpdk_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index 1b4d6c45..d48e1540 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -30,6 +30,9 @@ #endif /* included_from_layer_3 */ #include +#if DPDK > 0 +#include +#endif #include #include #include diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 6289249c..46e28e9d 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -86,10 +86,6 @@ #undef __included_bihash_template_h__ #include -#if DPDK > 0 -#include -#endif - #include #include @@ -131,9 +127,6 @@ _(SW_INTERFACE_SET_VPATH, sw_interface_set_vpath) \ _(SW_INTERFACE_SET_VXLAN_BYPASS, sw_interface_set_vxlan_bypass) \ _(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \ _(SW_INTERFACE_SET_L2_BRIDGE, sw_interface_set_l2_bridge) \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) \ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ _(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ @@ -697,152 +690,6 @@ static void REPLY_MACRO (VL_API_SW_INTERFACE_SET_L2_BRIDGE_REPLY); } -static void - vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler - (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - u32 pipe = ntohl (mp->pipe); - u32 profile = ntohl (mp->profile); - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); - - BAD_SW_IF_INDEX_LABEL; -#else - clib_warning ("setting HQoS pipe parameters without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); -} - -static void - vl_api_sw_interface_set_dpdk_hqos_subport_t_handler - (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - struct rte_sched_subport_params p; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - p.tb_rate = ntohl (mp->tb_rate); - p.tb_size = ntohl (mp->tb_size); - p.tc_rate[0] = ntohl (mp->tc_rate[0]); - p.tc_rate[1] = ntohl (mp->tc_rate[1]); - p.tc_rate[2] = ntohl (mp->tc_rate[2]); - p.tc_rate[3] = ntohl (mp->tc_rate[3]); - p.tc_period = ntohl (mp->tc_period); - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); - - BAD_SW_IF_INDEX_LABEL; -#else - clib_warning - ("setting HQoS subport parameters without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); -} - -static void - vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler - (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 entry = ntohl (mp->entry); - u32 tc = ntohl (mp->tc); - u32 queue = ntohl (mp->queue); - u32 val, i; - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) - { - clib_warning ("invalid traffic class !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) - { - clib_warning ("invalid queue !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - - if (p == 0) - { - clib_warning ("worker thread registration AWOL !!"); - rv = VNET_API_ERROR_INVALID_VALUE_2; - goto done; - } - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; - for (i = 0; i < worker_thread_count; i++) - xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; - - BAD_SW_IF_INDEX_LABEL; -done: -#else - clib_warning ("setting HQoS DSCP table entry without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); -} - static void vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) { diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 1964533e..c2cd3d15 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -238,6 +238,7 @@ static void *vl_api_sw_interface_set_l2_bridge_t_print FINISH; } +#if DPDK > 0 static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) { @@ -287,6 +288,7 @@ static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print FINISH; } +#endif static void *vl_api_bridge_domain_add_del_t_print (vl_api_bridge_domain_add_del_t * mp, void *handle) @@ -3002,9 +3004,6 @@ _(BRIDGE_FLAGS, bridge_flags) \ _(CLASSIFY_ADD_DEL_TABLE, classify_add_del_table) \ _(CLASSIFY_ADD_DEL_SESSION, classify_add_del_session) \ _(SW_INTERFACE_SET_L2_BRIDGE, sw_interface_set_l2_bridge) \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport)\ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) \ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ _(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table) \ @@ -3128,6 +3127,18 @@ vl_msg_api_custom_dump_configure (api_main_t * am) = (void *) vl_api_##f##_t_print; foreach_custom_print_function; #undef _ + +#if DPDK > 0 + /* + * manually add DPDK hqos print handlers + */ + am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE] = + (void *) vl_api_sw_interface_set_dpdk_hqos_pipe_t_print; + am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT] = + (void *) vl_api_sw_interface_set_dpdk_hqos_subport_t_print; + am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL] = + (void *) vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print; +#endif } /* diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index abd0e8f1..3e4bcdf9 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -37,6 +37,7 @@ * IPSEC-GRE APIs: see .../vnet/vnet/ipsec-gre/{ipsec_gre.api, ipsec_gre_api.c} * LISP APIs: see .../vnet/vnet/lisp/{lisp.api, lisp_api.c} * LISP-GPE APIs: see .../vnet/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c} + * DPDK APIs: ... see /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} */ /** \brief Create a new subinterface with the given vlan id @@ -2606,88 +2607,6 @@ define delete_subif_reply { i32 retval; }; -/** \brief DPDK interface HQoS pipe profile set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param pipe - pipe ID within its subport - @param profile - pipe profile ID -*/ -define sw_interface_set_dpdk_hqos_pipe { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 pipe; - u32 profile; -}; - -/** \brief DPDK interface HQoS pipe profile set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_pipe_reply { - u32 context; - i32 retval; -}; - -/** \brief DPDK interface HQoS subport parameters set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param tb_rate - subport token bucket rate (measured in bytes/second) - @param tb_size - subport token bucket size (measured in credits) - @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) - @param tc_period - enforcement period for rates (measured in milliseconds) -*/ -define sw_interface_set_dpdk_hqos_subport { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 tb_rate; - u32 tb_size; - u32 tc_rate[4]; - u32 tc_period; -}; - -/** \brief DPDK interface HQoS subport parameters set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_subport_reply { - u32 context; - i32 retval; -}; - -/** \brief DPDK interface HQoS tctbl entry set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param entry - entry index ID - @param tc - traffic class (0 .. 3) - @param queue - traffic class queue (0 .. 3) -*/ -define sw_interface_set_dpdk_hqos_tctbl { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 entry; - u32 tc; - u32 queue; -}; - -/** \brief DPDK interface HQoS tctbl entry set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_tctbl_reply { - u32 context; - i32 retval; -}; - /** \brief L2 interface pbb tag rewrite configure request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request -- cgit 1.2.3-korg From 553a41190963099631a73fb0a77b07b871d65e70 Mon Sep 17 00:00:00 2001 From: Ole Troan Date: Tue, 10 Jan 2017 10:07:04 +0100 Subject: VPP-585: vhost-user interface deletion leaks memory. Change-Id: I69bbc447e1989adea40f052eac4550036b6e2e1e Signed-off-by: Ole Troan --- src/vnet/devices/virtio/vhost-user.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 3fbcee90..ee41ee18 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2386,12 +2386,19 @@ VLIB_REGISTER_NODE (vhost_user_process_node,static) = { static void vhost_user_term_if (vhost_user_intf_t * vui) { + int q; + // Delete configured thread pinning vec_reset_length (vui->workers); // disconnect interface sockets vhost_user_if_disconnect (vui); vhost_user_update_iface_state (vui); + for (q = 0; q < VHOST_VRING_MAX_N; q++) + { + clib_mem_free ((void *) vui->vring_locks[q]); + } + if (vui->unix_server_index != ~0) { //Close server socket -- cgit 1.2.3-korg From 35b2607fcd033a18a32ba61b44e68d6e9e3eeeca Mon Sep 17 00:00:00 2001 From: Billy McFall Date: Thu, 5 Jan 2017 13:23:39 -0500 Subject: VPP-580: Host Interface created via Command Line Arg is misnamed Host interfaces created via the command-line arguments are missed named (i.e. - UnknownEthernet0 instead of af_packet0). In DPDK 16.11, they changed the driver names from eth_xxx to net_xxx. However, looks like the AF_PACKET driver still returns "AF_PACKET PMD" as the driver name in the rte_eth_dev_info_get(..) call. I modified the driver name look table in vnet/devices/dpdk/dpdk.h to revert the name back. Change-Id: I2b0a9f6b4d5245b76548027891d40f81a56b230d Signed-off-by: Billy McFall --- src/vnet/devices/dpdk/dpdk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h index d8f378d2..e0436031 100644 --- a/src/vnet/devices/dpdk/dpdk.h +++ b/src/vnet/devices/dpdk/dpdk.h @@ -79,7 +79,7 @@ extern vlib_node_registration_t handoff_dispatch_node; _ ("net_virtio", VIRTIO) \ _ ("net_enic", ENIC) \ _ ("net_vmxnet3", VMXNET3) \ - _ ("net_af_packet", AF_PACKET) \ + _ ("AF_PACKET PMD", AF_PACKET) \ _ ("rte_bond_pmd", BOND) \ _ ("net_fm10k", FM10K) \ _ ("net_cxgbe", CXGBE) \ -- cgit 1.2.3-korg From 2d0b6e380be3acec05f4acea9e4f1b9bb730ee88 Mon Sep 17 00:00:00 2001 From: Billy McFall Date: Wed, 11 Jan 2017 08:44:52 -0500 Subject: VPP-279: Document changes for vnet/vnet/devices Add doxygen documentation for netmap CLI commands. Change-Id: I8d3ce12b1cfa5af30ddcd31cb476ca4652cfc2f3 Signed-off-by: Billy McFall --- src/vnet/devices/af_packet/cli.c | 5 +++ src/vnet/devices/netmap/cli.c | 68 ++++++++++++++++++++++++++++++++++++++-- src/vnet/devices/netmap/dir.dox | 27 ++++++++++++++++ 3 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 src/vnet/devices/netmap/dir.dox (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/af_packet/cli.c b/src/vnet/devices/af_packet/cli.c index 5c55cb4f..6baa26e1 100644 --- a/src/vnet/devices/af_packet/cli.c +++ b/src/vnet/devices/af_packet/cli.c @@ -96,6 +96,11 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input, * is the name of the specified veth pair. Use the * 'show interfaces' command to display host interface details. * + * This command has the following optional parameters: + * + * - hw-addr - Optional ethernet address, can be in either + * X:X:X:X:X:X unix or X.X.X cisco format. + * * @cliexpar * Example of how to create a host interface tied to one side of an * existing linux veth pair named vpp1: diff --git a/src/vnet/devices/netmap/cli.c b/src/vnet/devices/netmap/cli.c index 68695250..6157f27c 100644 --- a/src/vnet/devices/netmap/cli.c +++ b/src/vnet/devices/netmap/cli.c @@ -83,11 +83,57 @@ netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } +/*? + * 'netmap' is a framework for very fast packet I/O from userspace. + * 'VALE' is an equally fast in-kernel software switch using the + * netmap API. 'netmap' includes 'netmap pipes', a shared + * memory packet transport channel. Together, they provide a high speed + * user-space interface that allows VPP to patch into a linux namespace, a + * linux container, or a physical NIC without the use of DPDK. Netmap/VALE + * generates the 'netmap.ko' kernel module that needs to be loaded + * before netmap interfaces can be created. + * - https://github.com/luigirizzo/netmap - Netmap/VALE repo. + * - https://github.com/vpp-dev/netmap - VPP development package for Netmap/VALE, + * which is a snapshot of the Netmap/VALE repo with minor changes to work + * with containers and modified kernel drivers to work with NICs. + * + * Create a netmap interface that will attach to a linux interface. + * The interface must already exist. Once created, a new netmap interface + * will exist in VPP with the name 'netmap-', where + * '' takes one of two forms: + * - ifname - Linux interface to bind too. + * - valeXXX:YYY - + * - Where 'valeXXX' is an arbitrary name for a VALE + * interface that must start with 'vale' and is less + * than 16 characters. + * - Where 'YYY' is an existing linux namespace. + * + * This command has the following optional parameters: + * + * - hw-addr - Optional ethernet address, can be in either + * X:X:X:X:X:X unix or X.X.X cisco format. + * + * - pipe - Optional flag to indicate that a 'netmap pipe' + * instance should be created. + * + * - master | slave - Optional flag to indicate whether VPP should + * be the master or slave of the 'netmap pipe'. Only considered + * if 'pipe' is entered. Defaults to 'slave' if not entered. + * + * @cliexpar + * Example of how to create a netmap interface tied to the linux + * namespace 'vpp1': + * @cliexstart{create netmap name vale00:vpp1 hw-addr 02:FE:3F:34:15:9B pipe master} + * netmap-vale00:vpp1 + * @cliexend + * Once the netmap interface is created, enable the interface using: + * @cliexcmd{set interface state netmap-vale00:vpp1 up} +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (netmap_create_command, static) = { .path = "create netmap", - .short_help = "create netmap name [|valeXXX:YYY] " - "[hw-addr ] [pipe] [master|slave]", + .short_help = "create netmap name |valeXXX:YYY " + "[hw-addr ] [pipe] [master|slave]", .function = netmap_create_command_fn, }; /* *INDENT-ON* */ @@ -121,10 +167,26 @@ netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } +/*? + * Delete a netmap interface. Use the '' to identify + * the netmap interface to be deleted. In VPP, netmap interfaces are + * named as 'netmap-', where '' + * takes one of two forms: + * - ifname - Linux interface to bind too. + * - valeXXX:YYY - + * - Where 'valeXXX' is an arbitrary name for a VALE + * interface that must start with 'vale' and is less + * than 16 characters. + * - Where 'YYY' is an existing linux namespace. + * + * @cliexpar + * Example of how to delete a netmap interface named 'netmap-vale00:vpp1': + * @cliexcmd{delete netmap name vale00:vpp1} +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (netmap_delete_command, static) = { .path = "delete netmap", - .short_help = "delete netmap name ", + .short_help = "delete netmap name |valeXXX:YYY", .function = netmap_delete_command_fn, }; /* *INDENT-ON* */ diff --git a/src/vnet/devices/netmap/dir.dox b/src/vnet/devices/netmap/dir.dox new file mode 100644 index 00000000..7ddbf947 --- /dev/null +++ b/src/vnet/devices/netmap/dir.dox @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Doxygen directory documentation */ + +/** +@dir +@brief netmap Interface Implementation. + +This directory contains the source code for the netmap driver. + +*/ +/*? %%clicmd:group_label netmap %% ?*/ +/*? %%syscfg:group_label netmap %% ?*/ -- cgit 1.2.3-korg From 878c609889dcdc58538d40d8b3f662320f88573d Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 4 Jan 2017 13:19:27 +0100 Subject: vlib: add buffer and thread callbacks Change-Id: I8e2e8f94a884ab2f9909d0c83ba00edd38cdab77 Signed-off-by: Damjan Marion --- src/plugins/flowperpkt/flowperpkt.c | 2 +- src/vlib.am | 1 + src/vlib/buffer.c | 736 +++--------------------------------- src/vlib/buffer.h | 45 ++- src/vlib/buffer_funcs.h | 82 ++-- src/vlib/buffer_serialize.c | 248 ++++++++++++ src/vlib/main.c | 7 +- src/vlib/threads.c | 112 +++--- src/vlib/threads.h | 17 +- src/vlib/threads_cli.c | 25 -- src/vlib/unix/physmem.c | 15 +- src/vnet.am | 2 + src/vnet/devices/dpdk/buffer.c | 729 +++++++++++++++++++++++++++++++++++ src/vnet/devices/dpdk/cli.c | 4 +- src/vnet/devices/dpdk/device.c | 7 +- src/vnet/devices/dpdk/dpdk.h | 3 + src/vnet/devices/dpdk/dpdk_priv.h | 3 + src/vnet/devices/dpdk/init.c | 6 +- src/vnet/devices/dpdk/thread.c | 85 +++++ src/vnet/sr/sr_replicate.c | 7 +- 20 files changed, 1304 insertions(+), 832 deletions(-) create mode 100644 src/vlib/buffer_serialize.c create mode 100644 src/vnet/devices/dpdk/buffer.c create mode 100644 src/vnet/devices/dpdk/thread.c (limited to 'src/vnet/devices') diff --git a/src/plugins/flowperpkt/flowperpkt.c b/src/plugins/flowperpkt/flowperpkt.c index fb71d5b0..cc351599 100644 --- a/src/plugins/flowperpkt/flowperpkt.c +++ b/src/plugins/flowperpkt/flowperpkt.c @@ -643,7 +643,7 @@ flowperpkt_init (vlib_main_t * vm) vec_free (name); /* Decide how many worker threads we have */ - num_threads = 1 /* main thread */ + tm->n_eal_threads; + num_threads = 1 /* main thread */ + tm->n_threads; /* Allocate per worker thread vectors */ vec_validate (fm->ipv4_buffers_per_worker, num_threads - 1); diff --git a/src/vlib.am b/src/vlib.am index 0154d841..c21f88c4 100644 --- a/src/vlib.am +++ b/src/vlib.am @@ -23,6 +23,7 @@ vlib/config.h: libvlib_la_SOURCES = \ vlib/buffer.c \ + vlib/buffer_serialize.c \ vlib/cli.c \ vlib/cli.h \ vlib/config.h \ diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 4bf6d125..0b0e6054 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -38,50 +38,13 @@ */ /** - * @cond (!DPDK) * @file * * Allocate/free network buffers. */ -#if DPDK > 0 -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#endif - #include -#if DPDK > 0 -#pragma weak rte_mem_virt2phy -#pragma weak rte_eal_has_hugepages -#pragma weak rte_socket_id -#pragma weak rte_pktmbuf_pool_create -#endif - uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, vlib_buffer_t * b_first) @@ -103,7 +66,6 @@ u8 * format_vlib_buffer (u8 * s, va_list * args) { vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *); -#if DPDK > 0 uword indent = format_get_indent (s); s = format (s, "current data %d, length %d, free-list %d", @@ -126,18 +88,6 @@ format_vlib_buffer (u8 * s, va_list * args) format_white_space, indent, next_buffer, b->current_length); } -#else - - s = format (s, "current data %d, length %d, free-list %d", - b->current_data, b->current_length, b->free_list_index); - - if (b->flags & VLIB_BUFFER_IS_TRACED) - s = format (s, ", trace 0x%x", b->trace_index); - - if (b->flags & VLIB_BUFFER_NEXT_PRESENT) - s = format (s, ", next-buffer 0x%x", b->next_buffer); -#endif - return s; } @@ -153,7 +103,6 @@ format_vlib_buffer_and_data (u8 * s, va_list * args) return s; } -#if DPDK == 0 static u8 * format_vlib_buffer_known_state (u8 * s, va_list * args) { @@ -181,7 +130,6 @@ format_vlib_buffer_known_state (u8 * s, va_list * args) return format (s, "%s", t); } -#endif u8 * format_vlib_buffer_contents (u8 * s, va_list * va) @@ -200,7 +148,6 @@ format_vlib_buffer_contents (u8 * s, va_list * va) return s; } -#if DPDK == 0 static u8 * vlib_validate_buffer_helper (vlib_main_t * vm, u32 bi, @@ -217,11 +164,10 @@ vlib_validate_buffer_helper (vlib_main_t * vm, if ((signed) b->current_data < (signed) -VLIB_BUFFER_PRE_DATA_SIZE) return format (0, "current data %d before pre-data", b->current_data); -#if DPDK == 0 + if (b->current_data + b->current_length > fl->n_data_bytes) return format (0, "%d-%d beyond end of buffer %d", b->current_data, b->current_length, fl->n_data_bytes); -#endif if (follow_buffer_next && (b->flags & VLIB_BUFFER_NEXT_PRESENT)) { @@ -311,14 +257,12 @@ done: hash_free (hash); return result; } -#endif vlib_main_t **vlib_mains; -#if DPDK == 0 /* When dubugging validate that given buffers are either known allocated or known free. */ -static void +static void __attribute__ ((unused)) vlib_buffer_validate_alloc_free (vlib_main_t * vm, u32 * buffers, uword n_buffers, @@ -359,7 +303,6 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm, is_free ? VLIB_BUFFER_KNOWN_FREE : VLIB_BUFFER_KNOWN_ALLOCATED); } } -#endif #define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32)) @@ -463,7 +406,6 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, { vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *f; -#if DPDK > 0 int i; ASSERT (os_get_cpu_number () == 0); @@ -519,47 +461,6 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, wf->unaligned_buffers = 0; wf->n_alloc = 0; } -#else - - if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0) - { - u32 default_free_free_list_index; - - default_free_free_list_index = vlib_buffer_create_free_list_helper (vm, - /* default buffer size */ - VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, - /* is_public */ - 1, - /* is_default */ - 1, - (u8 - *) - "default"); - ASSERT (default_free_free_list_index == - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - - if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public) - return default_free_free_list_index; - } - - pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES); - - memset (f, 0, sizeof (f[0])); - f->index = f - bm->buffer_free_list_pool; - f->n_data_bytes = vlib_buffer_round_size (n_data_bytes); - f->min_n_buffers_each_physmem_alloc = 256; - f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name); - - /* Setup free buffer template. */ - f->buffer_init_template.free_list_index = f->index; - - if (is_public) - { - uword *p = hash_get (bm->free_list_by_size, f->n_data_bytes); - if (!p) - hash_set (bm->free_list_by_size, f->n_data_bytes, f->index); - } -#endif return f->index; } @@ -609,50 +510,30 @@ static void del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) { u32 i; -#if DPDK > 0 - struct rte_mbuf *mb; - vlib_buffer_t *b; - - for (i = 0; i < vec_len (f->unaligned_buffers); i++) - { - b = vlib_get_buffer (vm, f->unaligned_buffers[i]); - mb = rte_mbuf_from_vlib_buffer (b); - ASSERT (rte_mbuf_refcnt_read (mb) == 1); - rte_pktmbuf_free (mb); - } - for (i = 0; i < vec_len (f->aligned_buffers); i++) - { - b = vlib_get_buffer (vm, f->aligned_buffers[i]); - mb = rte_mbuf_from_vlib_buffer (b); - ASSERT (rte_mbuf_refcnt_read (mb) == 1); - rte_pktmbuf_free (mb); - } - vec_free (f->name); -#else for (i = 0; i < vec_len (f->buffer_memory_allocated); i++) vm->os_physmem_free (f->buffer_memory_allocated[i]); vec_free (f->name); vec_free (f->buffer_memory_allocated); -#endif vec_free (f->unaligned_buffers); vec_free (f->aligned_buffers); } /* Add buffer free list. */ void -vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +vlib_buffer_delete_free_list_internal (vlib_main_t * vm, u32 free_list_index) { vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *f; u32 merge_index; -#if DPDK > 0 int i; ASSERT (os_get_cpu_number () == 0); f = vlib_buffer_get_free_list (vm, free_list_index); + ASSERT (vec_len (f->unaligned_buffers) + vec_len (f->aligned_buffers) == + f->n_alloc); merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); if (merge_index != ~0 && merge_index != free_list_index) { @@ -674,26 +555,6 @@ vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) memset (f, 0xab, sizeof (f[0])); pool_put (bm->buffer_free_list_pool, f); } -#else - - f = vlib_buffer_get_free_list (vm, free_list_index); - - ASSERT (vec_len (f->unaligned_buffers) + vec_len (f->aligned_buffers) == - f->n_alloc); - merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); - if (merge_index != ~0 && merge_index != free_list_index) - { - merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool, - merge_index), f); - } - - del_free_list (vm, f); - - /* Poison it. */ - memset (f, 0xab, sizeof (f[0])); - - pool_put (bm->buffer_free_list_pool, f); -#endif } /* Make sure free list has at least given number of free buffers. */ @@ -701,63 +562,6 @@ static uword fill_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * fl, uword min_free_buffers) { -#if DPDK > 0 - vlib_buffer_t *b; - int n, i; - u32 bi; - u32 n_remaining = 0, n_alloc = 0; - unsigned socket_id = rte_socket_id ? rte_socket_id () : 0; - struct rte_mempool *rmp = vm->buffer_main->pktmbuf_pools[socket_id]; - struct rte_mbuf *mb; - - /* Too early? */ - if (PREDICT_FALSE (rmp == 0)) - return 0; - - trim_aligned (fl); - - /* Already have enough free buffers on free list? */ - n = min_free_buffers - vec_len (fl->aligned_buffers); - if (n <= 0) - return min_free_buffers; - - /* Always allocate round number of buffers. */ - n = round_pow2 (n, BUFFERS_PER_COPY); - - /* Always allocate new buffers in reasonably large sized chunks. */ - n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); - - vec_validate (vm->mbuf_alloc_list, n - 1); - - if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0) - return 0; - - _vec_len (vm->mbuf_alloc_list) = n; - - for (i = 0; i < n; i++) - { - mb = vm->mbuf_alloc_list[i]; - - ASSERT (rte_mbuf_refcnt_read (mb) == 0); - rte_mbuf_refcnt_set (mb, 1); - - b = vlib_buffer_from_rte_mbuf (mb); - bi = vlib_get_buffer_index (vm, b); - - vec_add1_aligned (fl->aligned_buffers, bi, sizeof (vlib_copy_unit_t)); - n_alloc++; - n_remaining--; - - vlib_buffer_init_for_free_list (b, fl); - - if (fl->buffer_init_function) - fl->buffer_init_function (vm, fl, &bi, 1); - } - - fl->n_alloc += n; - - return n; -#else vlib_buffer_t *buffers, *b; int n, n_bytes, i; u32 *bi; @@ -824,7 +628,6 @@ fill_free_list (vlib_main_t * vm, fl->buffer_init_function (vm, fl, bi, n_this_chunk); } return n_alloc; -#endif } always_inline uword @@ -833,6 +636,7 @@ copy_alignment (u32 * x) return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY; } + static u32 alloc_from_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * free_list, @@ -842,10 +646,6 @@ alloc_from_free_list (vlib_main_t * vm, uword u_len, n_left; uword n_unaligned_start, n_unaligned_end, n_filled; -#if DPDK == 0 - ASSERT (os_get_cpu_number () == 0); - -#endif n_left = n_alloc_buffers; dst = alloc_buffers; n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst)) @@ -945,25 +745,21 @@ alloc_from_free_list (vlib_main_t * vm, else _vec_len (free_list->unaligned_buffers) = u_len; -#if DPDK == 0 /* Verify that buffers are known free. */ vlib_buffer_validate_alloc_free (vm, alloc_buffers, n_alloc_buffers, VLIB_BUFFER_KNOWN_FREE); -#endif return n_alloc_buffers; } + /* Allocate a given number of buffers into given array. Returns number actually allocated which will be either zero or number requested. */ -u32 -vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +static u32 +vlib_buffer_alloc_internal (vlib_main_t * vm, u32 * buffers, u32 n_buffers) { vlib_buffer_main_t *bm = vm->buffer_main; -#if DPDK == 0 - ASSERT (os_get_cpu_number () == 0); -#endif return alloc_from_free_list (vm, @@ -972,10 +768,10 @@ vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) buffers, n_buffers); } -u32 -vlib_buffer_alloc_from_free_list (vlib_main_t * vm, - u32 * buffers, - u32 n_buffers, u32 free_list_index) +static u32 +vlib_buffer_alloc_from_free_list_internal (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, u32 free_list_index) { vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *f; @@ -1016,81 +812,10 @@ vlib_set_buffer_free_callback (vlib_main_t * vm, void *fp) return rv; } -#if DPDK == 0 -void vnet_buffer_free_dpdk_mb (vlib_buffer_t * b) __attribute__ ((weak)); -void -vnet_buffer_free_dpdk_mb (vlib_buffer_t * b) -{ -} - -#endif static_always_inline void vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, u32 follow_buffer_next) { -#if DPDK > 0 - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *fl; - u32 fi; - int i; - u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, - u32 follow_buffer_next); - - cb = bm->buffer_free_callback; - - if (PREDICT_FALSE (cb != 0)) - n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); - - if (!n_buffers) - return; - - for (i = 0; i < n_buffers; i++) - { - vlib_buffer_t *b; - struct rte_mbuf *mb; - - b = vlib_get_buffer (vm, buffers[i]); - - fl = buffer_get_free_list (vm, b, &fi); - - /* The only current use of this callback: multicast recycle */ - if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) - { - int j; - - add_buffer_to_free_list - (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); - - for (j = 0; j < vec_len (bm->announce_list); j++) - { - if (fl == bm->announce_list[j]) - goto already_announced; - } - vec_add1 (bm->announce_list, fl); - already_announced: - ; - } - else - { - if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) - { - mb = rte_mbuf_from_vlib_buffer (b); - ASSERT (rte_mbuf_refcnt_read (mb) == 1); - rte_pktmbuf_free (mb); - } - } - } - if (vec_len (bm->announce_list)) - { - vlib_buffer_free_list_t *fl; - for (i = 0; i < vec_len (bm->announce_list); i++) - { - fl = bm->announce_list[i]; - fl->buffers_added_to_freelist_function (vm, fl); - } - _vec_len (bm->announce_list) = 0; - } -#else vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *fl; static u32 *next_to_free[2]; /* smp bad */ @@ -1315,26 +1040,25 @@ again: } _vec_len (announce_list) = 0; } -#endif } -void -vlib_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +static void +vlib_buffer_free_internal (vlib_main_t * vm, u32 * buffers, u32 n_buffers) { vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 1); } -void -vlib_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +static void +vlib_buffer_free_no_next_internal (vlib_main_t * vm, u32 * buffers, + u32 n_buffers) { vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 0); } -#if DPDK == 0 /* Copy template packet data into buffers as they are allocated. */ -static void +static void __attribute__ ((unused)) vlib_packet_template_buffer_init (vlib_main_t * vm, vlib_buffer_free_list_t * fl, u32 * buffers, u32 n_buffers) @@ -1352,7 +1076,6 @@ vlib_packet_template_buffer_init (vlib_main_t * vm, b->current_length); } } -#endif void vlib_packet_template_init (vlib_main_t * vm, @@ -1362,28 +1085,22 @@ vlib_packet_template_init (vlib_main_t * vm, uword min_n_buffers_each_physmem_alloc, char *fmt, ...) { -#if DPDK > 0 + vlib_buffer_main_t *bm = vm->buffer_main; va_list va; __attribute__ ((unused)) u8 *name; + vlib_buffer_free_list_t *fl; va_start (va, fmt); name = va_format (0, fmt, &va); va_end (va); - vlib_worker_thread_barrier_sync (vm); - memset (t, 0, sizeof (t[0])); - - vec_add (t->packet_data, packet_data, n_packet_data_bytes); + if (bm->cb.vlib_packet_template_init_cb) + bm->cb.vlib_packet_template_init_cb (vm, (void *) t, packet_data, + n_packet_data_bytes, + min_n_buffers_each_physmem_alloc, + name); - vlib_worker_thread_barrier_release (vm); -#else - vlib_buffer_free_list_t *fl; - va_list va; - u8 *name; - - va_start (va, fmt); - name = va_format (0, fmt, &va); - va_end (va); + vlib_worker_thread_barrier_sync (vm); memset (t, 0, sizeof (t[0])); @@ -1406,7 +1123,7 @@ vlib_packet_template_init (vlib_main_t * vm, fl->buffer_init_template.current_data = 0; fl->buffer_init_template.current_length = n_packet_data_bytes; fl->buffer_init_template.flags = 0; -#endif + vlib_worker_thread_barrier_release (vm); } void * @@ -1429,7 +1146,6 @@ vlib_packet_template_get_packet (vlib_main_t * vm, return b->data; } -#if DPDK == 0 void vlib_packet_template_get_packet_helper (vlib_main_t * vm, vlib_packet_template_t * t) @@ -1447,7 +1163,6 @@ vlib_packet_template_get_packet_helper (vlib_main_t * vm, _vec_len (t->free_buffers) = n_alloc; } -#endif /* Append given data to end of buffer, possibly allocating new buffers. */ u32 vlib_buffer_add_data (vlib_main_t * vm, @@ -1541,328 +1256,11 @@ vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm, return copied; } -#if DPDK > 0 -clib_error_t * -vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, - unsigned socket_id) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_physmem_main_t *vpm = &vm->physmem_main; - struct rte_mempool *rmp; - int i; - - if (!rte_pktmbuf_pool_create) - return clib_error_return (0, "not linked with DPDK"); - - vec_validate_aligned (bm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); - - /* pool already exists, nothing to do */ - if (bm->pktmbuf_pools[socket_id]) - return 0; - - u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0); - - rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */ - num_mbufs, /* number of mbufs */ - 512, /* cache size */ - VLIB_BUFFER_HDR_SIZE, /* priv size */ - VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */ - socket_id); /* cpu socket */ - - if (rmp) - { - { - uword this_pool_end; - uword this_pool_start; - uword this_pool_size; - uword save_vpm_start, save_vpm_end, save_vpm_size; - struct rte_mempool_memhdr *memhdr; - - this_pool_start = ~0ULL; - this_pool_end = 0LL; - - STAILQ_FOREACH (memhdr, &rmp->mem_list, next) - { - if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end) - this_pool_end = (uword) (memhdr->addr + memhdr->len); - if (((uword) memhdr->addr) < this_pool_start) - this_pool_start = (uword) (memhdr->addr); - } - ASSERT (this_pool_start < ~0ULL && this_pool_end > 0); - this_pool_size = this_pool_end - this_pool_start; - - if (CLIB_DEBUG > 1) - { - clib_warning ("%s: pool start %llx pool end %llx pool size %lld", - pool_name, this_pool_start, this_pool_end, - this_pool_size); - clib_warning - ("before: virtual.start %llx virtual.end %llx virtual.size %lld", - vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); - } - - save_vpm_start = vpm->virtual.start; - save_vpm_end = vpm->virtual.end; - save_vpm_size = vpm->virtual.size; - - if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0) - vpm->virtual.start = this_pool_start; - if (this_pool_end > vpm->virtual.end) - vpm->virtual.end = this_pool_end; - - vpm->virtual.size = vpm->virtual.end - vpm->virtual.start; - - if (CLIB_DEBUG > 1) - { - clib_warning - ("after: virtual.start %llx virtual.end %llx virtual.size %lld", - vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); - } - - /* check if fits into buffer index range */ - if ((u64) vpm->virtual.size > - ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) - { - clib_warning ("physmem: virtual size out of range!"); - vpm->virtual.start = save_vpm_start; - vpm->virtual.end = save_vpm_end; - vpm->virtual.size = save_vpm_size; - rmp = 0; - } - } - if (rmp) - { - bm->pktmbuf_pools[socket_id] = rmp; - vec_free (pool_name); - return 0; - } - } - - vec_free (pool_name); - - /* no usable pool for this socket, try to use pool from another one */ - for (i = 0; i < vec_len (bm->pktmbuf_pools); i++) - { - if (bm->pktmbuf_pools[i]) - { - clib_warning - ("WARNING: Failed to allocate mempool for CPU socket %u. " - "Threads running on socket %u will use socket %u mempool.", - socket_id, socket_id, i); - bm->pktmbuf_pools[socket_id] = bm->pktmbuf_pools[i]; - return 0; - } - } - - return clib_error_return (0, "failed to allocate mempool on socket %u", - socket_id); -} -#endif - -static void -vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s) -{ - vlib_main_t *vm; - vlib_serialize_buffer_main_t *sm; - uword n, n_bytes_to_write; - vlib_buffer_t *last; - - n_bytes_to_write = s->current_buffer_index; - sm = - uword_to_pointer (s->data_function_opaque, - vlib_serialize_buffer_main_t *); - vm = sm->vlib_main; - - ASSERT (sm->tx.max_n_data_bytes_per_chain > 0); - if (serialize_stream_is_end_of_stream (s) - || sm->tx.n_total_data_bytes + n_bytes_to_write > - sm->tx.max_n_data_bytes_per_chain) - { - vlib_process_t *p = vlib_get_current_process (vm); - - last = vlib_get_buffer (vm, sm->last_buffer); - last->current_length = n_bytes_to_write; - - vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index, - sm->first_buffer); - - sm->first_buffer = sm->last_buffer = ~0; - sm->tx.n_total_data_bytes = 0; - } - - else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0) - { - ASSERT (sm->first_buffer == ~0); - ASSERT (sm->last_buffer == ~0); - n = - vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1, - sm->tx.free_list_index); - if (n != 1) - serialize_error (m, - clib_error_create - ("vlib_buffer_alloc_from_free_list fails")); - sm->last_buffer = sm->first_buffer; - s->n_buffer_bytes = - vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index); - } - - if (n_bytes_to_write > 0) - { - vlib_buffer_t *prev = vlib_get_buffer (vm, sm->last_buffer); - n = - vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1, - sm->tx.free_list_index); - if (n != 1) - serialize_error (m, - clib_error_create - ("vlib_buffer_alloc_from_free_list fails")); - sm->tx.n_total_data_bytes += n_bytes_to_write; - prev->current_length = n_bytes_to_write; - prev->next_buffer = sm->last_buffer; - prev->flags |= VLIB_BUFFER_NEXT_PRESENT; - } - - if (sm->last_buffer != ~0) - { - last = vlib_get_buffer (vm, sm->last_buffer); - s->buffer = vlib_buffer_get_current (last); - s->current_buffer_index = 0; - ASSERT (last->current_data == s->current_buffer_index); - } -} - -static void -vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s) -{ - vlib_main_t *vm; - vlib_serialize_buffer_main_t *sm; - vlib_buffer_t *last; - - sm = - uword_to_pointer (s->data_function_opaque, - vlib_serialize_buffer_main_t *); - vm = sm->vlib_main; - - if (serialize_stream_is_end_of_stream (s)) - return; - - if (sm->last_buffer != ~0) - { - last = vlib_get_buffer (vm, sm->last_buffer); - - if (last->flags & VLIB_BUFFER_NEXT_PRESENT) - sm->last_buffer = last->next_buffer; - else - { - vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1); - sm->first_buffer = sm->last_buffer = ~0; - } - } - - if (sm->last_buffer == ~0) - { - while (clib_fifo_elts (sm->rx.buffer_fifo) == 0) - { - sm->rx.ready_one_time_event = - vlib_process_create_one_time_event (vm, vlib_current_process (vm), - ~0); - vlib_process_wait_for_one_time_event (vm, /* no event data */ 0, - sm->rx.ready_one_time_event); - } - - clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer); - sm->last_buffer = sm->first_buffer; - } - - ASSERT (sm->last_buffer != ~0); - - last = vlib_get_buffer (vm, sm->last_buffer); - s->current_buffer_index = 0; - s->buffer = vlib_buffer_get_current (last); - s->n_buffer_bytes = last->current_length; -} - -static void -serialize_open_vlib_helper (serialize_main_t * m, - vlib_main_t * vm, - vlib_serialize_buffer_main_t * sm, uword is_read) -{ - /* Initialize serialize main but save overflow buffer for re-use between calls. */ - { - u8 *save = m->stream.overflow_buffer; - memset (m, 0, sizeof (m[0])); - m->stream.overflow_buffer = save; - if (save) - _vec_len (save) = 0; - } - - sm->first_buffer = sm->last_buffer = ~0; - if (is_read) - clib_fifo_reset (sm->rx.buffer_fifo); - else - sm->tx.n_total_data_bytes = 0; - sm->vlib_main = vm; - m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx; - m->stream.data_function_opaque = pointer_to_uword (sm); -} - -void -serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, - vlib_serialize_buffer_main_t * sm) -{ - serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0); -} - -void -unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, - vlib_serialize_buffer_main_t * sm) -{ - serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1); -} - -u32 -serialize_close_vlib_buffer (serialize_main_t * m) -{ - vlib_serialize_buffer_main_t *sm - = uword_to_pointer (m->stream.data_function_opaque, - vlib_serialize_buffer_main_t *); - vlib_buffer_t *last; - serialize_stream_t *s = &m->stream; - - last = vlib_get_buffer (sm->vlib_main, sm->last_buffer); - last->current_length = s->current_buffer_index; - - if (vec_len (s->overflow_buffer) > 0) - { - sm->last_buffer - = vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index, - sm->last_buffer == ~0 ? 0 : sm->last_buffer, - s->overflow_buffer, - vec_len (s->overflow_buffer)); - _vec_len (s->overflow_buffer) = 0; - } - - return sm->first_buffer; -} - -void -unserialize_close_vlib_buffer (serialize_main_t * m) -{ - vlib_serialize_buffer_main_t *sm - = uword_to_pointer (m->stream.data_function_opaque, - vlib_serialize_buffer_main_t *); - if (sm->first_buffer != ~0) - vlib_buffer_free_one (sm->vlib_main, sm->first_buffer); - clib_fifo_reset (sm->rx.buffer_fifo); - if (m->stream.overflow_buffer) - _vec_len (m->stream.overflow_buffer) = 0; -} static u8 * format_vlib_buffer_free_list (u8 * s, va_list * va) { vlib_buffer_free_list_t *f = va_arg (*va, vlib_buffer_free_list_t *); -#if DPDK > 0 u32 threadnum = va_arg (*va, u32); uword bytes_alloc, bytes_free, n_free, size; @@ -1877,21 +1275,6 @@ format_vlib_buffer_free_list (u8 * s, va_list * va) bytes_free = size * n_free; s = format (s, "%7d%30s%12d%12d%=12U%=12U%=12d%=12d", threadnum, -#else - uword bytes_alloc, bytes_free, n_free, size; - - if (!f) - return format (s, "%=30s%=12s%=12s%=12s%=12s%=12s%=12s", - "Name", "Index", "Size", "Alloc", "Free", "#Alloc", - "#Free"); - - size = sizeof (vlib_buffer_t) + f->n_data_bytes; - n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers); - bytes_alloc = size * f->n_alloc; - bytes_free = size * n_free; - - s = format (s, "%30s%12d%12d%=12U%=12U%=12d%=12d", -#endif f->name, f->index, f->n_data_bytes, format_memory_size, bytes_alloc, format_memory_size, bytes_free, f->n_alloc, n_free); @@ -1903,7 +1286,6 @@ static clib_error_t * show_buffers (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { -#if DPDK > 0 vlib_buffer_main_t *bm; vlib_buffer_free_list_t *f; vlib_main_t *curr_vm; @@ -1926,18 +1308,6 @@ show_buffers (vlib_main_t * vm, } while (vm_index < vec_len (vlib_mains)); -#else - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *f; - - vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0); - /* *INDENT-OFF* */ - pool_foreach (f, bm->buffer_free_list_pool, ({ - vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f); - })); -/* *INDENT-ON* */ - -#endif return 0; } @@ -1949,34 +1319,38 @@ VLIB_CLI_COMMAND (show_buffers_command, static) = { }; /* *INDENT-ON* */ -#if DPDK > 0 -#if CLIB_DEBUG > 0 - -u32 *vlib_buffer_state_validation_lock; -uword *vlib_buffer_state_validation_hash; -void *vlib_buffer_state_heap; - -static clib_error_t * -buffer_state_validation_init (vlib_main_t * vm) +void +vlib_buffer_cb_init (struct vlib_main_t *vm) { - void *oldheap; - - vlib_buffer_state_heap = mheap_alloc (0, 10 << 20); - - oldheap = clib_mem_set_heap (vlib_buffer_state_heap); + vlib_buffer_main_t *bm = vm->buffer_main; + bm->cb.vlib_buffer_alloc_cb = &vlib_buffer_alloc_internal; + bm->cb.vlib_buffer_alloc_from_free_list_cb = + &vlib_buffer_alloc_from_free_list_internal; + bm->cb.vlib_buffer_free_cb = &vlib_buffer_free_internal; + bm->cb.vlib_buffer_free_no_next_cb = &vlib_buffer_free_no_next_internal; + bm->cb.vlib_buffer_delete_free_list_cb = + &vlib_buffer_delete_free_list_internal; + bm->extern_buffer_mgmt = 0; +} - vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword)); - vec_validate_aligned (vlib_buffer_state_validation_lock, 0, - CLIB_CACHE_LINE_BYTES); - clib_mem_set_heap (oldheap); +int +vlib_buffer_cb_register (struct vlib_main_t *vm, vlib_buffer_callbacks_t * cb) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + if (bm->extern_buffer_mgmt) + return -1; + +#define _(x) bm->cb.x = cb->x + _(vlib_buffer_alloc_cb); + _(vlib_buffer_alloc_from_free_list_cb); + _(vlib_buffer_free_cb); + _(vlib_buffer_free_no_next_cb); + _(vlib_buffer_delete_free_list_cb); +#undef _ + bm->extern_buffer_mgmt = 1; return 0; } -VLIB_INIT_FUNCTION (buffer_state_validation_init); -#endif -#endif - - /** @endcond */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index 5f1e62f0..d270c08a 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -46,15 +46,9 @@ #include #include /* for vlib_error_t */ -#if DPDK > 0 -#include -#define VLIB_BUFFER_DATA_SIZE (2048) -#define VLIB_BUFFER_PRE_DATA_SIZE RTE_PKTMBUF_HEADROOM -#else #include /* for __PRE_DATA_SIZE */ -#define VLIB_BUFFER_DATA_SIZE (512) +#define VLIB_BUFFER_DATA_SIZE (2048) #define VLIB_BUFFER_PRE_DATA_SIZE __PRE_DATA_SIZE -#endif #if defined (CLIB_HAVE_VEC128) || defined (__aarch64__) typedef u8x16 vlib_copy_unit_t; @@ -296,6 +290,27 @@ typedef struct vlib_buffer_free_list_t uword buffer_init_function_opaque; } __attribute__ ((aligned (16))) vlib_buffer_free_list_t; +typedef struct +{ + u32 (*vlib_buffer_alloc_cb) (struct vlib_main_t * vm, u32 * buffers, + u32 n_buffers); + u32 (*vlib_buffer_alloc_from_free_list_cb) (struct vlib_main_t * vm, + u32 * buffers, u32 n_buffers, + u32 free_list_index); + void (*vlib_buffer_free_cb) (struct vlib_main_t * vm, u32 * buffers, + u32 n_buffers); + void (*vlib_buffer_free_no_next_cb) (struct vlib_main_t * vm, u32 * buffers, + u32 n_buffers); + void (*vlib_packet_template_init_cb) (struct vlib_main_t * vm, void *t, + void *packet_data, + uword n_packet_data_bytes, + uword + min_n_buffers_each_physmem_alloc, + u8 * name); + void (*vlib_buffer_delete_free_list_cb) (struct vlib_main_t * vm, + u32 free_list_index); +} vlib_buffer_callbacks_t; + typedef struct { /* Buffer free callback, for subversive activities */ @@ -323,12 +338,15 @@ typedef struct /* List of free-lists needing Blue Light Special announcements */ vlib_buffer_free_list_t **announce_list; - /* Vector of rte_mempools per socket */ -#if DPDK == 1 - struct rte_mempool **pktmbuf_pools; -#endif + /* Callbacks */ + vlib_buffer_callbacks_t cb; + int extern_buffer_mgmt; } vlib_buffer_main_t; +void vlib_buffer_cb_init (struct vlib_main_t *vm); +int vlib_buffer_cb_register (struct vlib_main_t *vm, + vlib_buffer_callbacks_t * cb); + typedef struct { struct vlib_main_t *vlib_main; @@ -385,11 +403,6 @@ serialize_vlib_buffer_n_bytes (serialize_main_t * m) vec_len (s->overflow_buffer); } -#if DPDK > 0 -#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1) -#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1)) -#endif - /* */ diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index 75716eca..15d93c16 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -195,8 +195,6 @@ do { \ } while (0) #endif -#if DPDK == 0 - typedef enum { /* Index is unknown. */ @@ -232,8 +230,6 @@ vlib_buffer_set_known_state (vlib_main_t * vm, u8 *vlib_validate_buffer (vlib_main_t * vm, u32 buffer_index, uword follow_chain); -#endif /* DPDK == 0 */ - clib_error_t *vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, unsigned socket_id); @@ -245,7 +241,15 @@ clib_error_t *vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, @return - (u32) number of buffers actually allocated, may be less than the number requested or zero */ -u32 vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers); +always_inline u32 +vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + ASSERT (bm->cb.vlib_buffer_alloc_cb); + + return bm->cb.vlib_buffer_alloc_cb (vm, buffers, n_buffers); +} always_inline u32 vlib_buffer_round_size (u32 size) @@ -261,9 +265,18 @@ vlib_buffer_round_size (u32 size) @return - (u32) number of buffers actually allocated, may be less than the number requested or zero */ -u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm, - u32 * buffers, - u32 n_buffers, u32 free_list_index); +always_inline u32 +vlib_buffer_alloc_from_free_list (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + ASSERT (bm->cb.vlib_buffer_alloc_from_free_list_cb); + + return bm->cb.vlib_buffer_alloc_from_free_list_cb (vm, buffers, n_buffers, + free_list_index); +} /** \brief Free buffers Frees the entire buffer chain for each buffer @@ -273,11 +286,19 @@ u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm, @param n_buffers - (u32) number of buffers to free */ -void vlib_buffer_free (vlib_main_t * vm, - /* pointer to first buffer */ - u32 * buffers, - /* number of buffers to free */ - u32 n_buffers); +always_inline void +vlib_buffer_free (vlib_main_t * vm, + /* pointer to first buffer */ + u32 * buffers, + /* number of buffers to free */ + u32 n_buffers) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + ASSERT (bm->cb.vlib_buffer_free_cb); + + return bm->cb.vlib_buffer_free_cb (vm, buffers, n_buffers); +} /** \brief Free buffers, does not free the buffer chain for each buffer @@ -286,11 +307,19 @@ void vlib_buffer_free (vlib_main_t * vm, @param n_buffers - (u32) number of buffers to free */ -void vlib_buffer_free_no_next (vlib_main_t * vm, - /* pointer to first buffer */ - u32 * buffers, - /* number of buffers to free */ - u32 n_buffers); +always_inline void +vlib_buffer_free_no_next (vlib_main_t * vm, + /* pointer to first buffer */ + u32 * buffers, + /* number of buffers to free */ + u32 n_buffers) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + ASSERT (bm->cb.vlib_buffer_free_no_next_cb); + + return bm->cb.vlib_buffer_free_no_next_cb (vm, buffers, n_buffers); +} /** \brief Free one buffer Shorthand to free a single buffer chain. @@ -307,7 +336,15 @@ vlib_buffer_free_one (vlib_main_t * vm, u32 buffer_index) /* Add/delete buffer free lists. */ u32 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes, char *fmt, ...); -void vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index); +always_inline void +vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + ASSERT (bm->cb.vlib_buffer_delete_free_list_cb); + + bm->cb.vlib_buffer_delete_free_list_cb (vm, free_list_index); +} /* Find already existing public free list with given size or create one. */ u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes, @@ -453,11 +490,6 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b) return fd; } -/* - * vlib_buffer_chain_* functions provide a way to create long buffers. - * When DPDK is enabled, the 'hidden' DPDK header is taken care of transparently. - */ - /* Initializes the buffer as an empty packet with no chained buffers. */ always_inline void vlib_buffer_chain_init (vlib_buffer_t * first) @@ -537,8 +569,6 @@ typedef struct /* Vector of packet data. */ u8 *packet_data; - /* Note: the next three fields are unused if DPDK == 1 */ - /* Number of buffers to allocate in each call to physmem allocator. */ u32 min_n_buffers_each_physmem_alloc; diff --git a/src/vlib/buffer_serialize.c b/src/vlib/buffer_serialize.c new file mode 100644 index 00000000..96a5f0a0 --- /dev/null +++ b/src/vlib/buffer_serialize.c @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * buffer.c: allocate/free network buffers. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +static void +vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s) +{ + vlib_main_t *vm; + vlib_serialize_buffer_main_t *sm; + uword n, n_bytes_to_write; + vlib_buffer_t *last; + + n_bytes_to_write = s->current_buffer_index; + sm = + uword_to_pointer (s->data_function_opaque, + vlib_serialize_buffer_main_t *); + vm = sm->vlib_main; + + ASSERT (sm->tx.max_n_data_bytes_per_chain > 0); + if (serialize_stream_is_end_of_stream (s) + || sm->tx.n_total_data_bytes + n_bytes_to_write > + sm->tx.max_n_data_bytes_per_chain) + { + vlib_process_t *p = vlib_get_current_process (vm); + + last = vlib_get_buffer (vm, sm->last_buffer); + last->current_length = n_bytes_to_write; + + vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index, + sm->first_buffer); + + sm->first_buffer = sm->last_buffer = ~0; + sm->tx.n_total_data_bytes = 0; + } + + else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0) + { + ASSERT (sm->first_buffer == ~0); + ASSERT (sm->last_buffer == ~0); + n = + vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1, + sm->tx.free_list_index); + if (n != 1) + serialize_error (m, + clib_error_create + ("vlib_buffer_alloc_from_free_list fails")); + sm->last_buffer = sm->first_buffer; + s->n_buffer_bytes = + vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index); + } + + if (n_bytes_to_write > 0) + { + vlib_buffer_t *prev = vlib_get_buffer (vm, sm->last_buffer); + n = + vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1, + sm->tx.free_list_index); + if (n != 1) + serialize_error (m, + clib_error_create + ("vlib_buffer_alloc_from_free_list fails")); + sm->tx.n_total_data_bytes += n_bytes_to_write; + prev->current_length = n_bytes_to_write; + prev->next_buffer = sm->last_buffer; + prev->flags |= VLIB_BUFFER_NEXT_PRESENT; + } + + if (sm->last_buffer != ~0) + { + last = vlib_get_buffer (vm, sm->last_buffer); + s->buffer = vlib_buffer_get_current (last); + s->current_buffer_index = 0; + ASSERT (last->current_data == s->current_buffer_index); + } +} + +static void +vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s) +{ + vlib_main_t *vm; + vlib_serialize_buffer_main_t *sm; + vlib_buffer_t *last; + + sm = + uword_to_pointer (s->data_function_opaque, + vlib_serialize_buffer_main_t *); + vm = sm->vlib_main; + + if (serialize_stream_is_end_of_stream (s)) + return; + + if (sm->last_buffer != ~0) + { + last = vlib_get_buffer (vm, sm->last_buffer); + + if (last->flags & VLIB_BUFFER_NEXT_PRESENT) + sm->last_buffer = last->next_buffer; + else + { + vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1); + sm->first_buffer = sm->last_buffer = ~0; + } + } + + if (sm->last_buffer == ~0) + { + while (clib_fifo_elts (sm->rx.buffer_fifo) == 0) + { + sm->rx.ready_one_time_event = + vlib_process_create_one_time_event (vm, vlib_current_process (vm), + ~0); + vlib_process_wait_for_one_time_event (vm, /* no event data */ 0, + sm->rx.ready_one_time_event); + } + + clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer); + sm->last_buffer = sm->first_buffer; + } + + ASSERT (sm->last_buffer != ~0); + + last = vlib_get_buffer (vm, sm->last_buffer); + s->current_buffer_index = 0; + s->buffer = vlib_buffer_get_current (last); + s->n_buffer_bytes = last->current_length; +} + +static void +serialize_open_vlib_helper (serialize_main_t * m, + vlib_main_t * vm, + vlib_serialize_buffer_main_t * sm, uword is_read) +{ + /* Initialize serialize main but save overflow buffer for re-use between calls. */ + { + u8 *save = m->stream.overflow_buffer; + memset (m, 0, sizeof (m[0])); + m->stream.overflow_buffer = save; + if (save) + _vec_len (save) = 0; + } + + sm->first_buffer = sm->last_buffer = ~0; + if (is_read) + clib_fifo_reset (sm->rx.buffer_fifo); + else + sm->tx.n_total_data_bytes = 0; + sm->vlib_main = vm; + m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx; + m->stream.data_function_opaque = pointer_to_uword (sm); +} + +void +serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, + vlib_serialize_buffer_main_t * sm) +{ + serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0); +} + +void +unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, + vlib_serialize_buffer_main_t * sm) +{ + serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1); +} + +u32 +serialize_close_vlib_buffer (serialize_main_t * m) +{ + vlib_serialize_buffer_main_t *sm + = uword_to_pointer (m->stream.data_function_opaque, + vlib_serialize_buffer_main_t *); + vlib_buffer_t *last; + serialize_stream_t *s = &m->stream; + + last = vlib_get_buffer (sm->vlib_main, sm->last_buffer); + last->current_length = s->current_buffer_index; + + if (vec_len (s->overflow_buffer) > 0) + { + sm->last_buffer + = vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index, + sm->last_buffer == ~0 ? 0 : sm->last_buffer, + s->overflow_buffer, + vec_len (s->overflow_buffer)); + _vec_len (s->overflow_buffer) = 0; + } + + return sm->first_buffer; +} + +void +unserialize_close_vlib_buffer (serialize_main_t * m) +{ + vlib_serialize_buffer_main_t *sm + = uword_to_pointer (m->stream.data_function_opaque, + vlib_serialize_buffer_main_t *); + if (sm->first_buffer != ~0) + vlib_buffer_free_one (sm->vlib_main, sm->first_buffer); + clib_fifo_reset (sm->rx.buffer_fifo); + if (m->stream.overflow_buffer) + _vec_len (m->stream.overflow_buffer) = 0; +} + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/main.c b/src/vlib/main.c index 6c6cad98..09f34bbd 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -465,7 +465,7 @@ vlib_put_next_frame (vlib_main_t * vm, vlib_frame_t *f; u32 n_vectors_in_frame; - if (DPDK == 0 && CLIB_DEBUG > 0) + if (vm->buffer_main->extern_buffer_mgmt == 0 && CLIB_DEBUG > 0) vlib_put_next_frame_validate (vm, r, next_index, n_vectors_left); nf = vlib_node_runtime_get_next_frame (vm, r, next_index); @@ -1012,8 +1012,8 @@ dispatch_node (vlib_main_t * vm, /* When in interrupt mode and vector rate crosses threshold switch to polling mode. */ - if ((DPDK == 0 && dispatch_state == VLIB_NODE_STATE_INTERRUPT) - || (DPDK == 0 && dispatch_state == VLIB_NODE_STATE_POLLING + if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT) + || (dispatch_state == VLIB_NODE_STATE_POLLING && (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))) { @@ -1615,6 +1615,7 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) vm->name = "VLIB"; vec_validate (vm->buffer_main, 0); + vlib_buffer_cb_init (vm); if ((error = vlib_thread_init (vm))) { diff --git a/src/vlib/threads.c b/src/vlib/threads.c index c5e58bc0..b3bbd30e 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -22,29 +22,10 @@ #include #include - -#if DPDK==1 -#include -#include -#include -#include -#include -#endif DECLARE_CJ_GLOBAL_LOG; #define FRAME_QUEUE_NELTS 32 - -#if DPDK==1 -/* - * Weak definitions of DPDK symbols used in this file. - * Needed for linking test programs without DPDK libs. - */ -unsigned __thread __attribute__ ((weak)) RTE_PER_LCORE (_lcore_id); -struct lcore_config __attribute__ ((weak)) lcore_config[]; -unsigned __attribute__ ((weak)) rte_socket_id (); -int __attribute__ ((weak)) rte_eal_remote_launch (); -#endif u32 vl (void *p) { @@ -194,14 +175,17 @@ vlib_thread_init (vlib_main_t * vm) tm->cpu_socket_bitmap = clib_bitmap_set (0, 0, 1); /* pin main thread to main_lcore */ -#if DPDK==0 - { - cpu_set_t cpuset; - CPU_ZERO (&cpuset); - CPU_SET (tm->main_lcore, &cpuset); - pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset); - } -#endif + if (tm->cb.vlib_thread_set_lcore_cb) + { + tm->cb.vlib_thread_set_lcore_cb (0, tm->main_lcore); + } + else + { + cpu_set_t cpuset; + CPU_ZERO (&cpuset); + CPU_SET (tm->main_lcore, &cpuset); + pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset); + } /* as many threads as stacks... */ vec_validate_aligned (vlib_worker_threads, vec_len (vlib_thread_stacks) - 1, @@ -520,32 +504,29 @@ vlib_worker_thread_bootstrap_fn (void *arg) return rv; } -static int -vlib_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) +static clib_error_t * +vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) { + vlib_thread_main_t *tm = &vlib_thread_main; void *(*fp_arg) (void *) = fp; w->lcore_id = lcore_id; -#if DPDK==1 - if (!w->registration->use_pthreads) - if (rte_eal_remote_launch) /* do we have dpdk linked */ - return rte_eal_remote_launch (fp, (void *) w, lcore_id); - else - return -1; + if (tm->cb.vlib_launch_thread_cb && !w->registration->use_pthreads) + return tm->cb.vlib_launch_thread_cb (fp, (void *) w, lcore_id); else -#endif { - int ret; pthread_t worker; cpu_set_t cpuset; CPU_ZERO (&cpuset); CPU_SET (lcore_id, &cpuset); - ret = pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w); - if (ret == 0) - return pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset); - else - return ret; + if (pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w)) + return clib_error_return_unix (0, "pthread_create"); + + if (pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset)) + return clib_error_return_unix (0, "pthread_setaffinity_np"); + + return 0; } } @@ -769,6 +750,7 @@ start_workers (vlib_main_t * vm) for (i = 0; i < vec_len (tm->registrations); i++) { + clib_error_t *err; int j; tr = tm->registrations[i]; @@ -778,22 +760,24 @@ start_workers (vlib_main_t * vm) for (j = 0; j < tr->count; j++) { w = vlib_worker_threads + worker_thread_index++; - if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, 0) < - 0) - clib_warning ("Couldn't start '%s' pthread ", tr->name); + err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn, + w, 0); + if (err) + clib_error_report (err); } } else { uword c; - /* *INDENT-OFF* */ - clib_bitmap_foreach (c, tr->coremask, ({ - w = vlib_worker_threads + worker_thread_index++; - if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, c) < 0) - clib_warning ("Couldn't start DPDK lcore %d", c); - - })); -/* *INDENT-ON* */ + /* *INDENT-OFF* */ + clib_bitmap_foreach (c, tr->coremask, ({ + w = vlib_worker_threads + worker_thread_index++; + err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn, + w, c); + if (err) + clib_error_report (err); + })); + /* *INDENT-ON* */ } } vlib_worker_thread_barrier_sync (vm); @@ -1105,7 +1089,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) { tm->n_thread_stacks += tr->count; tm->n_pthreads += tr->count * tr->use_pthreads; - tm->n_eal_threads += tr->count * (tr->use_pthreads == 0); + tm->n_threads += tr->count * (tr->use_pthreads == 0); tr = tr->next; } @@ -1423,6 +1407,7 @@ void vlib_worker_thread_fn (void *arg) { vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; + vlib_thread_main_t *tm = vlib_get_thread_main (); vlib_main_t *vm = vlib_get_main (); ASSERT (vm->cpu_index == os_get_cpu_number ()); @@ -1431,12 +1416,9 @@ vlib_worker_thread_fn (void *arg) clib_time_init (&vm->clib_time); clib_mem_set_heap (w->thread_mheap); -#if DPDK > 0 /* Wait until the dpdk init sequence is complete */ - vlib_thread_main_t *tm = vlib_get_thread_main (); - while (tm->worker_thread_release == 0) + while (tm->extern_thread_mgmt && tm->worker_thread_release == 0) vlib_worker_thread_barrier_check (); -#endif vlib_worker_thread_internal (vm); } @@ -1475,6 +1457,20 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts) return (fqm - tm->frame_queue_mains); } + +int +vlib_thread_cb_register (struct vlib_main_t *vm, vlib_thread_callbacks_t * cb) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + + if (tm->extern_thread_mgmt) + return -1; + + tm->cb.vlib_launch_thread_cb = cb->vlib_launch_thread_cb; + tm->extern_thread_mgmt = 1; + return 0; +} + clib_error_t * threads_init (vlib_main_t * vm) { diff --git a/src/vlib/threads.h b/src/vlib/threads.h index 34ab5be8..75a5a281 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -263,6 +263,13 @@ typedef enum SCHED_POLICY_N, } sched_policy_t; +typedef struct +{ + clib_error_t *(*vlib_launch_thread_cb) (void *fp, vlib_worker_thread_t * w, + unsigned lcore_id); + clib_error_t *(*vlib_thread_set_lcore_cb) (u32 thread, u16 lcore); +} vlib_thread_callbacks_t; + typedef struct { /* Link list of registrations, built by constructors */ @@ -290,8 +297,8 @@ typedef struct /* Number of pthreads */ u32 n_pthreads; - /* Number of DPDK eal threads */ - u32 n_eal_threads; + /* Number of threads */ + u32 n_threads; /* Number of cores to skip, must match the core mask */ u32 skip_cores; @@ -320,6 +327,9 @@ typedef struct /* scheduling policy priority */ u32 sched_priority; + /* callbacks */ + vlib_thread_callbacks_t cb; + int extern_thread_mgmt; } vlib_thread_main_t; extern vlib_thread_main_t vlib_thread_main; @@ -459,6 +469,9 @@ vlib_get_worker_handoff_queue_elt (u32 frame_queue_index, return elt; } +int vlib_thread_cb_register (struct vlib_main_t *vm, + vlib_thread_callbacks_t * cb); + #endif /* included_vlib_threads_h */ /* diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c index ee632279..b64028c4 100644 --- a/src/vlib/threads_cli.c +++ b/src/vlib/threads_cli.c @@ -20,14 +20,6 @@ #include #include -#if DPDK==1 -#include -#include -#include -#include -#include -#endif - static u8 * format_sched_policy_and_priority (u8 * s, va_list * args) { @@ -116,23 +108,6 @@ show_threads_fn (vlib_main_t * vm, vec_free (p); line = format (line, "%-7u%-7u%-7u%", lcore, core_id, socket_id); -#if DPDK==1 - ASSERT (lcore <= RTE_MAX_LCORE); - switch (lcore_config[lcore].state) - { - case WAIT: - line = format (line, "wait"); - break; - case RUNNING: - line = format (line, "running"); - break; - case FINISHED: - line = format (line, "finished"); - break; - default: - line = format (line, "unknown"); - } -#endif } else { diff --git a/src/vlib/unix/physmem.c b/src/vlib/unix/physmem.c index 80ab7b9d..8d10ad2e 100644 --- a/src/vlib/unix/physmem.c +++ b/src/vlib/unix/physmem.c @@ -45,13 +45,13 @@ static void * unix_physmem_alloc_aligned (vlib_physmem_main_t * vpm, uword n_bytes, uword alignment) { + vlib_main_t *vm = vlib_get_main (); physmem_main_t *pm = &physmem_main; uword lo_offset, hi_offset; uword *to_free = 0; -#if DPDK > 0 - clib_warning ("unsafe alloc!"); -#endif + if (vm->buffer_main->extern_buffer_mgmt) + clib_warning ("unsafe alloc!"); /* IO memory is always at least cache aligned. */ alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES); @@ -269,16 +269,17 @@ static clib_error_t * show_physmem (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { -#if DPDK > 0 - vlib_cli_output (vm, "Not supported with DPDK drivers."); -#else physmem_main_t *pm = &physmem_main; + if (vm->buffer_main->extern_buffer_mgmt) + { + vlib_cli_output (vm, "Not supported with external buffer management."); + return 0; + } if (pm->heap) vlib_cli_output (vm, "%U", format_mheap, pm->heap, /* verbose */ 1); else vlib_cli_output (vm, "No physmem allocated."); -#endif return 0; } diff --git a/src/vnet.am b/src/vnet.am index 665a16ea..47c5eda7 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -761,11 +761,13 @@ nobase_include_HEADERS += \ ######################################## if WITH_DPDK libvnet_la_SOURCES += \ + vnet/devices/dpdk/buffer.c \ vnet/devices/dpdk/dpdk_priv.h \ vnet/devices/dpdk/device.c \ vnet/devices/dpdk/format.c \ vnet/devices/dpdk/init.c \ vnet/devices/dpdk/node.c \ + vnet/devices/dpdk/thread.c \ vnet/devices/dpdk/hqos.c \ vnet/devices/dpdk/cli.c \ vnet/devices/dpdk/dpdk_api.c diff --git a/src/vnet/devices/dpdk/buffer.c b/src/vnet/devices/dpdk/buffer.c new file mode 100644 index 00000000..214a9162 --- /dev/null +++ b/src/vnet/devices/dpdk/buffer.c @@ -0,0 +1,729 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * buffer.c: allocate/free network buffers. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @file + * + * Allocate/free network buffers. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM, + "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM"); + +#define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32)) + +/* Make sure we have at least given number of unaligned buffers. */ +static void +fill_unaligned (vlib_main_t * vm, + vlib_buffer_free_list_t * free_list, + uword n_unaligned_buffers) +{ + word la = vec_len (free_list->aligned_buffers); + word lu = vec_len (free_list->unaligned_buffers); + + /* Aligned come in aligned copy-sized chunks. */ + ASSERT (la % BUFFERS_PER_COPY == 0); + + ASSERT (la >= n_unaligned_buffers); + + while (lu < n_unaligned_buffers) + { + /* Copy 4 buffers from end of aligned vector to unaligned vector. */ + vec_add (free_list->unaligned_buffers, + free_list->aligned_buffers + la - BUFFERS_PER_COPY, + BUFFERS_PER_COPY); + la -= BUFFERS_PER_COPY; + lu += BUFFERS_PER_COPY; + } + _vec_len (free_list->aligned_buffers) = la; +} + +/* After free aligned buffers may not contain even sized chunks. */ +static void +trim_aligned (vlib_buffer_free_list_t * f) +{ + uword l, n_trim; + + /* Add unaligned to aligned before trim. */ + l = vec_len (f->unaligned_buffers); + if (l > 0) + { + vec_add_aligned (f->aligned_buffers, f->unaligned_buffers, l, + /* align */ sizeof (vlib_copy_unit_t)); + + _vec_len (f->unaligned_buffers) = 0; + } + + /* Remove unaligned buffers from end of aligned vector and save for next trim. */ + l = vec_len (f->aligned_buffers); + n_trim = l % BUFFERS_PER_COPY; + if (n_trim) + { + /* Trim aligned -> unaligned. */ + vec_add (f->unaligned_buffers, f->aligned_buffers + l - n_trim, n_trim); + + /* Remove from aligned. */ + _vec_len (f->aligned_buffers) = l - n_trim; + } +} + +static void +merge_free_lists (vlib_buffer_free_list_t * dst, + vlib_buffer_free_list_t * src) +{ + uword l; + u32 *d; + + trim_aligned (src); + trim_aligned (dst); + + l = vec_len (src->aligned_buffers); + if (l > 0) + { + vec_add2_aligned (dst->aligned_buffers, d, l, + /* align */ sizeof (vlib_copy_unit_t)); + clib_memcpy (d, src->aligned_buffers, l * sizeof (d[0])); + vec_free (src->aligned_buffers); + } + + l = vec_len (src->unaligned_buffers); + if (l > 0) + { + vec_add (dst->unaligned_buffers, src->unaligned_buffers, l); + vec_free (src->unaligned_buffers); + } +} + +always_inline u32 +dpdk_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + size = vlib_buffer_round_size (size); + uword *p = hash_get (bm->free_list_by_size, size); + return p ? p[0] : ~0; +} + +static void +del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) +{ + u32 i; + struct rte_mbuf *mb; + vlib_buffer_t *b; + + for (i = 0; i < vec_len (f->unaligned_buffers); i++) + { + b = vlib_get_buffer (vm, f->unaligned_buffers[i]); + mb = rte_mbuf_from_vlib_buffer (b); + ASSERT (rte_mbuf_refcnt_read (mb) == 1); + rte_pktmbuf_free (mb); + } + for (i = 0; i < vec_len (f->aligned_buffers); i++) + { + b = vlib_get_buffer (vm, f->aligned_buffers[i]); + mb = rte_mbuf_from_vlib_buffer (b); + ASSERT (rte_mbuf_refcnt_read (mb) == 1); + rte_pktmbuf_free (mb); + } + vec_free (f->name); + vec_free (f->unaligned_buffers); + vec_free (f->aligned_buffers); +} + +/* Add buffer free list. */ +static void +dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *f; + u32 merge_index; + int i; + + ASSERT (os_get_cpu_number () == 0); + + f = vlib_buffer_get_free_list (vm, free_list_index); + + merge_index = dpdk_buffer_get_free_list_with_size (vm, f->n_data_bytes); + if (merge_index != ~0 && merge_index != free_list_index) + { + merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool, + merge_index), f); + } + + del_free_list (vm, f); + + /* Poison it. */ + memset (f, 0xab, sizeof (f[0])); + + pool_put (bm->buffer_free_list_pool, f); + + for (i = 1; i < vec_len (vlib_mains); i++) + { + bm = vlib_mains[i]->buffer_main; + f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);; + memset (f, 0xab, sizeof (f[0])); + pool_put (bm->buffer_free_list_pool, f); + } +} + +/* Make sure free list has at least given number of free buffers. */ +static uword +fill_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * fl, uword min_free_buffers) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_buffer_t *b; + int n, i; + u32 bi; + u32 n_remaining = 0, n_alloc = 0; + unsigned socket_id = rte_socket_id (); + struct rte_mempool *rmp = dm->pktmbuf_pools[socket_id]; + struct rte_mbuf *mb; + + /* Too early? */ + if (PREDICT_FALSE (rmp == 0)) + return 0; + + trim_aligned (fl); + + /* Already have enough free buffers on free list? */ + n = min_free_buffers - vec_len (fl->aligned_buffers); + if (n <= 0) + return min_free_buffers; + + /* Always allocate round number of buffers. */ + n = round_pow2 (n, BUFFERS_PER_COPY); + + /* Always allocate new buffers in reasonably large sized chunks. */ + n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); + + vec_validate (vm->mbuf_alloc_list, n - 1); + + if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0) + return 0; + + _vec_len (vm->mbuf_alloc_list) = n; + + for (i = 0; i < n; i++) + { + mb = vm->mbuf_alloc_list[i]; + + ASSERT (rte_mbuf_refcnt_read (mb) == 0); + rte_mbuf_refcnt_set (mb, 1); + + b = vlib_buffer_from_rte_mbuf (mb); + bi = vlib_get_buffer_index (vm, b); + + vec_add1_aligned (fl->aligned_buffers, bi, sizeof (vlib_copy_unit_t)); + n_alloc++; + n_remaining--; + + vlib_buffer_init_for_free_list (b, fl); + + if (fl->buffer_init_function) + fl->buffer_init_function (vm, fl, &bi, 1); + } + + fl->n_alloc += n; + + return n; +} + +always_inline uword +copy_alignment (u32 * x) +{ + return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY; +} + +static u32 +alloc_from_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * free_list, + u32 * alloc_buffers, u32 n_alloc_buffers) +{ + u32 *dst, *u_src; + uword u_len, n_left; + uword n_unaligned_start, n_unaligned_end, n_filled; + + n_left = n_alloc_buffers; + dst = alloc_buffers; + n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst)) + & (BUFFERS_PER_COPY - 1)); + + n_filled = fill_free_list (vm, free_list, n_alloc_buffers); + if (n_filled == 0) + return 0; + + n_left = n_filled < n_left ? n_filled : n_left; + n_alloc_buffers = n_left; + + if (n_unaligned_start >= n_left) + { + n_unaligned_start = n_left; + n_unaligned_end = 0; + } + else + n_unaligned_end = copy_alignment (dst + n_alloc_buffers); + + fill_unaligned (vm, free_list, n_unaligned_start + n_unaligned_end); + + u_len = vec_len (free_list->unaligned_buffers); + u_src = free_list->unaligned_buffers + u_len - 1; + + if (n_unaligned_start) + { + uword n_copy = n_unaligned_start; + if (n_copy > n_left) + n_copy = n_left; + n_left -= n_copy; + + while (n_copy > 0) + { + *dst++ = *u_src--; + n_copy--; + u_len--; + } + + /* Now dst should be aligned. */ + if (n_left > 0) + ASSERT (pointer_to_uword (dst) % sizeof (vlib_copy_unit_t) == 0); + } + + /* Aligned copy. */ + { + vlib_copy_unit_t *d, *s; + uword n_copy; + + if (vec_len (free_list->aligned_buffers) < + ((n_left / BUFFERS_PER_COPY) * BUFFERS_PER_COPY)) + abort (); + + n_copy = n_left / BUFFERS_PER_COPY; + n_left = n_left % BUFFERS_PER_COPY; + + /* Remove buffers from aligned free list. */ + _vec_len (free_list->aligned_buffers) -= n_copy * BUFFERS_PER_COPY; + + s = (vlib_copy_unit_t *) vec_end (free_list->aligned_buffers); + d = (vlib_copy_unit_t *) dst; + + /* Fast path loop. */ + while (n_copy >= 4) + { + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + d[3] = s[3]; + n_copy -= 4; + s += 4; + d += 4; + } + + while (n_copy >= 1) + { + d[0] = s[0]; + n_copy -= 1; + s += 1; + d += 1; + } + + dst = (void *) d; + } + + /* Unaligned copy. */ + ASSERT (n_unaligned_end == n_left); + while (n_left > 0) + { + *dst++ = *u_src--; + n_left--; + u_len--; + } + + if (!free_list->unaligned_buffers) + ASSERT (u_len == 0); + else + _vec_len (free_list->unaligned_buffers) = u_len; + + return n_alloc_buffers; +} + +/* Allocate a given number of buffers into given array. + Returns number actually allocated which will be either zero or + number requested. */ +u32 +dpdk_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + return alloc_from_free_list + (vm, + pool_elt_at_index (bm->buffer_free_list_pool, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX), + buffers, n_buffers); +} + + +u32 +dpdk_buffer_alloc_from_free_list (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *f; + f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); + return alloc_from_free_list (vm, f, buffers, n_buffers); +} + +always_inline void +add_buffer_to_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * f, + u32 buffer_index, u8 do_init) +{ + vlib_buffer_t *b; + b = vlib_get_buffer (vm, buffer_index); + if (PREDICT_TRUE (do_init)) + vlib_buffer_init_for_free_list (b, f); + vec_add1_aligned (f->aligned_buffers, buffer_index, + sizeof (vlib_copy_unit_t)); +} + +always_inline vlib_buffer_free_list_t * +buffer_get_free_list (vlib_main_t * vm, vlib_buffer_t * b, u32 * index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + u32 i; + + *index = i = b->free_list_index; + return pool_elt_at_index (bm->buffer_free_list_pool, i); +} + +static_always_inline void +vlib_buffer_free_inline (vlib_main_t * vm, + u32 * buffers, u32 n_buffers, u32 follow_buffer_next) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *fl; + u32 fi; + int i; + u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, + u32 follow_buffer_next); + + cb = bm->buffer_free_callback; + + if (PREDICT_FALSE (cb != 0)) + n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); + + if (!n_buffers) + return; + + for (i = 0; i < n_buffers; i++) + { + vlib_buffer_t *b; + struct rte_mbuf *mb; + + b = vlib_get_buffer (vm, buffers[i]); + + fl = buffer_get_free_list (vm, b, &fi); + + /* The only current use of this callback: multicast recycle */ + if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) + { + int j; + + add_buffer_to_free_list + (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); + + for (j = 0; j < vec_len (bm->announce_list); j++) + { + if (fl == bm->announce_list[j]) + goto already_announced; + } + vec_add1 (bm->announce_list, fl); + already_announced: + ; + } + else + { + if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) + { + mb = rte_mbuf_from_vlib_buffer (b); + ASSERT (rte_mbuf_refcnt_read (mb) == 1); + rte_pktmbuf_free (mb); + } + } + } + if (vec_len (bm->announce_list)) + { + vlib_buffer_free_list_t *fl; + for (i = 0; i < vec_len (bm->announce_list); i++) + { + fl = bm->announce_list[i]; + fl->buffers_added_to_freelist_function (vm, fl); + } + _vec_len (bm->announce_list) = 0; + } +} + +static void +dpdk_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ + 1); +} + +static void +dpdk_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ + 0); +} + +static void +dpdk_packet_template_init (vlib_main_t * vm, + void *vt, + void *packet_data, + uword n_packet_data_bytes, + uword min_n_buffers_each_physmem_alloc, u8 * name) +{ + vlib_packet_template_t *t = (vlib_packet_template_t *) vt; + + vlib_worker_thread_barrier_sync (vm); + memset (t, 0, sizeof (t[0])); + + vec_add (t->packet_data, packet_data, n_packet_data_bytes); + + vlib_worker_thread_barrier_release (vm); +} + +clib_error_t * +vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, + unsigned socket_id) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_physmem_main_t *vpm = &vm->physmem_main; + struct rte_mempool *rmp; + int i; + + vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); + + /* pool already exists, nothing to do */ + if (dm->pktmbuf_pools[socket_id]) + return 0; + + u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0); + + rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */ + num_mbufs, /* number of mbufs */ + 512, /* cache size */ + VLIB_BUFFER_HDR_SIZE, /* priv size */ + VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */ + socket_id); /* cpu socket */ + + if (rmp) + { + { + uword this_pool_end; + uword this_pool_start; + uword this_pool_size; + uword save_vpm_start, save_vpm_end, save_vpm_size; + struct rte_mempool_memhdr *memhdr; + + this_pool_start = ~0ULL; + this_pool_end = 0LL; + + STAILQ_FOREACH (memhdr, &rmp->mem_list, next) + { + if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end) + this_pool_end = (uword) (memhdr->addr + memhdr->len); + if (((uword) memhdr->addr) < this_pool_start) + this_pool_start = (uword) (memhdr->addr); + } + ASSERT (this_pool_start < ~0ULL && this_pool_end > 0); + this_pool_size = this_pool_end - this_pool_start; + + if (CLIB_DEBUG > 1) + { + clib_warning ("%s: pool start %llx pool end %llx pool size %lld", + pool_name, this_pool_start, this_pool_end, + this_pool_size); + clib_warning + ("before: virtual.start %llx virtual.end %llx virtual.size %lld", + vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); + } + + save_vpm_start = vpm->virtual.start; + save_vpm_end = vpm->virtual.end; + save_vpm_size = vpm->virtual.size; + + if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0) + vpm->virtual.start = this_pool_start; + if (this_pool_end > vpm->virtual.end) + vpm->virtual.end = this_pool_end; + + vpm->virtual.size = vpm->virtual.end - vpm->virtual.start; + + if (CLIB_DEBUG > 1) + { + clib_warning + ("after: virtual.start %llx virtual.end %llx virtual.size %lld", + vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); + } + + /* check if fits into buffer index range */ + if ((u64) vpm->virtual.size > + ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) + { + clib_warning ("physmem: virtual size out of range!"); + vpm->virtual.start = save_vpm_start; + vpm->virtual.end = save_vpm_end; + vpm->virtual.size = save_vpm_size; + rmp = 0; + } + } + if (rmp) + { + dm->pktmbuf_pools[socket_id] = rmp; + vec_free (pool_name); + return 0; + } + } + + vec_free (pool_name); + + /* no usable pool for this socket, try to use pool from another one */ + for (i = 0; i < vec_len (dm->pktmbuf_pools); i++) + { + if (dm->pktmbuf_pools[i]) + { + clib_warning + ("WARNING: Failed to allocate mempool for CPU socket %u. " + "Threads running on socket %u will use socket %u mempool.", + socket_id, socket_id, i); + dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i]; + return 0; + } + } + + return clib_error_return (0, "failed to allocate mempool on socket %u", + socket_id); +} + +#if CLIB_DEBUG > 0 + +u32 *vlib_buffer_state_validation_lock; +uword *vlib_buffer_state_validation_hash; +void *vlib_buffer_state_heap; + +static clib_error_t * +buffer_state_validation_init (vlib_main_t * vm) +{ + void *oldheap; + + vlib_buffer_state_heap = mheap_alloc (0, 10 << 20); + + oldheap = clib_mem_set_heap (vlib_buffer_state_heap); + + vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword)); + vec_validate_aligned (vlib_buffer_state_validation_lock, 0, + CLIB_CACHE_LINE_BYTES); + clib_mem_set_heap (oldheap); + return 0; +} + +VLIB_INIT_FUNCTION (buffer_state_validation_init); +#endif + +static vlib_buffer_callbacks_t callbacks = { + .vlib_buffer_alloc_cb = &dpdk_buffer_alloc, + .vlib_buffer_alloc_from_free_list_cb = &dpdk_buffer_alloc_from_free_list, + .vlib_buffer_free_cb = &dpdk_buffer_free, + .vlib_buffer_free_no_next_cb = &dpdk_buffer_free_no_next, + .vlib_packet_template_init_cb = &dpdk_packet_template_init, + .vlib_buffer_delete_free_list_cb = &dpdk_buffer_delete_free_list, +}; + +static clib_error_t * +dpdk_buffer_init (vlib_main_t * vm) +{ + vlib_buffer_cb_register (vm, &callbacks); + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_buffer_init); + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c index 538a00fd..22bd4b4f 100644 --- a/src/vnet/devices/dpdk/cli.c +++ b/src/vnet/devices/dpdk/cli.c @@ -164,9 +164,9 @@ show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, struct rte_mempool *rmp; int i; - for (i = 0; i < vec_len (vm->buffer_main->pktmbuf_pools); i++) + for (i = 0; i < vec_len (dpdk_main.pktmbuf_pools); i++) { - rmp = vm->buffer_main->pktmbuf_pools[i]; + rmp = dpdk_main.pktmbuf_pools[i]; if (rmp) { unsigned count = rte_mempool_avail_count (rmp); diff --git a/src/vnet/devices/dpdk/device.c b/src/vnet/devices/dpdk/device.c index b22fbf2e..0deab6aa 100644 --- a/src/vnet/devices/dpdk/device.c +++ b/src/vnet/devices/dpdk/device.c @@ -87,19 +87,18 @@ dpdk_set_mc_filter (vnet_hw_interface_t * hi, struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b) { - vlib_main_t *vm = vlib_get_main (); - vlib_buffer_main_t *bm = vm->buffer_main; + dpdk_main_t *dm = &dpdk_main; struct rte_mbuf **mbufs = 0, *s, *d; u8 nb_segs; unsigned socket_id = rte_socket_id (); int i; - ASSERT (bm->pktmbuf_pools[socket_id]); + ASSERT (dm->pktmbuf_pools[socket_id]); s = rte_mbuf_from_vlib_buffer (b); nb_segs = s->nb_segs; vec_validate (mbufs, nb_segs - 1); - if (rte_pktmbuf_alloc_bulk (bm->pktmbuf_pools[socket_id], mbufs, nb_segs)) + if (rte_pktmbuf_alloc_bulk (dm->pktmbuf_pools[socket_id], mbufs, nb_segs)) { vec_free (mbufs); return 0; diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h index e0436031..066ec6fa 100644 --- a/src/vnet/devices/dpdk/dpdk.h +++ b/src/vnet/devices/dpdk/dpdk.h @@ -425,6 +425,9 @@ typedef struct vlib_main_t *vlib_main; vnet_main_t *vnet_main; dpdk_config_main_t *conf; + + /* mempool */ + struct rte_mempool **pktmbuf_pools; } dpdk_main_t; dpdk_main_t dpdk_main; diff --git a/src/vnet/devices/dpdk/dpdk_priv.h b/src/vnet/devices/dpdk/dpdk_priv.h index 0c81dbc3..dd40ff48 100644 --- a/src/vnet/devices/dpdk/dpdk_priv.h +++ b/src/vnet/devices/dpdk/dpdk_priv.h @@ -13,6 +13,9 @@ * limitations under the License. */ +#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1) +#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1)) + #define DPDK_NB_RX_DESC_DEFAULT 1024 #define DPDK_NB_TX_DESC_DEFAULT 1024 #define DPDK_NB_RX_DESC_VIRTIO 256 diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c index 60689463..4c040d20 100755 --- a/src/vnet/devices/dpdk/init.c +++ b/src/vnet/devices/dpdk/init.c @@ -64,8 +64,6 @@ static struct rte_eth_conf port_conf_template = { clib_error_t * dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) { - vlib_main_t *vm = vlib_get_main (); - vlib_buffer_main_t *bm = vm->buffer_main; int rv; int j; @@ -107,7 +105,7 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, xd->cpu_socket, 0, - bm-> + dm-> pktmbuf_pools[xd->cpu_socket_id_by_queue [j]]); @@ -115,7 +113,7 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) if (rv < 0) rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, SOCKET_ID_ANY, 0, - bm-> + dm-> pktmbuf_pools[xd->cpu_socket_id_by_queue [j]]); if (rv < 0) diff --git a/src/vnet/devices/dpdk/thread.c b/src/vnet/devices/dpdk/thread.c new file mode 100644 index 00000000..475dd142 --- /dev/null +++ b/src/vnet/devices/dpdk/thread.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static clib_error_t * +dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) +{ + int r; + r = rte_eal_remote_launch (fp, (void *) w, lcore_id); + if (r) + return clib_error_return (0, "Failed to launch thread %u", lcore_id); + return 0; +} + +static clib_error_t * +dpdk_thread_set_lcore (u32 thread, u16 lcore) +{ + return 0; +} + +static vlib_thread_callbacks_t callbacks = { + .vlib_launch_thread_cb = &dpdk_launch_thread, + .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore, +}; + +static clib_error_t * +dpdk_thread_init (vlib_main_t * vm) +{ + vlib_thread_cb_register (vm, &callbacks); + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_thread_init); + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sr/sr_replicate.c b/src/vnet/sr/sr_replicate.c index 5f9de504..fa5a68c3 100644 --- a/src/vnet/sr/sr_replicate.c +++ b/src/vnet/sr/sr_replicate.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -142,6 +143,7 @@ static uword sr_replicate_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { + dpdk_main_t *dm = &dpdk_main; u32 n_left_from, *from, *to_next; sr_replicate_next_t next_index; int pkts_replicated = 0; @@ -149,7 +151,6 @@ sr_replicate_node_fn (vlib_main_t * vm, int no_buffer_drops = 0; vlib_buffer_free_list_t *fl; unsigned socket_id = rte_socket_id (); - vlib_buffer_main_t *bm = vm->buffer_main; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -246,13 +247,13 @@ sr_replicate_node_fn (vlib_main_t * vm, vlib_buffer_t *clone0_c, *clone_b0; t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]); - hdr_mb0 = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]); + hdr_mb0 = rte_pktmbuf_alloc (dm->pktmbuf_pools[socket_id]); if (i < (num_replicas - 1)) { /* Not the last tunnel to process */ clone0 = rte_pktmbuf_clone - (orig_mb0, bm->pktmbuf_pools[socket_id]); + (orig_mb0, dm->pktmbuf_pools[socket_id]); if (clone0 == 0) goto clone_fail; nb_seg = 0; -- cgit 1.2.3-korg From ca80025805230b34daa10fc1eb16600080c2a54f Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 16 Jan 2017 21:29:39 +0100 Subject: dpdk: register rte_delay_us callback from vnet Change-Id: Ibf7fc9a54d3fbee431b4814fa8abc5ba29ed9eef Signed-off-by: Damjan Marion --- src/vnet.am | 1 + src/vnet/devices/dpdk/main.c | 90 ++++++++++++++++++++++++++++++++++++++++++++ src/vpp/vnet/main.c | 62 ------------------------------ 3 files changed, 91 insertions(+), 62 deletions(-) create mode 100644 src/vnet/devices/dpdk/main.c (limited to 'src/vnet/devices') diff --git a/src/vnet.am b/src/vnet.am index 76824fdb..3b2a25e8 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -772,6 +772,7 @@ libvnet_la_SOURCES += \ vnet/devices/dpdk/device.c \ vnet/devices/dpdk/format.c \ vnet/devices/dpdk/init.c \ + vnet/devices/dpdk/main.c \ vnet/devices/dpdk/node.c \ vnet/devices/dpdk/thread.c \ vnet/devices/dpdk/hqos.c \ diff --git a/src/vnet/devices/dpdk/main.c b/src/vnet/devices/dpdk/main.c new file mode 100644 index 00000000..1e6ec2f8 --- /dev/null +++ b/src/vnet/devices/dpdk/main.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + + +/* + * Called by the dpdk driver's rte_delay_us() function. + * Return 0 to have the dpdk do a regular delay loop. + * Return 1 if to skip the delay loop because we are suspending + * the calling vlib process instead. + */ +int +rte_delay_us_override (unsigned us) +{ + vlib_main_t *vm; + + /* Don't bother intercepting for short delays */ + if (us < 10) + return 0; + + /* + * Only intercept if we are in a vlib process. + * If we are called from a vlib worker thread or the vlib main + * thread then do not intercept. (Must not be called from an + * independent pthread). + */ + if (os_get_cpu_number () == 0) + { + /* + * We're in the vlib main thread or a vlib process. Make sure + * the process is running and we're not still initializing. + */ + vm = vlib_get_main (); + if (vlib_in_process_context (vm)) + { + /* Only suspend for the admin_down_process */ + vlib_process_t *proc = vlib_get_current_process (vm); + if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) || + (proc->node_runtime.function != admin_up_down_process)) + return 0; + + f64 delay = 1e-6 * us; + vlib_process_suspend (vm, delay); + return 1; + } + } + return 0; // no override +} + +#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0) +static void +rte_delay_us_override_cb (unsigned us) +{ + if (rte_delay_us_override (us) == 0) + rte_delay_us_block (us); +} +#endif + +static clib_error_t * dpdk_main_init (vlib_main_t * vm) +{ + clib_error_t * error = 0; + + if ((error = vlib_call_init_function (vm, dpdk_init))) + return error; + +#if DPDK +#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0) + /* register custom delay function */ + rte_delay_us_callback_register (rte_delay_us_override_cb); +#endif +#endif + return error; +} + +VLIB_INIT_FUNCTION (dpdk_main_init); + diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c index e4695e1e..a252b846 100644 --- a/src/vpp/vnet/main.c +++ b/src/vpp/vnet/main.c @@ -21,62 +21,6 @@ #include -#if DPDK -#include - -/* - * Called by the dpdk driver's rte_delay_us() function. - * Return 0 to have the dpdk do a regular delay loop. - * Return 1 if to skip the delay loop because we are suspending - * the calling vlib process instead. - */ -int -rte_delay_us_override (unsigned us) -{ - vlib_main_t *vm; - - /* Don't bother intercepting for short delays */ - if (us < 10) - return 0; - - /* - * Only intercept if we are in a vlib process. - * If we are called from a vlib worker thread or the vlib main - * thread then do not intercept. (Must not be called from an - * independent pthread). - */ - if (os_get_cpu_number () == 0) - { - /* - * We're in the vlib main thread or a vlib process. Make sure - * the process is running and we're not still initializing. - */ - vm = vlib_get_main (); - if (vlib_in_process_context (vm)) - { - /* Only suspend for the admin_down_process */ - vlib_process_t *proc = vlib_get_current_process (vm); - if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) || - (proc->node_runtime.function != admin_up_down_process)) - return 0; - - f64 delay = 1e-6 * us; - vlib_process_suspend (vm, delay); - return 1; - } - } - return 0; // no override -} - -#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0) -static void -rte_delay_us_override_cb (unsigned us) -{ - if (rte_delay_us_override (us) == 0) - rte_delay_us_block (us); -} -#endif -#endif static void vpe_main_init (vlib_main_t * vm) @@ -89,12 +33,6 @@ vpe_main_init (vlib_main_t * vm) /* Turn off network stack components which we don't want */ vlib_mark_init_function_complete (vm, srp_init); -#if DPDK -#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0) - /* register custom delay function */ - rte_delay_us_callback_register (rte_delay_us_override_cb); -#endif -#endif } /* -- cgit 1.2.3-korg From 597d3c4121b4dd557328c11debb42927a45d52fb Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 16 Jan 2017 21:36:28 +0100 Subject: dpdk: add 'show dpdk version' cli Change-Id: Iaecebae25ee4b8df8ca919992a0433e92e82e90c Signed-off-by: Damjan Marion --- src/vnet/devices/dpdk/cli.c | 20 ++++++++++++++++++++ src/vpp/app/version.c | 10 ---------- src/vpp/oam/oam.c | 4 ---- 3 files changed, 20 insertions(+), 14 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c index 22bd4b4f..3bbace26 100644 --- a/src/vnet/devices/dpdk/cli.c +++ b/src/vnet/devices/dpdk/cli.c @@ -1279,6 +1279,26 @@ VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = { }; /* *INDENT-ON* */ +static clib_error_t * +show_dpdk_version_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ +#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c); + _("DPDK Version", "%s", rte_version ()); + _("DPDK EAL init args", "%s", dpdk_config_main.eal_init_args_str); +#undef _ + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_vpe_version_command, static) = { + .path = "show dpdk version", + .short_help = "show dpdk version information", + .function = show_dpdk_version_command_fn, +}; +/* *INDENT-ON* */ + clib_error_t * dpdk_cli_init (vlib_main_t * vm) { diff --git a/src/vpp/app/version.c b/src/vpp/app/version.c index 60844c98..0a2c7fd4 100644 --- a/src/vpp/app/version.c +++ b/src/vpp/app/version.c @@ -16,12 +16,6 @@ #include #include -#if DPDK > 0 -#include -#include -#include -#endif /* DPDK */ - static char *vpe_version_string = "vpp v" VPP_BUILD_VER " built by " VPP_BUILD_USER " on " VPP_BUILD_HOST " at " VPP_BUILD_DATE; @@ -56,10 +50,6 @@ show_vpe_version_command_fn (vlib_main_t * vm, _("Compile location", "%s", VPP_BUILD_TOPDIR); _("Compiler", "%s", vpe_compiler); _("Current PID", "%d", getpid ()); -#if DPDK > 0 - _("DPDK Version", "%s", rte_version ()); - _("DPDK EAL init args", "%s", dpdk_config_main.eal_init_args_str); -#endif #undef _ } else diff --git a/src/vpp/oam/oam.c b/src/vpp/oam/oam.c index 07e17b64..ef061207 100644 --- a/src/vpp/oam/oam.c +++ b/src/vpp/oam/oam.c @@ -14,10 +14,6 @@ */ #include -#if DPDK > 0 -#include -#endif - oam_main_t oam_main; static vlib_node_registration_t oam_node; -- cgit 1.2.3-korg From 8a6a3b2ddb2e2929c4697b31746b3f617886f157 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 17 Jan 2017 14:12:42 +0100 Subject: dpdk: remove duplicate code in buffers.c Change-Id: Idc17b4a32d40012556d5d8550942db0372ebf23d Signed-off-by: Damjan Marion --- src/vlib/buffer.c | 73 +++++++---------------- src/vlib/buffer_funcs.h | 48 +++++++++++++++ src/vnet/devices/dpdk/buffer.c | 131 +++-------------------------------------- 3 files changed, 78 insertions(+), 174 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 0b0e6054..ea4960e2 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -307,10 +307,10 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm, #define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32)) /* Make sure we have at least given number of unaligned buffers. */ -static void -fill_unaligned (vlib_main_t * vm, - vlib_buffer_free_list_t * free_list, - uword n_unaligned_buffers) +void +vlib_buffer_free_list_fill_unaligned (vlib_main_t * vm, + vlib_buffer_free_list_t * free_list, + uword n_unaligned_buffers) { word la = vec_len (free_list->aligned_buffers); word lu = vec_len (free_list->unaligned_buffers); @@ -333,8 +333,8 @@ fill_unaligned (vlib_main_t * vm, } /* After free aligned buffers may not contain even sized chunks. */ -static void -trim_aligned (vlib_buffer_free_list_t * f) +void +vlib_buffer_free_list_trim_aligned (vlib_buffer_free_list_t * f) { uword l, n_trim; @@ -361,15 +361,15 @@ trim_aligned (vlib_buffer_free_list_t * f) } } -static void -merge_free_lists (vlib_buffer_free_list_t * dst, - vlib_buffer_free_list_t * src) +void +vlib_buffer_merge_free_lists (vlib_buffer_free_list_t * dst, + vlib_buffer_free_list_t * src) { uword l; u32 *d; - trim_aligned (src); - trim_aligned (dst); + vlib_buffer_free_list_trim_aligned (src); + vlib_buffer_free_list_trim_aligned (dst); l = vec_len (src->aligned_buffers); if (l > 0) @@ -388,16 +388,6 @@ merge_free_lists (vlib_buffer_free_list_t * dst, } } -always_inline u32 -vlib_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - - size = vlib_buffer_round_size (size); - uword *p = hash_get (bm->free_list_by_size, size); - return p ? p[0] : ~0; -} - /* Add buffer free list. */ static u32 vlib_buffer_create_free_list_helper (vlib_main_t * vm, @@ -537,8 +527,9 @@ vlib_buffer_delete_free_list_internal (vlib_main_t * vm, u32 free_list_index) merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); if (merge_index != ~0 && merge_index != free_list_index) { - merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool, - merge_index), f); + vlib_buffer_merge_free_lists (pool_elt_at_index + (bm->buffer_free_list_pool, merge_index), + f); } del_free_list (vm, f); @@ -567,7 +558,7 @@ fill_free_list (vlib_main_t * vm, u32 *bi; u32 n_remaining, n_alloc, n_this_chunk; - trim_aligned (fl); + vlib_buffer_free_list_trim_aligned (fl); /* Already have enough free buffers on free list? */ n = min_free_buffers - vec_len (fl->aligned_buffers); @@ -666,7 +657,8 @@ alloc_from_free_list (vlib_main_t * vm, else n_unaligned_end = copy_alignment (dst + n_alloc_buffers); - fill_unaligned (vm, free_list, n_unaligned_start + n_unaligned_end); + vlib_buffer_free_list_fill_unaligned (vm, free_list, + n_unaligned_start + n_unaligned_end); u_len = vec_len (free_list->unaligned_buffers); u_src = free_list->unaligned_buffers + u_len - 1; @@ -779,29 +771,6 @@ vlib_buffer_alloc_from_free_list_internal (vlib_main_t * vm, return alloc_from_free_list (vm, f, buffers, n_buffers); } -always_inline void -add_buffer_to_free_list (vlib_main_t * vm, - vlib_buffer_free_list_t * f, - u32 buffer_index, u8 do_init) -{ - vlib_buffer_t *b; - b = vlib_get_buffer (vm, buffer_index); - if (PREDICT_TRUE (do_init)) - vlib_buffer_init_for_free_list (b, f); - vec_add1_aligned (f->aligned_buffers, buffer_index, - sizeof (vlib_copy_unit_t)); -} - -always_inline vlib_buffer_free_list_t * -buffer_get_free_list (vlib_main_t * vm, vlib_buffer_t * b, u32 * index) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - u32 i; - - *index = i = b->free_list_index; - return pool_elt_at_index (bm->buffer_free_list_pool, i); -} - void * vlib_set_buffer_free_callback (vlib_main_t * vm, void *fp) { @@ -845,7 +814,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, vlib_buffer_t *b0; b0 = vlib_get_buffer (vm, bi0); - fl = buffer_get_free_list (vm, b0, &fi); + fl = vlib_buffer_get_buffer_free_list (vm, b0, &fi); if (fl->buffers_added_to_freelist_function) vec_add1 (announce_list, fl); } @@ -926,7 +895,7 @@ again: fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0); fl1 = pool_elt_at_index (bm->buffer_free_list_pool, fi1); - add_buffer_to_free_list (vm, fl0, bi0, free0); + vlib_buffer_add_to_free_list (vm, fl0, bi0, free0); if (PREDICT_FALSE (fl0->buffers_added_to_freelist_function != 0)) { int i; @@ -946,7 +915,7 @@ again: } no_fl1: - add_buffer_to_free_list (vm, fl1, bi1, free1); + vlib_buffer_add_to_free_list (vm, fl1, bi1, free1); /* Possibly change current free list. */ if (fi0 != fi && fi1 != fi) @@ -1003,7 +972,7 @@ again: fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0); - add_buffer_to_free_list (vm, fl0, bi0, free0); + vlib_buffer_add_to_free_list (vm, fl0, bi0, free0); if (PREDICT_FALSE (fl0->buffers_added_to_freelist_function != 0)) { int i; diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index 15d93c16..543a903c 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -350,6 +350,41 @@ vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes, char *fmt, ...); + +/* After free aligned buffers may not contain even sized chunks. */ +void vlib_buffer_free_list_trim_aligned (vlib_buffer_free_list_t * f); + +/* Merge two free lists */ +void vlib_buffer_merge_free_lists (vlib_buffer_free_list_t * dst, + vlib_buffer_free_list_t * src); + +/* Make sure we have at least given number of unaligned buffers. */ +void vlib_buffer_free_list_fill_unaligned (vlib_main_t * vm, + vlib_buffer_free_list_t * + free_list, + uword n_unaligned_buffers); + +always_inline u32 +vlib_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + size = vlib_buffer_round_size (size); + uword *p = hash_get (bm->free_list_by_size, size); + return p ? p[0] : ~0; +} + +always_inline vlib_buffer_free_list_t * +vlib_buffer_get_buffer_free_list (vlib_main_t * vm, vlib_buffer_t * b, + u32 * index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + u32 i; + + *index = i = b->free_list_index; + return pool_elt_at_index (bm->buffer_free_list_pool, i); +} + always_inline vlib_buffer_free_list_t * vlib_buffer_get_free_list (vlib_main_t * vm, u32 free_list_index) { @@ -674,6 +709,19 @@ vlib_buffer_init_for_free_list (vlib_buffer_t * _dst, ASSERT (dst->b.total_length_not_including_first_buffer == 0); } +always_inline void +vlib_buffer_add_to_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * f, + u32 buffer_index, u8 do_init) +{ + vlib_buffer_t *b; + b = vlib_get_buffer (vm, buffer_index); + if (PREDICT_TRUE (do_init)) + vlib_buffer_init_for_free_list (b, f); + vec_add1_aligned (f->aligned_buffers, buffer_index, + sizeof (vlib_copy_unit_t)); +} + always_inline void vlib_buffer_init_two_for_free_list (vlib_buffer_t * _dst0, vlib_buffer_t * _dst1, diff --git a/src/vnet/devices/dpdk/buffer.c b/src/vnet/devices/dpdk/buffer.c index 214a9162..038f46d9 100644 --- a/src/vnet/devices/dpdk/buffer.c +++ b/src/vnet/devices/dpdk/buffer.c @@ -81,98 +81,6 @@ STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM, #define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32)) -/* Make sure we have at least given number of unaligned buffers. */ -static void -fill_unaligned (vlib_main_t * vm, - vlib_buffer_free_list_t * free_list, - uword n_unaligned_buffers) -{ - word la = vec_len (free_list->aligned_buffers); - word lu = vec_len (free_list->unaligned_buffers); - - /* Aligned come in aligned copy-sized chunks. */ - ASSERT (la % BUFFERS_PER_COPY == 0); - - ASSERT (la >= n_unaligned_buffers); - - while (lu < n_unaligned_buffers) - { - /* Copy 4 buffers from end of aligned vector to unaligned vector. */ - vec_add (free_list->unaligned_buffers, - free_list->aligned_buffers + la - BUFFERS_PER_COPY, - BUFFERS_PER_COPY); - la -= BUFFERS_PER_COPY; - lu += BUFFERS_PER_COPY; - } - _vec_len (free_list->aligned_buffers) = la; -} - -/* After free aligned buffers may not contain even sized chunks. */ -static void -trim_aligned (vlib_buffer_free_list_t * f) -{ - uword l, n_trim; - - /* Add unaligned to aligned before trim. */ - l = vec_len (f->unaligned_buffers); - if (l > 0) - { - vec_add_aligned (f->aligned_buffers, f->unaligned_buffers, l, - /* align */ sizeof (vlib_copy_unit_t)); - - _vec_len (f->unaligned_buffers) = 0; - } - - /* Remove unaligned buffers from end of aligned vector and save for next trim. */ - l = vec_len (f->aligned_buffers); - n_trim = l % BUFFERS_PER_COPY; - if (n_trim) - { - /* Trim aligned -> unaligned. */ - vec_add (f->unaligned_buffers, f->aligned_buffers + l - n_trim, n_trim); - - /* Remove from aligned. */ - _vec_len (f->aligned_buffers) = l - n_trim; - } -} - -static void -merge_free_lists (vlib_buffer_free_list_t * dst, - vlib_buffer_free_list_t * src) -{ - uword l; - u32 *d; - - trim_aligned (src); - trim_aligned (dst); - - l = vec_len (src->aligned_buffers); - if (l > 0) - { - vec_add2_aligned (dst->aligned_buffers, d, l, - /* align */ sizeof (vlib_copy_unit_t)); - clib_memcpy (d, src->aligned_buffers, l * sizeof (d[0])); - vec_free (src->aligned_buffers); - } - - l = vec_len (src->unaligned_buffers); - if (l > 0) - { - vec_add (dst->unaligned_buffers, src->unaligned_buffers, l); - vec_free (src->unaligned_buffers); - } -} - -always_inline u32 -dpdk_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - - size = vlib_buffer_round_size (size); - uword *p = hash_get (bm->free_list_by_size, size); - return p ? p[0] : ~0; -} - static void del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) { @@ -212,11 +120,12 @@ dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) f = vlib_buffer_get_free_list (vm, free_list_index); - merge_index = dpdk_buffer_get_free_list_with_size (vm, f->n_data_bytes); + merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); if (merge_index != ~0 && merge_index != free_list_index) { - merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool, - merge_index), f); + vlib_buffer_merge_free_lists (pool_elt_at_index + (bm->buffer_free_list_pool, merge_index), + f); } del_free_list (vm, f); @@ -253,7 +162,7 @@ fill_free_list (vlib_main_t * vm, if (PREDICT_FALSE (rmp == 0)) return 0; - trim_aligned (fl); + vlib_buffer_free_list_trim_aligned (fl); /* Already have enough free buffers on free list? */ n = min_free_buffers - vec_len (fl->aligned_buffers); @@ -333,7 +242,8 @@ alloc_from_free_list (vlib_main_t * vm, else n_unaligned_end = copy_alignment (dst + n_alloc_buffers); - fill_unaligned (vm, free_list, n_unaligned_start + n_unaligned_end); + vlib_buffer_free_list_fill_unaligned (vm, free_list, + n_unaligned_start + n_unaligned_end); u_len = vec_len (free_list->unaligned_buffers); u_src = free_list->unaligned_buffers + u_len - 1; @@ -442,29 +352,6 @@ dpdk_buffer_alloc_from_free_list (vlib_main_t * vm, return alloc_from_free_list (vm, f, buffers, n_buffers); } -always_inline void -add_buffer_to_free_list (vlib_main_t * vm, - vlib_buffer_free_list_t * f, - u32 buffer_index, u8 do_init) -{ - vlib_buffer_t *b; - b = vlib_get_buffer (vm, buffer_index); - if (PREDICT_TRUE (do_init)) - vlib_buffer_init_for_free_list (b, f); - vec_add1_aligned (f->aligned_buffers, buffer_index, - sizeof (vlib_copy_unit_t)); -} - -always_inline vlib_buffer_free_list_t * -buffer_get_free_list (vlib_main_t * vm, vlib_buffer_t * b, u32 * index) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - u32 i; - - *index = i = b->free_list_index; - return pool_elt_at_index (bm->buffer_free_list_pool, i); -} - static_always_inline void vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, u32 follow_buffer_next) @@ -491,14 +378,14 @@ vlib_buffer_free_inline (vlib_main_t * vm, b = vlib_get_buffer (vm, buffers[i]); - fl = buffer_get_free_list (vm, b, &fi); + fl = vlib_buffer_get_buffer_free_list (vm, b, &fi); /* The only current use of this callback: multicast recycle */ if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) { int j; - add_buffer_to_free_list + vlib_buffer_add_to_free_list (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); for (j = 0; j < vec_len (bm->announce_list); j++) -- cgit 1.2.3-korg From d8e478762919b5d40529d72edd3ff8a85fbe9800 Mon Sep 17 00:00:00 2001 From: Wojciech Dec Date: Tue, 17 Jan 2017 21:45:11 +0100 Subject: Fix crash on deleting activated vhost-user - VPP-603 Vhost-user pool getting freed prematurely Change-Id: I952821ec85efa68923d09a643c70b6b309ea2574 Signed-off-by: Wojciech Dec --- src/vnet/devices/virtio/vhost-user.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index ee41ee18..9a7c1dc0 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2429,9 +2429,6 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) // Disable and reset interface vhost_user_term_if (vui); - // Back to pool - pool_put (vum->vhost_user_interfaces, vui); - // Reset renumbered iface if (hwif->dev_instance < vec_len (vum->show_dev_instance_by_real_dev_instance)) @@ -2439,6 +2436,10 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) // Delete ethernet interface ethernet_delete_interface (vnm, vui->hw_if_index); + + // Back to pool + pool_put (vum->vhost_user_interfaces, vui); + return rv; } -- cgit 1.2.3-korg From 8f544964a3df144a441b136c2a01427eca731eea Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Wed, 18 Jan 2017 10:23:22 -0500 Subject: Fix coverity warnings, VPP-608 Change-Id: Ib0144ba3a9a09971d3946c932e8fed6d5c1ad278 Signed-off-by: Dave Barach --- src/plugins/snat/in2out.c | 8 ++++++-- src/plugins/snat/out2in.c | 8 ++++++-- src/vnet/devices/virtio/vhost-user.c | 8 ++++++-- src/vnet/unix/tapcli.c | 5 +++-- src/vpp/api/api_main.c | 10 +++++----- src/vppinfra/bihash_template.c | 11 ++++++----- 6 files changed, 32 insertions(+), 18 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c index 76a6a12c..ba752cf0 100644 --- a/src/plugins/snat/in2out.c +++ b/src/plugins/snat/in2out.c @@ -1232,8 +1232,12 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0)) { /* No, assign next available worker (RR) */ - next_worker_index = sm->first_worker_index + - sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + next_worker_index = sm->first_worker_index; + if (vec_len (sm->workers)) + { + next_worker_index += + sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; + } /* add non-traslated packets worker lookup */ kv0.value = next_worker_index; diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c index f1329733..855e9efb 100644 --- a/src/plugins/snat/out2in.c +++ b/src/plugins/snat/out2in.c @@ -901,8 +901,12 @@ snat_out2in_worker_handoff_fn (vlib_main_t * vm, if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) { /* No, assign next available worker (RR) */ - next_worker_index = sm->first_worker_index + - sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + next_worker_index = sm->first_worker_index; + if (vec_len (sm->workers)) + { + next_worker_index += + sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; + } } else { diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 9a7c1dc0..ac142867 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2326,12 +2326,16 @@ vhost_user_process (vlib_main_t * vm, sizeof (sun.sun_path) - 1); /* Avoid hanging VPP if the other end does not accept */ - fcntl(sockfd, F_SETFL, O_NONBLOCK); + if (fcntl(sockfd, F_SETFL, O_NONBLOCK) < 0) + clib_unix_warning ("fcntl"); + if (connect (sockfd, (struct sockaddr *) &sun, sizeof (struct sockaddr_un)) == 0) { /* Set the socket to blocking as it was before */ - fcntl(sockfd, F_SETFL, 0); + if (fcntl(sockfd, F_SETFL, 0) < 0) + clib_unix_warning ("fcntl2"); + vui->sock_errno = 0; template.file_descriptor = sockfd; template.private_data = diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index 2d3082cb..e9dbf729 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -899,8 +899,9 @@ int vnet_tap_connect (vlib_main_t * vm, vnet_tap_connect_args_t *ap) /* ip4: mask defaults to /24 */ u32 mask = clib_host_to_net_u32 (0xFFFFFF00); + memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; - sin.sin_port = 0; + /* sin.sin_port = 0; */ sin.sin_addr.s_addr = ap->ip4_address->as_u32; memcpy (&ifr.ifr_ifru.ifru_addr, &sin, sizeof (sin)); @@ -1294,7 +1295,7 @@ tap_connect_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - u8 * intfc_name; + u8 * intfc_name = 0; unformat_input_t _line_input, *line_input = &_line_input; vnet_tap_connect_args_t _a, *ap= &_a; tapcli_main_t * tm = &tapcli_main; diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index db532061..fd6998f4 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -139,11 +139,11 @@ api_command_fn (vlib_main_t * vm, "%s error: %U\n", cmdp, format_api_error, vam, rv); - if (vam->regenerate_interface_table) - { - vam->regenerate_interface_table = 0; - api_sw_interface_dump (vam); - } + } + if (vam->regenerate_interface_table) + { + vam->regenerate_interface_table = 0; + api_sw_interface_dump (vam); } unformat_free (vam->input); return 0; diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c index 7c817a20..d8b97b5f 100644 --- a/src/vppinfra/bihash_template.c +++ b/src/vppinfra/bihash_template.c @@ -199,7 +199,7 @@ BV (split_and_rehash_linear) /* Find a free slot in the new linear scan bucket */ for (; j < new_length; j++) { - /* Old value in use? Forget it. */ + /* Old value not in use? Forget it. */ if (BV (clib_bihash_is_free) (&(old_values->kvp[i]))) goto doublebreak; @@ -212,11 +212,12 @@ BV (split_and_rehash_linear) j++; goto doublebreak; } - /* This should never happen... */ - clib_warning ("BUG: linear rehash failed!"); - BV (value_free) (h, new_values); - return 0; } + /* This should never happen... */ + clib_warning ("BUG: linear rehash failed!"); + BV (value_free) (h, new_values); + return 0; + doublebreak:; } return new_values; -- cgit 1.2.3-korg From cf751ec70df21affb19c77b2c51e3c231b8202ad Mon Sep 17 00:00:00 2001 From: Mohsin KAZMI Date: Wed, 18 Jan 2017 11:59:45 +0100 Subject: af_packet: multithreading support This patch adds multithreading support for af_packet interfaces. Change-Id: Ief5d1117e7ffeaa59dbc2831e583d5d8e8d4fa7a Signed-off-by: Mohsin KAZMI --- src/vnet/devices/af_packet/af_packet.c | 55 ++++++++++++++++++++++++++++++++++ src/vnet/devices/af_packet/af_packet.h | 7 +++++ src/vnet/devices/af_packet/device.c | 9 ++++++ src/vnet/devices/af_packet/node.c | 26 +++++++++------- 4 files changed, 86 insertions(+), 11 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index 91c3988b..e491ba47 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -171,6 +171,31 @@ error: return ret; } +static void +af_packet_worker_thread_enable () +{ + /* If worker threads are enabled, switch to polling mode */ + foreach_vlib_main (( + { + vlib_node_set_state (this_vlib_main, + af_packet_input_node.index, + VLIB_NODE_STATE_POLLING); + })); + +} + +static void +af_packet_worker_thread_disable () +{ + foreach_vlib_main (( + { + vlib_node_set_state (this_vlib_main, + af_packet_input_node.index, + VLIB_NODE_STATE_INTERRUPT); + })); + +} + int af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, u32 * sw_if_index) @@ -184,6 +209,7 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, u8 hw_addr[6]; clib_error_t *error; vnet_sw_interface_t *sw; + vlib_thread_main_t *tm = vlib_get_thread_main (); vnet_main_t *vnm = vnet_get_main (); uword *p; uword if_index; @@ -226,6 +252,13 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, apif->next_tx_frame = 0; apif->next_rx_frame = 0; + if (tm->n_vlib_mains > 1) + { + apif->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) apif->lockp, 0, CLIB_CACHE_LINE_BYTES); + } + { unix_file_t template = { 0 }; template.read_function = af_packet_fd_read_ready; @@ -273,6 +306,10 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, 0); if (sw_if_index) *sw_if_index = apif->sw_if_index; + + if (tm->n_vlib_mains > 1 && pool_elts (apm->interfaces) == 1) + af_packet_worker_thread_enable (); + return 0; error: @@ -286,6 +323,7 @@ int af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name) { vnet_main_t *vnm = vnet_get_main (); + vlib_thread_main_t *tm = vlib_get_thread_main (); af_packet_main_t *apm = &af_packet_main; af_packet_if_t *apif; uword *p; @@ -335,6 +373,8 @@ af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name) ethernet_delete_interface (vnm, apif->hw_if_index); pool_put (apm->interfaces, apif); + if (tm->n_vlib_mains > 1 && pool_elts (apm->interfaces) == 0) + af_packet_worker_thread_disable (); return 0; } @@ -344,9 +384,24 @@ af_packet_init (vlib_main_t * vm) { af_packet_main_t *apm = &af_packet_main; vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_thread_registration_t *tr; + uword *p; memset (apm, 0, sizeof (af_packet_main_t)); + apm->input_cpu_first_index = 0; + apm->input_cpu_count = 1; + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + apm->input_cpu_first_index = tr->first_index; + apm->input_cpu_count = tr->count; + } + mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword)); vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1, diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h index 19e2523d..e00e5cb4 100644 --- a/src/vnet/devices/af_packet/af_packet.h +++ b/src/vnet/devices/af_packet/af_packet.h @@ -20,6 +20,7 @@ typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + volatile u32 *lockp; u8 *host_if_name; int fd; struct tpacket_req *rx_req; @@ -50,6 +51,12 @@ typedef struct /* hash of host interface names */ mhash_t if_index_by_host_if_name; + + /* first cpu index */ + u32 input_cpu_first_index; + + /* total cpu count */ + u32 input_cpu_count; } af_packet_main_t; af_packet_main_t af_packet_main; diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c index 1fb4000f..e3bf9bbc 100644 --- a/src/vnet/devices/af_packet/device.c +++ b/src/vnet/devices/af_packet/device.c @@ -92,6 +92,12 @@ af_packet_interface_tx (vlib_main_t * vm, struct tpacket2_hdr *tph; u32 frame_not_ready = 0; + if (PREDICT_FALSE (apif->lockp != 0)) + { + while (__sync_lock_test_and_set (apif->lockp, 1)) + ; + } + while (n_left > 0) { u32 len; @@ -152,6 +158,9 @@ af_packet_interface_tx (vlib_main_t * vm, } } + if (PREDICT_FALSE (apif->lockp != 0)) + *apif->lockp = 0; + if (PREDICT_FALSE (frame_not_ready)) vlib_error_count (vm, node->node_index, AF_PACKET_TX_ERROR_FRAME_NOT_READY, frame_not_ready); diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index 72004320..476ccca9 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -108,10 +108,9 @@ buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi) always_inline uword af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, u32 device_idx) + vlib_frame_t * frame, af_packet_if_t * apif) { af_packet_main_t *apm = &af_packet_main; - af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, device_idx); struct tpacket2_hdr *tph; u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; u32 block = 0; @@ -125,10 +124,10 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, u32 frame_num = apif->rx_req->tp_frame_nr; u8 *block_start = apif->rx_ring + block * block_size; uword n_trace = vlib_get_trace_count (vm, node); + u32 cpu_index = os_get_cpu_number (); u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes; - int cpu_index = node->cpu_index; if (apif->per_interface_next_index != ~0) next_index = apif->per_interface_next_index; @@ -249,16 +248,18 @@ af_packet_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, { int i; u32 n_rx_packets = 0; - + u32 cpu_index = os_get_cpu_number (); af_packet_main_t *apm = &af_packet_main; + af_packet_if_t *apif; - /* *INDENT-OFF* */ - clib_bitmap_foreach (i, apm->pending_input_bitmap, - ({ - clib_bitmap_set (apm->pending_input_bitmap, i, 0); - n_rx_packets += af_packet_device_input_fn(vm, node, frame, i); - })); - /* *INDENT-ON* */ + for (i = 0; i < vec_len (apm->interfaces); i++) + { + apif = vec_elt_at_index (apm->interfaces, i); + if (apif->is_admin_up && + (i % apm->input_cpu_count) == + (cpu_index - apm->input_cpu_first_index)) + n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif); + } return n_rx_packets; } @@ -270,6 +271,9 @@ VLIB_REGISTER_NODE (af_packet_input_node) = { .sibling_of = "device-input", .format_trace = format_af_packet_input_trace, .type = VLIB_NODE_TYPE_INPUT, + /** + * default state is INTERRUPT mode, switch to POLLING if worker threads are enabled + */ .state = VLIB_NODE_STATE_INTERRUPT, .n_errors = AF_PACKET_INPUT_N_ERROR, .error_strings = af_packet_input_error_strings, -- cgit 1.2.3-korg From 50132fa8c14dd74a77b760bc603f96d7027bb73a Mon Sep 17 00:00:00 2001 From: Pavel Kotucek Date: Mon, 23 Jan 2017 15:24:49 +0100 Subject: dpdk : incorrect rx filter being installed When mac address is set prior bringing interface up incorrect rx filter being installed into the e1000 mac. Change-Id: If59a2bf16f732e45221b3787d271307d369e54d3 Signed-off-by: Pavel Kotucek --- src/vnet/devices/dpdk/device.c | 10 +++++++++- src/vnet/devices/dpdk/dpdk.h | 3 +++ src/vnet/devices/dpdk/init.c | 8 ++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/dpdk/device.c b/src/vnet/devices/dpdk/device.c index 0deab6aa..cd32389c 100644 --- a/src/vnet/devices/dpdk/device.c +++ b/src/vnet/devices/dpdk/device.c @@ -60,6 +60,8 @@ dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address) } else { + vec_reset_length (xd->default_mac_address); + vec_add (xd->default_mac_address, address, sizeof (address)); return NULL; } } @@ -628,7 +630,13 @@ dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) f64 now = vlib_time_now (dm->vlib_main); if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) - rv = rte_eth_dev_start (xd->device_index); + { + rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + } if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) rte_eth_promiscuous_enable (xd->device_index); diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h index 066ec6fa..a91e87df 100644 --- a/src/vnet/devices/dpdk/dpdk.h +++ b/src/vnet/devices/dpdk/dpdk.h @@ -232,6 +232,9 @@ typedef struct struct rte_eth_xstat *last_cleared_xstats; f64 time_last_stats_update; dpdk_port_type_t port_type; + + /* mac address */ + u8 *default_mac_address; } dpdk_device_t; #define DPDK_STATS_POLL_INTERVAL (10.0) diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c index 4c040d20..3fa656ea 100755 --- a/src/vnet/devices/dpdk/init.c +++ b/src/vnet/devices/dpdk/init.c @@ -125,6 +125,10 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) { int rv; rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); if (rv < 0) clib_warning ("rte_eth_dev_start %d returned %d", xd->device_index, rv); @@ -199,6 +203,10 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) { int rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); if (rv < 0) clib_warning ("rte_eth_dev_start %d returned %d", xd->device_index, rv); -- cgit 1.2.3-korg From d04b60bfa940e21ab4676a1cb3c15989748be40a Mon Sep 17 00:00:00 2001 From: Sergio Gonzalez Monroy Date: Fri, 20 Jan 2017 15:35:23 +0000 Subject: dpdk: rework cryptodev ipsec build and setup Build Cryptodev IPsec support by default when DPDK is enabled but only build hardware Cryptodev PMDs. To enable Cryptodev support, a new startup.conf option for dpdk has been introduced 'enable-cryptodev'. During VPP init, if Cryptodev support is not enabled or not enough cryptodev resources are available then default to OpenSSL ipsec implementation. Change-Id: I5aa7e0d5c2676bdb41d775ef40364536a081956d Signed-off-by: Sergio Gonzalez Monroy --- build-data/packages/dpdk.mk | 6 +- build-data/packages/vpp.mk | 4 +- build-data/platforms/vpp.mk | 2 +- dpdk/Makefile | 10 +- src/Makefile.am | 2 +- src/configure.ac | 4 +- src/vat/api_format.c | 31 --- src/vnet.am | 4 +- src/vnet/devices/dpdk/dpdk.h | 1 + src/vnet/devices/dpdk/format.c | 3 + src/vnet/devices/dpdk/init.c | 3 + src/vnet/devices/dpdk/ipsec/cli.c | 8 + src/vnet/devices/dpdk/ipsec/crypto_node.c | 25 +- .../devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md | 37 ++- src/vnet/devices/dpdk/ipsec/esp.h | 95 ++------ src/vnet/devices/dpdk/ipsec/esp_decrypt.c | 11 +- src/vnet/devices/dpdk/ipsec/esp_encrypt.c | 11 +- src/vnet/devices/dpdk/ipsec/ipsec.c | 257 +++++++++++++++------ src/vnet/devices/dpdk/ipsec/ipsec.h | 2 +- src/vnet/ipsec-gre/interface.c | 8 +- src/vnet/ipsec/ipsec.c | 51 ++-- src/vnet/ipsec/ipsec.h | 63 +++-- src/vnet/ipsec/ipsec_api.c | 43 +--- src/vnet/ipsec/ipsec_cli.c | 31 +-- src/vnet/ipsec/ipsec_if.c | 73 ++++-- src/vnet/ipsec/ipsec_if_in.c | 24 +- src/vnet/ipsec/ipsec_if_out.c | 22 +- src/vnet/ipsec/ipsec_input.c | 24 +- src/vnet/ipsec/ipsec_output.c | 20 -- 29 files changed, 452 insertions(+), 423 deletions(-) (limited to 'src/vnet/devices') diff --git a/build-data/packages/dpdk.mk b/build-data/packages/dpdk.mk index 6c136824..6938392c 100644 --- a/build-data/packages/dpdk.mk +++ b/build-data/packages/dpdk.mk @@ -22,9 +22,9 @@ DPDK_MAKE_ARGS = -C $(call find_source_fn,$(PACKAGE_SOURCE)) \ DPDK_TUNE=$(DPDK_TUNE) \ DPDK_DEBUG=$(DPDK_DEBUG) -DPDK_CRYPTO_PMD=$(strip $($(PLATFORM)_uses_dpdk_cryptodev)) -ifneq ($(DPDK_CRYPTO_PMD),) -DPDK_MAKE_ARGS += DPDK_CRYPTO_PMD=y +DPDK_CRYPTO_SW_PMD=$(strip $($(PLATFORM)_uses_dpdk_cryptodev_sw)) +ifneq ($(DPDK_CRYPTO_SW_PMD),) +DPDK_MAKE_ARGS += DPDK_CRYPTO_SW_PMD=y endif DPDK_MLX5_PMD=$(strip $($(PLATFORM)_uses_dpdk_mlx5_pmd)) diff --git a/build-data/packages/vpp.mk b/build-data/packages/vpp.mk index 81aeab69..64eb0d89 100644 --- a/build-data/packages/vpp.mk +++ b/build-data/packages/vpp.mk @@ -23,8 +23,8 @@ vpp_CPPFLAGS += $(call installed_includes_fn, dpdk)/dpdk vpp_LDFLAGS += $(call installed_libs_fn, dpdk) vpp_CPPFLAGS += -I/usr/include/dpdk endif -ifeq ($($(PLATFORM)_uses_dpdk_cryptodev),yes) -vpp_configure_args += --with-dpdk-crypto +ifeq ($($(PLATFORM)_uses_dpdk_cryptodev_sw),yes) +vpp_configure_args += --with-dpdk-crypto-sw endif ifeq ($($(PLATFORM)_uses_dpdk_mlx5_pmd),yes) vpp_configure_args += --with-dpdk-mlx5-pmd diff --git a/build-data/platforms/vpp.mk b/build-data/platforms/vpp.mk index dd6f9dc2..5b200587 100644 --- a/build-data/platforms/vpp.mk +++ b/build-data/platforms/vpp.mk @@ -44,7 +44,7 @@ vpp_configure_args_vpp = --with-dpdk vlib_configure_args_vpp = --with-pre-data=128 # DPDK configuration parameters -# vpp_uses_dpdk_cryptodev = yes +# vpp_uses_dpdk_cryptodev_sw = yes # vpp_uses_dpdk_mlx5_pmd = yes # vpp_uses_external_dpdk = yes # vpp_dpdk_inc_dir = /usr/include/dpdk diff --git a/dpdk/Makefile b/dpdk/Makefile index 586d2425..22e97878 100644 --- a/dpdk/Makefile +++ b/dpdk/Makefile @@ -21,13 +21,13 @@ DPDK_DOWNLOAD_DIR ?= $(HOME)/Downloads DPDK_MARCH ?= native DPDK_TUNE ?= generic DPDK_DEBUG ?= n -DPDK_CRYPTO_PMD ?= n +DPDK_CRYPTO_SW_PMD ?= n DPDK_MLX5_PMD ?= n B := $(DPDK_BUILD_DIR) I := $(DPDK_INSTALL_DIR) DPDK_VERSION ?= 16.11 -PKG_SUFFIX ?= vpp1 +PKG_SUFFIX ?= vpp2 DPDK_BASE_URL ?= http://fast.dpdk.org/rel DPDK_TARBALL := dpdk-$(DPDK_VERSION).tar.xz DPDK_TAR_URL := $(DPDK_BASE_URL)/$(DPDK_TARBALL) @@ -121,9 +121,9 @@ $(B)/custom-config: $(B)/.patch.ok Makefile $(call set,RTE_LIBRTE_VMXNET3_DEBUG_INIT,$(DPDK_DEBUG)) $(call set,RTE_LIBRTE_PMD_BOND,y) $(call set,RTE_LIBRTE_IP_FRAG,y) - $(call set,RTE_LIBRTE_PMD_AESNI_MB,$(DPDK_CRYPTO_PMD)) - $(call set,RTE_LIBRTE_PMD_AESNI_GCM,$(DPDK_CRYPTO_PMD)) - $(call set,RTE_LIBRTE_PMD_QAT,$(DPDK_CRYPTO_PMD)) + $(call set,RTE_LIBRTE_PMD_QAT,y) + $(call set,RTE_LIBRTE_PMD_AESNI_MB,$(DPDK_CRYPTO_SW_PMD)) + $(call set,RTE_LIBRTE_PMD_AESNI_GCM,$(DPDK_CRYPTO_SW_PMD)) $(call set,RTE_LIBRTE_MLX5_PMD,$(DPDK_MLX5_PMD)) @# not needed $(call set,RTE_LIBRTE_TIMER,n) diff --git a/src/Makefile.am b/src/Makefile.am index 5e248972..239afeac 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -47,7 +47,7 @@ DPDK_LD_FLAGS = -Wl,--whole-archive,-ldpdk,--no-whole-archive else DPDK_LD_FLAGS = -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive,-lm,-ldl endif -if WITH_DPDK_CRYPTO +if WITH_DPDK_CRYPTO_SW DPDK_LD_ADD = -L$(AESNI_MULTI_BUFFER_LIB_PATH) -lIPSec_MB endif if WITH_DPDK_MLX5_PMD diff --git a/src/configure.ac b/src/configure.ac index fbedabf0..49da6248 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -98,7 +98,7 @@ DISABLE_ARG(japi, [Disable Java API bindings]) # --with-X WITH_ARG(dpdk, [Use use DPDK]) -WITH_ARG(dpdk_crypto, [Use DPDK cryptodev]) +WITH_ARG(dpdk_crypto_sw,[Use DPDK cryptodev SW PMDs]) WITH_ARG(dpdk_mlx5_pmd, [Use DPDK with mlx5 PMD]) # --without-X @@ -132,7 +132,7 @@ AC_SUBST(APICLI, [-DVPP_API_TEST_BUILTIN=${n_with_apicli}]) AC_DEFINE_UNQUOTED(DPDK, [${n_with_dpdk}]) AC_DEFINE_UNQUOTED(DPDK_SHARED_LIB, [${n_enable_dpdk_shared}]) -AC_DEFINE_UNQUOTED(DPDK_CRYPTO, [${n_with_dpdk_crypto}]) +AC_DEFINE_UNQUOTED(DPDK_CRYPTO_SW, [${n_with_dpdk_crypto_sw}]) AC_DEFINE_UNQUOTED(WITH_LIBSSL, [${n_with_libssl}]) diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 4cfe4a58..6b8c5fb9 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -12199,11 +12199,7 @@ api_ipsec_sad_add_del_entry (vat_main_t * vam) if (unformat (i, "integ_alg %U", unformat_ipsec_integ_alg, &integ_alg)) { -#if DPDK_CRYPTO==1 - if (integ_alg < IPSEC_INTEG_ALG_NONE || -#else if (integ_alg < IPSEC_INTEG_ALG_SHA1_96 || -#endif integ_alg >= IPSEC_INTEG_N_ALG) { clib_warning ("unsupported integ-alg: '%U'", @@ -12221,33 +12217,6 @@ api_ipsec_sad_add_del_entry (vat_main_t * vam) } -#if DPDK_CRYPTO==1 - /*Special cases, aes-gcm-128 encryption */ - if (crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - if (integ_alg != IPSEC_INTEG_ALG_NONE - && integ_alg != IPSEC_INTEG_ALG_AES_GCM_128) - { - clib_warning - ("unsupported: aes-gcm-128 crypto-alg needs none as integ-alg"); - return -99; - } - else /*set integ-alg internally to aes-gcm-128 */ - integ_alg = IPSEC_INTEG_ALG_AES_GCM_128; - } - else if (integ_alg == IPSEC_INTEG_ALG_AES_GCM_128) - { - clib_warning ("unsupported integ-alg: aes-gcm-128"); - return -99; - } - else if (integ_alg == IPSEC_INTEG_ALG_NONE) - { - clib_warning ("unsupported integ-alg: none"); - return -99; - } -#endif - - M (IPSEC_SAD_ADD_DEL_ENTRY, ipsec_sad_add_del_entry); mp->sad_id = ntohl (sad_id); diff --git a/src/vnet.am b/src/vnet.am index 28a1b19a..96cfa557 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -400,7 +400,7 @@ libvnet_la_SOURCES += \ API_FILES += vnet/ipsec/ipsec.api -if WITH_DPDK_CRYPTO +if WITH_DPDK libvnet_la_SOURCES += \ vnet/devices/dpdk/ipsec/esp_encrypt.c \ vnet/devices/dpdk/ipsec/esp_decrypt.c \ @@ -419,7 +419,7 @@ nobase_include_HEADERS += \ vnet/ipsec/ikev2.h \ vnet/ipsec/ikev2_priv.h \ vnet/ipsec/ipsec.api.h -if WITH_DPDK_CRYPTO +if WITH_DPDK nobase_include_HEADERS += \ vnet/devices/dpdk/ipsec/ipsec.h \ vnet/devices/dpdk/ipsec/esp.h diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h index a91e87df..1b54460e 100644 --- a/src/vnet/devices/dpdk/dpdk.h +++ b/src/vnet/devices/dpdk/dpdk.h @@ -338,6 +338,7 @@ typedef struct u8 *uio_driver_name; u8 no_multi_seg; u8 enable_tcp_udp_checksum; + u8 cryptodev; /* Required config parameters */ u8 coremask_set_manually; diff --git a/src/vnet/devices/dpdk/format.c b/src/vnet/devices/dpdk/format.c index ff7c7a5a..cc0d71af 100644 --- a/src/vnet/devices/dpdk/format.c +++ b/src/vnet/devices/dpdk/format.c @@ -684,6 +684,8 @@ format_dpdk_rte_mbuf (u8 * s, va_list * va) return s; } +/* FIXME is this function used? */ +#if 0 uword unformat_socket_mem (unformat_input_t * input, va_list * va) { @@ -710,6 +712,7 @@ unformat_socket_mem (unformat_input_t * input, va_list * va) done: return 1; } +#endif clib_error_t * unformat_rss_fn (unformat_input_t * input, uword * rss_fn) diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c index 3fa656ea..01ef48cb 100755 --- a/src/vnet/devices/dpdk/init.c +++ b/src/vnet/devices/dpdk/init.c @@ -1054,6 +1054,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "no-multi-seg")) conf->no_multi_seg = 1; + else if (unformat (input, "enable-cryptodev")) + conf->cryptodev = 1; + else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input, &sub_input)) { diff --git a/src/vnet/devices/dpdk/ipsec/cli.c b/src/vnet/devices/dpdk/ipsec/cli.c index 3b634e03..93df4a64 100644 --- a/src/vnet/devices/dpdk/ipsec/cli.c +++ b/src/vnet/devices/dpdk/ipsec/cli.c @@ -14,15 +14,23 @@ */ #include +#include #include static void dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display) { + dpdk_config_main_t *conf = &dpdk_config_main; dpdk_crypto_main_t *dcm = &dpdk_crypto_main; vlib_thread_main_t *tm = vlib_get_thread_main (); u32 i, skip_master; + if (!conf->cryptodev) + { + vlib_cli_output (vm, "DPDK Cryptodev support is disabled\n"); + return; + } + if (detail_display) vlib_cli_output (vm, "worker\t%10s\t%15s\tdir\tdev\tqp\n", "cipher", "auth"); diff --git a/src/vnet/devices/dpdk/ipsec/crypto_node.c b/src/vnet/devices/dpdk/ipsec/crypto_node.c index 7b32704e..e8fef235 100644 --- a/src/vnet/devices/dpdk/ipsec/crypto_node.c +++ b/src/vnet/devices/dpdk/ipsec/crypto_node.c @@ -22,6 +22,8 @@ #include #include +#include +#include #include #define foreach_dpdk_crypto_input_next \ @@ -183,24 +185,27 @@ dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, return n_deq; } +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (dpdk_crypto_input_node) = { - .function = dpdk_crypto_input_fn,.name = "dpdk-crypto-input",.format_trace = - format_dpdk_crypto_input_trace,.type = VLIB_NODE_TYPE_INPUT,.state = - VLIB_NODE_STATE_DISABLED,.n_errors = - DPDK_CRYPTO_INPUT_N_ERROR,.error_strings = - dpdk_crypto_input_error_strings,.n_next_nodes = - DPDK_CRYPTO_INPUT_N_NEXT,.next_nodes = + .function = dpdk_crypto_input_fn, + .name = "dpdk-crypto-input", + .format_trace = format_dpdk_crypto_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, + .n_errors = DPDK_CRYPTO_INPUT_N_ERROR, + .error_strings = dpdk_crypto_input_error_strings, + .n_next_nodes = DPDK_CRYPTO_INPUT_N_NEXT, + .next_nodes = { #define _(s,n) [DPDK_CRYPTO_INPUT_NEXT_##s] = n, foreach_dpdk_crypto_input_next #undef _ - } -,}; + }, +}; +/* *INDENT-ON* */ -#if DPDK_CRYPTO==1 VLIB_NODE_FUNCTION_MULTIARCH (dpdk_crypto_input_node, dpdk_crypto_input_fn) -#endif /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md b/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md index 8089696f..fed2fe0e 100644 --- a/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md +++ b/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md @@ -7,43 +7,55 @@ This document is meant to contain all related information about implementation a DPDK Cryptodev is an asynchronous crypto API that supports both Hardware and Software implementations (for more details refer to [DPDK Cryptography Device Library documentation](http://dpdk.org/doc/guides/prog_guide/cryptodev_lib.html)). -When DPDK Cryptodev support is enabled, the node graph is modified by adding and replacing some of the nodes. - -The following nodes are replaced: -* esp-encrypt -> dpdk-esp-encrypt -* esp-decrypt -> dpdk-esp-decrypt +When DPDK support is enabled and there are enough Cryptodev resources for all workers, the node graph is reconfigured by adding and changing default next nodes. The following nodes are added: * dpdk-crypto-input : polling input node, basically dequeuing from crypto devices. +* dpdk-esp-encrypt : internal node. +* dpdk-esp-decrypt : internal node. * dpdk-esp-encrypt-post : internal node. * dpdk-esp-decrypt-post : internal node. +Set new default next nodes: +* for esp encryption: esp-encrypt -> dpdk-esp-encrypt +* for esp decryption: esp-decrypt -> dpdk-esp-decrypt + ### How to enable VPP IPSec with DPDK Cryptodev support -To enable DPDK Cryptodev support (disabled by default), we need the following env option: +DPDK Cryptodev is supported in DPDK enabled VPP. +By default, only HW Cryptodev is supported but needs to be explicetly enabled with the following config option: + +``` +dpdk { + enable-cryptodev +} +``` + +To enable SW Cryptodev support (AESNI-MB-PMD and GCM-PMD), we need the following env option: - vpp_uses_dpdk_cryptodev=yes + vpp_uses_dpdk_cryptodev_sw=yes A couple of ways to achive this: * uncomment/add it in the platforms config (ie. build-data/platforms/vpp.mk) -* set the option when building vpp (ie. make vpp_uses_dpdk_cryptodev=yes build-release) +* set the option when building vpp (ie. make vpp_uses_dpdk_cryptodev_sw=yes build-release) + +When enabling SW Cryptodev support, it means that you need to pre-build the required crypto libraries needed by those SW Cryptodev PMDs. ### Crypto Resources allocation VPP allocates crypto resources based on a best effort approach: * first allocate Hardware crypto resources, then Software. -* if there are not enough crypto resources for all workers, all packets will be dropped if they reach ESP encrypt/decrypt nodes, displaying the warning: +* if there are not enough crypto resources for all workers, the graph node is not modifed, therefore the default VPP IPsec implementation based in OpenSSL is used. The following message is displayed: 0: dpdk_ipsec_init: not enough cryptodevs for ipsec ### Configuration example -No especial IPsec configuration is required. - -Once DPDK Cryptodev is enabled, the user just needs to provide cryptodevs in the startup.conf. +To enable DPDK Cryptodev the user just need to provide the startup.conf option +as mentioned previously. Example startup.conf: @@ -53,6 +65,7 @@ dpdk { num-mbufs 131072 dev 0000:81:00.0 dev 0000:81:00.1 + enable-cryptodev dev 0000:85:01.0 dev 0000:85:01.1 vdev cryptodev_aesni_mb_pmd,socket_id=1 diff --git a/src/vnet/devices/dpdk/ipsec/esp.h b/src/vnet/devices/dpdk/ipsec/esp.h index 7ef90c49..d414d679 100644 --- a/src/vnet/devices/dpdk/ipsec/esp.h +++ b/src/vnet/devices/dpdk/ipsec/esp.h @@ -97,60 +97,11 @@ dpdk_esp_init () } static_always_inline int -add_del_sa_sess (u32 sa_index, u8 is_add) -{ - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - crypto_worker_main_t *cwm; - u8 skip_master = vlib_num_workers () > 0; - - /* *INDENT-OFF* */ - vec_foreach (cwm, dcm->workers_main) - { - crypto_sa_session_t *sa_sess; - u8 is_outbound; - - if (skip_master) - { - skip_master = 0; - continue; - } - - for (is_outbound = 0; is_outbound < 2; is_outbound++) - { - if (is_add) - { - pool_get (cwm->sa_sess_d[is_outbound], sa_sess); - } - else - { - u8 dev_id; - - sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index); - dev_id = cwm->qp_data[sa_sess->qp_index].dev_id; - - if (!sa_sess->sess) - continue; - - if (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess)) - { - clib_warning("failed to free session"); - return -1; - } - memset(sa_sess, 0, sizeof(sa_sess[0])); - } - } - } - /* *INDENT-OFF* */ - - return 0; -} - -static_always_inline int -translate_crypto_algo(ipsec_crypto_alg_t crypto_algo, - struct rte_crypto_sym_xform *cipher_xform) +translate_crypto_algo (ipsec_crypto_alg_t crypto_algo, + struct rte_crypto_sym_xform *cipher_xform) { switch (crypto_algo) - { + { case IPSEC_CRYPTO_ALG_NONE: cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL; break; @@ -164,7 +115,7 @@ translate_crypto_algo(ipsec_crypto_alg_t crypto_algo, break; default: return -1; - } + } cipher_xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; @@ -172,10 +123,11 @@ translate_crypto_algo(ipsec_crypto_alg_t crypto_algo, } static_always_inline int -translate_integ_algo(ipsec_integ_alg_t integ_alg, - struct rte_crypto_sym_xform *auth_xform, int use_esn) +translate_integ_algo (ipsec_integ_alg_t integ_alg, + struct rte_crypto_sym_xform *auth_xform, int use_esn) { - switch (integ_alg) { + switch (integ_alg) + { case IPSEC_INTEG_ALG_NONE: auth_xform->auth.algo = RTE_CRYPTO_AUTH_NULL; auth_xform->auth.digest_length = 0; @@ -203,11 +155,11 @@ translate_integ_algo(ipsec_integ_alg_t integ_alg, case IPSEC_INTEG_ALG_AES_GCM_128: auth_xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM; auth_xform->auth.digest_length = 16; - auth_xform->auth.add_auth_data_length = use_esn? 12 : 8; + auth_xform->auth.add_auth_data_length = use_esn ? 12 : 8; break; default: return -1; - } + } auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; @@ -215,25 +167,26 @@ translate_integ_algo(ipsec_integ_alg_t integ_alg, } static_always_inline int -create_sym_sess(ipsec_sa_t *sa, crypto_sa_session_t *sa_sess, u8 is_outbound) +create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, + u8 is_outbound) { - u32 cpu_index = os_get_cpu_number(); - dpdk_crypto_main_t * dcm = &dpdk_crypto_main; + u32 cpu_index = os_get_cpu_number (); + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - struct rte_crypto_sym_xform cipher_xform = {0}; - struct rte_crypto_sym_xform auth_xform = {0}; + struct rte_crypto_sym_xform cipher_xform = { 0 }; + struct rte_crypto_sym_xform auth_xform = { 0 }; struct rte_crypto_sym_xform *xfs; uword key = 0, *data; - crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *)&key; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) { sa->crypto_key_len -= 4; - clib_memcpy(&sa->salt, &sa->crypto_key[sa->crypto_key_len], 4); + clib_memcpy (&sa->salt, &sa->crypto_key[sa->crypto_key_len], 4); } else { - sa->salt = (u32) rand(); + sa->salt = (u32) rand (); } cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; @@ -244,11 +197,11 @@ create_sym_sess(ipsec_sa_t *sa, crypto_sa_session_t *sa_sess, u8 is_outbound) auth_xform.auth.key.data = sa->integ_key; auth_xform.auth.key.length = sa->integ_key_len; - if (translate_crypto_algo(sa->crypto_alg, &cipher_xform) < 0) + if (translate_crypto_algo (sa->crypto_alg, &cipher_xform) < 0) return -1; p_key->cipher_algo = cipher_xform.cipher.algo; - if (translate_integ_algo(sa->integ_alg, &auth_xform, sa->use_esn) < 0) + if (translate_integ_algo (sa->integ_alg, &auth_xform, sa->use_esn) < 0) return -1; p_key->auth_algo = auth_xform.auth.algo; @@ -269,17 +222,17 @@ create_sym_sess(ipsec_sa_t *sa, crypto_sa_session_t *sa_sess, u8 is_outbound) p_key->is_outbound = is_outbound; - data = hash_get(cwm->algo_qp_map, key); + data = hash_get (cwm->algo_qp_map, key); if (!data) return -1; sa_sess->sess = - rte_cryptodev_sym_session_create(cwm->qp_data[*data].dev_id, xfs); + rte_cryptodev_sym_session_create (cwm->qp_data[*data].dev_id, xfs); if (!sa_sess->sess) return -1; - sa_sess->qp_index = (u8)*data; + sa_sess->qp_index = (u8) * data; return 0; } diff --git a/src/vnet/devices/dpdk/ipsec/esp_decrypt.c b/src/vnet/devices/dpdk/ipsec/esp_decrypt.c index 89ab9f9b..53b2d122 100644 --- a/src/vnet/devices/dpdk/ipsec/esp_decrypt.c +++ b/src/vnet/devices/dpdk/ipsec/esp_decrypt.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #define foreach_esp_decrypt_next \ _(DROP, "error-drop") \ @@ -189,7 +191,14 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm, if (PREDICT_FALSE(!sa_sess->sess)) { int ret = create_sym_sess(sa0, sa_sess, 0); - ASSERT(ret == 0); + + if (PREDICT_FALSE (ret)) + { + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } } sess = sa_sess->sess; diff --git a/src/vnet/devices/dpdk/ipsec/esp_encrypt.c b/src/vnet/devices/dpdk/ipsec/esp_encrypt.c index 10bb4616..b6f00004 100644 --- a/src/vnet/devices/dpdk/ipsec/esp_encrypt.c +++ b/src/vnet/devices/dpdk/ipsec/esp_encrypt.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #define foreach_esp_encrypt_next \ _(DROP, "error-drop") \ @@ -179,7 +181,14 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (!sa_sess->sess)) { int ret = create_sym_sess (sa0, sa_sess, 1); - ASSERT (ret == 0); + + if (PREDICT_FALSE (ret)) + { + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } } qp_index = sa_sess->qp_index; diff --git a/src/vnet/devices/dpdk/ipsec/ipsec.c b/src/vnet/devices/dpdk/ipsec/ipsec.c index de253f02..05c17c99 100644 --- a/src/vnet/devices/dpdk/ipsec/ipsec.c +++ b/src/vnet/devices/dpdk/ipsec/ipsec.c @@ -15,24 +15,69 @@ #include #include #include +#include +#include + #include #include #include -#include -#define DPDK_CRYPTO_NB_OBJS 2048 +#define DPDK_CRYPTO_NB_SESS_OBJS 20000 #define DPDK_CRYPTO_CACHE_SIZE 512 #define DPDK_CRYPTO_PRIV_SIZE 128 -#define DPDK_CRYPTO_N_QUEUE_DESC 512 +#define DPDK_CRYPTO_N_QUEUE_DESC 1024 #define DPDK_CRYPTO_NB_COPS (1024 * 4) -/* - * return: - * -1: update failed - * 0: already exist - * 1: mapped - */ static int +add_del_sa_sess (u32 sa_index, u8 is_add) +{ + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm; + u8 skip_master = vlib_num_workers () > 0; + + /* *INDENT-OFF* */ + vec_foreach (cwm, dcm->workers_main) + { + crypto_sa_session_t *sa_sess; + u8 is_outbound; + + if (skip_master) + { + skip_master = 0; + continue; + } + + for (is_outbound = 0; is_outbound < 2; is_outbound++) + { + if (is_add) + { + pool_get (cwm->sa_sess_d[is_outbound], sa_sess); + } + else + { + u8 dev_id; + + sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index); + dev_id = cwm->qp_data[sa_sess->qp_index].dev_id; + + if (!sa_sess->sess) + continue; + + if (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess)) + { + clib_warning("failed to free session"); + return -1; + } + memset(sa_sess, 0, sizeof(sa_sess[0])); + } + } + } + /* *INDENT-OFF* */ + + return 0; +} + +static void update_qp_data (crypto_worker_main_t * cwm, u8 cdev_id, u16 qp_id, u8 is_outbound, u16 * idx) { @@ -45,7 +90,7 @@ update_qp_data (crypto_worker_main_t * cwm, if (qpd->dev_id == cdev_id && qpd->qp_id == qp_id && qpd->is_outbound == is_outbound) - return 0; + return; } /* *INDENT-ON* */ @@ -54,13 +99,10 @@ update_qp_data (crypto_worker_main_t * cwm, qpd->dev_id = cdev_id; qpd->qp_id = qp_id; qpd->is_outbound = is_outbound; - - return 1; } /* * return: - * -1: error * 0: already exist * 1: mapped */ @@ -70,7 +112,6 @@ add_mapping (crypto_worker_main_t * cwm, const struct rte_cryptodev_capabilities *cipher_cap, const struct rte_cryptodev_capabilities *auth_cap) { - int mapped; u16 qp_index; uword key = 0, data, *ret; crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; @@ -83,17 +124,12 @@ add_mapping (crypto_worker_main_t * cwm, if (ret) return 0; - mapped = update_qp_data (cwm, cdev_id, qp, is_outbound, &qp_index); - if (mapped < 0) - return -1; + update_qp_data (cwm, cdev_id, qp, is_outbound, &qp_index); data = (uword) qp_index; + hash_set (cwm->algo_qp_map, key, data); - ret = hash_set (cwm->algo_qp_map, key, data); - if (!ret) - rte_panic ("Failed to insert hash table\n"); - - return mapped; + return 1; } /* @@ -120,19 +156,13 @@ add_cdev_mapping (crypto_worker_main_t * cwm, for (j = dev_info->capabilities; j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; j++) { - int status = 0; - if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH) continue; if (check_algo_is_supported (j, NULL) != 0) continue; - status = add_mapping (cwm, cdev_id, qp, is_outbound, i, j); - if (status == 1) - mapped += 1; - if (status < 0) - return status; + mapped |= add_mapping (cwm, cdev_id, qp, is_outbound, i, j); } } @@ -169,8 +199,33 @@ check_cryptodev_queues () } static clib_error_t * -dpdk_ipsec_init (vlib_main_t * vm) +dpdk_ipsec_check_support (ipsec_sa_t * sa) +{ + if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + if (sa->integ_alg != IPSEC_INTEG_ALG_NONE) + return clib_error_return (0, "unsupported integ-alg %U with " + "crypto-algo aes-gcm-128", + format_ipsec_integ_alg, sa->integ_alg); + sa->integ_alg = IPSEC_INTEG_ALG_AES_GCM_128; + } + else + { + if (sa->integ_alg == IPSEC_INTEG_ALG_NONE || + sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128) + return clib_error_return (0, "unsupported integ-alg %U", + format_ipsec_integ_alg, sa->integ_alg); + } + + return 0; +} + +static uword +dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) { + dpdk_config_main_t *conf = &dpdk_config_main; + ipsec_main_t *im = &ipsec_main; dpdk_crypto_main_t *dcm = &dpdk_crypto_main; vlib_thread_main_t *tm = vlib_get_thread_main (); struct rte_cryptodev_config dev_conf; @@ -180,8 +235,19 @@ dpdk_ipsec_init (vlib_main_t * vm) i32 dev_id, ret; u32 i, skip_master; + if (!conf->cryptodev) + { + clib_warning ("DPDK Cryptodev support is disabled, " + "default to OpenSSL IPsec"); + return 0; + } + if (check_cryptodev_queues () < 0) - return clib_error_return (0, "not enough cryptodevs for ipsec"); + { + conf->cryptodev = 0; + clib_warning ("not enough Cryptodevs, default to OpenSSL IPsec"); + return 0; + } vec_alloc (dcm->workers_main, tm->n_vlib_mains); _vec_len (dcm->workers_main) = tm->n_vlib_mains; @@ -221,24 +287,17 @@ dpdk_ipsec_init (vlib_main_t * vm) { map = hash_create (0, sizeof (crypto_worker_qp_key_t)); if (!map) - return clib_error_return (0, "unable to create hash table " - "for worker %u", - vlib_mains[i]->cpu_index); + { + clib_warning ("unable to create hash table for worker %u", + vlib_mains[i]->cpu_index); + goto error; + } cwm->algo_qp_map = map; } for (is_outbound = 0; is_outbound < 2 && qp < max_nb_qp; is_outbound++) - { - int mapped = add_cdev_mapping (cwm, &cdev_info, - dev_id, qp, is_outbound); - if (mapped > 0) - qp++; - - if (mapped < 0) - return clib_error_return (0, - "too many queues for one worker"); - } + qp += add_cdev_mapping (cwm, &cdev_info, dev_id, qp, is_outbound); } if (qp == 0) @@ -246,12 +305,15 @@ dpdk_ipsec_init (vlib_main_t * vm) dev_conf.socket_id = rte_cryptodev_socket_id (dev_id); dev_conf.nb_queue_pairs = cdev_info.max_nb_queue_pairs; - dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_OBJS; + dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_SESS_OBJS; dev_conf.session_mp.cache_size = DPDK_CRYPTO_CACHE_SIZE; ret = rte_cryptodev_configure (dev_id, &dev_conf); if (ret < 0) - return clib_error_return (0, "cryptodev %u config error", dev_id); + { + clib_warning ("cryptodev %u config error", dev_id); + goto error; + } qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC; for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++) @@ -259,37 +321,64 @@ dpdk_ipsec_init (vlib_main_t * vm) ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf, dev_conf.socket_id); if (ret < 0) - return clib_error_return (0, "cryptodev %u qp %u setup error", - dev_id, qp); + { + clib_warning ("cryptodev %u qp %u setup error", dev_id, qp); + goto error; + } } - fprintf (stdout, "%u\t%u\t%u\t%u\n", dev_id, dev_conf.nb_queue_pairs, - DPDK_CRYPTO_NB_OBJS, DPDK_CRYPTO_CACHE_SIZE); - } + vec_validate_aligned (dcm->cop_pools, dev_conf.socket_id, + CLIB_CACHE_LINE_BYTES); - u32 socket_id = rte_socket_id (); + if (!vec_elt (dcm->cop_pools, dev_conf.socket_id)) + { + u8 *pool_name = format (0, "crypto_op_pool_socket%u%c", + dev_conf.socket_id, 0); + + rmp = rte_crypto_op_pool_create ((char *) pool_name, + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + DPDK_CRYPTO_NB_COPS * + (1 + vlib_num_workers ()), + DPDK_CRYPTO_CACHE_SIZE, + DPDK_CRYPTO_PRIV_SIZE, + dev_conf.socket_id); + vec_free (pool_name); + + if (!rmp) + { + clib_warning ("failed to allocate mempool on socket %u", + dev_conf.socket_id); + goto error; + } + vec_elt (dcm->cop_pools, dev_conf.socket_id) = rmp; + } - vec_validate_aligned (dcm->cop_pools, socket_id, CLIB_CACHE_LINE_BYTES); + fprintf (stdout, "%u\t%u\t%u\t%u\n", dev_id, dev_conf.nb_queue_pairs, + DPDK_CRYPTO_NB_SESS_OBJS, DPDK_CRYPTO_CACHE_SIZE); + } - /* pool already exists, nothing to do */ - if (dcm->cop_pools[socket_id]) - return 0; + dpdk_esp_init (); - u8 *pool_name = format (0, "crypto_op_pool_socket%u%c", socket_id, 0); + /* Add new next node and set as default */ + vlib_node_t *node, *next_node; - rmp = rte_crypto_op_pool_create ((char *) pool_name, - RTE_CRYPTO_OP_TYPE_SYMMETRIC, - DPDK_CRYPTO_NB_COPS * - (1 + vlib_num_workers ()), - DPDK_CRYPTO_CACHE_SIZE, - DPDK_CRYPTO_PRIV_SIZE, socket_id); - vec_free (pool_name); + next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-encrypt"); + ASSERT (next_node); + node = vlib_get_node_by_name (vm, (u8 *) "ipsec-output-ip4"); + ASSERT (node); + im->esp_encrypt_node_index = next_node->index; + im->esp_encrypt_next_index = + vlib_node_add_next (vm, node->index, next_node->index); - if (!rmp) - return clib_error_return (0, "failed to allocate mempool on socket %u", - socket_id); - dcm->cop_pools[socket_id] = rmp; + next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-decrypt"); + ASSERT (next_node); + node = vlib_get_node_by_name (vm, (u8 *) "ipsec-input-ip4"); + ASSERT (node); + im->esp_decrypt_node_index = next_node->index; + im->esp_decrypt_next_index = + vlib_node_add_next (vm, node->index, next_node->index); - dpdk_esp_init (); + im->cb.check_support_cb = dpdk_ipsec_check_support; + im->cb.add_del_sa_sess_cb = add_del_sa_sess; if (vec_len (vlib_mains) == 0) vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index, @@ -299,10 +388,38 @@ dpdk_ipsec_init (vlib_main_t * vm) vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index, VLIB_NODE_STATE_POLLING); + /* TODO cryptodev counters */ + + return 0; + +error: + ; + crypto_worker_main_t *cwm; + struct rte_mempool **mp; + /* *INDENT-OFF* */ + vec_foreach (cwm, dcm->workers_main) + hash_free (cwm->algo_qp_map); + + vec_foreach (mp, dcm->cop_pools) + { + if (mp) + rte_mempool_free (mp[0]); + } + /* *INDENT-ON* */ + vec_free (dcm->workers_main); + vec_free (dcm->cop_pools); + return 0; } -VLIB_MAIN_LOOP_ENTER_FUNCTION (dpdk_ipsec_init); +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_ipsec_process_node,static) = { + .function = dpdk_ipsec_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dpdk-ipsec-process", + .process_log2_n_stack_bytes = 17, +}; +/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/devices/dpdk/ipsec/ipsec.h b/src/vnet/devices/dpdk/ipsec/ipsec.h index e6c7498c..3465b361 100644 --- a/src/vnet/devices/dpdk/ipsec/ipsec.h +++ b/src/vnet/devices/dpdk/ipsec/ipsec.h @@ -167,7 +167,7 @@ check_algo_is_supported (const struct rte_cryptodev_capabilities *cap, .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = RTE_CRYPTO_CIPHER_3DES_CBC,.name = "3DES-CBC"}, { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.auth = + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"}, { .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = diff --git a/src/vnet/ipsec-gre/interface.c b/src/vnet/ipsec-gre/interface.c index 56832ee1..3b6e4ac2 100644 --- a/src/vnet/ipsec-gre/interface.c +++ b/src/vnet/ipsec-gre/interface.c @@ -28,13 +28,7 @@ #include #include -#if DPDK_CRYPTO==1 -#include -#define ESP_NODE "dpdk-esp-encrypt" -#else #include -#define ESP_NODE "esp-encrypt" -#endif u8 * format_ipsec_gre_tunnel (u8 * s, va_list * args) @@ -193,7 +187,7 @@ vnet_ipsec_gre_add_del_tunnel (vnet_ipsec_gre_add_del_tunnel_args_t * a, hash_set (igm->tunnel_by_key, key, t - igm->tunnels); slot = vlib_node_add_named_next_with_slot - (vnm->vlib_main, hi->tx_node_index, ESP_NODE, + (vnm->vlib_main, hi->tx_node_index, "esp-encrypt", IPSEC_GRE_OUTPUT_NEXT_ESP_ENCRYPT); ASSERT (slot == IPSEC_GRE_OUTPUT_NEXT_ESP_ENCRYPT); diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c index ee85c402..cfe434ab 100644 --- a/src/vnet/ipsec/ipsec.c +++ b/src/vnet/ipsec/ipsec.c @@ -22,23 +22,7 @@ #include #include - -#if DPDK_CRYPTO==1 -#include -#define ESP_NODE "dpdk-esp-encrypt" -#else #include -#define ESP_NODE "esp-encrypt" -#endif - -#if DPDK_CRYPTO==0 -/* dummy function */ -static int -add_del_sa_sess (u32 sa_index, u8 is_add) -{ - return 0; -} -#endif u32 ipsec_get_sa_index_by_sa_id (u32 sa_id) @@ -449,7 +433,9 @@ ipsec_add_del_sa (vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add) return VNET_API_ERROR_SYSCALL_ERROR_1; /* sa used in policy */ } hash_unset (im->sa_index_by_sa_id, sa->id); - add_del_sa_sess (sa_index, is_add); + if (im->cb.add_del_sa_sess_cb && + im->cb.add_del_sa_sess_cb (sa_index, is_add) < 0) + return VNET_API_ERROR_SYSCALL_ERROR_1; pool_put (im->sad, sa); } else /* create new SA */ @@ -458,7 +444,8 @@ ipsec_add_del_sa (vlib_main_t * vm, ipsec_sa_t * new_sa, int is_add) clib_memcpy (sa, new_sa, sizeof (*sa)); sa_index = sa - im->sad; hash_set (im->sa_index_by_sa_id, sa->id, sa_index); - if (add_del_sa_sess (sa_index, is_add) < 0) + if (im->cb.add_del_sa_sess_cb && + im->cb.add_del_sa_sess_cb (sa_index, is_add) < 0) return VNET_API_ERROR_SYSCALL_ERROR_1; } return 0; @@ -497,7 +484,8 @@ ipsec_set_sa_key (vlib_main_t * vm, ipsec_sa_t * sa_update) if (sa->crypto_key_len + sa->integ_key_len > 0) { - if (add_del_sa_sess (sa_index, 0) < 0) + if (im->cb.add_del_sa_sess_cb && + im->cb.add_del_sa_sess_cb (sa_index, 0) < 0) return VNET_API_ERROR_SYSCALL_ERROR_1; } @@ -521,6 +509,19 @@ ipsec_rand_seed (void) RAND_seed ((const void *) &seed_data, sizeof (seed_data)); } +static clib_error_t * +ipsec_check_support (ipsec_sa_t * sa) +{ + if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + return clib_error_return (0, "unsupported aes-gcm-128 crypto-alg"); + if (sa->integ_alg == IPSEC_INTEG_ALG_NONE) + return clib_error_return (0, "unsupported none integ-alg"); + if (sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128) + return clib_error_return (0, "unsupported aes-gcm-128 integ-alg"); + + return 0; +} + static clib_error_t * ipsec_init (vlib_main_t * vm) { @@ -547,14 +548,18 @@ ipsec_init (vlib_main_t * vm) ASSERT (node); im->error_drop_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) ESP_NODE); - + node = vlib_get_node_by_name (vm, (u8 *) "esp-encrypt"); ASSERT (node); im->esp_encrypt_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "ip4-lookup"); + node = vlib_get_node_by_name (vm, (u8 *) "esp-decrypt"); ASSERT (node); - im->ip4_lookup_node_index = node->index; + im->esp_decrypt_node_index = node->index; + + im->esp_encrypt_next_index = IPSEC_OUTPUT_NEXT_ESP_ENCRYPT; + im->esp_decrypt_next_index = IPSEC_INPUT_NEXT_ESP_DECRYPT; + + im->cb.check_support_cb = ipsec_check_support; if ((error = vlib_call_init_function (vm, ipsec_cli_init))) return error; diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h index 32c7edfc..6726dba0 100644 --- a/src/vnet/ipsec/ipsec.h +++ b/src/vnet/ipsec/ipsec.h @@ -17,6 +17,33 @@ #define IPSEC_FLAG_IPSEC_GRE_TUNNEL (1 << 0) + +#define foreach_ipsec_output_next \ +_(DROP, "error-drop") \ +_(ESP_ENCRYPT, "esp-encrypt") + +#define _(v, s) IPSEC_OUTPUT_NEXT_##v, +typedef enum +{ + foreach_ipsec_output_next +#undef _ + IPSEC_OUTPUT_N_NEXT, +} ipsec_output_next_t; + + +#define foreach_ipsec_input_next \ +_(DROP, "error-drop") \ +_(ESP_DECRYPT, "esp-decrypt") + +#define _(v, s) IPSEC_INPUT_NEXT_##v, +typedef enum +{ + foreach_ipsec_input_next +#undef _ + IPSEC_INPUT_N_NEXT, +} ipsec_input_next_t; + + #define foreach_ipsec_policy_action \ _(0, BYPASS, "bypass") \ _(1, DISCARD, "discard") \ @@ -31,20 +58,12 @@ typedef enum IPSEC_POLICY_N_ACTION, } ipsec_policy_action_t; -#if DPDK_CRYPTO==1 #define foreach_ipsec_crypto_alg \ _(0, NONE, "none") \ _(1, AES_CBC_128, "aes-cbc-128") \ _(2, AES_CBC_192, "aes-cbc-192") \ _(3, AES_CBC_256, "aes-cbc-256") \ _(4, AES_GCM_128, "aes-gcm-128") -#else -#define foreach_ipsec_crypto_alg \ - _(0, NONE, "none") \ - _(1, AES_CBC_128, "aes-cbc-128") \ - _(2, AES_CBC_192, "aes-cbc-192") \ - _(3, AES_CBC_256, "aes-cbc-256") -#endif typedef enum { @@ -54,7 +73,6 @@ typedef enum IPSEC_CRYPTO_N_ALG, } ipsec_crypto_alg_t; -#if DPDK_CRYPTO==1 #define foreach_ipsec_integ_alg \ _(0, NONE, "none") \ _(1, MD5_96, "md5-96") /* RFC2403 */ \ @@ -63,17 +81,7 @@ typedef enum _(4, SHA_256_128, "sha-256-128") /* RFC4868 */ \ _(5, SHA_384_192, "sha-384-192") /* RFC4868 */ \ _(6, SHA_512_256, "sha-512-256") /* RFC4868 */ \ - _(7, AES_GCM_128, "aes-gcm-128") -#else -#define foreach_ipsec_integ_alg \ - _(0, NONE, "none") \ - _(1, MD5_96, "md5-96") /* RFC2403 */ \ - _(2, SHA1_96, "sha1-96") /* RFC2404 */ \ - _(3, SHA_256_96, "sha-256-96") /* draft-ietf-ipsec-ciph-sha-256-00 */ \ - _(4, SHA_256_128, "sha-256-128") /* RFC4868 */ \ - _(5, SHA_384_192, "sha-384-192") /* RFC4868 */ \ - _(6, SHA_512_256, "sha-512-256") /* RFC4868 */ -#endif + _(7, AES_GCM_128, "aes-gcm-128") /* RFC4106 */ typedef enum { @@ -223,6 +231,12 @@ typedef struct u32 hw_if_index; } ipsec_tunnel_if_t; +typedef struct +{ + i32 (*add_del_sa_sess_cb) (u32 sa_index, u8 is_add); + clib_error_t *(*check_support_cb) (ipsec_sa_t * sa); +} ipsec_main_callbacks_t; + typedef struct { /* pool of tunnel instances */ @@ -250,11 +264,16 @@ typedef struct uword *sa_index_by_sa_id; uword *ipsec_if_pool_index_by_key; - /* node indexes */ + /* node indeces */ u32 error_drop_node_index; - u32 ip4_lookup_node_index; u32 esp_encrypt_node_index; + u32 esp_decrypt_node_index; + /* next node indeces */ + u32 esp_encrypt_next_index; + u32 esp_decrypt_next_index; + /* callbacks */ + ipsec_main_callbacks_t cb; } ipsec_main_t; ipsec_main_t ipsec_main; diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c index 9bcf63b4..30732266 100644 --- a/src/vnet/ipsec/ipsec_api.c +++ b/src/vnet/ipsec/ipsec_api.c @@ -177,6 +177,7 @@ static void vl_api_ipsec_sad_add_del_entry_t_handler vl_api_ipsec_sad_add_del_entry_reply_t *rmp; int rv; #if WITH_LIBSSL > 0 + ipsec_main_t *im = &ipsec_main; ipsec_sa_t sa; memset (&sa, 0, sizeof (sa)); @@ -204,11 +205,7 @@ static void vl_api_ipsec_sad_add_del_entry_t_handler sa.crypto_key_len = mp->crypto_key_length; clib_memcpy (&sa.crypto_key, mp->crypto_key, sizeof (sa.crypto_key)); /* check for unsupported integ-alg */ -#if DPDK_CRYPTO==1 if (mp->integrity_algorithm < IPSEC_INTEG_ALG_NONE || -#else - if (mp->integrity_algorithm < IPSEC_INTEG_ALG_SHA1_96 || -#endif mp->integrity_algorithm >= IPSEC_INTEG_N_ALG) { clib_warning ("unsupported integ-alg: '%U'", format_ipsec_integ_alg, @@ -217,35 +214,6 @@ static void vl_api_ipsec_sad_add_del_entry_t_handler goto out; } -#if DPDK_CRYPTO==1 - /*Special cases, aes-gcm-128 encryption */ - if (mp->crypto_algorithm == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - if (mp->integrity_algorithm != IPSEC_INTEG_ALG_NONE - && mp->integrity_algorithm != IPSEC_INTEG_ALG_AES_GCM_128) - { - clib_warning - ("unsupported: aes-gcm-128 crypto-alg needs none as integ-alg"); - rv = VNET_API_ERROR_UNIMPLEMENTED; - goto out; - } - else /*set integ-alg internally to aes-gcm-128 */ - mp->integrity_algorithm = IPSEC_INTEG_ALG_AES_GCM_128; - } - else if (mp->integrity_algorithm == IPSEC_INTEG_ALG_AES_GCM_128) - { - clib_warning ("unsupported integ-alg: aes-gcm-128"); - rv = VNET_API_ERROR_UNIMPLEMENTED; - goto out; - } - else if (mp->integrity_algorithm == IPSEC_INTEG_ALG_NONE) - { - clib_warning ("unsupported integ-alg: none"); - rv = VNET_API_ERROR_UNIMPLEMENTED; - goto out; - } -#endif - sa.integ_alg = mp->integrity_algorithm; sa.integ_key_len = mp->integrity_key_length; clib_memcpy (&sa.integ_key, mp->integrity_key, sizeof (sa.integ_key)); @@ -263,6 +231,15 @@ static void vl_api_ipsec_sad_add_del_entry_t_handler clib_memcpy (&sa.tunnel_dst_addr.ip4.data, mp->tunnel_dst_address, 4); } + ASSERT (im->cb.check_support_cb); + clib_error_t *err = im->cb.check_support_cb (&sa); + if (err) + { + clib_warning ("%s", err->what); + rv = VNET_API_ERROR_UNIMPLEMENTED; + goto out; + } + rv = ipsec_add_del_sa (vm, &sa, mp->is_add); #else rv = VNET_API_ERROR_UNIMPLEMENTED; diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c index 7ab85d4a..3c1e26f2 100644 --- a/src/vnet/ipsec/ipsec_cli.c +++ b/src/vnet/ipsec/ipsec_cli.c @@ -67,10 +67,12 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { + ipsec_main_t *im = &ipsec_main; unformat_input_t _line_input, *line_input = &_line_input; ipsec_sa_t sa; int is_add = ~0; u8 *ck = 0, *ik = 0; + clib_error_t *err = 0; memset (&sa, 0, sizeof (sa)); @@ -109,11 +111,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, else if (unformat (line_input, "integ-alg %U", unformat_ipsec_integ_alg, &sa.integ_alg)) { -#if DPDK_CRYPTO==1 - if (sa.integ_alg < IPSEC_INTEG_ALG_NONE || -#else if (sa.integ_alg < IPSEC_INTEG_ALG_SHA1_96 || -#endif sa.integ_alg >= IPSEC_INTEG_N_ALG) return clib_error_return (0, "unsupported integ-alg: '%U'", format_ipsec_integ_alg, sa.integ_alg); @@ -141,23 +139,6 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, format_unformat_error, line_input); } -#if DPDK_CRYPTO==1 - /*Special cases, aes-gcm-128 encryption */ - if (sa.crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - if (sa.integ_alg != IPSEC_INTEG_ALG_NONE - && sa.integ_alg != IPSEC_INTEG_ALG_AES_GCM_128) - return clib_error_return (0, - "unsupported: aes-gcm-128 crypto-alg needs none as integ-alg"); - else /*set integ-alg internally to aes-gcm-128 */ - sa.integ_alg = IPSEC_INTEG_ALG_AES_GCM_128; - } - else if (sa.integ_alg == IPSEC_INTEG_ALG_AES_GCM_128) - return clib_error_return (0, "unsupported integ-alg: aes-gcm-128"); - else if (sa.integ_alg == IPSEC_INTEG_ALG_NONE) - return clib_error_return (0, "unsupported integ-alg: none"); -#endif - unformat_free (line_input); if (sa.crypto_key_len > sizeof (sa.crypto_key)) @@ -172,6 +153,14 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, if (ik) strncpy ((char *) sa.integ_key, (char *) ik, sa.integ_key_len); + if (is_add) + { + ASSERT (im->cb.check_support_cb); + err = im->cb.check_support_cb (&sa); + if (err) + return err; + } + ipsec_add_del_sa (vm, &sa, is_add); return 0; diff --git a/src/vnet/ipsec/ipsec_if.c b/src/vnet/ipsec/ipsec_if.c index a8da046f..ca6b0092 100644 --- a/src/vnet/ipsec/ipsec_if.c +++ b/src/vnet/ipsec/ipsec_if.c @@ -20,20 +20,7 @@ #include #include -#if DPDK_CRYPTO==1 -#include -#else #include -#endif - -#if DPDK_CRYPTO==0 -/* dummy function */ -static int -add_del_sa_sess (u32 sa_index, u8 is_add) -{ - return 0; -} -#endif void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); @@ -52,6 +39,39 @@ dummy_interface_tx (vlib_main_t * vm, return frame->n_vectors; } +static clib_error_t * +ipsec_admin_up_down_function (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + ipsec_main_t *im = &ipsec_main; + clib_error_t *err = 0; + ipsec_tunnel_if_t *t; + vnet_hw_interface_t *hi; + ipsec_sa_t *sa; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + { + t = pool_elt_at_index (im->tunnel_interfaces, hi->hw_instance); + ASSERT (im->cb.check_support_cb); + sa = pool_elt_at_index (im->sad, t->input_sa_index); + err = im->cb.check_support_cb (sa); + if (err) + return err; + + sa = pool_elt_at_index (im->sad, t->output_sa_index); + err = im->cb.check_support_cb (sa); + if (err) + return err; + + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + } + else + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, 0 /* down */ ); + + return /* no error */ 0; +} + /* *INDENT-OFF* */ VNET_DEVICE_CLASS (ipsec_device_class, static) = { @@ -59,6 +79,7 @@ VNET_DEVICE_CLASS (ipsec_device_class, static) = .format_device_name = format_ipsec_name, .format_tx_trace = format_ipsec_if_output_trace, .tx_function = dummy_interface_tx, + .admin_up_down_function = ipsec_admin_up_down_function, }; /* *INDENT-ON* */ @@ -138,7 +159,9 @@ ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm, args->remote_crypto_key_len); } - add_del_sa_sess (t->input_sa_index, args->is_add); + if (im->cb.add_del_sa_sess_cb && + im->cb.add_del_sa_sess_cb (t->input_sa_index, args->is_add) < 0) + return VNET_API_ERROR_SYSCALL_ERROR_1; pool_get (im->sad, sa); memset (sa, 0, sizeof (*sa)); @@ -165,7 +188,9 @@ ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm, args->local_crypto_key_len); } - add_del_sa_sess (t->output_sa_index, args->is_add); + if (im->cb.add_del_sa_sess_cb && + im->cb.add_del_sa_sess_cb (t->output_sa_index, args->is_add) < 0) + return VNET_API_ERROR_SYSCALL_ERROR_1; hash_set (im->ipsec_if_pool_index_by_key, key, t - im->tunnel_interfaces); @@ -211,14 +236,16 @@ ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm, /* delete input and output SA */ sa = pool_elt_at_index (im->sad, t->input_sa_index); - if (add_del_sa_sess (t->input_sa_index, args->is_add) < 0) + if (im->cb.add_del_sa_sess_cb && + im->cb.add_del_sa_sess_cb (t->input_sa_index, args->is_add) < 0) return VNET_API_ERROR_SYSCALL_ERROR_1; pool_put (im->sad, sa); sa = pool_elt_at_index (im->sad, t->output_sa_index); - if (add_del_sa_sess (t->output_sa_index, args->is_add) < 0) + if (im->cb.add_del_sa_sess_cb && + im->cb.add_del_sa_sess_cb (t->output_sa_index, args->is_add) < 0) return VNET_API_ERROR_SYSCALL_ERROR_1; pool_put (im->sad, sa); @@ -310,7 +337,8 @@ ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index, sa->crypto_key_len = vec_len (key); clib_memcpy (sa->crypto_key, key, vec_len (key)); - if (add_del_sa_sess (t->input_sa_index, 0) < 0) + if (im->cb.add_del_sa_sess_cb && + im->cb.add_del_sa_sess_cb (t->output_sa_index, 0) < 0) return VNET_API_ERROR_SYSCALL_ERROR_1; } else if (type == IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG) @@ -320,7 +348,8 @@ ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index, sa->integ_key_len = vec_len (key); clib_memcpy (sa->integ_key, key, vec_len (key)); - if (add_del_sa_sess (t->output_sa_index, 0) < 0) + if (im->cb.add_del_sa_sess_cb && + im->cb.add_del_sa_sess_cb (t->output_sa_index, 0) < 0) return VNET_API_ERROR_SYSCALL_ERROR_1; } else if (type == IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO) @@ -330,7 +359,8 @@ ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index, sa->crypto_key_len = vec_len (key); clib_memcpy (sa->crypto_key, key, vec_len (key)); - if (add_del_sa_sess (t->input_sa_index, 0) < 0) + if (im->cb.add_del_sa_sess_cb && + im->cb.add_del_sa_sess_cb (t->input_sa_index, 0) < 0) return VNET_API_ERROR_SYSCALL_ERROR_1; } else if (type == IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG) @@ -340,7 +370,8 @@ ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index, sa->integ_key_len = vec_len (key); clib_memcpy (sa->integ_key, key, vec_len (key)); - if (add_del_sa_sess (t->output_sa_index, 0) < 0) + if (im->cb.add_del_sa_sess_cb && + im->cb.add_del_sa_sess_cb (t->input_sa_index, 0) < 0) return VNET_API_ERROR_SYSCALL_ERROR_1; } else diff --git a/src/vnet/ipsec/ipsec_if_in.c b/src/vnet/ipsec/ipsec_if_in.c index db75ab92..bd2a9f78 100644 --- a/src/vnet/ipsec/ipsec_if_in.c +++ b/src/vnet/ipsec/ipsec_if_in.c @@ -22,12 +22,6 @@ #include #include -#if DPDK_CRYPTO==1 -#define ESP_NODE "dpdk-esp-decrypt" -#else -#define ESP_NODE "esp-decrypt" -#endif - /* Statistics (not really errors) */ #define foreach_ipsec_if_input_error \ _(RX, "good packets received") @@ -46,12 +40,6 @@ typedef enum IPSEC_IF_INPUT_N_ERROR, } ipsec_if_input_error_t; -typedef enum -{ - IPSEC_IF_INPUT_NEXT_ESP_DECRYPT, - IPSEC_IF_INPUT_NEXT_DROP, - IPSEC_IF_INPUT_N_NEXT, -} ipsec_if_input_next_t; typedef struct { @@ -59,7 +47,6 @@ typedef struct u32 seq; } ipsec_if_input_trace_t; - u8 * format_ipsec_if_input_trace (u8 * s, va_list * args) { @@ -106,7 +93,7 @@ ipsec_if_input_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, ip0 = vlib_buffer_get_current (b0); esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); - next0 = IPSEC_IF_INPUT_NEXT_DROP; + next0 = IPSEC_INPUT_NEXT_DROP; u64 key = (u64) ip0->src_address.as_u32 << 32 | (u64) clib_net_to_host_u32 (esp0->spi); @@ -121,7 +108,7 @@ ipsec_if_input_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b0)->ipsec.flags = t->hw_if_index == ~0 ? IPSEC_FLAG_IPSEC_GRE_TUNNEL : 0; vlib_buffer_advance (b0, ip4_header_bytes (ip0)); - next0 = IPSEC_IF_INPUT_NEXT_ESP_DECRYPT; + next0 = im->esp_decrypt_next_index; } if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -156,12 +143,7 @@ VLIB_REGISTER_NODE (ipsec_if_input_node) = { .n_errors = ARRAY_LEN(ipsec_if_input_error_strings), .error_strings = ipsec_if_input_error_strings, - .n_next_nodes = IPSEC_IF_INPUT_N_NEXT, - - .next_nodes = { - [IPSEC_IF_INPUT_NEXT_ESP_DECRYPT] = ESP_NODE, - [IPSEC_IF_INPUT_NEXT_DROP] = "error-drop", - }, + .sibling_of = "ipsec-input-ip4", }; /* *INDENT-ON* */ diff --git a/src/vnet/ipsec/ipsec_if_out.c b/src/vnet/ipsec/ipsec_if_out.c index 8f062828..62ff67ac 100644 --- a/src/vnet/ipsec/ipsec_if_out.c +++ b/src/vnet/ipsec/ipsec_if_out.c @@ -21,12 +21,6 @@ #include -#if DPDK_CRYPTO==1 -#define ESP_NODE "dpdk-esp-encrypt" -#else -#define ESP_NODE "esp-encrypt" -#endif - /* Statistics (not really errors) */ #define foreach_ipsec_if_output_error \ _(TX, "good packets transmitted") @@ -45,12 +39,6 @@ typedef enum IPSEC_IF_OUTPUT_N_ERROR, } ipsec_if_output_error_t; -typedef enum -{ - IPSEC_IF_OUTPUT_NEXT_ESP_ENCRYPT, - IPSEC_IF_OUTPUT_NEXT_DROP, - IPSEC_IF_OUTPUT_N_NEXT, -} ipsec_if_output_next_t; typedef struct { @@ -58,7 +46,6 @@ typedef struct u32 seq; } ipsec_if_output_trace_t; - u8 * format_ipsec_if_output_trace (u8 * s, va_list * args) { @@ -106,7 +93,7 @@ ipsec_if_output_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); t0 = pool_elt_at_index (im->tunnel_interfaces, hi0->dev_instance); vnet_buffer (b0)->ipsec.sad_index = t0->output_sa_index; - next0 = IPSEC_IF_OUTPUT_NEXT_ESP_ENCRYPT; + next0 = im->esp_encrypt_next_index; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -142,12 +129,7 @@ VLIB_REGISTER_NODE (ipsec_if_output_node) = { .n_errors = ARRAY_LEN(ipsec_if_output_error_strings), .error_strings = ipsec_if_output_error_strings, - .n_next_nodes = IPSEC_IF_OUTPUT_N_NEXT, - - .next_nodes = { - [IPSEC_IF_OUTPUT_NEXT_ESP_ENCRYPT] = ESP_NODE, - [IPSEC_IF_OUTPUT_NEXT_DROP] = "error-drop", - }, + .sibling_of = "ipsec-output-ip4", }; /* *INDENT-ON* */ diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c index 4662c1a1..deaa7b7b 100644 --- a/src/vnet/ipsec/ipsec_input.c +++ b/src/vnet/ipsec/ipsec_input.c @@ -23,30 +23,10 @@ #include #include -#if DPDK_CRYPTO==1 -#define ESP_NODE "dpdk-esp-decrypt" -#else -#define ESP_NODE "esp-decrypt" -#endif - -#define foreach_ipsec_input_next \ -_(DROP, "error-drop") \ -_(ESP_DECRYPT, ESP_NODE) - -#define _(v, s) IPSEC_INPUT_NEXT_##v, -typedef enum -{ - foreach_ipsec_input_next -#undef _ - IPSEC_INPUT_N_NEXT, -} ipsec_input_next_t; - - #define foreach_ipsec_input_error \ _(RX_PKTS, "IPSEC pkts received") \ _(DECRYPTION_FAILED, "IPSEC decryption failed") - typedef enum { #define _(sym,str) IPSEC_INPUT_ERROR_##sym, @@ -262,7 +242,7 @@ ipsec_input_ip4_node_fn (vlib_main_t * vm, p0->counter.bytes += clib_net_to_host_u16 (ip0->length); vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; vnet_buffer (b0)->ipsec.flags = 0; - next0 = IPSEC_INPUT_NEXT_ESP_DECRYPT; + next0 = im->esp_decrypt_next_index; vlib_buffer_advance (b0, ip4_header_bytes (ip0)); goto trace0; } @@ -392,7 +372,7 @@ VLIB_NODE_FUNCTION_MULTIARCH (ipsec_input_ip4_node, ipsec_input_ip4_node_fn) p0->counter.bytes += header_size; vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; vnet_buffer (b0)->ipsec.flags = 0; - next0 = IPSEC_INPUT_NEXT_ESP_DECRYPT; + next0 = im->esp_decrypt_next_index; vlib_buffer_advance (b0, header_size); goto trace0; } diff --git a/src/vnet/ipsec/ipsec_output.c b/src/vnet/ipsec/ipsec_output.c index df93b5e4..1b8070d6 100644 --- a/src/vnet/ipsec/ipsec_output.c +++ b/src/vnet/ipsec/ipsec_output.c @@ -21,27 +21,8 @@ #include -#if DPDK_CRYPTO==1 -#define ESP_NODE "dpdk-esp-encrypt" -#else -#define ESP_NODE "esp-encrypt" -#endif - #if WITH_LIBSSL > 0 -#define foreach_ipsec_output_next \ -_(DROP, "error-drop") \ -_(ESP_ENCRYPT, ESP_NODE) - -#define _(v, s) IPSEC_OUTPUT_NEXT_##v, -typedef enum -{ - foreach_ipsec_output_next -#undef _ - IPSEC_OUTPUT_N_NEXT, -} ipsec_output_next_t; - - #define foreach_ipsec_output_error \ _(RX_PKTS, "IPSec pkts received") \ _(POLICY_DISCARD, "IPSec policy discard") \ @@ -50,7 +31,6 @@ typedef enum _(POLICY_BYPASS, "IPSec policy bypass") \ _(ENCAPS_FAILED, "IPSec encapsulation failed") - typedef enum { #define _(sym,str) IPSEC_OUTPUT_ERROR_##sym, -- cgit 1.2.3-korg From ad623b1f71f0808916e513437246a0fb36174d5b Mon Sep 17 00:00:00 2001 From: Sergio Gonzalez Monroy Date: Wed, 1 Feb 2017 15:23:12 +0000 Subject: dpdk: fix crypto coverity warning Change-Id: I165b64fdc12dd2936df1958348e93b709ce0e784 Signed-off-by: Sergio Gonzalez Monroy --- src/vnet/devices/dpdk/ipsec/esp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/dpdk/ipsec/esp.h b/src/vnet/devices/dpdk/ipsec/esp.h index d414d679..d0b27618 100644 --- a/src/vnet/devices/dpdk/ipsec/esp.h +++ b/src/vnet/devices/dpdk/ipsec/esp.h @@ -186,7 +186,8 @@ create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, } else { - sa->salt = (u32) rand (); + u32 seed = (u32) clib_cpu_time_now (); + sa->salt = random_u32 (&seed); } cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; -- cgit 1.2.3-korg From 62411e7d82dc4562003651a53e723ea938be2ebd Mon Sep 17 00:00:00 2001 From: Steven Date: Fri, 3 Feb 2017 09:30:37 -0800 Subject: vhost-user: fix missing speculative enqueue unwind Running trex in a VM with a bad config, trex sent a bogus pack from the VM to the Virtual interface. It caused a crash. Change-Id: I64d0197b444265553ab4c24f21e6a962e89cb587 Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index ac142867..9b8c1888 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -1677,6 +1677,10 @@ vhost_user_if_input (vlib_main_t * vm, if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len == 0)) { + /* Cancel speculation */ + to_next--; + n_left_to_next++; + /* * Checking if there are some left buffers. * If not, just rewind the used buffers and stop. -- cgit 1.2.3-korg From d0f673ee92121e13a88ad7002e0c860b2cfc5e4b Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 31 Jan 2017 17:29:33 +0100 Subject: dpdk: move to uio_pci_generic Change-Id: I3d8b7947ae6d721e9b514a59a7d2de49aed419b5 Signed-off-by: Damjan Marion --- build-root/deb/debian/vpp.service | 6 +++--- build-root/deb/debian/vpp.upstart | 2 +- build-root/rpm/vpp.spec | 2 +- src/vnet/devices/dpdk/init.c | 2 +- src/vpp/conf/startup.conf | 6 +++--- src/vpp/conf/startup.uiopcigeneric.conf | 18 ------------------ 6 files changed, 9 insertions(+), 27 deletions(-) delete mode 100644 src/vpp/conf/startup.uiopcigeneric.conf (limited to 'src/vnet/devices') diff --git a/build-root/deb/debian/vpp.service b/build-root/deb/debian/vpp.service index 40549856..aa1651c4 100644 --- a/build-root/deb/debian/vpp.service +++ b/build-root/deb/debian/vpp.service @@ -4,10 +4,10 @@ After=network.target [Service] Type=simple -ExecStartPre=-/bin/rm -f /dev/shm/db /dev/shm/global_vm /dev/shm/vpe-api -ExecStartPre=-/sbin/modprobe igb_uio +ExecStartPre=-/bin/rm -f /dev/shm/db /dev/shm/global_vm /dev/shm/vpe-api +ExecStartPre=-/sbin/modprobe uio_pci_generic ExecStart=/usr/bin/vpp -c /etc/vpp/startup.conf -ExecStopPost=/bin/rm -f /dev/shm/db /dev/shm/global_vm /dev/shm/vpe-api +ExecStopPost=/bin/rm -f /dev/shm/db /dev/shm/global_vm /dev/shm/vpe-api Restart=always [Install] diff --git a/build-root/deb/debian/vpp.upstart b/build-root/deb/debian/vpp.upstart index f5908783..62e1d278 100644 --- a/build-root/deb/debian/vpp.upstart +++ b/build-root/deb/debian/vpp.upstart @@ -8,7 +8,7 @@ respawn pre-start script rm -f /dev/shm/db /dev/shm/global_vm /dev/shm/vpe-api || true # should be there via dkms, but if not, start anyway - modprobe igb_uio || true + modprobe uio_pci_generic || true end script diff --git a/build-root/rpm/vpp.spec b/build-root/rpm/vpp.spec index 5575b5b1..95196e9b 100644 --- a/build-root/rpm/vpp.spec +++ b/build-root/rpm/vpp.spec @@ -118,7 +118,7 @@ mkdir -p -m755 %{buildroot}/usr/share/vpp/api mkdir -p -m755 %{buildroot}/etc/vpp mkdir -p -m755 %{buildroot}/etc/sysctl.d install -p -m 644 %{_mu_build_dir}/rpm/vpp.service %{buildroot}%{_unitdir} -install -p -m 644 %{_mu_build_dir}/../src/vpp/conf/startup.uiopcigeneric.conf %{buildroot}/etc/vpp/startup.conf +install -p -m 644 %{_mu_build_dir}/../src/vpp/conf/startup.conf %{buildroot}/etc/vpp/startup.conf install -p -m 644 %{_mu_build_dir}/../src/vpp/conf/80-vpp.conf %{buildroot}/etc/sysctl.d # # libraries diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c index 01ef48cb..7249cc52 100755 --- a/src/vnet/devices/dpdk/init.c +++ b/src/vnet/devices/dpdk/init.c @@ -1165,7 +1165,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) } if (!conf->uio_driver_name) - conf->uio_driver_name = format (0, "igb_uio%c", 0); + conf->uio_driver_name = format (0, "uio_pci_generic%c", 0); /* * Use 1G huge pages if available. diff --git a/src/vpp/conf/startup.conf b/src/vpp/conf/startup.conf index bce00202..a100e3e6 100644 --- a/src/vpp/conf/startup.conf +++ b/src/vpp/conf/startup.conf @@ -80,9 +80,9 @@ dpdk { # num-rx-queues 2 # } - ## Change UIO driver used by VPP, Options are: uio_pci_generic, vfio-pci - ## and igb_uio (default) - # uio-driver uio_pci_generic + ## Change UIO driver used by VPP, Options are: igb_uio, vfio-pci + ## and uio_pci_generic (default) + # uio-driver vfio-pci ## Disable mutli-segment buffers, improves performance but ## disables Jumbo MTU support diff --git a/src/vpp/conf/startup.uiopcigeneric.conf b/src/vpp/conf/startup.uiopcigeneric.conf deleted file mode 100644 index 03a89dff..00000000 --- a/src/vpp/conf/startup.uiopcigeneric.conf +++ /dev/null @@ -1,18 +0,0 @@ - -unix { - nodaemon - log /tmp/vpp.log - full-coredump -} - -dpdk { - uio-driver uio_pci_generic -} - -api-trace { - on -} - -api-segment { - gid vpp -} -- cgit 1.2.3-korg From bd69a5f24c6e83e9101f203dd124864fb2877a17 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Sun, 5 Feb 2017 23:44:42 +0100 Subject: vlib: remove algned/unaligned buffers scheme Change-Id: I4433eaed3f4e201edc329c4842cbbf74beb19a9a Signed-off-by: Damjan Marion --- src/vlib/buffer.c | 220 +++++------------------------------------ src/vlib/buffer.h | 13 +-- src/vlib/buffer_funcs.h | 53 +++------- src/vlib/threads.c | 3 +- src/vnet/devices/dpdk/buffer.c | 131 +++--------------------- src/vnet/replication.c | 23 +---- 6 files changed, 57 insertions(+), 386 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index ea4960e2..95b4344f 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -304,63 +304,6 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm, } } -#define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32)) - -/* Make sure we have at least given number of unaligned buffers. */ -void -vlib_buffer_free_list_fill_unaligned (vlib_main_t * vm, - vlib_buffer_free_list_t * free_list, - uword n_unaligned_buffers) -{ - word la = vec_len (free_list->aligned_buffers); - word lu = vec_len (free_list->unaligned_buffers); - - /* Aligned come in aligned copy-sized chunks. */ - ASSERT (la % BUFFERS_PER_COPY == 0); - - ASSERT (la >= n_unaligned_buffers); - - while (lu < n_unaligned_buffers) - { - /* Copy 4 buffers from end of aligned vector to unaligned vector. */ - vec_add (free_list->unaligned_buffers, - free_list->aligned_buffers + la - BUFFERS_PER_COPY, - BUFFERS_PER_COPY); - la -= BUFFERS_PER_COPY; - lu += BUFFERS_PER_COPY; - } - _vec_len (free_list->aligned_buffers) = la; -} - -/* After free aligned buffers may not contain even sized chunks. */ -void -vlib_buffer_free_list_trim_aligned (vlib_buffer_free_list_t * f) -{ - uword l, n_trim; - - /* Add unaligned to aligned before trim. */ - l = vec_len (f->unaligned_buffers); - if (l > 0) - { - vec_add_aligned (f->aligned_buffers, f->unaligned_buffers, l, - /* align */ sizeof (vlib_copy_unit_t)); - - _vec_len (f->unaligned_buffers) = 0; - } - - /* Remove unaligned buffers from end of aligned vector and save for next trim. */ - l = vec_len (f->aligned_buffers); - n_trim = l % BUFFERS_PER_COPY; - if (n_trim) - { - /* Trim aligned -> unaligned. */ - vec_add (f->unaligned_buffers, f->aligned_buffers + l - n_trim, n_trim); - - /* Remove from aligned. */ - _vec_len (f->aligned_buffers) = l - n_trim; - } -} - void vlib_buffer_merge_free_lists (vlib_buffer_free_list_t * dst, vlib_buffer_free_list_t * src) @@ -368,23 +311,12 @@ vlib_buffer_merge_free_lists (vlib_buffer_free_list_t * dst, uword l; u32 *d; - vlib_buffer_free_list_trim_aligned (src); - vlib_buffer_free_list_trim_aligned (dst); - - l = vec_len (src->aligned_buffers); - if (l > 0) - { - vec_add2_aligned (dst->aligned_buffers, d, l, - /* align */ sizeof (vlib_copy_unit_t)); - clib_memcpy (d, src->aligned_buffers, l * sizeof (d[0])); - vec_free (src->aligned_buffers); - } - - l = vec_len (src->unaligned_buffers); + l = vec_len (src->buffers); if (l > 0) { - vec_add (dst->unaligned_buffers, src->unaligned_buffers, l); - vec_free (src->unaligned_buffers); + vec_add2_aligned (dst->buffers, d, l, CLIB_CACHE_LINE_BYTES); + clib_memcpy (d, src->buffers, l * sizeof (d[0])); + vec_free (src->buffers); } } @@ -447,8 +379,7 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, ASSERT (f - bm->buffer_free_list_pool == wf - wbm->buffer_free_list_pool); wf[0] = f[0]; - wf->aligned_buffers = 0; - wf->unaligned_buffers = 0; + wf->buffers = 0; wf->n_alloc = 0; } @@ -505,8 +436,7 @@ del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) vm->os_physmem_free (f->buffer_memory_allocated[i]); vec_free (f->name); vec_free (f->buffer_memory_allocated); - vec_free (f->unaligned_buffers); - vec_free (f->aligned_buffers); + vec_free (f->buffers); } /* Add buffer free list. */ @@ -522,8 +452,7 @@ vlib_buffer_delete_free_list_internal (vlib_main_t * vm, u32 free_list_index) f = vlib_buffer_get_free_list (vm, free_list_index); - ASSERT (vec_len (f->unaligned_buffers) + vec_len (f->aligned_buffers) == - f->n_alloc); + ASSERT (vec_len (f->buffers) == f->n_alloc); merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); if (merge_index != ~0 && merge_index != free_list_index) { @@ -558,15 +487,13 @@ fill_free_list (vlib_main_t * vm, u32 *bi; u32 n_remaining, n_alloc, n_this_chunk; - vlib_buffer_free_list_trim_aligned (fl); - /* Already have enough free buffers on free list? */ - n = min_free_buffers - vec_len (fl->aligned_buffers); + n = min_free_buffers - vec_len (fl->buffers); if (n <= 0) return min_free_buffers; /* Always allocate round number of buffers. */ - n = round_pow2 (n, BUFFERS_PER_COPY); + n = round_pow2 (n, CLIB_CACHE_LINE_BYTES / sizeof (u32)); /* Always allocate new buffers in reasonably large sized chunks. */ n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); @@ -594,8 +521,7 @@ fill_free_list (vlib_main_t * vm, n_remaining -= n_this_chunk; b = buffers; - vec_add2_aligned (fl->aligned_buffers, bi, n_this_chunk, - sizeof (vlib_copy_unit_t)); + vec_add2_aligned (fl->buffers, bi, n_this_chunk, CLIB_CACHE_LINE_BYTES); for (i = 0; i < n_this_chunk; i++) { bi[i] = vlib_get_buffer_index (vm, b); @@ -621,121 +547,28 @@ fill_free_list (vlib_main_t * vm, return n_alloc; } -always_inline uword -copy_alignment (u32 * x) -{ - return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY; -} - - static u32 alloc_from_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * free_list, u32 * alloc_buffers, u32 n_alloc_buffers) { - u32 *dst, *u_src; - uword u_len, n_left; - uword n_unaligned_start, n_unaligned_end, n_filled; + u32 *dst, *src; + uword len; + uword n_filled; - n_left = n_alloc_buffers; dst = alloc_buffers; - n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst)) - & (BUFFERS_PER_COPY - 1)); n_filled = fill_free_list (vm, free_list, n_alloc_buffers); if (n_filled == 0) return 0; - n_left = n_filled < n_left ? n_filled : n_left; - n_alloc_buffers = n_left; - - if (n_unaligned_start >= n_left) - { - n_unaligned_start = n_left; - n_unaligned_end = 0; - } - else - n_unaligned_end = copy_alignment (dst + n_alloc_buffers); - - vlib_buffer_free_list_fill_unaligned (vm, free_list, - n_unaligned_start + n_unaligned_end); - - u_len = vec_len (free_list->unaligned_buffers); - u_src = free_list->unaligned_buffers + u_len - 1; + len = vec_len (free_list->buffers); + ASSERT (len >= n_alloc_buffers); - if (n_unaligned_start) - { - uword n_copy = n_unaligned_start; - if (n_copy > n_left) - n_copy = n_left; - n_left -= n_copy; - - while (n_copy > 0) - { - *dst++ = *u_src--; - n_copy--; - u_len--; - } - - /* Now dst should be aligned. */ - if (n_left > 0) - ASSERT (pointer_to_uword (dst) % sizeof (vlib_copy_unit_t) == 0); - } - - /* Aligned copy. */ - { - vlib_copy_unit_t *d, *s; - uword n_copy; - - if (vec_len (free_list->aligned_buffers) < - ((n_left / BUFFERS_PER_COPY) * BUFFERS_PER_COPY)) - abort (); - - n_copy = n_left / BUFFERS_PER_COPY; - n_left = n_left % BUFFERS_PER_COPY; - - /* Remove buffers from aligned free list. */ - _vec_len (free_list->aligned_buffers) -= n_copy * BUFFERS_PER_COPY; - - s = (vlib_copy_unit_t *) vec_end (free_list->aligned_buffers); - d = (vlib_copy_unit_t *) dst; - - /* Fast path loop. */ - while (n_copy >= 4) - { - d[0] = s[0]; - d[1] = s[1]; - d[2] = s[2]; - d[3] = s[3]; - n_copy -= 4; - s += 4; - d += 4; - } - - while (n_copy >= 1) - { - d[0] = s[0]; - n_copy -= 1; - s += 1; - d += 1; - } - - dst = (void *) d; - } - - /* Unaligned copy. */ - ASSERT (n_unaligned_end == n_left); - while (n_left > 0) - { - *dst++ = *u_src--; - n_left--; - u_len--; - } + src = free_list->buffers + len - n_alloc_buffers; + clib_memcpy (dst, src, n_alloc_buffers * sizeof (u32)); - if (!free_list->unaligned_buffers) - ASSERT (u_len == 0); - else - _vec_len (free_list->unaligned_buffers) = u_len; + _vec_len (free_list->buffers) -= n_alloc_buffers; /* Verify that buffers are known free. */ vlib_buffer_validate_alloc_free (vm, alloc_buffers, @@ -831,8 +664,7 @@ again: vlib_buffer_validate_alloc_free (vm, b, n_left, VLIB_BUFFER_KNOWN_ALLOCATED); - vec_add2_aligned (fl->aligned_buffers, f, n_left, - /* align */ sizeof (vlib_copy_unit_t)); + vec_add2_aligned (fl->buffers, f, n_left, CLIB_CACHE_LINE_BYTES); n = next_to_free[i_next_to_free]; while (n_left >= 4) @@ -890,7 +722,7 @@ again: f -= 2; n -= free_next0 + free_next1; - _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers; + _vec_len (fl->buffers) = f - fl->buffers; fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0); fl1 = pool_elt_at_index (bm->buffer_free_list_pool, fi1); @@ -924,8 +756,7 @@ again: fl = pool_elt_at_index (bm->buffer_free_list_pool, fi); } - vec_add2_aligned (fl->aligned_buffers, f, n_left, - /* align */ sizeof (vlib_copy_unit_t)); + vec_add2_aligned (fl->buffers, f, n_left, CLIB_CACHE_LINE_BYTES); } while (n_left >= 1) @@ -968,7 +799,7 @@ again: f -= 1; n -= free_next0; - _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers; + _vec_len (fl->buffers) = f - fl->buffers; fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0); @@ -986,8 +817,7 @@ again: fi = fi0; fl = pool_elt_at_index (bm->buffer_free_list_pool, fi); - vec_add2_aligned (fl->aligned_buffers, f, n_left, - /* align */ sizeof (vlib_copy_unit_t)); + vec_add2_aligned (fl->buffers, f, n_left, CLIB_CACHE_LINE_BYTES); } if (follow_buffer_next && ((n_left = n - next_to_free[i_next_to_free]) > 0)) @@ -997,7 +827,7 @@ again: goto again; } - _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers; + _vec_len (fl->buffers) = f - fl->buffers; if (vec_len (announce_list)) { @@ -1239,7 +1069,7 @@ format_vlib_buffer_free_list (u8 * s, va_list * va) "#Alloc", "#Free"); size = sizeof (vlib_buffer_t) + f->n_data_bytes; - n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers); + n_free = vec_len (f->buffers); bytes_alloc = size * f->n_alloc; bytes_free = size * n_free; diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index d270c08a..fffb50c8 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -50,12 +50,6 @@ #define VLIB_BUFFER_DATA_SIZE (2048) #define VLIB_BUFFER_PRE_DATA_SIZE __PRE_DATA_SIZE -#if defined (CLIB_HAVE_VEC128) || defined (__aarch64__) -typedef u8x16 vlib_copy_unit_t; -#else -typedef u64 vlib_copy_unit_t; -#endif - /** \file vlib buffer structure definition and a few select access methods. This structure and the buffer allocation @@ -262,11 +256,8 @@ typedef struct vlib_buffer_free_list_t /* Total number of buffers allocated from this free list. */ u32 n_alloc; - /* Vector of free buffers. Each element is a byte offset into I/O heap. - Aligned vectors always has naturally aligned vlib_copy_unit_t sized chunks - of buffer indices. Unaligned vector has any left over. This is meant to - speed up copy routines. */ - u32 *aligned_buffers, *unaligned_buffers; + /* Vector of free buffers. Each element is a byte offset into I/O heap. */ + u32 *buffers; /* Memory chunks allocated for this free list recorded here so they can be freed when free list diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index 543a903c..fd051de5 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -350,10 +350,6 @@ vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes, char *fmt, ...); - -/* After free aligned buffers may not contain even sized chunks. */ -void vlib_buffer_free_list_trim_aligned (vlib_buffer_free_list_t * f); - /* Merge two free lists */ void vlib_buffer_merge_free_lists (vlib_buffer_free_list_t * dst, vlib_buffer_free_list_t * src); @@ -664,23 +660,14 @@ unserialize_vlib_buffer_n_bytes (serialize_main_t * m) return n; } -typedef union -{ - vlib_buffer_t b; - vlib_copy_unit_t i[sizeof (vlib_buffer_t) / sizeof (vlib_copy_unit_t)]; -} -vlib_buffer_union_t; - /* Set a buffer quickly into "uninitialized" state. We want this to be extremely cheap and arrange for all fields that need to be initialized to be in the first 128 bits of the buffer. */ always_inline void -vlib_buffer_init_for_free_list (vlib_buffer_t * _dst, +vlib_buffer_init_for_free_list (vlib_buffer_t * dst, vlib_buffer_free_list_t * fl) { - vlib_buffer_union_t *dst = (vlib_buffer_union_t *) _dst; - vlib_buffer_union_t *src = - (vlib_buffer_union_t *) & fl->buffer_init_template; + vlib_buffer_t *src = &fl->buffer_init_template; /* Make sure vlib_buffer_t is cacheline aligned and sized */ ASSERT (STRUCT_OFFSET_OF (vlib_buffer_t, cacheline0) == 0); @@ -692,21 +679,14 @@ vlib_buffer_init_for_free_list (vlib_buffer_t * _dst, /* Make sure buffer template is sane. */ ASSERT (fl->index == fl->buffer_init_template.free_list_index); - /* Copy template from src->current_data thru src->free_list_index */ - dst->i[0] = src->i[0]; - if (1 * sizeof (dst->i[0]) < 16) - dst->i[1] = src->i[1]; - if (2 * sizeof (dst->i[0]) < 16) - dst->i[2] = src->i[2]; - /* Make sure it really worked. */ -#define _(f) ASSERT (dst->b.f == src->b.f) +#define _(f) dst->f = src->f _(current_data); _(current_length); _(flags); _(free_list_index); #undef _ - ASSERT (dst->b.total_length_not_including_first_buffer == 0); + ASSERT (dst->total_length_not_including_first_buffer == 0); } always_inline void @@ -718,39 +698,28 @@ vlib_buffer_add_to_free_list (vlib_main_t * vm, b = vlib_get_buffer (vm, buffer_index); if (PREDICT_TRUE (do_init)) vlib_buffer_init_for_free_list (b, f); - vec_add1_aligned (f->aligned_buffers, buffer_index, - sizeof (vlib_copy_unit_t)); + vec_add1_aligned (f->buffers, buffer_index, CLIB_CACHE_LINE_BYTES); } always_inline void -vlib_buffer_init_two_for_free_list (vlib_buffer_t * _dst0, - vlib_buffer_t * _dst1, +vlib_buffer_init_two_for_free_list (vlib_buffer_t * dst0, + vlib_buffer_t * dst1, vlib_buffer_free_list_t * fl) { - vlib_buffer_union_t *dst0 = (vlib_buffer_union_t *) _dst0; - vlib_buffer_union_t *dst1 = (vlib_buffer_union_t *) _dst1; - vlib_buffer_union_t *src = - (vlib_buffer_union_t *) & fl->buffer_init_template; + vlib_buffer_t *src = &fl->buffer_init_template; /* Make sure buffer template is sane. */ ASSERT (fl->index == fl->buffer_init_template.free_list_index); - /* Copy template from src->current_data thru src->free_list_index */ - dst0->i[0] = dst1->i[0] = src->i[0]; - if (1 * sizeof (dst0->i[0]) < 16) - dst0->i[1] = dst1->i[1] = src->i[1]; - if (2 * sizeof (dst0->i[0]) < 16) - dst0->i[2] = dst1->i[2] = src->i[2]; - /* Make sure it really worked. */ -#define _(f) ASSERT (dst0->b.f == src->b.f && dst1->b.f == src->b.f) +#define _(f) dst0->f = src->f; dst1->f = src->f _(current_data); _(current_length); _(flags); _(free_list_index); #undef _ - ASSERT (dst0->b.total_length_not_including_first_buffer == 0); - ASSERT (dst1->b.total_length_not_including_first_buffer == 0); + ASSERT (dst0->total_length_not_including_first_buffer == 0); + ASSERT (dst1->total_length_not_including_first_buffer == 0); } #if CLIB_DEBUG > 0 diff --git a/src/vlib/threads.c b/src/vlib/threads.c index b3bbd30e..e3ea3c9c 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -708,8 +708,7 @@ start_workers (vlib_main_t * vm) == fl_clone - bm_clone->buffer_free_list_pool); fl_clone[0] = fl_orig[0]; - fl_clone->aligned_buffers = 0; - fl_clone->unaligned_buffers = 0; + fl_clone->buffers = 0; fl_clone->n_alloc = 0; })); /* *INDENT-ON* */ diff --git a/src/vnet/devices/dpdk/buffer.c b/src/vnet/devices/dpdk/buffer.c index 038f46d9..43ceb91e 100644 --- a/src/vnet/devices/dpdk/buffer.c +++ b/src/vnet/devices/dpdk/buffer.c @@ -79,8 +79,6 @@ STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM, "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM"); -#define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32)) - static void del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) { @@ -88,23 +86,15 @@ del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) struct rte_mbuf *mb; vlib_buffer_t *b; - for (i = 0; i < vec_len (f->unaligned_buffers); i++) + for (i = 0; i < vec_len (f->buffers); i++) { - b = vlib_get_buffer (vm, f->unaligned_buffers[i]); - mb = rte_mbuf_from_vlib_buffer (b); - ASSERT (rte_mbuf_refcnt_read (mb) == 1); - rte_pktmbuf_free (mb); - } - for (i = 0; i < vec_len (f->aligned_buffers); i++) - { - b = vlib_get_buffer (vm, f->aligned_buffers[i]); + b = vlib_get_buffer (vm, f->buffers[i]); mb = rte_mbuf_from_vlib_buffer (b); ASSERT (rte_mbuf_refcnt_read (mb) == 1); rte_pktmbuf_free (mb); } vec_free (f->name); - vec_free (f->unaligned_buffers); - vec_free (f->aligned_buffers); + vec_free (f->buffers); } /* Add buffer free list. */ @@ -162,15 +152,13 @@ fill_free_list (vlib_main_t * vm, if (PREDICT_FALSE (rmp == 0)) return 0; - vlib_buffer_free_list_trim_aligned (fl); - /* Already have enough free buffers on free list? */ - n = min_free_buffers - vec_len (fl->aligned_buffers); + n = min_free_buffers - vec_len (fl->buffers); if (n <= 0) return min_free_buffers; /* Always allocate round number of buffers. */ - n = round_pow2 (n, BUFFERS_PER_COPY); + n = round_pow2 (n, CLIB_CACHE_LINE_BYTES / sizeof (u32)); /* Always allocate new buffers in reasonably large sized chunks. */ n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); @@ -192,7 +180,7 @@ fill_free_list (vlib_main_t * vm, b = vlib_buffer_from_rte_mbuf (mb); bi = vlib_get_buffer_index (vm, b); - vec_add1_aligned (fl->aligned_buffers, bi, sizeof (vlib_copy_unit_t)); + vec_add1_aligned (fl->buffers, bi, CLIB_CACHE_LINE_BYTES); n_alloc++; n_remaining--; @@ -207,120 +195,27 @@ fill_free_list (vlib_main_t * vm, return n; } -always_inline uword -copy_alignment (u32 * x) -{ - return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY; -} - static u32 alloc_from_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * free_list, u32 * alloc_buffers, u32 n_alloc_buffers) { - u32 *dst, *u_src; - uword u_len, n_left; - uword n_unaligned_start, n_unaligned_end, n_filled; + u32 *dst, *src; + uword len, n_filled; - n_left = n_alloc_buffers; dst = alloc_buffers; - n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst)) - & (BUFFERS_PER_COPY - 1)); n_filled = fill_free_list (vm, free_list, n_alloc_buffers); if (n_filled == 0) return 0; - n_left = n_filled < n_left ? n_filled : n_left; - n_alloc_buffers = n_left; - - if (n_unaligned_start >= n_left) - { - n_unaligned_start = n_left; - n_unaligned_end = 0; - } - else - n_unaligned_end = copy_alignment (dst + n_alloc_buffers); - - vlib_buffer_free_list_fill_unaligned (vm, free_list, - n_unaligned_start + n_unaligned_end); - - u_len = vec_len (free_list->unaligned_buffers); - u_src = free_list->unaligned_buffers + u_len - 1; - - if (n_unaligned_start) - { - uword n_copy = n_unaligned_start; - if (n_copy > n_left) - n_copy = n_left; - n_left -= n_copy; - - while (n_copy > 0) - { - *dst++ = *u_src--; - n_copy--; - u_len--; - } - - /* Now dst should be aligned. */ - if (n_left > 0) - ASSERT (pointer_to_uword (dst) % sizeof (vlib_copy_unit_t) == 0); - } - - /* Aligned copy. */ - { - vlib_copy_unit_t *d, *s; - uword n_copy; - - if (vec_len (free_list->aligned_buffers) < - ((n_left / BUFFERS_PER_COPY) * BUFFERS_PER_COPY)) - abort (); - - n_copy = n_left / BUFFERS_PER_COPY; - n_left = n_left % BUFFERS_PER_COPY; - - /* Remove buffers from aligned free list. */ - _vec_len (free_list->aligned_buffers) -= n_copy * BUFFERS_PER_COPY; - - s = (vlib_copy_unit_t *) vec_end (free_list->aligned_buffers); - d = (vlib_copy_unit_t *) dst; + len = vec_len (free_list->buffers); + ASSERT (len >= n_alloc_buffers); - /* Fast path loop. */ - while (n_copy >= 4) - { - d[0] = s[0]; - d[1] = s[1]; - d[2] = s[2]; - d[3] = s[3]; - n_copy -= 4; - s += 4; - d += 4; - } - - while (n_copy >= 1) - { - d[0] = s[0]; - n_copy -= 1; - s += 1; - d += 1; - } - - dst = (void *) d; - } - - /* Unaligned copy. */ - ASSERT (n_unaligned_end == n_left); - while (n_left > 0) - { - *dst++ = *u_src--; - n_left--; - u_len--; - } + src = free_list->buffers + len - n_alloc_buffers; + clib_memcpy (dst, src, n_alloc_buffers * sizeof (u32)); - if (!free_list->unaligned_buffers) - ASSERT (u_len == 0); - else - _vec_len (free_list->unaligned_buffers) = u_len; + _vec_len (free_list->buffers) -= n_alloc_buffers; return n_alloc_buffers; } diff --git a/src/vnet/replication.c b/src/vnet/replication.c index 561c86cd..02755195 100644 --- a/src/vnet/replication.c +++ b/src/vnet/replication.c @@ -168,32 +168,20 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl) * Note: this could be sped up if the node index were stuffed into * the freelist itself. */ - if (vec_len (fl->aligned_buffers) > 0) + if (vec_len (fl->buffers) > 0) { - bi0 = fl->aligned_buffers[0]; - b0 = vlib_get_buffer (vm, bi0); - ctx = pool_elt_at_index (rm->contexts[cpu_number], b0->recycle_count); - feature_node_index = ctx->recycle_node_index; - } - else if (vec_len (fl->unaligned_buffers) > 0) - { - bi0 = fl->unaligned_buffers[0]; + bi0 = fl->buffers[0]; b0 = vlib_get_buffer (vm, bi0); ctx = pool_elt_at_index (rm->contexts[cpu_number], b0->recycle_count); feature_node_index = ctx->recycle_node_index; } - /* aligned, unaligned buffers */ + /* buffers */ for (i = 0; i < 2; i++) { if (i == 0) { - from = fl->aligned_buffers; - n_left_from = vec_len (from); - } - else - { - from = fl->unaligned_buffers; + from = fl->buffers; n_left_from = vec_len (from); } @@ -245,8 +233,7 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl) } } - vec_reset_length (fl->aligned_buffers); - vec_reset_length (fl->unaligned_buffers); + vec_reset_length (fl->buffers); if (f) { -- cgit 1.2.3-korg From 0b49e2beee343b7f78aea69c64fc919ad0ac397c Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 9 Feb 2017 21:49:06 +0100 Subject: vhost-user: fix crash when descriptor points to unknown region This happens only on when compiled for older microarchitectures, where BSF insutruction is used instead of TZCNT. BSF provides undefined result if operand is 0. Change-Id: I7a13350786a533428168595097ef01a560fde53b Signed-off-by: Damjan Marion --- src/vnet/devices/virtio/vhost-user.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 9b8c1888..c627dec2 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -237,7 +237,8 @@ map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint) r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88); r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800)); - i = __builtin_ctzll (_mm_movemask_epi8 (r)); + i = __builtin_ctzll (_mm_movemask_epi8 (r) | + (1 << VHOST_MEMORY_MAX_NREGIONS)); if (i < vui->nregions) { -- cgit 1.2.3-korg From 0578cd1c6c03319187147874ac93b714a6e893b8 Mon Sep 17 00:00:00 2001 From: Marek Gradzki Date: Mon, 13 Feb 2017 14:19:51 +0100 Subject: Fix is_server flag in vhost dump (VPP-562) Change-Id: I5b308eb39ae770d58d1498d7fafa49b236b3f534 Signed-off-by: Marek Gradzki --- src/vnet/devices/virtio/vhost-user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index c627dec2..315daa77 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2793,6 +2793,7 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz; vuid->features = vui->features; vuid->num_regions = vui->nregions; + vuid->is_server = vui->unix_server_index != ~0; vuid->sock_errno = vui->sock_errno; strncpy ((char *) vuid->sock_filename, (char *) vui->sock_filename, ARRAY_LEN (vuid->sock_filename) - 1); -- cgit 1.2.3-korg From 33e2dc078b4cc7eb32a829180bd8a58ec5aafc90 Mon Sep 17 00:00:00 2001 From: Billy McFall Date: Fri, 10 Feb 2017 14:57:24 -0500 Subject: VPP-279: Document changes for vnet/vnet/devices Add doxygen documentation for pcap tx trace CLI command. In the process of adding the documentation, made the following changes to the way the command worked: * If there is an error with any of the attributes, the whole command fails. The existing behavior was to apply attribute by attribute, then bail if there was an issue, with partial apply. * Move the 'on' processing to the end. The existing behavior was to process the 'on' as it was encountered on the commandline. That meant that any attributes after the 'on' in the commandline were saved and displayed, but not really being used in the packet trace. * Enhanced the 'status' to show all the configured attributes. NOTE: The packet capture has some weird behavior with regards to how many packets are written to file and if the file is appended or overwritten. VPP-634 written to document the issue. Change-Id: Iab241228b125385052de242865afd9515fa2524f Signed-off-by: Billy McFall --- src/vnet/devices/dpdk/cli.c | 235 ++++++++++++++++++++++++++++++++---------- src/vnet/devices/dpdk/dir.dox | 27 +++++ 2 files changed, 210 insertions(+), 52 deletions(-) create mode 100644 src/vnet/devices/dpdk/dir.dox (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c index 3bbace26..d133cfd9 100644 --- a/src/vnet/devices/dpdk/cli.c +++ b/src/vnet/devices/dpdk/cli.c @@ -25,43 +25,49 @@ #include "dpdk_priv.h" +/** + * @file + * @brief CLI for DPDK Abstraction Layer and pcap Tx Trace. + * + * This file contains the source code for CLI for DPDK + * Abstraction Layer and pcap Tx Trace. + */ + static clib_error_t * pcap_trace_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { +#define PCAP_DEF_PKT_TO_CAPTURE (100) + + unformat_input_t _line_input, *line_input = &_line_input; dpdk_main_t *dm = &dpdk_main; u8 *filename; - u32 max; - int matched = 0; + u8 *chroot_filename = 0; + u32 max = 0; + int enabled = 0; + int errorFlag = 0; clib_error_t *error = 0; - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "on")) + if (unformat (line_input, "on")) { if (dm->tx_pcap_enable == 0) { - if (dm->pcap_filename == 0) - dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0); - - memset (&dm->pcap_main, 0, sizeof (dm->pcap_main)); - dm->pcap_main.file_name = (char *) dm->pcap_filename; - dm->pcap_main.n_packets_to_capture = 100; - if (dm->pcap_pkts_to_capture) - dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture; - - dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; - dm->tx_pcap_enable = 1; - matched = 1; - vlib_cli_output (vm, "pcap tx capture on..."); + enabled = 1; } else { vlib_cli_output (vm, "pcap tx capture already on..."); + errorFlag = 1; + break; } - matched = 1; } - else if (unformat (input, "off")) + else if (unformat (line_input, "off")) { if (dm->tx_pcap_enable) { @@ -77,81 +83,206 @@ pcap_trace_command_fn (vlib_main_t * vm, else vlib_cli_output (vm, "saved to %s...", dm->pcap_filename); } + + dm->tx_pcap_enable = 0; } else { vlib_cli_output (vm, "pcap tx capture already off..."); + errorFlag = 1; + break; } - - dm->tx_pcap_enable = 0; - matched = 1; } - else if (unformat (input, "max %d", &max)) + else if (unformat (line_input, "max %d", &max)) { - dm->pcap_pkts_to_capture = max; - matched = 1; + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, + "can't change max value while pcap tx capture active..."); + errorFlag = 1; + break; + } } - - else if (unformat (input, "intfc %U", + else if (unformat (line_input, "intfc %U", unformat_vnet_sw_interface, dm->vnet_main, &dm->pcap_sw_if_index)) - matched = 1; - else if (unformat (input, "intfc any")) + ; + + else if (unformat (line_input, "intfc any")) { dm->pcap_sw_if_index = 0; - matched = 1; } - else if (unformat (input, "file %s", &filename)) + else if (unformat (line_input, "file %s", &filename)) { - u8 *chroot_filename; + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, + "can't change file while pcap tx capture active..."); + errorFlag = 1; + break; + } + /* Brain-police user path input */ if (strstr ((char *) filename, "..") || index ((char *) filename, '/')) { vlib_cli_output (vm, "illegal characters in filename '%s'", filename); - continue; + vlib_cli_output (vm, + "Hint: Only filename, do not enter directory structure."); + vec_free (filename); + errorFlag = 1; + break; } chroot_filename = format (0, "/tmp/%s%c", filename, 0); vec_free (filename); - - if (dm->pcap_filename) - vec_free (dm->pcap_filename); - vec_add1 (filename, 0); - dm->pcap_filename = chroot_filename; - matched = 1; } - else if (unformat (input, "status")) + else if (unformat (line_input, "status")) { + if (dm->pcap_sw_if_index == 0) + { + vlib_cli_output (vm, "max is %d for any interface to file %s", + dm-> + pcap_pkts_to_capture ? dm->pcap_pkts_to_capture + : PCAP_DEF_PKT_TO_CAPTURE, + dm-> + pcap_filename ? dm->pcap_filename : (u8 *) + "/tmp/vpe.pcap"); + } + else + { + vlib_cli_output (vm, "max is %d for interface %U to file %s", + dm-> + pcap_pkts_to_capture ? dm->pcap_pkts_to_capture + : PCAP_DEF_PKT_TO_CAPTURE, + format_vnet_sw_if_index_name, dm->vnet_main, + dm->pcap_sw_if_index, + dm-> + pcap_filename ? dm->pcap_filename : (u8 *) + "/tmp/vpe.pcap"); + } + if (dm->tx_pcap_enable == 0) { vlib_cli_output (vm, "pcap tx capture is off..."); - continue; } - - vlib_cli_output (vm, "pcap tx capture: %d of %d pkts...", - dm->pcap_main.n_packets_captured, - dm->pcap_main.n_packets_to_capture); - matched = 1; + else + { + vlib_cli_output (vm, "pcap tx capture is on: %d of %d pkts...", + dm->pcap_main.n_packets_captured, + dm->pcap_main.n_packets_to_capture); + } + break; } else - break; + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + errorFlag = 1; + break; + } } + unformat_free (line_input); - if (matched == 0) - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - return 0; + if (errorFlag == 0) + { + /* Since no error, save configured values. */ + if (chroot_filename) + { + if (dm->pcap_filename) + vec_free (dm->pcap_filename); + vec_add1 (chroot_filename, 0); + dm->pcap_filename = chroot_filename; + } + + if (max) + dm->pcap_pkts_to_capture = max; + + + if (enabled) + { + if (dm->pcap_filename == 0) + dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0); + + memset (&dm->pcap_main, 0, sizeof (dm->pcap_main)); + dm->pcap_main.file_name = (char *) dm->pcap_filename; + dm->pcap_main.n_packets_to_capture = PCAP_DEF_PKT_TO_CAPTURE; + if (dm->pcap_pkts_to_capture) + dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture; + + dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; + dm->tx_pcap_enable = 1; + vlib_cli_output (vm, "pcap tx capture on..."); + } + } + else if (chroot_filename) + vec_free (chroot_filename); + + + return error; } +/*? + * This command is used to start or stop a packet capture, or show + * the status of packet capture. + * + * This command has the following optional parameters: + * + * - on|off - Used to start or stop a packet capture. + * + * - max - Depth of local buffer. Once 'nn' number + * of packets have been received, buffer is flushed to file. Once another + * 'nn' number of packets have been received, buffer is flushed + * to file, overwriting previous write. If not entered, value defaults + * to 100. Can only be updated if packet capture is off. + * + * - intfc |any - Used to specify a given interface, + * or use 'any' to run packet capture on all interfaces. + * 'any' is the default if not provided. Settings from a previous + * packet capture are preserved, so 'any' can be used to reset + * the interface setting. + * + * - file - Used to specify the output filename. The file will + * be placed in the '/tmp' directory, so only the filename is + * supported. Directory should not be entered. If file already exists, file + * will be overwritten. If no filename is provided, '/tmp/vpe.pcap' + * will be used. Can only be updated if packet capture is off. + * + * - status - Displays the current status and configured attributes + * associated with a packet capture. If packet capture is in progress, + * 'status' also will return the number of packets currently in + * the local buffer. All additional attributes entered on command line + * with 'status' will be ingnored and not applied. + * + * @cliexpar + * Example of how to display the status of a tx packet capture when off: + * @cliexstart{pcap tx trace status} + * max is 100, for any interface to file /tmp/vpe.pcap + * pcap tx capture is off... + * @cliexend + * Example of how to start a tx packet capture: + * @cliexstart{pcap tx trace on max 35 intfc GigabitEthernet0/8/0 file vppTest.pcap} + * pcap tx capture on... + * @cliexend + * Example of how to display the status of a tx packet capture in progress: + * @cliexstart{pcap tx trace status} + * max is 35, for interface GigabitEthernet0/8/0 to file /tmp/vppTest.pcap + * pcap tx capture is on: 20 of 35 pkts... + * @cliexend + * Example of how to stop a tx packet capture: + * @cliexstart{vppctl pcap tx trace off} + * captured 21 pkts... + * saved to /tmp/vppTest.pcap... + * @cliexend +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (pcap_trace_command, static) = { .path = "pcap tx trace", .short_help = - "pcap tx trace on off max intfc file status", + "pcap tx trace [on|off] [max ] [intfc |any] [file ] [status]", .function = pcap_trace_command_fn, }; /* *INDENT-ON* */ diff --git a/src/vnet/devices/dpdk/dir.dox b/src/vnet/devices/dpdk/dir.dox new file mode 100644 index 00000000..43e36753 --- /dev/null +++ b/src/vnet/devices/dpdk/dir.dox @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Doxygen directory documentation */ + +/** +@dir +@brief DPDK Abstraction Layer. + +This directory contains the source code for the DPDK abstraction layer. + +*/ +/*? %%clicmd:group_label DPDK and pcap tx %% ?*/ +/*? %%syscfg:group_label DPDK and pcap tx %% ?*/ -- cgit 1.2.3-korg From 5d81f452df82576fecb6b665b56e3e1a2636e647 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 16 Feb 2017 20:16:06 +0100 Subject: dpdk: quad loop and prefetch in fill_free_list Change-Id: I19ec3b769b6512f7408044751393d9faf10d01d5 Signed-off-by: Damjan Marion --- src/vnet/devices/dpdk/buffer.c | 86 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 15 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/dpdk/buffer.c b/src/vnet/devices/dpdk/buffer.c index 43ceb91e..007093e4 100644 --- a/src/vnet/devices/dpdk/buffer.c +++ b/src/vnet/devices/dpdk/buffer.c @@ -140,13 +140,12 @@ fill_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * fl, uword min_free_buffers) { dpdk_main_t *dm = &dpdk_main; - vlib_buffer_t *b; + vlib_buffer_t *b0, *b1, *b2, *b3; int n, i; - u32 bi; - u32 n_remaining = 0, n_alloc = 0; + u32 bi0, bi1, bi2, bi3; unsigned socket_id = rte_socket_id (); struct rte_mempool *rmp = dm->pktmbuf_pools[socket_id]; - struct rte_mbuf *mb; + struct rte_mbuf *mb0, *mb1, *mb2, *mb3; /* Too early? */ if (PREDICT_FALSE (rmp == 0)) @@ -170,24 +169,81 @@ fill_free_list (vlib_main_t * vm, _vec_len (vm->mbuf_alloc_list) = n; - for (i = 0; i < n; i++) + i = 0; + + while (i < (n - 7)) + { + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 4]), STORE); + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 5]), STORE); + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 6]), STORE); + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 7]), STORE); + + mb0 = vm->mbuf_alloc_list[i]; + mb1 = vm->mbuf_alloc_list[i + 1]; + mb2 = vm->mbuf_alloc_list[i + 2]; + mb3 = vm->mbuf_alloc_list[i + 3]; + + ASSERT (rte_mbuf_refcnt_read (mb0) == 0); + ASSERT (rte_mbuf_refcnt_read (mb1) == 0); + ASSERT (rte_mbuf_refcnt_read (mb2) == 0); + ASSERT (rte_mbuf_refcnt_read (mb3) == 0); + + rte_mbuf_refcnt_set (mb0, 1); + rte_mbuf_refcnt_set (mb1, 1); + rte_mbuf_refcnt_set (mb2, 1); + rte_mbuf_refcnt_set (mb3, 1); + + b0 = vlib_buffer_from_rte_mbuf (mb0); + b1 = vlib_buffer_from_rte_mbuf (mb1); + b2 = vlib_buffer_from_rte_mbuf (mb2); + b3 = vlib_buffer_from_rte_mbuf (mb3); + + bi0 = vlib_get_buffer_index (vm, b0); + bi1 = vlib_get_buffer_index (vm, b1); + bi2 = vlib_get_buffer_index (vm, b2); + bi3 = vlib_get_buffer_index (vm, b3); + + vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); + vec_add1_aligned (fl->buffers, bi1, CLIB_CACHE_LINE_BYTES); + vec_add1_aligned (fl->buffers, bi2, CLIB_CACHE_LINE_BYTES); + vec_add1_aligned (fl->buffers, bi3, CLIB_CACHE_LINE_BYTES); + + vlib_buffer_init_for_free_list (b0, fl); + vlib_buffer_init_for_free_list (b1, fl); + vlib_buffer_init_for_free_list (b2, fl); + vlib_buffer_init_for_free_list (b3, fl); + + if (fl->buffer_init_function) + { + fl->buffer_init_function (vm, fl, &bi0, 1); + fl->buffer_init_function (vm, fl, &bi1, 1); + fl->buffer_init_function (vm, fl, &bi2, 1); + fl->buffer_init_function (vm, fl, &bi3, 1); + } + i += 4; + } + + while (i < n) { - mb = vm->mbuf_alloc_list[i]; + mb0 = vm->mbuf_alloc_list[i]; - ASSERT (rte_mbuf_refcnt_read (mb) == 0); - rte_mbuf_refcnt_set (mb, 1); + ASSERT (rte_mbuf_refcnt_read (mb0) == 0); + rte_mbuf_refcnt_set (mb0, 1); - b = vlib_buffer_from_rte_mbuf (mb); - bi = vlib_get_buffer_index (vm, b); + b0 = vlib_buffer_from_rte_mbuf (mb0); + bi0 = vlib_get_buffer_index (vm, b0); - vec_add1_aligned (fl->buffers, bi, CLIB_CACHE_LINE_BYTES); - n_alloc++; - n_remaining--; + vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); - vlib_buffer_init_for_free_list (b, fl); + vlib_buffer_init_for_free_list (b0, fl); if (fl->buffer_init_function) - fl->buffer_init_function (vm, fl, &bi, 1); + fl->buffer_init_function (vm, fl, &bi0, 1); + i++; } fl->n_alloc += n; -- cgit 1.2.3-korg From cb33dc2d7a566d571c86b950b4aa92dd7ae01c3c Mon Sep 17 00:00:00 2001 From: Radu Nicolau Date: Thu, 16 Feb 2017 16:49:46 +0000 Subject: Implemented IKEv2 initiator features: - IKE_SA_INIT and IKE_AUTH initial exchanges - Delete IKA SA - Rekey and delete Child SA - Child SAs lifetime policy To set up one VPP instance as the initiator use the following CLI commands (or API equivalents): ikev2 profile set responder ikev2 profile set ike-crypto-alg ike-integ-alg ike-dh ikev2 profile set esp-crypto-alg esp-integ-alg esp-dh ikev2 profile set sa-lifetime and finally ikev2 initiate sa-init to initiate the IKE_SA_INIT exchange Child SA re-keying process: 1. Child SA expires 2. A new Child SA is created using the Child SA rekey exchange 3. For a set time both SAs are alive 4. After the set time interval expires old SA is deleted Any additional settings will not be carried over (i.e. settings of the ipsec interface associated with the Child SA) CLI API additions: ikev2 profile set responder ikev2 profile set ike-crypto-alg ike-integ-alg ike-dh ikev2 profile set esp-crypto-alg esp-integ-alg esp-dh ikev2 profile set sa-lifetime ikev2 initiate sa-init ikev2 initiate del-child-sa ikev2 initiate del-sa ikev2 initiate rekey-child-sa Sample configurations: Responder: ikev2 profile add pr1 ikev2 profile set pr1 auth shared-key-mic string Vpp123 ikev2 profile set pr1 id local fqdn vpp.home.responder ikev2 profile set pr1 id remote fqdn vpp.home.initiator ikev2 profile set pr1 traffic-selector remote ip-range 192.168.125.0 - 192.168.125.255 port-range 0 - 65535 protocol 0 ikev2 profile set pr1 traffic-selector local ip-range 192.168.124.0 - 192.168.124.255 port-range 0 - 65535 protocol 0 Initiator: ikev2 profile add pr1 ikev2 profile set pr1 auth shared-key-mic string Vpp123 ikev2 profile set pr1 id local fqdn vpp.home.initiator ikev2 profile set pr1 id remote fqdn vpp.home.responder ikev2 profile set pr1 traffic-selector local ip-range 192.168.125.0 - 192.168.125.255 port-range 0 - 65535 protocol 0 ikev2 profile set pr1 traffic-selector remote ip-range 192.168.124.0 - 192.168.124.255 port-range 0 - 65535 protocol 0 ikev2 profile set pr1 responder TenGigabitEthernet3/0/1 192.168.40.20 ikev2 profile set pr1 ike-crypto-alg aes-cbc 192 ike-integ-alg sha1-96 ike-dh modp-2048 ikev2 profile set pr1 esp-crypto-alg aes-cbc 192 esp-integ-alg sha1-96 esp-dh ecp-256 ikev2 profile set pr1 sa-lifetime 3600 10 5 0 Change-Id: I1db9084dc787129ea61298223fb7585a6f7eaf9e Signed-off-by: Radu Nicolau --- src/vat/api_format.c | 354 +++++++ src/vnet/devices/dpdk/ipsec/esp_decrypt.c | 2 + src/vnet/devices/dpdk/ipsec/esp_encrypt.c | 2 + src/vnet/ipsec/esp_decrypt.c | 2 + src/vnet/ipsec/esp_encrypt.c | 2 + src/vnet/ipsec/ikev2.c | 1552 ++++++++++++++++++++++++++--- src/vnet/ipsec/ikev2.h | 25 + src/vnet/ipsec/ikev2_cli.c | 125 ++- src/vnet/ipsec/ikev2_crypto.c | 149 ++- src/vnet/ipsec/ikev2_payload.c | 19 +- src/vnet/ipsec/ikev2_priv.h | 63 +- src/vnet/ipsec/ipsec.api | 228 +++++ src/vnet/ipsec/ipsec.h | 3 + src/vnet/ipsec/ipsec_api.c | 198 +++- 14 files changed, 2536 insertions(+), 188 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 98b03c67..3a40a553 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -3843,6 +3843,14 @@ _(ikev2_profile_set_auth_reply) \ _(ikev2_profile_set_id_reply) \ _(ikev2_profile_set_ts_reply) \ _(ikev2_set_local_key_reply) \ +_(ikev2_set_responder_reply) \ +_(ikev2_set_ike_transforms_reply) \ +_(ikev2_set_esp_transforms_reply) \ +_(ikev2_set_sa_lifetime_reply) \ +_(ikev2_initiate_sa_init_reply) \ +_(ikev2_initiate_del_ike_sa_reply) \ +_(ikev2_initiate_del_child_sa_reply) \ +_(ikev2_initiate_rekey_child_sa_reply) \ _(delete_loopback_reply) \ _(bd_ip_mac_add_del_reply) \ _(map_del_domain_reply) \ @@ -4076,6 +4084,14 @@ _(IKEV2_PROFILE_SET_AUTH_REPLY, ikev2_profile_set_auth_reply) \ _(IKEV2_PROFILE_SET_ID_REPLY, ikev2_profile_set_id_reply) \ _(IKEV2_PROFILE_SET_TS_REPLY, ikev2_profile_set_ts_reply) \ _(IKEV2_SET_LOCAL_KEY_REPLY, ikev2_set_local_key_reply) \ +_(IKEV2_SET_RESPONDER_REPLY, ikev2_set_responder_reply) \ +_(IKEV2_SET_IKE_TRANSFORMS_REPLY, ikev2_set_ike_transforms_reply) \ +_(IKEV2_SET_ESP_TRANSFORMS_REPLY, ikev2_set_esp_transforms_reply) \ +_(IKEV2_SET_SA_LIFETIME_REPLY, ikev2_set_sa_lifetime_reply) \ +_(IKEV2_INITIATE_SA_INIT_REPLY, ikev2_initiate_sa_init_reply) \ +_(IKEV2_INITIATE_DEL_IKE_SA_REPLY, ikev2_initiate_del_ike_sa_reply) \ +_(IKEV2_INITIATE_DEL_CHILD_SA_REPLY, ikev2_initiate_del_child_sa_reply) \ +_(IKEV2_INITIATE_REKEY_CHILD_SA_REPLY, ikev2_initiate_rekey_child_sa_reply) \ _(DELETE_LOOPBACK_REPLY, delete_loopback_reply) \ _(BD_IP_MAC_ADD_DEL_REPLY, bd_ip_mac_add_del_reply) \ _(DHCP_COMPL_EVENT, dhcp_compl_event) \ @@ -12738,6 +12754,336 @@ api_ikev2_set_local_key (vat_main_t * vam) return ret; } +static int +api_ikev2_set_responder (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_ikev2_set_responder_t *mp; + int ret; + u8 *name = 0; + u32 sw_if_index = ~0; + ip4_address_t address; + + const char *valid_chars = "a-zA-Z0-9_"; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (i, "%U interface %d address %U", unformat_token, valid_chars, + &name, &sw_if_index, unformat_ip4_address, &address)) + vec_add1 (name, 0); + else + { + errmsg ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + if (!vec_len (name)) + { + errmsg ("profile name must be specified"); + return -99; + } + + if (vec_len (name) > 64) + { + errmsg ("profile name too long"); + return -99; + } + + M (IKEV2_SET_RESPONDER, mp); + + clib_memcpy (mp->name, name, vec_len (name)); + vec_free (name); + + mp->sw_if_index = sw_if_index; + clib_memcpy (mp->address, &address, sizeof (address)); + + S (mp); + W (ret); + return ret; +} + +static int +api_ikev2_set_ike_transforms (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_ikev2_set_ike_transforms_t *mp; + int ret; + u8 *name = 0; + u32 crypto_alg, crypto_key_size, integ_alg, dh_group; + + const char *valid_chars = "a-zA-Z0-9_"; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U %d %d %d %d", unformat_token, valid_chars, &name, + &crypto_alg, &crypto_key_size, &integ_alg, &dh_group)) + vec_add1 (name, 0); + else + { + errmsg ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + if (!vec_len (name)) + { + errmsg ("profile name must be specified"); + return -99; + } + + if (vec_len (name) > 64) + { + errmsg ("profile name too long"); + return -99; + } + + M (IKEV2_SET_IKE_TRANSFORMS, mp); + + clib_memcpy (mp->name, name, vec_len (name)); + vec_free (name); + mp->crypto_alg = crypto_alg; + mp->crypto_key_size = crypto_key_size; + mp->integ_alg = integ_alg; + mp->dh_group = dh_group; + + S (mp); + W (ret); + return ret; +} + + +static int +api_ikev2_set_esp_transforms (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_ikev2_set_esp_transforms_t *mp; + int ret; + u8 *name = 0; + u32 crypto_alg, crypto_key_size, integ_alg, dh_group; + + const char *valid_chars = "a-zA-Z0-9_"; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U %d %d %d %d", unformat_token, valid_chars, &name, + &crypto_alg, &crypto_key_size, &integ_alg, &dh_group)) + vec_add1 (name, 0); + else + { + errmsg ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + if (!vec_len (name)) + { + errmsg ("profile name must be specified"); + return -99; + } + + if (vec_len (name) > 64) + { + errmsg ("profile name too long"); + return -99; + } + + M (IKEV2_SET_ESP_TRANSFORMS, mp); + + clib_memcpy (mp->name, name, vec_len (name)); + vec_free (name); + mp->crypto_alg = crypto_alg; + mp->crypto_key_size = crypto_key_size; + mp->integ_alg = integ_alg; + mp->dh_group = dh_group; + + S (mp); + W (ret); + return ret; +} + +static int +api_ikev2_set_sa_lifetime (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_ikev2_set_sa_lifetime_t *mp; + int ret; + u8 *name = 0; + u64 lifetime, lifetime_maxdata; + u32 lifetime_jitter, handover; + + const char *valid_chars = "a-zA-Z0-9_"; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U %lu %u %u %lu", unformat_token, valid_chars, &name, + &lifetime, &lifetime_jitter, &handover, + &lifetime_maxdata)) + vec_add1 (name, 0); + else + { + errmsg ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + if (!vec_len (name)) + { + errmsg ("profile name must be specified"); + return -99; + } + + if (vec_len (name) > 64) + { + errmsg ("profile name too long"); + return -99; + } + + M (IKEV2_SET_SA_LIFETIME, mp); + + clib_memcpy (mp->name, name, vec_len (name)); + vec_free (name); + mp->lifetime = lifetime; + mp->lifetime_jitter = lifetime_jitter; + mp->handover = handover; + mp->lifetime_maxdata = lifetime_maxdata; + + S (mp); + W (ret); + return ret; +} + +static int +api_ikev2_initiate_sa_init (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_ikev2_initiate_sa_init_t *mp; + int ret; + u8 *name = 0; + + const char *valid_chars = "a-zA-Z0-9_"; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", unformat_token, valid_chars, &name)) + vec_add1 (name, 0); + else + { + errmsg ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + if (!vec_len (name)) + { + errmsg ("profile name must be specified"); + return -99; + } + + if (vec_len (name) > 64) + { + errmsg ("profile name too long"); + return -99; + } + + M (IKEV2_INITIATE_SA_INIT, mp); + + clib_memcpy (mp->name, name, vec_len (name)); + vec_free (name); + + S (mp); + W (ret); + return ret; +} + +static int +api_ikev2_initiate_del_ike_sa (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_ikev2_initiate_del_ike_sa_t *mp; + int ret; + u64 ispi; + + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%lx", &ispi)) + ; + else + { + errmsg ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + M (IKEV2_INITIATE_DEL_IKE_SA, mp); + + mp->ispi = ispi; + + S (mp); + W (ret); + return ret; +} + +static int +api_ikev2_initiate_del_child_sa (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_ikev2_initiate_del_child_sa_t *mp; + int ret; + u32 ispi; + + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%x", &ispi)) + ; + else + { + errmsg ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + M (IKEV2_INITIATE_DEL_CHILD_SA, mp); + + mp->ispi = ispi; + + S (mp); + W (ret); + return ret; +} + +static int +api_ikev2_initiate_rekey_child_sa (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_ikev2_initiate_rekey_child_sa_t *mp; + int ret; + u32 ispi; + + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%x", &ispi)) + ; + else + { + errmsg ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + M (IKEV2_INITIATE_REKEY_CHILD_SA, mp); + + mp->ispi = ispi; + + S (mp); + W (ret); + return ret; +} + /* * MAP */ @@ -17914,6 +18260,14 @@ _(ikev2_profile_set_ts, "name protocol \n" \ "start_port end_port start_addr end_addr \n" \ "(local|remote)") \ _(ikev2_set_local_key, "file ") \ +_(ikev2_set_responder, " interface address ") \ +_(ikev2_set_ike_transforms, " ") \ +_(ikev2_set_esp_transforms, " ") \ +_(ikev2_set_sa_lifetime, " ") \ +_(ikev2_initiate_sa_init, "") \ +_(ikev2_initiate_del_ike_sa, "") \ +_(ikev2_initiate_del_child_sa, "") \ +_(ikev2_initiate_rekey_child_sa, "") \ _(delete_loopback,"sw_if_index ") \ _(bd_ip_mac_add_del, "bd_id [del]") \ _(map_add_domain, \ diff --git a/src/vnet/devices/dpdk/ipsec/esp_decrypt.c b/src/vnet/devices/dpdk/ipsec/esp_decrypt.c index 53b2d122..76007609 100644 --- a/src/vnet/devices/dpdk/ipsec/esp_decrypt.c +++ b/src/vnet/devices/dpdk/ipsec/esp_decrypt.c @@ -174,6 +174,8 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm, } } + sa0->total_data_size += b0->current_length; + if (PREDICT_FALSE(sa0->integ_alg == IPSEC_INTEG_ALG_NONE) || PREDICT_FALSE(sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE)) { diff --git a/src/vnet/devices/dpdk/ipsec/esp_encrypt.c b/src/vnet/devices/dpdk/ipsec/esp_encrypt.c index b6f00004..6eb1afc9 100644 --- a/src/vnet/devices/dpdk/ipsec/esp_encrypt.c +++ b/src/vnet/devices/dpdk/ipsec/esp_encrypt.c @@ -177,6 +177,8 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm, goto trace; } + sa0->total_data_size += b0->current_length; + sa_sess = pool_elt_at_index (cwm->sa_sess_d[1], sa_index0); if (PREDICT_FALSE (!sa_sess->sess)) { diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c index e69cd851..7289b260 100644 --- a/src/vnet/ipsec/esp_decrypt.c +++ b/src/vnet/ipsec/esp_decrypt.c @@ -190,6 +190,8 @@ esp_decrypt_node_fn (vlib_main_t * vm, } } + sa0->total_data_size += i_b0->current_length; + if (PREDICT_TRUE (sa0->integ_alg != IPSEC_INTEG_ALG_NONE)) { u8 sig[64]; diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c index 7b7f9b9c..44ae2297 100644 --- a/src/vnet/ipsec/esp_encrypt.c +++ b/src/vnet/ipsec/esp_encrypt.c @@ -182,6 +182,8 @@ esp_encrypt_node_fn (vlib_main_t * vm, goto trace; } + sa0->total_data_size += i_b0->current_length; + /* grab free buffer */ last_empty_buffer = vec_len (empty_buffers) - 1; o_bi0 = empty_buffers[last_empty_buffer]; diff --git a/src/vnet/ipsec/ikev2.c b/src/vnet/ipsec/ikev2.c index 5a6c3674..09209334 100644 --- a/src/vnet/ipsec/ikev2.c +++ b/src/vnet/ipsec/ikev2.c @@ -21,6 +21,7 @@ #include #include #include +#include static int ikev2_delete_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa, @@ -202,13 +203,15 @@ ikev2_sa_get_td_for_type (ikev2_sa_proposal_t * p, } ikev2_child_sa_t * -ikev2_sa_get_child (ikev2_sa_t * sa, u32 spi, ikev2_protocol_id_t prot_id) +ikev2_sa_get_child (ikev2_sa_t * sa, u32 spi, ikev2_protocol_id_t prot_id, + int by_initiator) { ikev2_child_sa_t *c; vec_foreach (c, sa->childs) { - if (c->i_proposals[0].spi == spi - && c->i_proposals[0].protocol_id == prot_id) + ikev2_sa_proposal_t *proposal = + by_initiator ? &c->i_proposals[0] : &c->r_proposals[0]; + if (proposal && proposal->spi == spi && proposal->protocol_id == prot_id) return c; } @@ -271,6 +274,7 @@ ikev2_sa_free_all_vec (ikev2_sa_t * sa) vec_free (sa->i_nonce); vec_free (sa->i_dh_data); vec_free (sa->dh_shared_key); + vec_free (sa->dh_private_key); ikev2_sa_free_proposal_vector (&sa->r_proposals); ikev2_sa_free_proposal_vector (&sa->i_proposals); @@ -341,15 +345,84 @@ ikev2_generate_sa_init_data (ikev2_sa_t * sa) return; } - /* generate rspi */ - RAND_bytes ((u8 *) & sa->rspi, 8); + if (sa->is_initiator) + { + /* generate rspi */ + RAND_bytes ((u8 *) & sa->ispi, 8); + + /* generate nonce */ + sa->i_nonce = vec_new (u8, IKEV2_NONCE_SIZE); + RAND_bytes ((u8 *) sa->i_nonce, IKEV2_NONCE_SIZE); + } + else + { + /* generate rspi */ + RAND_bytes ((u8 *) & sa->rspi, 8); - /* generate nonce */ - sa->r_nonce = vec_new (u8, IKEV2_NONCE_SIZE); - RAND_bytes ((u8 *) sa->r_nonce, IKEV2_NONCE_SIZE); + /* generate nonce */ + sa->r_nonce = vec_new (u8, IKEV2_NONCE_SIZE); + RAND_bytes ((u8 *) sa->r_nonce, IKEV2_NONCE_SIZE); + } /* generate dh keys */ ikev2_generate_dh (sa, t); + +} + +static void +ikev2_complete_sa_data (ikev2_sa_t * sa, ikev2_sa_t * sai) +{ + ikev2_sa_transform_t *t = 0, *t2; + ikev2_main_t *km = &ikev2_main; + + + /*move some data to the new SA */ +#define _(A) ({void* __tmp__ = (A); (A) = 0; __tmp__;}) + sa->i_nonce = _(sai->i_nonce); + sa->i_dh_data = _(sai->i_dh_data); + sa->dh_private_key = _(sai->dh_private_key); + sa->iaddr.as_u32 = sai->iaddr.as_u32; + sa->raddr.as_u32 = sai->raddr.as_u32; + sa->is_initiator = sai->is_initiator; + sa->profile = sai->profile; + sa->i_id.type = sai->i_id.type; + sa->i_id.data = _(sai->i_id.data); + sa->i_auth.method = sai->i_auth.method; + sa->i_auth.hex = sai->i_auth.hex; + sa->i_auth.data = _(sai->i_auth.data); + sa->i_auth.key = _(sai->i_auth.key); + sa->last_sa_init_req_packet_data = _(sai->last_sa_init_req_packet_data); + sa->childs = _(sai->childs); +#undef _ + + + if (sa->dh_group == IKEV2_TRANSFORM_DH_TYPE_NONE) + { + return; + } + + /* check if received DH group is on our list of supported groups */ + vec_foreach (t2, km->supported_transforms) + { + if (t2->type == IKEV2_TRANSFORM_TYPE_DH && sa->dh_group == t2->dh_type) + { + t = t2; + break; + } + } + + if (!t) + { + clib_warning ("unknown dh data group %u (data len %u)", sa->dh_group, + vec_len (sa->i_dh_data)); + sa->dh_group = IKEV2_TRANSFORM_DH_TYPE_NONE; + return; + } + + + /* generate dh keys */ + ikev2_complete_dh (sa, t); + } static void @@ -551,6 +624,86 @@ ikev2_process_sa_init_req (vlib_main_t * vm, ikev2_sa_t * sa, ikev2_set_state (sa, IKEV2_STATE_SA_INIT); } +static void +ikev2_process_sa_init_resp (vlib_main_t * vm, ikev2_sa_t * sa, + ike_header_t * ike) +{ + int p = 0; + u32 len = clib_net_to_host_u32 (ike->length); + u8 payload = ike->nextpayload; + + clib_warning ("ispi %lx rspi %lx nextpayload %x version %x " + "exchange %x flags %x msgid %x length %u", + clib_net_to_host_u64 (ike->ispi), + clib_net_to_host_u64 (ike->rspi), + payload, ike->version, + ike->exchange, ike->flags, + clib_net_to_host_u32 (ike->msgid), len); + + sa->ispi = clib_net_to_host_u64 (ike->ispi); + sa->rspi = clib_net_to_host_u64 (ike->rspi); + + /* store whole IKE payload - needed for PSK auth */ + vec_free (sa->last_sa_init_res_packet_data); + vec_add (sa->last_sa_init_res_packet_data, ike, len); + + while (p < len && payload != IKEV2_PAYLOAD_NONE) + { + ike_payload_header_t *ikep = (ike_payload_header_t *) & ike->payload[p]; + u32 plen = clib_net_to_host_u16 (ikep->length); + + if (plen < sizeof (ike_payload_header_t)) + return; + + if (payload == IKEV2_PAYLOAD_SA) + { + ikev2_sa_free_proposal_vector (&sa->r_proposals); + sa->r_proposals = ikev2_parse_sa_payload (ikep); + if (sa->r_proposals) + { + ikev2_set_state (sa, IKEV2_STATE_SA_INIT); + ike->msgid = + clib_host_to_net_u32 (clib_net_to_host_u32 (ike->msgid) + 1); + } + } + else if (payload == IKEV2_PAYLOAD_KE) + { + ike_ke_payload_header_t *ke = (ike_ke_payload_header_t *) ikep; + sa->dh_group = clib_net_to_host_u16 (ke->dh_group); + vec_free (sa->r_dh_data); + vec_add (sa->r_dh_data, ke->payload, plen - sizeof (*ke)); + } + else if (payload == IKEV2_PAYLOAD_NONCE) + { + vec_free (sa->r_nonce); + vec_add (sa->r_nonce, ikep->payload, plen - sizeof (*ikep)); + } + else if (payload == IKEV2_PAYLOAD_NOTIFY) + { + ikev2_notify_t *n = ikev2_parse_notify_payload (ikep); + vec_free (n); + } + else if (payload == IKEV2_PAYLOAD_VENDOR) + { + ikev2_parse_vendor_payload (ikep); + } + else + { + clib_warning ("unknown payload %u flags %x length %u", payload, + ikep->flags, plen); + if (ikep->flags & IKEV2_PAYLOAD_FLAG_CRITICAL) + { + ikev2_set_state (sa, IKEV2_STATE_NOTIFY_AND_DELETE); + sa->unsupported_cp = payload; + return; + } + } + + payload = ikep->nextpayload; + p += plen; + } +} + static u8 * ikev2_decrypt_sk_payload (ikev2_sa_t * sa, ike_header_t * ike, u8 * payload) { @@ -599,8 +752,9 @@ ikev2_decrypt_sk_payload (ikev2_sa_t * sa, ike_header_t * ike, u8 * payload) return 0; } - hmac = ikev2_calc_integr (tr_integ, sa->sk_ai, (u8 *) ike, - len - tr_integ->key_trunc); + hmac = + ikev2_calc_integr (tr_integ, sa->is_initiator ? sa->sk_ar : sa->sk_ai, + (u8 *) ike, len - tr_integ->key_trunc); plen = plen - sizeof (*ikep) - tr_integ->key_trunc; @@ -683,9 +837,16 @@ ikev2_process_auth_req (vlib_main_t * vm, ikev2_sa_t * sa, ike_header_t * ike) goto cleanup_and_exit; } - /* create 1st child SA */ - ikev2_sa_free_all_child_sa (&sa->childs); - vec_add2 (sa->childs, first_child_sa, 1); + /* select or create 1st child SA */ + if (sa->is_initiator) + { + first_child_sa = &sa->childs[0]; + } + else + { + ikev2_sa_free_all_child_sa (&sa->childs); + vec_add2 (sa->childs, first_child_sa, 1); + } /* process encrypted payload */ @@ -701,27 +862,54 @@ ikev2_process_auth_req (vlib_main_t * vm, ikev2_sa_t * sa, ike_header_t * ike) if (payload == IKEV2_PAYLOAD_SA) /* 33 */ { clib_warning ("received payload SA, len %u", plen - sizeof (*ikep)); - ikev2_sa_free_proposal_vector (&first_child_sa->i_proposals); - first_child_sa->i_proposals = ikev2_parse_sa_payload (ikep); + if (sa->is_initiator) + { + ikev2_sa_free_proposal_vector (&first_child_sa->r_proposals); + first_child_sa->r_proposals = ikev2_parse_sa_payload (ikep); + } + else + { + ikev2_sa_free_proposal_vector (&first_child_sa->i_proposals); + first_child_sa->i_proposals = ikev2_parse_sa_payload (ikep); + } } - else if (payload == IKEV2_PAYLOAD_IDI) /* 35 */ + else if (payload == IKEV2_PAYLOAD_IDI || payload == IKEV2_PAYLOAD_IDR) /* 35, 36 */ { ike_id_payload_header_t *id = (ike_id_payload_header_t *) ikep; - sa->i_id.type = id->id_type; - vec_free (sa->i_id.data); - vec_add (sa->i_id.data, id->payload, plen - sizeof (*id)); + if (sa->is_initiator) + { + sa->r_id.type = id->id_type; + vec_free (sa->r_id.data); + vec_add (sa->r_id.data, id->payload, plen - sizeof (*id)); + } + else + { + sa->i_id.type = id->id_type; + vec_free (sa->i_id.data); + vec_add (sa->i_id.data, id->payload, plen - sizeof (*id)); + } - clib_warning ("received payload IDi, len %u id_type %u", + clib_warning ("received payload %s, len %u id_type %u", + (payload == IKEV2_PAYLOAD_IDI ? "IDi" : "IDr"), plen - sizeof (*id), id->id_type); } else if (payload == IKEV2_PAYLOAD_AUTH) /* 39 */ { ike_auth_payload_header_t *a = (ike_auth_payload_header_t *) ikep; - sa->i_auth.method = a->auth_method; - vec_free (sa->i_auth.data); - vec_add (sa->i_auth.data, a->payload, plen - sizeof (*a)); + if (sa->is_initiator) + { + sa->r_auth.method = a->auth_method; + vec_free (sa->r_auth.data); + vec_add (sa->r_auth.data, a->payload, plen - sizeof (*a)); + } + else + { + sa->i_auth.method = a->auth_method; + vec_free (sa->i_auth.data); + vec_add (sa->i_auth.data, a->payload, plen - sizeof (*a)); + } clib_warning ("received payload AUTH, len %u auth_type %u", plen - sizeof (*a), a->auth_method); @@ -857,7 +1045,7 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm, ikev2_sa_t * sa, u8 payload = ike->nextpayload; u8 *plaintext = 0; u8 rekeying = 0; - u8 i_nonce[IKEV2_NONCE_SIZE]; + u8 nonce[IKEV2_NONCE_SIZE]; ike_payload_header_t *ikep; u32 plen; @@ -912,7 +1100,7 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm, ikev2_sa_t * sa, } else if (payload == IKEV2_PAYLOAD_NONCE) { - clib_memcpy (i_nonce, ikep->payload, plen - sizeof (*ikep)); + clib_memcpy (nonce, ikep->payload, plen - sizeof (*ikep)); } else if (payload == IKEV2_PAYLOAD_TSI) { @@ -939,10 +1127,29 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm, ikev2_sa_t * sa, p += plen; } - if (rekeying) + if (sa->is_initiator && proposal->protocol_id == IKEV2_PROTOCOL_ESP) + { + ikev2_rekey_t *rekey = &sa->rekey[0]; + rekey->protocol_id = proposal->protocol_id; + rekey->i_proposal = + ikev2_select_proposal (proposal, IKEV2_PROTOCOL_ESP); + rekey->i_proposal->spi = rekey->spi; + rekey->r_proposal = proposal; + rekey->tsi = tsi; + rekey->tsr = tsr; + /* update Nr */ + vec_free (sa->r_nonce); + vec_add (sa->r_nonce, nonce, IKEV2_NONCE_SIZE); + child_sa = ikev2_sa_get_child (sa, rekey->ispi, IKEV2_PROTOCOL_ESP, 1); + if (child_sa) + { + child_sa->rekey_retries = 0; + } + } + else if (rekeying) { ikev2_rekey_t *rekey; - child_sa = ikev2_sa_get_child (sa, n->spi, n->protocol_id); + child_sa = ikev2_sa_get_child (sa, n->spi, n->protocol_id, 1); if (!child_sa) { clib_warning ("child SA spi %lx not found", n->spi); @@ -958,7 +1165,7 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm, ikev2_sa_t * sa, rekey->tsr = tsr; /* update Ni */ vec_free (sa->i_nonce); - vec_add (sa->i_nonce, i_nonce, IKEV2_NONCE_SIZE); + vec_add (sa->i_nonce, nonce, IKEV2_NONCE_SIZE); /* generate new Nr */ vec_free (sa->r_nonce); sa->r_nonce = vec_new (u8, IKEV2_NONCE_SIZE); @@ -1030,20 +1237,34 @@ ikev2_sa_match_ts (ikev2_sa_t * sa) { ikev2_main_t *km = &ikev2_main; ikev2_profile_t *p; - ikev2_ts_t *ts, *tsi = 0, *tsr = 0; + ikev2_ts_t *ts, *p_tsi, *p_tsr, *tsi = 0, *tsr = 0; + ikev2_id_t *id; /* *INDENT-OFF* */ pool_foreach (p, km->profiles, ({ + if (sa->is_initiator) + { + p_tsi = &p->loc_ts; + p_tsr = &p->rem_ts; + id = &sa->r_id; + } + else + { + p_tsi = &p->rem_ts; + p_tsr = &p->loc_ts; + id = &sa->i_id; + } + /* check id */ - if (p->rem_id.type != sa->i_id.type || - vec_len(p->rem_id.data) != vec_len(sa->i_id.data) || - memcmp(p->rem_id.data, sa->i_id.data, vec_len(p->rem_id.data))) + if (p->rem_id.type != id->type || + vec_len(p->rem_id.data) != vec_len(id->data) || + memcmp(p->rem_id.data, id->data, vec_len(p->rem_id.data))) continue; vec_foreach(ts, sa->childs[0].tsi) { - if (ikev2_ts_cmp(&p->rem_ts, ts)) + if (ikev2_ts_cmp(p_tsi, ts)) { tsi = vec_dup(ts); break; @@ -1052,7 +1273,7 @@ ikev2_sa_match_ts (ikev2_sa_t * sa) vec_foreach(ts, sa->childs[0].tsr) { - if (ikev2_ts_cmp(&p->loc_ts, ts)) + if (ikev2_ts_cmp(p_tsr, ts)) { tsr = vec_dup(ts); break; @@ -1100,18 +1321,32 @@ ikev2_sa_auth (ikev2_sa_t * sa) } key_pad = format (0, "%s", IKEV2_KEY_PAD); - authmsg = ikev2_sa_generate_authmsg (sa, 0); + authmsg = ikev2_sa_generate_authmsg (sa, sa->is_initiator); + + ikev2_id_t *sa_id; + ikev2_auth_t *sa_auth; + + if (sa->is_initiator) + { + sa_id = &sa->r_id; + sa_auth = &sa->r_auth; + } + else + { + sa_id = &sa->i_id; + sa_auth = &sa->i_auth; + } /* *INDENT-OFF* */ pool_foreach (p, km->profiles, ({ /* check id */ - if (p->rem_id.type != sa->i_id.type || - vec_len(p->rem_id.data) != vec_len(sa->i_id.data) || - memcmp(p->rem_id.data, sa->i_id.data, vec_len(p->rem_id.data))) + if (p->rem_id.type != sa_id->type || + vec_len(p->rem_id.data) != vec_len(sa_id->data) || + memcmp(p->rem_id.data, sa_id->data, vec_len(p->rem_id.data))) continue; - if (sa->i_auth.method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC) + if (sa_auth->method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC) { if (!p->auth.data || p->auth.method != IKEV2_AUTH_METHOD_SHARED_KEY_MIC) @@ -1120,7 +1355,7 @@ ikev2_sa_auth (ikev2_sa_t * sa) psk = ikev2_calc_prf(tr_prf, p->auth.data, key_pad); auth = ikev2_calc_prf(tr_prf, psk, authmsg); - if (!memcmp(auth, sa->i_auth.data, vec_len(sa->i_auth.data))) + if (!memcmp(auth, sa_auth->data, vec_len(sa_auth->data))) { ikev2_set_state(sa, IKEV2_STATE_AUTHENTICATED); vec_free(auth); @@ -1129,12 +1364,12 @@ ikev2_sa_auth (ikev2_sa_t * sa) } } - else if (sa->i_auth.method == IKEV2_AUTH_METHOD_RSA_SIG) + else if (sa_auth->method == IKEV2_AUTH_METHOD_RSA_SIG) { if (p->auth.method != IKEV2_AUTH_METHOD_RSA_SIG) continue; - if (ikev2_verify_sign(p->auth.key, sa->i_auth.data, authmsg) == 1) + if (ikev2_verify_sign(p->auth.key, sa_auth->data, authmsg) == 1) { ikev2_set_state(sa, IKEV2_STATE_AUTHENTICATED); sel_p = p; @@ -1151,28 +1386,32 @@ ikev2_sa_auth (ikev2_sa_t * sa) if (sa->state == IKEV2_STATE_AUTHENTICATED) { - vec_free (sa->r_id.data); - sa->r_id.data = vec_dup (sel_p->loc_id.data); - sa->r_id.type = sel_p->loc_id.type; - - /* generate our auth data */ - authmsg = ikev2_sa_generate_authmsg (sa, 1); - if (sel_p->auth.method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC) + if (!sa->is_initiator) { - sa->r_auth.data = ikev2_calc_prf (tr_prf, psk, authmsg); - sa->r_auth.method = IKEV2_AUTH_METHOD_SHARED_KEY_MIC; - } - else if (sel_p->auth.method == IKEV2_AUTH_METHOD_RSA_SIG) - { - sa->r_auth.data = ikev2_calc_sign (km->pkey, authmsg); - sa->r_auth.method = IKEV2_AUTH_METHOD_RSA_SIG; - } - vec_free (authmsg); + vec_free (sa->r_id.data); + sa->r_id.data = vec_dup (sel_p->loc_id.data); + sa->r_id.type = sel_p->loc_id.type; + + /* generate our auth data */ + authmsg = ikev2_sa_generate_authmsg (sa, 1); + if (sel_p->auth.method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC) + { + sa->r_auth.data = ikev2_calc_prf (tr_prf, psk, authmsg); + sa->r_auth.method = IKEV2_AUTH_METHOD_SHARED_KEY_MIC; + } + else if (sel_p->auth.method == IKEV2_AUTH_METHOD_RSA_SIG) + { + sa->r_auth.data = ikev2_calc_sign (km->pkey, authmsg); + sa->r_auth.method = IKEV2_AUTH_METHOD_RSA_SIG; + } + vec_free (authmsg); - /* select transforms for 1st child sa */ - ikev2_sa_free_proposal_vector (&sa->childs[0].r_proposals); - sa->childs[0].r_proposals = - ikev2_select_proposal (sa->childs[0].i_proposals, IKEV2_PROTOCOL_ESP); + /* select transforms for 1st child sa */ + ikev2_sa_free_proposal_vector (&sa->childs[0].r_proposals); + sa->childs[0].r_proposals = + ikev2_select_proposal (sa->childs[0].i_proposals, + IKEV2_PROTOCOL_ESP); + } } else { @@ -1182,12 +1421,58 @@ ikev2_sa_auth (ikev2_sa_t * sa) vec_free (key_pad); } + +static void +ikev2_sa_auth_init (ikev2_sa_t * sa) +{ + ikev2_main_t *km = &ikev2_main; + u8 *authmsg, *key_pad, *psk = 0, *auth = 0; + ikev2_sa_transform_t *tr_prf; + + tr_prf = + ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF); + + /* only shared key and rsa signature */ + if (!(sa->i_auth.method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC || + sa->i_auth.method == IKEV2_AUTH_METHOD_RSA_SIG)) + { + clib_warning ("unsupported authentication method %u", + sa->i_auth.method); + ikev2_set_state (sa, IKEV2_STATE_AUTH_FAILED); + return; + } + + key_pad = format (0, "%s", IKEV2_KEY_PAD); + authmsg = ikev2_sa_generate_authmsg (sa, 0); + psk = ikev2_calc_prf (tr_prf, sa->i_auth.data, key_pad); + auth = ikev2_calc_prf (tr_prf, psk, authmsg); + + + if (sa->i_auth.method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC) + { + sa->i_auth.data = ikev2_calc_prf (tr_prf, psk, authmsg); + sa->i_auth.method = IKEV2_AUTH_METHOD_SHARED_KEY_MIC; + } + else if (sa->i_auth.method == IKEV2_AUTH_METHOD_RSA_SIG) + { + sa->i_auth.data = ikev2_calc_sign (km->pkey, authmsg); + sa->i_auth.method = IKEV2_AUTH_METHOD_RSA_SIG; + } + + vec_free (psk); + vec_free (key_pad); + vec_free (auth); + vec_free (authmsg); +} + + static int ikev2_create_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa, ikev2_child_sa_t * child) { ipsec_add_del_tunnel_args_t a; ikev2_sa_transform_t *tr; + ikev2_sa_proposal_t *proposals; u8 encr_type = 0; if (!child->r_proposals) @@ -1198,21 +1483,31 @@ ikev2_create_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa, memset (&a, 0, sizeof (a)); a.is_add = 1; - a.local_ip.as_u32 = sa->raddr.as_u32; - a.remote_ip.as_u32 = sa->iaddr.as_u32; - a.local_spi = child->i_proposals[0].spi; - a.remote_spi = child->r_proposals[0].spi; + if (sa->is_initiator) + { + a.local_ip.as_u32 = sa->iaddr.as_u32; + a.remote_ip.as_u32 = sa->raddr.as_u32; + proposals = child->i_proposals; + a.local_spi = child->r_proposals[0].spi; + a.remote_spi = child->i_proposals[0].spi; + } + else + { + a.local_ip.as_u32 = sa->raddr.as_u32; + a.remote_ip.as_u32 = sa->iaddr.as_u32; + proposals = child->r_proposals; + a.local_spi = child->i_proposals[0].spi; + a.remote_spi = child->r_proposals[0].spi; + } a.anti_replay = 1; - tr = - ikev2_sa_get_td_for_type (child->r_proposals, IKEV2_TRANSFORM_TYPE_ESN); + tr = ikev2_sa_get_td_for_type (proposals, IKEV2_TRANSFORM_TYPE_ESN); if (tr) a.esn = tr->esn_type; else a.esn = 0; - tr = - ikev2_sa_get_td_for_type (child->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR); + tr = ikev2_sa_get_td_for_type (proposals, IKEV2_TRANSFORM_TYPE_ENCR); if (tr) { if (tr->encr_type == IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC && tr->key_len) @@ -1246,8 +1541,7 @@ ikev2_create_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa, return 1; } - tr = - ikev2_sa_get_td_for_type (child->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG); + tr = ikev2_sa_get_td_for_type (proposals, IKEV2_TRANSFORM_TYPE_INTEG); if (tr) { if (tr->integ_type != IKEV2_TRANSFORM_INTEG_TYPE_AUTH_HMAC_SHA1_96) @@ -1264,17 +1558,44 @@ ikev2_create_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa, ikev2_calc_child_keys (sa, child); + u8 *loc_ckey, *rem_ckey, *loc_ikey, *rem_ikey; + if (sa->is_initiator) + { + loc_ikey = child->sk_ai; + rem_ikey = child->sk_ar; + loc_ckey = child->sk_ei; + rem_ckey = child->sk_er; + } + else + { + loc_ikey = child->sk_ar; + rem_ikey = child->sk_ai; + loc_ckey = child->sk_er; + rem_ckey = child->sk_ei; + } + a.integ_alg = IPSEC_INTEG_ALG_SHA1_96; - a.local_integ_key_len = vec_len (child->sk_ar); - clib_memcpy (a.local_integ_key, child->sk_ar, a.local_integ_key_len); - a.remote_integ_key_len = vec_len (child->sk_ai); - clib_memcpy (a.remote_integ_key, child->sk_ai, a.remote_integ_key_len); + a.local_integ_key_len = vec_len (loc_ikey); + clib_memcpy (a.local_integ_key, loc_ikey, a.local_integ_key_len); + a.remote_integ_key_len = vec_len (rem_ikey); + clib_memcpy (a.remote_integ_key, rem_ikey, a.remote_integ_key_len); a.crypto_alg = encr_type; - a.local_crypto_key_len = vec_len (child->sk_er); - clib_memcpy (a.local_crypto_key, child->sk_er, a.local_crypto_key_len); - a.remote_crypto_key_len = vec_len (child->sk_ei); - clib_memcpy (a.remote_crypto_key, child->sk_ei, a.remote_crypto_key_len); + a.local_crypto_key_len = vec_len (loc_ckey); + clib_memcpy (a.local_crypto_key, loc_ckey, a.local_crypto_key_len); + a.remote_crypto_key_len = vec_len (rem_ckey); + clib_memcpy (a.remote_crypto_key, rem_ckey, a.remote_crypto_key_len); + + if (sa->profile && sa->profile->lifetime) + { + child->time_to_expiration = vlib_time_now (vnm->vlib_main) + + sa->profile->lifetime; + if (sa->profile->lifetime_jitter) + { + child->time_to_expiration += + 1 + (rand () % sa->profile->lifetime_jitter); + } + } ipsec_add_del_tunnel_if (&a); @@ -1287,21 +1608,35 @@ ikev2_delete_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa, { ipsec_add_del_tunnel_args_t a; - if (!vec_len (child->r_proposals)) - return 0; + if (sa->is_initiator) + { + if (!vec_len (child->i_proposals)) + return 0; + + a.is_add = 0; + a.local_ip.as_u32 = sa->iaddr.as_u32; + a.remote_ip.as_u32 = sa->raddr.as_u32; + a.local_spi = child->r_proposals[0].spi; + a.remote_spi = child->i_proposals[0].spi; + } + else + { + if (!vec_len (child->r_proposals)) + return 0; - a.is_add = 0; - a.local_ip.as_u32 = sa->raddr.as_u32; - a.remote_ip.as_u32 = sa->iaddr.as_u32; - a.local_spi = child->i_proposals[0].spi; - a.remote_spi = child->r_proposals[0].spi; + a.is_add = 0; + a.local_ip.as_u32 = sa->raddr.as_u32; + a.remote_ip.as_u32 = sa->iaddr.as_u32; + a.local_spi = child->i_proposals[0].spi; + a.remote_spi = child->r_proposals[0].spi; + } ipsec_add_del_tunnel_if (&a); return 0; } static u32 -ikev2_generate_resp (ikev2_sa_t * sa, ike_header_t * ike) +ikev2_generate_message (ikev2_sa_t * sa, ike_header_t * ike, void *user) { v8 *integ = 0; ike_payload_header_t *ph; @@ -1404,6 +1739,14 @@ ikev2_generate_resp (ikev2_sa_t * sa, ike_header_t * ike) data); vec_free (data); } + else if (sa->state == IKEV2_STATE_SA_INIT) + { + ikev2_payload_add_id (chain, &sa->i_id, IKEV2_PAYLOAD_IDI); + ikev2_payload_add_auth (chain, &sa->i_auth); + ikev2_payload_add_sa (chain, sa->childs[0].i_proposals); + ikev2_payload_add_ts (chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI); + ikev2_payload_add_ts (chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR); + } else { ikev2_set_state (sa, IKEV2_STATE_DELETED); @@ -1415,10 +1758,13 @@ ikev2_generate_resp (ikev2_sa_t * sa, ike_header_t * ike) /* if pending delete */ if (sa->del) { - /* The response to a request that deletes the IKE SA is an empty - INFORMATIONAL response. */ if (sa->del[0].protocol_id == IKEV2_PROTOCOL_IKE) { + if (sa->is_initiator) + ikev2_payload_add_delete (chain, sa->del); + + /* The response to a request that deletes the IKE SA is an empty + INFORMATIONAL response. */ ikev2_set_state (sa, IKEV2_STATE_NOTIFY_AND_DELETE); } /* The response to a request that deletes ESP or AH SAs will contain @@ -1452,39 +1798,73 @@ ikev2_generate_resp (ikev2_sa_t * sa, ike_header_t * ike) } else if (ike->exchange == IKEV2_EXCHANGE_CREATE_CHILD_SA) { - if (sa->rekey) + if (sa->is_initiator) { - ikev2_payload_add_sa (chain, sa->rekey[0].r_proposal); - ikev2_payload_add_nonce (chain, sa->r_nonce); - ikev2_payload_add_ts (chain, sa->rekey[0].tsi, IKEV2_PAYLOAD_TSI); - ikev2_payload_add_ts (chain, sa->rekey[0].tsr, IKEV2_PAYLOAD_TSR); - vec_del1 (sa->rekey, 0); - } - else if (sa->unsupported_cp) - { - u8 *data = vec_new (u8, 1); - data[0] = sa->unsupported_cp; - ikev2_payload_add_notify (chain, - IKEV2_NOTIFY_MSG_UNSUPPORTED_CRITICAL_PAYLOAD, - data); + ikev2_sa_proposal_t *proposals = (ikev2_sa_proposal_t *) user; + ikev2_notify_t notify; + u8 *data = vec_new (u8, 4); + memset (¬ify, 0, sizeof (notify)); + notify.protocol_id = IKEV2_PROTOCOL_ESP; + notify.spi = sa->childs[0].i_proposals->spi; + *(u32 *) data = clib_host_to_net_u32 (notify.spi); + + ikev2_payload_add_sa (chain, proposals); + ikev2_payload_add_nonce (chain, sa->i_nonce); + ikev2_payload_add_ts (chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI); + ikev2_payload_add_ts (chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR); + ikev2_payload_add_notify_2 (chain, IKEV2_NOTIFY_MSG_REKEY_SA, data, + ¬ify); + vec_free (data); - sa->unsupported_cp = 0; } else { - ikev2_payload_add_notify (chain, IKEV2_NOTIFY_MSG_NO_ADDITIONAL_SAS, - 0); + if (sa->rekey) + { + ikev2_payload_add_sa (chain, sa->rekey[0].r_proposal); + ikev2_payload_add_nonce (chain, sa->r_nonce); + ikev2_payload_add_ts (chain, sa->rekey[0].tsi, + IKEV2_PAYLOAD_TSI); + ikev2_payload_add_ts (chain, sa->rekey[0].tsr, + IKEV2_PAYLOAD_TSR); + vec_del1 (sa->rekey, 0); + } + else if (sa->unsupported_cp) + { + u8 *data = vec_new (u8, 1); + + data[0] = sa->unsupported_cp; + ikev2_payload_add_notify (chain, + IKEV2_NOTIFY_MSG_UNSUPPORTED_CRITICAL_PAYLOAD, + data); + vec_free (data); + sa->unsupported_cp = 0; + } + else + { + ikev2_payload_add_notify (chain, + IKEV2_NOTIFY_MSG_NO_ADDITIONAL_SAS, + 0); + } } } /* IKEv2 header */ ike->version = IKE_VERSION_2; - ike->flags = IKEV2_HDR_FLAG_RESPONSE; ike->nextpayload = IKEV2_PAYLOAD_SK; tlen = sizeof (*ike); - - + if (sa->is_initiator) + { + ike->flags = IKEV2_HDR_FLAG_INITIATOR; + sa->last_init_msg_id = clib_net_to_host_u32 (ike->msgid); + } + else + { + ike->flags = IKEV2_HDR_FLAG_RESPONSE; + } + + if (ike->exchange == IKEV2_EXCHANGE_SA_INIT) { tlen += vec_len (chain->data); @@ -1518,8 +1898,9 @@ ikev2_generate_resp (ikev2_sa_t * sa, ike_header_t * ike) ike->length = clib_host_to_net_u32 (tlen); /* calc integrity data for whole packet except hash itself */ - integ = ikev2_calc_integr (tr_integ, sa->sk_ar, (u8 *) ike, - tlen - tr_integ->key_trunc); + integ = + ikev2_calc_integr (tr_integ, sa->is_initiator ? sa->sk_ai : sa->sk_ar, + (u8 *) ike, tlen - tr_integ->key_trunc); clib_memcpy (ike->payload + tlen - tr_integ->key_trunc - sizeof (*ike), integ, tr_integ->key_trunc); @@ -1708,43 +2089,81 @@ ikev2_node_fn (vlib_main_t * vm, sa0 = &sa; memset (sa0, 0, sizeof (*sa0)); - if (ike0->rspi == 0) + if (ike0->flags & IKEV2_HDR_FLAG_INITIATOR) { - sa0->raddr.as_u32 = ip40->dst_address.as_u32; - sa0->iaddr.as_u32 = ip40->src_address.as_u32; - - r = ikev2_retransmit_sa_init (ike0, sa0->iaddr, sa0->raddr); - if (r == 1) + if (ike0->rspi == 0) { - vlib_node_increment_counter (vm, ikev2_node.index, - IKEV2_ERROR_IKE_SA_INIT_RETRANSMIT, - 1); - len = clib_net_to_host_u32 (ike0->length); - goto dispatch0; - } - else if (r == -1) - { - vlib_node_increment_counter (vm, ikev2_node.index, - IKEV2_ERROR_IKE_SA_INIT_IGNORE, - 1); - goto dispatch0; - } + sa0->raddr.as_u32 = ip40->dst_address.as_u32; + sa0->iaddr.as_u32 = ip40->src_address.as_u32; + + r = ikev2_retransmit_sa_init (ike0, sa0->iaddr, + sa0->raddr); + if (r == 1) + { + vlib_node_increment_counter (vm, ikev2_node.index, + IKEV2_ERROR_IKE_SA_INIT_RETRANSMIT, + 1); + len = clib_net_to_host_u32 (ike0->length); + goto dispatch0; + } + else if (r == -1) + { + vlib_node_increment_counter (vm, ikev2_node.index, + IKEV2_ERROR_IKE_SA_INIT_IGNORE, + 1); + goto dispatch0; + } - ikev2_process_sa_init_req (vm, sa0, ike0); + ikev2_process_sa_init_req (vm, sa0, ike0); - if (sa0->state == IKEV2_STATE_SA_INIT) - { - ikev2_sa_free_proposal_vector (&sa0->r_proposals); - sa0->r_proposals = - ikev2_select_proposal (sa0->i_proposals, - IKEV2_PROTOCOL_IKE); - ikev2_generate_sa_init_data (sa0); + if (sa0->state == IKEV2_STATE_SA_INIT) + { + ikev2_sa_free_proposal_vector (&sa0->r_proposals); + sa0->r_proposals = + ikev2_select_proposal (sa0->i_proposals, + IKEV2_PROTOCOL_IKE); + ikev2_generate_sa_init_data (sa0); + } + + if (sa0->state == IKEV2_STATE_SA_INIT + || sa0->state == IKEV2_STATE_NOTIFY_AND_DELETE) + { + len = ikev2_generate_message (sa0, ike0, 0); + } + + if (sa0->state == IKEV2_STATE_SA_INIT) + { + /* add SA to the pool */ + pool_get (km->per_thread_data[cpu_index].sas, sa0); + clib_memcpy (sa0, &sa, sizeof (*sa0)); + hash_set (km->per_thread_data[cpu_index].sa_by_rspi, + sa0->rspi, + sa0 - km->per_thread_data[cpu_index].sas); + } + else + { + ikev2_sa_free_all_vec (sa0); + } } + } + else + { + ikev2_process_sa_init_resp (vm, sa0, ike0); - if (sa0->state == IKEV2_STATE_SA_INIT || - sa0->state == IKEV2_STATE_NOTIFY_AND_DELETE) + if (sa0->state == IKEV2_STATE_SA_INIT) { - len = ikev2_generate_resp (sa0, ike0); + ike0->exchange = IKEV2_EXCHANGE_IKE_AUTH; + uword *p = hash_get (km->sa_by_ispi, ike0->ispi); + if (p) + { + ikev2_sa_t *sai = + pool_elt_at_index (km->sais, p[0]); + + ikev2_complete_sa_data (sa0, sai); + ikev2_calc_keys (sa0); + ikev2_sa_auth_init (sa0); + len = ikev2_generate_message (sa0, ike0, 0); + } } if (sa0->state == IKEV2_STATE_SA_INIT) @@ -1799,7 +2218,23 @@ ikev2_node_fn (vlib_main_t * vm, ikev2_create_tunnel_interface (km->vnet_main, sa0, &sa0->childs[0]); } - len = ikev2_generate_resp (sa0, ike0); + + if (sa0->is_initiator) + { + uword *p = hash_get (km->sa_by_ispi, ike0->ispi); + if (p) + { + ikev2_sa_t *sai = + pool_elt_at_index (km->sais, p[0]); + hash_unset (km->sa_by_ispi, sai->ispi); + ikev2_sa_free_all_vec (sai); + pool_put (km->sais, sai); + } + } + else + { + len = ikev2_generate_message (sa0, ike0, 0); + } } } else if (ike0->exchange == IKEV2_EXCHANGE_INFORMATIONAL) @@ -1839,22 +2274,32 @@ ikev2_node_fn (vlib_main_t * vm, { ikev2_child_sa_t *ch_sa; ch_sa = ikev2_sa_get_child (sa0, d->spi, - d->protocol_id); + d->protocol_id, + !sa0->is_initiator); if (ch_sa) { ikev2_delete_tunnel_interface (km->vnet_main, sa0, ch_sa); - vec_add2 (resp, tmp, 1); - tmp->protocol_id = d->protocol_id; - tmp->spi = ch_sa->r_proposals[0].spi; + if (!sa0->is_initiator) + { + vec_add2 (resp, tmp, 1); + tmp->protocol_id = d->protocol_id; + tmp->spi = ch_sa->r_proposals[0].spi; + } ikev2_sa_del_child_sa (sa0, ch_sa); } } - vec_free (sa0->del); - sa0->del = resp; + if (!sa0->is_initiator) + { + vec_free (sa0->del); + sa0->del = resp; + } } } - len = ikev2_generate_resp (sa0, ike0); + if (!sa0->is_initiator) + { + len = ikev2_generate_message (sa0, ike0, 0); + } } } else if (ike0->exchange == IKEV2_EXCHANGE_CREATE_CHILD_SA) @@ -1898,7 +2343,14 @@ ikev2_node_fn (vlib_main_t * vm, ikev2_create_tunnel_interface (km->vnet_main, sa0, child); } - len = ikev2_generate_resp (sa0, ike0); + if (sa0->is_initiator) + { + vec_del1 (sa0->rekey, 0); + } + else + { + len = ikev2_generate_message (sa0, ike0, 0); + } } } } @@ -1915,8 +2367,16 @@ ikev2_node_fn (vlib_main_t * vm, if (len) { next0 = IKEV2_NEXT_IP4_LOOKUP; - ip40->dst_address.as_u32 = sa0->iaddr.as_u32; - ip40->src_address.as_u32 = sa0->raddr.as_u32; + if (sa0->is_initiator) + { + ip40->dst_address.as_u32 = sa0->raddr.as_u32; + ip40->src_address.as_u32 = sa0->iaddr.as_u32; + } + else + { + ip40->dst_address.as_u32 = sa0->iaddr.as_u32; + ip40->src_address.as_u32 = sa0->raddr.as_u32; + } udp0->length = clib_host_to_net_u16 (len + sizeof (udp_header_t)); udp0->checksum = 0; @@ -1979,6 +2439,126 @@ VLIB_REGISTER_NODE (ikev2_node,static) = { /* *INDENT-ON* */ +static clib_error_t * +ikev2_set_initiator_proposals (vlib_main_t * vm, ikev2_sa_t * sa, + ikev2_transforms_set * ts, + ikev2_sa_proposal_t ** proposals, int is_ike) +{ + clib_error_t *r; + ikev2_main_t *km = &ikev2_main; + ikev2_sa_proposal_t *proposal; + vec_add2 (*proposals, proposal, 1); + ikev2_sa_transform_t *td; + int error; + + /* Encryption */ + error = 1; + vec_foreach (td, km->supported_transforms) + { + if (td->type == IKEV2_TRANSFORM_TYPE_ENCR + && td->encr_type == IKEV2_TRANSFORM_ENCR_TYPE_AES_CBC + && td->key_len == ts->crypto_key_size / 8) + { + u16 attr[2]; + attr[0] = clib_host_to_net_u16 (14 | (1 << 15)); + attr[1] = clib_host_to_net_u16 (td->key_len << 3); + vec_add (td->attrs, (u8 *) attr, 4); + vec_add1 (proposal->transforms, *td); + td->attrs = 0; + + error = 0; + break; + } + } + if (error) + { + r = clib_error_return (0, "Unsupported algorithm"); + return r; + } + + /* Integrity */ + error = 1; + vec_foreach (td, km->supported_transforms) + { + if (td->type == IKEV2_TRANSFORM_TYPE_INTEG + && td->integ_type == IKEV2_TRANSFORM_INTEG_TYPE_AUTH_HMAC_SHA1_96) + { + vec_add1 (proposal->transforms, *td); + error = 0; + break; + } + } + if (error) + { + r = clib_error_return (0, "Unsupported algorithm"); + return r; + } + + /* PRF */ + if (is_ike) + { + error = 1; + vec_foreach (td, km->supported_transforms) + { + if (td->type == IKEV2_TRANSFORM_TYPE_PRF + && td->prf_type == IKEV2_TRANSFORM_PRF_TYPE_PRF_HMAC_SHA1) + { + vec_add1 (proposal->transforms, *td); + error = 0; + break; + } + } + if (error) + { + r = clib_error_return (0, "Unsupported algorithm"); + return r; + } + } + + /* DH */ + error = 1; + vec_foreach (td, km->supported_transforms) + { + if (td->type == IKEV2_TRANSFORM_TYPE_DH && td->dh_type == ts->dh_type) + { + vec_add1 (proposal->transforms, *td); + if (is_ike) + { + sa->dh_group = td->dh_type; + } + error = 0; + break; + } + } + if (error) + { + r = clib_error_return (0, "Unsupported algorithm"); + return r; + } + + if (!is_ike) + { + error = 1; + vec_foreach (td, km->supported_transforms) + { + if (td->type == IKEV2_TRANSFORM_TYPE_ESN) + { + vec_add1 (proposal->transforms, *td); + error = 0; + break; + } + } + if (error) + { + r = clib_error_return (0, "Unsupported algorithm"); + return r; + } + } + + + return 0; +} + static ikev2_profile_t * ikev2_profile_index_by_name (u8 * name) { @@ -1992,6 +2572,64 @@ ikev2_profile_index_by_name (u8 * name) return pool_elt_at_index (km->profiles, p[0]); } + +static void +ikev2_send_ike (vlib_main_t * vm, ip4_address_t * src, ip4_address_t * dst, + u32 bi0, u32 len) +{ + ip4_header_t *ip40; + udp_header_t *udp0; + vlib_buffer_t *b0; + vlib_frame_t *f; + u32 *to_next; + + b0 = vlib_get_buffer (vm, bi0); + vlib_buffer_advance (b0, -sizeof (udp_header_t)); + udp0 = vlib_buffer_get_current (b0); + vlib_buffer_advance (b0, -sizeof (ip4_header_t)); + ip40 = vlib_buffer_get_current (b0); + + + ip40->ip_version_and_header_length = 0x45; + ip40->tos = 0; + ip40->fragment_id = 0; + ip40->flags_and_fragment_offset = 0; + ip40->ttl = 0xff; + ip40->protocol = IP_PROTOCOL_UDP; + ip40->dst_address.as_u32 = dst->as_u32; + ip40->src_address.as_u32 = src->as_u32; + udp0->dst_port = clib_host_to_net_u16 (500); + udp0->src_port = clib_host_to_net_u16 (500); + udp0->length = clib_host_to_net_u16 (len + sizeof (udp_header_t)); + udp0->checksum = 0; + b0->current_length = len + sizeof (ip4_header_t) + sizeof (udp_header_t); + ip40->length = clib_host_to_net_u16 (b0->current_length); + ip40->checksum = ip4_header_checksum (ip40); + + + /* send the request */ + f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); + to_next = vlib_frame_vector_args (f); + to_next[0] = bi0; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, ip4_lookup_node.index, f); + +} + +static u32 +ikev2_get_new_ike_header_buff (vlib_main_t * vm, ike_header_t ** ike) +{ + u32 bi0; + if (vlib_buffer_alloc (vm, &bi0, 1) != 1) + { + *ike = 0; + return 0; + } + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + *ike = vlib_buffer_get_current (b0); + return bi0; +} + clib_error_t * ikev2_set_local_key (vlib_main_t * vm, u8 * file) { @@ -2018,6 +2656,7 @@ ikev2_add_del_profile (vlib_main_t * vm, u8 * name, int is_add) pool_get (km->profiles, p); memset (p, 0, sizeof (*p)); p->name = vec_dup (name); + p->responder.sw_if_index = ~0; uword index = p - km->profiles; mhash_set_mem (&km->profile_index_by_name, name, &index, 0); } @@ -2144,6 +2783,470 @@ ikev2_set_profile_ts (vlib_main_t * vm, u8 * name, u8 protocol_id, } +clib_error_t * +ikev2_set_profile_responder (vlib_main_t * vm, u8 * name, + u32 sw_if_index, ip4_address_t ip4) +{ + ikev2_profile_t *p; + clib_error_t *r; + + p = ikev2_profile_index_by_name (name); + + if (!p) + { + r = clib_error_return (0, "unknown profile %v", name); + return r; + } + + p->responder.sw_if_index = sw_if_index; + p->responder.ip4 = ip4; + + return 0; +} + +clib_error_t * +ikev2_set_profile_ike_transforms (vlib_main_t * vm, u8 * name, + ikev2_transform_encr_type_t crypto_alg, + ikev2_transform_integ_type_t integ_alg, + ikev2_transform_dh_type_t dh_type, + u32 crypto_key_size) +{ + ikev2_profile_t *p; + clib_error_t *r; + + p = ikev2_profile_index_by_name (name); + + if (!p) + { + r = clib_error_return (0, "unknown profile %v", name); + return r; + } + + p->ike_ts.crypto_alg = crypto_alg; + p->ike_ts.integ_alg = integ_alg; + p->ike_ts.dh_type = dh_type; + p->ike_ts.crypto_key_size = crypto_key_size; + return 0; +} + +clib_error_t * +ikev2_set_profile_esp_transforms (vlib_main_t * vm, u8 * name, + ikev2_transform_encr_type_t crypto_alg, + ikev2_transform_integ_type_t integ_alg, + ikev2_transform_dh_type_t dh_type, + u32 crypto_key_size) +{ + ikev2_profile_t *p; + clib_error_t *r; + + p = ikev2_profile_index_by_name (name); + + if (!p) + { + r = clib_error_return (0, "unknown profile %v", name); + return r; + } + + p->esp_ts.crypto_alg = crypto_alg; + p->esp_ts.integ_alg = integ_alg; + p->esp_ts.dh_type = dh_type; + p->esp_ts.crypto_key_size = crypto_key_size; + return 0; +} + +clib_error_t * +ikev2_set_profile_sa_lifetime (vlib_main_t * vm, u8 * name, + u64 lifetime, u32 jitter, u32 handover, + u64 maxdata) +{ + ikev2_profile_t *p; + clib_error_t *r; + + p = ikev2_profile_index_by_name (name); + + if (!p) + { + r = clib_error_return (0, "unknown profile %v", name); + return r; + } + + p->lifetime = lifetime; + p->lifetime_jitter = jitter; + p->handover = handover; + p->lifetime_maxdata = maxdata; + return 0; +} + +clib_error_t * +ikev2_initiate_sa_init (vlib_main_t * vm, u8 * name) +{ + ikev2_profile_t *p; + clib_error_t *r; + ip4_main_t *im = &ip4_main; + ikev2_main_t *km = &ikev2_main; + + p = ikev2_profile_index_by_name (name); + + if (!p) + { + r = clib_error_return (0, "unknown profile %v", name); + return r; + } + + if (p->responder.sw_if_index == ~0 || p->responder.ip4.data_u32 == 0) + { + r = clib_error_return (0, "responder not set for profile %v", name); + return r; + } + + + /* Create the Initiator Request */ + { + ike_header_t *ike0; + u32 bi0 = 0; + ip_lookup_main_t *lm = &im->lookup_main; + u32 if_add_index0; + int len = sizeof (ike_header_t); + + /* Get own iface IP */ + if_add_index0 = + lm->if_address_pool_index_by_sw_if_index[p->responder.sw_if_index]; + ip_interface_address_t *if_add = + pool_elt_at_index (lm->if_address_pool, if_add_index0); + ip4_address_t *if_ip = ip_interface_address_get_address (lm, if_add); + + bi0 = ikev2_get_new_ike_header_buff (vm, &ike0); + + /* Prepare the SA and the IKE payload */ + ikev2_sa_t sa; + memset (&sa, 0, sizeof (ikev2_sa_t)); + ikev2_payload_chain_t *chain = 0; + ikev2_payload_new_chain (chain); + + /* Build the IKE proposal payload */ + ikev2_sa_proposal_t *proposals = 0; + ikev2_set_initiator_proposals (vm, &sa, &p->ike_ts, &proposals, 1); + proposals[0].proposal_num = 1; + proposals[0].protocol_id = IKEV2_PROTOCOL_IKE; + + /* Add and then cleanup proposal data */ + ikev2_payload_add_sa (chain, proposals); + ikev2_sa_free_proposal_vector (&proposals); + + sa.is_initiator = 1; + sa.profile = p; + sa.state = IKEV2_STATE_SA_INIT; + ikev2_generate_sa_init_data (&sa); + ikev2_payload_add_ke (chain, sa.dh_group, sa.i_dh_data); + ikev2_payload_add_nonce (chain, sa.i_nonce); + + /* Build the child SA proposal */ + vec_resize (sa.childs, 1); + ikev2_set_initiator_proposals (vm, &sa, &p->esp_ts, + &sa.childs[0].i_proposals, 0); + sa.childs[0].i_proposals[0].proposal_num = 1; + sa.childs[0].i_proposals[0].protocol_id = IKEV2_PROTOCOL_ESP; + RAND_bytes ((u8 *) & sa.childs[0].i_proposals[0].spi, + sizeof (sa.childs[0].i_proposals[0].spi)); + + + + /* Add NAT detection notification messages (mandatory) */ + u8 nat_detection_source[8 + 8 + 4 + 2]; + u8 *nat_detection_sha1 = vec_new (u8, 20); + + u64 tmpspi = clib_host_to_net_u64 (sa.ispi); + clib_memcpy (&nat_detection_source[0], &tmpspi, sizeof (tmpspi)); + tmpspi = clib_host_to_net_u64 (sa.rspi); + clib_memcpy (&nat_detection_source[8], &tmpspi, sizeof (tmpspi)); + u16 tmpport = clib_host_to_net_u16 (500); + clib_memcpy (&nat_detection_source[8 + 8 + 4], &tmpport, + sizeof (tmpport)); + u32 tmpip = clib_host_to_net_u32 (if_ip->as_u32); + clib_memcpy (&nat_detection_source[8 + 8], &tmpip, sizeof (tmpip)); + SHA1 (nat_detection_source, sizeof (nat_detection_source), + nat_detection_sha1); + ikev2_payload_add_notify (chain, IKEV2_NOTIFY_MSG_NAT_DETECTION_SOURCE_IP, + nat_detection_sha1); + tmpip = clib_host_to_net_u32 (p->responder.ip4.as_u32); + clib_memcpy (&nat_detection_source[8 + 8], &tmpip, sizeof (tmpip)); + SHA1 (nat_detection_source, sizeof (nat_detection_source), + nat_detection_sha1); + ikev2_payload_add_notify (chain, + IKEV2_NOTIFY_MSG_NAT_DETECTION_DESTINATION_IP, + nat_detection_sha1); + vec_free (nat_detection_sha1); + + u8 *sig_hash_algo = vec_new (u8, 8); + u64 tmpsig = clib_host_to_net_u64 (0x0001000200030004); + clib_memcpy (sig_hash_algo, &tmpsig, sizeof (tmpsig)); + ikev2_payload_add_notify (chain, + IKEV2_NOTIFY_MSG_SIGNATURE_HASH_ALGORITHMS, + sig_hash_algo); + vec_free (sig_hash_algo); + + + /* Buffer update and bolierplate */ + len += vec_len (chain->data); + ike0->nextpayload = chain->first_payload_type; + ike0->length = clib_host_to_net_u32 (len); + clib_memcpy (ike0->payload, chain->data, vec_len (chain->data)); + ikev2_payload_destroy_chain (chain); + + ike0->version = IKE_VERSION_2; + ike0->flags = IKEV2_HDR_FLAG_INITIATOR; + ike0->exchange = IKEV2_EXCHANGE_SA_INIT; + ike0->ispi = sa.ispi; + + /* store whole IKE payload - needed for PSK auth */ + vec_free (sa.last_sa_init_req_packet_data); + vec_add (sa.last_sa_init_req_packet_data, ike0, len); + + /* add data to the SA then add it to the pool */ + sa.iaddr.as_u32 = if_ip->as_u32; + sa.raddr.as_u32 = p->responder.ip4.as_u32; + sa.i_id.type = p->loc_id.type; + sa.i_id.data = vec_dup (p->loc_id.data); + sa.i_auth.method = p->auth.method; + sa.i_auth.hex = p->auth.hex; + sa.i_auth.data = vec_dup (p->auth.data); + sa.i_auth.key = vec_dup (p->auth.key); + vec_add (sa.childs[0].tsi, &p->loc_ts, 1); + vec_add (sa.childs[0].tsr, &p->rem_ts, 1); + + /* add SA to the pool */ + ikev2_sa_t *sa0 = 0; + pool_get (km->sais, sa0); + clib_memcpy (sa0, &sa, sizeof (*sa0)); + hash_set (km->sa_by_ispi, sa0->ispi, sa0 - km->sais); + + ikev2_send_ike (vm, if_ip, &p->responder.ip4, bi0, len); + + } + + return 0; +} + +static void +ikev2_delete_child_sa_internal (vlib_main_t * vm, ikev2_sa_t * sa, + ikev2_child_sa_t * csa) +{ + /* Create the Initiator notification for child SA removal */ + ikev2_main_t *km = &ikev2_main; + ike_header_t *ike0; + u32 bi0 = 0; + int len; + + bi0 = ikev2_get_new_ike_header_buff (vm, &ike0); + + + ike0->exchange = IKEV2_EXCHANGE_INFORMATIONAL; + ike0->ispi = clib_host_to_net_u64 (sa->ispi); + ike0->rspi = clib_host_to_net_u64 (sa->rspi); + vec_resize (sa->del, 1); + sa->del->protocol_id = IKEV2_PROTOCOL_ESP; + sa->del->spi = csa->i_proposals->spi; + ike0->msgid = clib_host_to_net_u32 (sa->last_init_msg_id + 1); + sa->last_init_msg_id = clib_net_to_host_u32 (ike0->msgid); + len = ikev2_generate_message (sa, ike0, 0); + + ikev2_send_ike (vm, &sa->iaddr, &sa->raddr, bi0, len); + + /* delete local child SA */ + ikev2_delete_tunnel_interface (km->vnet_main, sa, csa); + ikev2_sa_del_child_sa (sa, csa); +} + +clib_error_t * +ikev2_initiate_delete_child_sa (vlib_main_t * vm, u32 ispi) +{ + clib_error_t *r; + ikev2_main_t *km = &ikev2_main; + ikev2_main_per_thread_data_t *tkm; + ikev2_sa_t *fsa = 0; + ikev2_child_sa_t *fchild = 0; + + /* Search for the child SA */ + vec_foreach (tkm, km->per_thread_data) + { + ikev2_sa_t *sa; + if (fchild) + break; + /* *INDENT-OFF* */ + pool_foreach (sa, tkm->sas, ({ + fchild = ikev2_sa_get_child(sa, ispi, IKEV2_PROTOCOL_ESP, 1); + if (fchild) + { + fsa = sa; + break; + } + })); + /* *INDENT-ON* */ + } + + if (!fchild || !fsa) + { + r = clib_error_return (0, "Child SA not found"); + return r; + } + else + { + ikev2_delete_child_sa_internal (vm, fsa, fchild); + } + + return 0; +} + +clib_error_t * +ikev2_initiate_delete_ike_sa (vlib_main_t * vm, u64 ispi) +{ + clib_error_t *r; + ikev2_main_t *km = &ikev2_main; + ikev2_main_per_thread_data_t *tkm; + ikev2_sa_t *fsa = 0; + ikev2_main_per_thread_data_t *ftkm = 0; + + /* Search for the IKE SA */ + vec_foreach (tkm, km->per_thread_data) + { + ikev2_sa_t *sa; + if (fsa) + break; + /* *INDENT-OFF* */ + pool_foreach (sa, tkm->sas, ({ + if (sa->ispi == ispi) + { + fsa = sa; + ftkm = tkm; + break; + } + })); + /* *INDENT-ON* */ + } + + if (!fsa) + { + r = clib_error_return (0, "IKE SA not found"); + return r; + } + + + /* Create the Initiator notification for IKE SA removal */ + { + ike_header_t *ike0; + u32 bi0 = 0; + int len; + + bi0 = ikev2_get_new_ike_header_buff (vm, &ike0); + + + ike0->exchange = IKEV2_EXCHANGE_INFORMATIONAL; + ike0->ispi = clib_host_to_net_u64 (fsa->ispi); + ike0->rspi = clib_host_to_net_u64 (fsa->rspi); + vec_resize (fsa->del, 1); + fsa->del->protocol_id = IKEV2_PROTOCOL_IKE; + fsa->del->spi = ispi; + ike0->msgid = clib_host_to_net_u32 (fsa->last_init_msg_id + 1); + fsa->last_init_msg_id = clib_net_to_host_u32 (ike0->msgid); + len = ikev2_generate_message (fsa, ike0, 0); + + ikev2_send_ike (vm, &fsa->iaddr, &fsa->raddr, bi0, len); + } + + + /* delete local SA */ + ikev2_child_sa_t *c; + vec_foreach (c, fsa->childs) + { + ikev2_delete_tunnel_interface (km->vnet_main, fsa, c); + ikev2_sa_del_child_sa (fsa, c); + } + ikev2_sa_free_all_vec (fsa); + uword *p = hash_get (ftkm->sa_by_rspi, fsa->rspi); + if (p) + { + hash_unset (ftkm->sa_by_rspi, fsa->rspi); + pool_put (ftkm->sas, fsa); + } + + + return 0; +} + +static void +ikev2_rekey_child_sa_internal (vlib_main_t * vm, ikev2_sa_t * sa, + ikev2_child_sa_t * csa) +{ + /* Create the Initiator request for create child SA */ + ike_header_t *ike0; + u32 bi0 = 0; + int len; + + + bi0 = ikev2_get_new_ike_header_buff (vm, &ike0); + + + ike0->version = IKE_VERSION_2; + ike0->flags = IKEV2_HDR_FLAG_INITIATOR; + ike0->exchange = IKEV2_EXCHANGE_CREATE_CHILD_SA; + ike0->ispi = clib_host_to_net_u64 (sa->ispi); + ike0->rspi = clib_host_to_net_u64 (sa->rspi); + ike0->msgid = clib_host_to_net_u32 (sa->last_init_msg_id + 1); + sa->last_init_msg_id = clib_net_to_host_u32 (ike0->msgid); + + ikev2_rekey_t *rekey; + vec_add2 (sa->rekey, rekey, 1); + ikev2_sa_proposal_t *proposals = vec_dup (csa->i_proposals); + + /*need new ispi */ + RAND_bytes ((u8 *) & proposals[0].spi, sizeof (proposals[0].spi)); + rekey->spi = proposals[0].spi; + rekey->ispi = csa->i_proposals->spi; + len = ikev2_generate_message (sa, ike0, proposals); + ikev2_send_ike (vm, &sa->iaddr, &sa->raddr, bi0, len); + vec_free (proposals); +} + +clib_error_t * +ikev2_initiate_rekey_child_sa (vlib_main_t * vm, u32 ispi) +{ + clib_error_t *r; + ikev2_main_t *km = &ikev2_main; + ikev2_main_per_thread_data_t *tkm; + ikev2_sa_t *fsa = 0; + ikev2_child_sa_t *fchild = 0; + + /* Search for the child SA */ + vec_foreach (tkm, km->per_thread_data) + { + ikev2_sa_t *sa; + if (fchild) + break; + /* *INDENT-OFF* */ + pool_foreach (sa, tkm->sas, ({ + fchild = ikev2_sa_get_child(sa, ispi, IKEV2_PROTOCOL_ESP, 1); + if (fchild) + { + fsa = sa; + break; + } + })); + /* *INDENT-ON* */ + } + + if (!fchild || !fsa) + { + r = clib_error_return (0, "Child SA not found"); + return r; + } + else + { + ikev2_rekey_child_sa_internal (vm, fsa, fchild); + } + + return 0; +} + clib_error_t * ikev2_init (vlib_main_t * vm) { @@ -2167,6 +3270,9 @@ ikev2_init (vlib_main_t * vm) hash_create (0, sizeof (uword)); } + km->sa_by_ispi = hash_create (0, sizeof (uword)); + + if ((error = vlib_call_init_function (vm, ikev2_cli_init))) return error; @@ -2176,6 +3282,146 @@ ikev2_init (vlib_main_t * vm) } +static u8 +ikev2_mngr_process_child_sa (ikev2_sa_t * sa, ikev2_child_sa_t * csa) +{ + ikev2_main_t *km = &ikev2_main; + vlib_main_t *vm = km->vlib_main; + f64 now = vlib_time_now (vm); + u8 res = 0; + + if (sa->is_initiator && sa->profile && csa->time_to_expiration + && now > csa->time_to_expiration) + { + if (!csa->is_expired || csa->rekey_retries > 0) + { + ikev2_rekey_child_sa_internal (vm, sa, csa); + csa->time_to_expiration = now + sa->profile->handover; + csa->is_expired = 1; + if (csa->rekey_retries == 0) + { + csa->rekey_retries = 5; + } + else if (csa->rekey_retries > 0) + { + csa->rekey_retries--; + clib_warning ("Rekeing Child SA 0x%x, retries left %d", + csa->i_proposals->spi, csa->rekey_retries); + if (csa->rekey_retries == 0) + { + csa->rekey_retries = -1; + } + } + res |= 1; + } + else + { + csa->time_to_expiration = 0; + ikev2_delete_child_sa_internal (vm, sa, csa); + res |= 1; + } + } + + return res; +} + +static void +ikev2_mngr_process_ipsec_sa (ipsec_sa_t * ipsec_sa) +{ + ikev2_main_t *km = &ikev2_main; + vlib_main_t *vm = km->vlib_main; + ikev2_main_per_thread_data_t *tkm; + ikev2_sa_t *fsa = 0; + ikev2_child_sa_t *fchild = 0; + f64 now = vlib_time_now (vm); + + /* Search for the SA and child SA */ + vec_foreach (tkm, km->per_thread_data) + { + ikev2_sa_t *sa; + if (fchild) + break; + /* *INDENT-OFF* */ + pool_foreach (sa, tkm->sas, ({ + fchild = ikev2_sa_get_child(sa, ipsec_sa->spi, IKEV2_PROTOCOL_ESP, 1); + if (fchild) + { + fsa = sa; + break; + } + })); + /* *INDENT-ON* */ + } + + if (fchild && fsa && fsa->profile && fsa->profile->lifetime_maxdata) + { + if (!fchild->is_expired + && ipsec_sa->total_data_size > fsa->profile->lifetime_maxdata) + { + fchild->time_to_expiration = now; + } + } +} + +static vlib_node_registration_t ikev2_mngr_process_node; + +static uword +ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + ikev2_main_t *km = &ikev2_main; + ipsec_main_t *im = &ipsec_main; + + while (1) + { + u8 req_sent = 0; + vlib_process_wait_for_event_or_clock (vm, 1); + vlib_process_get_events (vm, NULL); + + /* process ike child sas */ + ikev2_main_per_thread_data_t *tkm; + vec_foreach (tkm, km->per_thread_data) + { + ikev2_sa_t *sa; + /* *INDENT-OFF* */ + pool_foreach (sa, tkm->sas, ({ + ikev2_child_sa_t *c; + vec_foreach (c, sa->childs) + { + req_sent |= ikev2_mngr_process_child_sa(sa, c); + } + })); + /* *INDENT-ON* */ + } + + /* process ipsec sas */ + ipsec_sa_t *sa; + /* *INDENT-OFF* */ + pool_foreach (sa, im->sad, ({ + ikev2_mngr_process_ipsec_sa(sa); + })); + /* *INDENT-ON* */ + + if (req_sent) + { + vlib_process_wait_for_event_or_clock (vm, 5); + vlib_process_get_events (vm, NULL); + req_sent = 0; + } + + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ikev2_mngr_process_node, static) = { + .function = ikev2_mngr_process_fn, + .type = VLIB_NODE_TYPE_PROCESS, + .name = + "ikev2-manager-process", +}; + +/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ipsec/ikev2.h b/src/vnet/ipsec/ikev2.h index 723fdde8..84a8be53 100644 --- a/src/vnet/ipsec/ikev2.h +++ b/src/vnet/ipsec/ikev2.h @@ -371,6 +371,31 @@ clib_error_t *ikev2_set_profile_ts (vlib_main_t * vm, u8 * name, u8 protocol_id, u16 start_port, u16 end_port, ip4_address_t start_addr, ip4_address_t end_addr, int is_local); +clib_error_t *ikev2_set_profile_responder (vlib_main_t * vm, u8 * name, + u32 sw_if_index, + ip4_address_t ip4); +clib_error_t *ikev2_set_profile_ike_transforms (vlib_main_t * vm, u8 * name, + ikev2_transform_encr_type_t + crypto_alg, + ikev2_transform_integ_type_t + integ_alg, + ikev2_transform_dh_type_t + dh_type, u32 crypto_key_size); +clib_error_t *ikev2_set_profile_esp_transforms (vlib_main_t * vm, u8 * name, + ikev2_transform_encr_type_t + crypto_alg, + ikev2_transform_integ_type_t + integ_alg, + ikev2_transform_dh_type_t + dh_type, u32 crypto_key_size); +clib_error_t *ikev2_set_profile_sa_lifetime (vlib_main_t * vm, u8 * name, + u64 lifetime, u32 jitter, + u32 handover, u64 maxdata); +clib_error_t *ikev2_initiate_sa_init (vlib_main_t * vm, u8 * name); +clib_error_t *ikev2_initiate_delete_child_sa (vlib_main_t * vm, u32 ispi); +clib_error_t *ikev2_initiate_delete_ike_sa (vlib_main_t * vm, u64 ispi); +clib_error_t *ikev2_initiate_rekey_child_sa (vlib_main_t * vm, u32 ispi); + /* ikev2_format.c */ u8 *format_ikev2_auth_method (u8 * s, va_list * args); u8 *format_ikev2_id_type (u8 * s, va_list * args); diff --git a/src/vnet/ipsec/ikev2_cli.c b/src/vnet/ipsec/ikev2_cli.c index 1369c187..5c88d8d4 100644 --- a/src/vnet/ipsec/ikev2_cli.c +++ b/src/vnet/ipsec/ikev2_cli.c @@ -173,14 +173,21 @@ ikev2_profile_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { + vnet_main_t *vnm = vnet_get_main (); unformat_input_t _line_input, *line_input = &_line_input; u8 *name = 0; clib_error_t *r = 0; u32 id_type; u8 *data = 0; u32 tmp1, tmp2, tmp3; + u64 tmp4, tmp5; ip4_address_t ip4; ip4_address_t end_addr; + u32 responder_sw_if_index = (u32) ~ 0; + ip4_address_t responder_ip4; + ikev2_transform_encr_type_t crypto_alg; + ikev2_transform_integ_type_t integ_alg; + ikev2_transform_dh_type_t dh_type; const char *valid_chars = "a-zA-Z0-9_"; @@ -308,6 +315,53 @@ ikev2_profile_add_del_command_fn (vlib_main_t * vm, ip4, end_addr, /*remote */ 0); goto done; } + else if (unformat (line_input, "set %U responder %U %U", + unformat_token, valid_chars, &name, + unformat_vnet_sw_interface, vnm, + &responder_sw_if_index, unformat_ip4_address, + &responder_ip4)) + { + r = + ikev2_set_profile_responder (vm, name, responder_sw_if_index, + responder_ip4); + goto done; + } + else + if (unformat + (line_input, + "set %U ike-crypto-alg %U %u ike-integ-alg %U ike-dh %U", + unformat_token, valid_chars, &name, + unformat_ikev2_transform_encr_type, &crypto_alg, &tmp1, + unformat_ikev2_transform_integ_type, &integ_alg, + unformat_ikev2_transform_dh_type, &dh_type)) + { + r = + ikev2_set_profile_ike_transforms (vm, name, crypto_alg, integ_alg, + dh_type, tmp1); + goto done; + } + else + if (unformat + (line_input, + "set %U esp-crypto-alg %U %u esp-integ-alg %U esp-dh %U", + unformat_token, valid_chars, &name, + unformat_ikev2_transform_encr_type, &crypto_alg, &tmp1, + unformat_ikev2_transform_integ_type, &integ_alg, + unformat_ikev2_transform_dh_type, &dh_type)) + { + r = + ikev2_set_profile_esp_transforms (vm, name, crypto_alg, integ_alg, + dh_type, tmp1); + goto done; + } + else if (unformat (line_input, "set %U sa-lifetime %lu %u %u %lu", + unformat_token, valid_chars, &name, + &tmp4, &tmp1, &tmp2, &tmp5)) + { + r = + ikev2_set_profile_sa_lifetime (vm, name, tmp4, tmp1, tmp2, tmp5); + goto done; + } else break; } @@ -332,7 +386,11 @@ VLIB_CLI_COMMAND (ikev2_profile_add_del_command, static) = { "ikev2 profile set id \n" "ikev2 profile set traffic-selector ip-range " " - port-range - " - "protocol ", + "protocol \n" + "ikev2 profile set responder \n" + "ikev2 profile set ike-crypto-alg ike-integ-alg ike-dh \n" + "ikev2 profile set esp-crypto-alg esp-integ-alg esp-dh \n" + "ikev2 profile set sa-lifetime ", .function = ikev2_profile_add_del_command_fn, }; /* *INDENT-ON* */ @@ -462,6 +520,71 @@ VLIB_CLI_COMMAND (set_ikev2_local_key_command, static) = { }; /* *INDENT-ON* */ + +static clib_error_t * +ikev2_initiate_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *r = 0; + u8 *name = 0; + u32 tmp1; + u64 tmp2; + + const char *valid_chars = "a-zA-Z0-9_"; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "sa-init %U", unformat_token, valid_chars, &name)) + { + r = ikev2_initiate_sa_init (vm, name); + goto done; + } + else if (unformat (line_input, "del-child-sa %x", &tmp1)) + { + r = ikev2_initiate_delete_child_sa (vm, tmp1); + goto done; + } + else if (unformat (line_input, "del-sa %lx", &tmp2)) + { + r = ikev2_initiate_delete_ike_sa (vm, tmp2); + goto done; + } + else if (unformat (line_input, "rekey-child-sa %x", &tmp1)) + { + r = ikev2_initiate_rekey_child_sa (vm, tmp1); + goto done; + } + else + break; + } + + r = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + +done: + vec_free (name); + unformat_free (line_input); + return r; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ikev2_initiate_command, static) = { + .path = "ikev2 initiate", + .short_help = + "ikev2 initiate sa-init \n" + "ikev2 initiate del-child-sa \n" + "ikev2 initiate del-sa \n" + "ikev2 initiate rekey-child-sa \n", + .function = ikev2_initiate_command_fn, +}; +/* *INDENT-ON* */ + + clib_error_t * ikev2_cli_init (vlib_main_t * vm) { diff --git a/src/vnet/ipsec/ikev2_crypto.c b/src/vnet/ipsec/ikev2_crypto.c index 32927629..c201d3eb 100644 --- a/src/vnet/ipsec/ikev2_crypto.c +++ b/src/vnet/ipsec/ikev2_crypto.c @@ -343,6 +343,7 @@ ikev2_decrypt_data (ikev2_sa_t * sa, u8 * data, int len) v8 *r; int out_len = 0, block_size; ikev2_sa_transform_t *tr_encr; + u8 *key = sa->is_initiator ? sa->sk_er : sa->sk_ei; tr_encr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR); @@ -357,7 +358,7 @@ ikev2_decrypt_data (ikev2_sa_t * sa, u8 * data, int len) EVP_CIPHER_CTX_init (&ctx); r = vec_new (u8, len - block_size); - EVP_DecryptInit_ex (&ctx, tr_encr->cipher, NULL, sa->sk_ei, data); + EVP_DecryptInit_ex (&ctx, tr_encr->cipher, NULL, key, data); EVP_DecryptUpdate (&ctx, r, &out_len, data + block_size, len - block_size); EVP_DecryptFinal_ex (&ctx, r + out_len, &out_len); @@ -375,6 +376,7 @@ ikev2_encrypt_data (ikev2_sa_t * sa, v8 * src, u8 * dst) int out_len; int bs; ikev2_sa_transform_t *tr_encr; + u8 *key = sa->is_initiator ? sa->sk_ei : sa->sk_er; tr_encr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR); @@ -385,7 +387,7 @@ ikev2_encrypt_data (ikev2_sa_t * sa, v8 * src, u8 * dst) EVP_CIPHER_CTX_init (&ctx); - EVP_EncryptInit_ex (&ctx, tr_encr->cipher, NULL, sa->sk_er, dst /* dst */ ); + EVP_EncryptInit_ex (&ctx, tr_encr->cipher, NULL, key, dst /* dst */ ); EVP_EncryptUpdate (&ctx, dst + bs, &out_len, src, vec_len (src)); EVP_CIPHER_CTX_cleanup (&ctx); @@ -407,16 +409,29 @@ ikev2_generate_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t) BN_hex2bn (&dh->g, t->dh_g); DH_generate_key (dh); - sa->r_dh_data = vec_new (u8, t->key_len); - r = BN_bn2bin (dh->pub_key, sa->r_dh_data); - ASSERT (r == t->key_len); + if (sa->is_initiator) + { + sa->i_dh_data = vec_new (u8, t->key_len); + r = BN_bn2bin (dh->pub_key, sa->i_dh_data); + ASSERT (r == t->key_len); - BIGNUM *ex; - sa->dh_shared_key = vec_new (u8, t->key_len); - ex = BN_bin2bn (sa->i_dh_data, vec_len (sa->i_dh_data), NULL); - r = DH_compute_key (sa->dh_shared_key, ex, dh); - ASSERT (r == t->key_len); - BN_clear_free (ex); + sa->dh_private_key = vec_new (u8, t->key_len); + r = BN_bn2bin (dh->priv_key, sa->dh_private_key); + ASSERT (r == t->key_len); + + } + else + { + sa->r_dh_data = vec_new (u8, t->key_len); + r = BN_bn2bin (dh->pub_key, sa->r_dh_data); + ASSERT (r == t->key_len); + BIGNUM *ex; + sa->dh_shared_key = vec_new (u8, t->key_len); + ex = BN_bin2bn (sa->i_dh_data, vec_len (sa->i_dh_data), NULL); + r = DH_compute_key (sa->dh_shared_key, ex, dh); + ASSERT (r == t->key_len); + BN_clear_free (ex); + } DH_free (dh); } else if (t->dh_group == IKEV2_DH_GROUP_ECP) @@ -439,21 +454,113 @@ ikev2_generate_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t) len = t->key_len / 2; EC_POINT_get_affine_coordinates_GFp (group, r_point, x, y, bn_ctx); - sa->r_dh_data = vec_new (u8, t->key_len); - x_off = len - BN_num_bytes (x); - memset (sa->r_dh_data, 0, x_off); - BN_bn2bin (x, sa->r_dh_data + x_off); - y_off = t->key_len - BN_num_bytes (y); - memset (sa->r_dh_data + len, 0, y_off - len); - BN_bn2bin (y, sa->r_dh_data + y_off); + + if (sa->is_initiator) + { + sa->i_dh_data = vec_new (u8, t->key_len); + x_off = len - BN_num_bytes (x); + memset (sa->i_dh_data, 0, x_off); + BN_bn2bin (x, sa->i_dh_data + x_off); + y_off = t->key_len - BN_num_bytes (y); + memset (sa->i_dh_data + len, 0, y_off - len); + BN_bn2bin (y, sa->i_dh_data + y_off); + + const BIGNUM *prv = EC_KEY_get0_private_key (ec); + sa->dh_private_key = vec_new (u8, BN_num_bytes (prv)); + r = BN_bn2bin (prv, sa->dh_private_key); + ASSERT (r == BN_num_bytes (prv)); + } + else + { + sa->r_dh_data = vec_new (u8, t->key_len); + x_off = len - BN_num_bytes (x); + memset (sa->r_dh_data, 0, x_off); + BN_bn2bin (x, sa->r_dh_data + x_off); + y_off = t->key_len - BN_num_bytes (y); + memset (sa->r_dh_data + len, 0, y_off - len); + BN_bn2bin (y, sa->r_dh_data + y_off); + + x = BN_bin2bn (sa->i_dh_data, len, x); + y = BN_bin2bn (sa->i_dh_data + len, len, y); + EC_POINT_set_affine_coordinates_GFp (group, i_point, x, y, bn_ctx); + sa->dh_shared_key = vec_new (u8, t->key_len); + EC_POINT_mul (group, shared_point, NULL, i_point, + EC_KEY_get0_private_key (ec), NULL); + EC_POINT_get_affine_coordinates_GFp (group, shared_point, x, y, + bn_ctx); + x_off = len - BN_num_bytes (x); + memset (sa->dh_shared_key, 0, x_off); + BN_bn2bin (x, sa->dh_shared_key + x_off); + y_off = t->key_len - BN_num_bytes (y); + memset (sa->dh_shared_key + len, 0, y_off - len); + BN_bn2bin (y, sa->dh_shared_key + y_off); + } + + EC_KEY_free (ec); + BN_free (x); + BN_free (y); + BN_CTX_free (bn_ctx); + EC_POINT_free (i_point); + EC_POINT_free (shared_point); + } +} + +void +ikev2_complete_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t) +{ + int r; + + if (t->dh_group == IKEV2_DH_GROUP_MODP) + { + DH *dh = DH_new (); + BN_hex2bn (&dh->p, t->dh_p); + BN_hex2bn (&dh->g, t->dh_g); + dh->priv_key = + BN_bin2bn (sa->dh_private_key, vec_len (sa->dh_private_key), NULL); + + BIGNUM *ex; + sa->dh_shared_key = vec_new (u8, t->key_len); + ex = BN_bin2bn (sa->r_dh_data, vec_len (sa->r_dh_data), NULL); + r = DH_compute_key (sa->dh_shared_key, ex, dh); + ASSERT (r == t->key_len); + BN_clear_free (ex); + DH_free (dh); + } + else if (t->dh_group == IKEV2_DH_GROUP_ECP) + { + EC_KEY *ec = EC_KEY_new_by_curve_name (t->nid); + ASSERT (ec); + + const EC_GROUP *group = EC_KEY_get0_group (ec); + BIGNUM *x = NULL, *y = NULL; + BN_CTX *bn_ctx = BN_CTX_new (); + u16 x_off, y_off, len; + BIGNUM *prv; + + prv = + BN_bin2bn (sa->dh_private_key, vec_len (sa->dh_private_key), NULL); + EC_KEY_set_private_key (ec, prv); + + x = BN_new (); + y = BN_new (); + len = t->key_len / 2; + + x = BN_bin2bn (sa->r_dh_data, len, x); + y = BN_bin2bn (sa->r_dh_data + len, len, y); + EC_POINT *r_point = EC_POINT_new (group); + EC_POINT_set_affine_coordinates_GFp (group, r_point, x, y, bn_ctx); + EC_KEY_set_public_key (ec, r_point); + + EC_POINT *i_point = EC_POINT_new (group); + EC_POINT *shared_point = EC_POINT_new (group); x = BN_bin2bn (sa->i_dh_data, len, x); y = BN_bin2bn (sa->i_dh_data + len, len, y); EC_POINT_set_affine_coordinates_GFp (group, i_point, x, y, bn_ctx); - sa->dh_shared_key = vec_new (u8, t->key_len); - EC_POINT_mul (group, shared_point, NULL, i_point, + EC_POINT_mul (group, shared_point, NULL, r_point, EC_KEY_get0_private_key (ec), NULL); EC_POINT_get_affine_coordinates_GFp (group, shared_point, x, y, bn_ctx); + sa->dh_shared_key = vec_new (u8, t->key_len); x_off = len - BN_num_bytes (x); memset (sa->dh_shared_key, 0, x_off); BN_bn2bin (x, sa->dh_shared_key + x_off); @@ -464,8 +571,10 @@ ikev2_generate_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t) EC_KEY_free (ec); BN_free (x); BN_free (y); + BN_free (prv); BN_CTX_free (bn_ctx); EC_POINT_free (i_point); + EC_POINT_free (r_point); EC_POINT_free (shared_point); } } diff --git a/src/vnet/ipsec/ikev2_payload.c b/src/vnet/ipsec/ikev2_payload.c index dd14812b..34595380 100644 --- a/src/vnet/ipsec/ikev2_payload.c +++ b/src/vnet/ipsec/ikev2_payload.c @@ -132,14 +132,29 @@ ikev2_payload_add_data (ikev2_payload_chain_t * c, u8 * data) void ikev2_payload_add_notify (ikev2_payload_chain_t * c, u16 msg_type, u8 * data) +{ + ikev2_payload_add_notify_2(c, msg_type, data, 0); +} + +void +ikev2_payload_add_notify_2 (ikev2_payload_chain_t * c, u16 msg_type, + u8 * data, ikev2_notify_t * notify) { ike_notify_payload_header_t *n; n = (ike_notify_payload_header_t *) ikev2_payload_add_hdr (c, - IKEV2_PAYLOAD_NOTIFY, - sizeof (*n)); + IKEV2_PAYLOAD_NOTIFY, + sizeof (*n)); n->msg_type = clib_host_to_net_u16 (msg_type); + if (notify) + { + n->protocol_id = notify->protocol_id; + if (notify->spi) + { + n->spi_size = 4; + } + } ikev2_payload_add_data (c, data); } diff --git a/src/vnet/ipsec/ikev2_priv.h b/src/vnet/ipsec/ikev2_priv.h index 9f67ad2a..5a3dc520 100644 --- a/src/vnet/ipsec/ikev2_priv.h +++ b/src/vnet/ipsec/ikev2_priv.h @@ -107,6 +107,21 @@ typedef struct ip4_address_t end_addr; } ikev2_ts_t; +typedef struct +{ + u32 sw_if_index; + ip4_address_t ip4; +} ikev2_responder_t; + +typedef struct +{ + ikev2_transform_encr_type_t crypto_alg; + ikev2_transform_integ_type_t integ_alg; + ikev2_transform_dh_type_t dh_type; + u32 crypto_key_size; +} ikev2_transforms_set; + + typedef struct { ikev2_id_type_t type:8; @@ -128,6 +143,11 @@ typedef struct u8 *sk_ar; u8 *sk_ei; u8 *sk_er; + + /* lifetime data */ + f64 time_to_expiration; + u8 is_expired; + i8 rekey_retries; } ikev2_child_sa_t; typedef struct @@ -140,6 +160,7 @@ typedef struct { u8 protocol_id; u32 spi; + u32 ispi; ikev2_sa_proposal_t *i_proposal; ikev2_sa_proposal_t *r_proposal; ikev2_ts_t *tsi; @@ -154,6 +175,24 @@ typedef struct u8 *data; } ikev2_notify_t; +typedef struct +{ + u8 *name; + u8 is_enabled; + + ikev2_auth_t auth; + ikev2_id_t loc_id; + ikev2_id_t rem_id; + ikev2_ts_t loc_ts; + ikev2_ts_t rem_ts; + ikev2_responder_t responder; + ikev2_transforms_set ike_ts; + ikev2_transforms_set esp_ts; + u64 lifetime; + u64 lifetime_maxdata; + u32 lifetime_jitter; + u32 handover; +} ikev2_profile_t; typedef struct { @@ -170,6 +209,7 @@ typedef struct /* DH data */ u16 dh_group; u8 *dh_shared_key; + u8 *dh_private_key; u8 *i_dh_data; u8 *r_dh_data; @@ -208,20 +248,13 @@ typedef struct u32 last_msg_id; u8 *last_res_packet_data; + u8 is_initiator; + u32 last_init_msg_id; + ikev2_profile_t *profile; + ikev2_child_sa_t *childs; } ikev2_sa_t; -typedef struct -{ - u8 *name; - u8 is_enabled; - - ikev2_auth_t auth; - ikev2_id_t loc_id; - ikev2_id_t rem_id; - ikev2_ts_t loc_ts; - ikev2_ts_t rem_ts; -} ikev2_profile_t; typedef struct { @@ -250,6 +283,11 @@ typedef struct vlib_main_t *vlib_main; vnet_main_t *vnet_main; + /* pool of IKEv2 Security Associations created in initiator mode */ + ikev2_sa_t *sais; + /* hash */ + uword *sa_by_ispi; + ikev2_main_per_thread_data_t *per_thread_data; } ikev2_main_t; @@ -269,6 +307,7 @@ v8 *ikev2_calc_integr (ikev2_sa_transform_t * tr, v8 * key, u8 * data, v8 *ikev2_decrypt_data (ikev2_sa_t * sa, u8 * data, int len); int ikev2_encrypt_data (ikev2_sa_t * sa, v8 * src, u8 * dst); void ikev2_generate_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t); +void ikev2_complete_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t); int ikev2_verify_sign (EVP_PKEY * pkey, u8 * sigbuf, u8 * data); u8 *ikev2_calc_sign (EVP_PKEY * pkey, u8 * data); EVP_PKEY *ikev2_load_cert_file (u8 * file); @@ -291,6 +330,8 @@ typedef struct void ikev2_payload_add_notify (ikev2_payload_chain_t * c, u16 msg_type, u8 * data); +void ikev2_payload_add_notify_2 (ikev2_payload_chain_t * c, u16 msg_type, + u8 * data, ikev2_notify_t * notify); void ikev2_payload_add_sa (ikev2_payload_chain_t * c, ikev2_sa_proposal_t * proposals); void ikev2_payload_add_ke (ikev2_payload_chain_t * c, u16 dh_group, diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api index 178bb757..ef090f84 100644 --- a/src/vnet/ipsec/ipsec.api +++ b/src/vnet/ipsec/ipsec.api @@ -394,6 +394,234 @@ define ikev2_set_local_key_reply i32 retval; }; +/** \brief IKEv2: Set IKEv2 responder interface and IP address + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + + @param name - IKEv2 profile name + @param sw_if_index - interface index + @param address - interface address +*/ +define ikev2_set_responder +{ + u32 client_index; + u32 context; + + u8 name[64]; + u32 sw_if_index; + u8 address[4]; +}; + +/** \brief Reply for IKEv2: Set IKEv2 responder interface and IP address + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define ikev2_set_responder_reply +{ + u32 context; + i32 retval; +}; + + +/** \brief IKEv2: Set IKEv2 IKE transforms in SA_INIT proposal (RFC 7296) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + + @param name - IKEv2 profile name + @param crypto_alg - encryption algorithm + @param crypto_key_size - encryption key size + @param integ_alg - integrity algorithm + @param dh_group - Diffie-Hellman group + +*/ +define ikev2_set_ike_transforms +{ + u32 client_index; + u32 context; + + u8 name[64]; + u32 crypto_alg; + u32 crypto_key_size; + u32 integ_alg; + u32 dh_group; +}; + +/** \brief Reply for IKEv2: Set IKEv2 IKE transforms + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define ikev2_set_ike_transforms_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IKEv2: Set IKEv2 ESP transforms in SA_INIT proposal (RFC 7296) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + + @param name - IKEv2 profile name + @param crypto_alg - encryption algorithm + @param crypto_key_size - encryption key size + @param integ_alg - integrity algorithm + @param dh_group - Diffie-Hellman group + +*/ +define ikev2_set_esp_transforms +{ + u32 client_index; + u32 context; + + u8 name[64]; + u32 crypto_alg; + u32 crypto_key_size; + u32 integ_alg; + u32 dh_group; +}; + +/** \brief Reply for IKEv2: Set IKEv2 ESP transforms + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define ikev2_set_esp_transforms_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IKEv2: Set Child SA lifetime, limited by time and/or data + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + + @param name - IKEv2 profile name + @param lifetime - SA maximum life time in seconds (0 to disable) + @param lifetime_jitter - Jitter added to prevent simultaneounus rekeying + @param handover - Hand over time + @param lifetime_maxdata - SA maximum life time in bytes (0 to disable) + +*/ +define ikev2_set_sa_lifetime +{ + u32 client_index; + u32 context; + + u8 name[64]; + u64 lifetime; + u32 lifetime_jitter; + u32 handover; + u64 lifetime_maxdata; +}; + +/** \brief Reply for IKEv2: Set Child SA lifetime + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define ikev2_set_sa_lifetime_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IKEv2: Initiate the SA_INIT exchange + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + + @param name - IKEv2 profile name + +*/ +define ikev2_initiate_sa_init +{ + u32 client_index; + u32 context; + + u8 name[64]; +}; + +/** \brief Reply for IKEv2: Initiate the SA_INIT exchange + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define ikev2_initiate_sa_init_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IKEv2: Initiate the delete IKE SA exchange + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + + @param ispi - IKE SA initiator SPI + +*/ +define ikev2_initiate_del_ike_sa +{ + u32 client_index; + u32 context; + + u64 ispi; +}; + +/** \brief Reply for IKEv2: Initiate the delete IKE SA exchange + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define ikev2_initiate_del_ike_sa_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IKEv2: Initiate the delete Child SA exchange + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + + @param ispi - Child SA initiator SPI + +*/ +define ikev2_initiate_del_child_sa +{ + u32 client_index; + u32 context; + + u32 ispi; +}; + +/** \brief Reply for IKEv2: Initiate the delete Child SA exchange + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define ikev2_initiate_del_child_sa_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IKEv2: Initiate the rekey Child SA exchange + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + + @param ispi - Child SA initiator SPI + +*/ +define ikev2_initiate_rekey_child_sa +{ + u32 client_index; + u32 context; + + u32 ispi; +}; + +/** \brief Reply for IKEv2: Initiate the rekey Child SA exchange + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define ikev2_initiate_rekey_child_sa_reply +{ + u32 context; + i32 retval; +}; + /** \brief Dump ipsec policy database data @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h index 6726dba0..58f0f145 100644 --- a/src/vnet/ipsec/ipsec.h +++ b/src/vnet/ipsec/ipsec.h @@ -127,6 +127,9 @@ typedef struct u32 last_seq; u32 last_seq_hi; u64 replay_window; + + /*lifetime data */ + u64 total_data_size; } ipsec_sa_t; typedef struct diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c index 30732266..49b475cf 100644 --- a/src/vnet/ipsec/ipsec_api.c +++ b/src/vnet/ipsec/ipsec_api.c @@ -58,7 +58,15 @@ _(IKEV2_PROFILE_ADD_DEL, ikev2_profile_add_del) \ _(IKEV2_PROFILE_SET_AUTH, ikev2_profile_set_auth) \ _(IKEV2_PROFILE_SET_ID, ikev2_profile_set_id) \ _(IKEV2_PROFILE_SET_TS, ikev2_profile_set_ts) \ -_(IKEV2_SET_LOCAL_KEY, ikev2_set_local_key) +_(IKEV2_SET_LOCAL_KEY, ikev2_set_local_key) \ +_(IKEV2_SET_RESPONDER, ikev2_set_responder) \ +_(IKEV2_SET_IKE_TRANSFORMS, ikev2_set_ike_transforms) \ +_(IKEV2_SET_ESP_TRANSFORMS, ikev2_set_esp_transforms) \ +_(IKEV2_SET_SA_LIFETIME, ikev2_set_sa_lifetime) \ +_(IKEV2_INITIATE_SA_INIT, ikev2_initiate_sa_init) \ +_(IKEV2_INITIATE_DEL_IKE_SA, ikev2_initiate_del_ike_sa) \ +_(IKEV2_INITIATE_DEL_CHILD_SA, ikev2_initiate_del_child_sa) \ +_(IKEV2_INITIATE_REKEY_CHILD_SA, ikev2_initiate_rekey_child_sa) static void vl_api_ipsec_spd_add_del_t_handler (vl_api_ipsec_spd_add_del_t * mp) @@ -461,6 +469,194 @@ vl_api_ikev2_set_local_key_t_handler (vl_api_ikev2_set_local_key_t * mp) REPLY_MACRO (VL_API_IKEV2_SET_LOCAL_KEY_REPLY); } +static void +vl_api_ikev2_set_responder_t_handler (vl_api_ikev2_set_responder_t * mp) +{ + vl_api_ikev2_set_responder_reply_t *rmp; + int rv = 0; + +#if WITH_LIBSSL > 0 + vlib_main_t *vm = vlib_get_main (); + clib_error_t *error; + + u8 *tmp = format (0, "%s", mp->name); + ip4_address_t ip4; + clib_memcpy (&ip4, mp->address, sizeof (ip4)); + + error = ikev2_set_profile_responder (vm, tmp, mp->sw_if_index, ip4); + vec_free (tmp); + if (error) + rv = VNET_API_ERROR_UNSPECIFIED; +#else + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif + + REPLY_MACRO (VL_API_IKEV2_SET_RESPONDER_REPLY); +} + +static void +vl_api_ikev2_set_ike_transforms_t_handler (vl_api_ikev2_set_ike_transforms_t * + mp) +{ + vl_api_ikev2_set_ike_transforms_reply_t *rmp; + int rv = 0; + +#if WITH_LIBSSL > 0 + vlib_main_t *vm = vlib_get_main (); + clib_error_t *error; + + u8 *tmp = format (0, "%s", mp->name); + + error = + ikev2_set_profile_ike_transforms (vm, tmp, mp->crypto_alg, mp->integ_alg, + mp->dh_group, mp->crypto_key_size); + vec_free (tmp); + if (error) + rv = VNET_API_ERROR_UNSPECIFIED; +#else + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif + + REPLY_MACRO (VL_API_IKEV2_SET_IKE_TRANSFORMS_REPLY); +} + +static void +vl_api_ikev2_set_esp_transforms_t_handler (vl_api_ikev2_set_esp_transforms_t * + mp) +{ + vl_api_ikev2_set_esp_transforms_reply_t *rmp; + int rv = 0; + +#if WITH_LIBSSL > 0 + vlib_main_t *vm = vlib_get_main (); + clib_error_t *error; + + u8 *tmp = format (0, "%s", mp->name); + + error = + ikev2_set_profile_esp_transforms (vm, tmp, mp->crypto_alg, mp->integ_alg, + mp->dh_group, mp->crypto_key_size); + vec_free (tmp); + if (error) + rv = VNET_API_ERROR_UNSPECIFIED; +#else + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif + + REPLY_MACRO (VL_API_IKEV2_SET_ESP_TRANSFORMS_REPLY); +} + +static void +vl_api_ikev2_set_sa_lifetime_t_handler (vl_api_ikev2_set_sa_lifetime_t * mp) +{ + vl_api_ikev2_set_sa_lifetime_reply_t *rmp; + int rv = 0; + +#if WITH_LIBSSL > 0 + vlib_main_t *vm = vlib_get_main (); + clib_error_t *error; + + u8 *tmp = format (0, "%s", mp->name); + + error = + ikev2_set_profile_sa_lifetime (vm, tmp, mp->lifetime, mp->lifetime_jitter, + mp->handover, mp->lifetime_maxdata); + vec_free (tmp); + if (error) + rv = VNET_API_ERROR_UNSPECIFIED; +#else + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif + + REPLY_MACRO (VL_API_IKEV2_SET_SA_LIFETIME_REPLY); +} + +static void +vl_api_ikev2_initiate_sa_init_t_handler (vl_api_ikev2_initiate_sa_init_t * mp) +{ + vl_api_ikev2_initiate_sa_init_reply_t *rmp; + int rv = 0; + +#if WITH_LIBSSL > 0 + vlib_main_t *vm = vlib_get_main (); + clib_error_t *error; + + u8 *tmp = format (0, "%s", mp->name); + + error = ikev2_initiate_sa_init (vm, tmp); + vec_free (tmp); + if (error) + rv = VNET_API_ERROR_UNSPECIFIED; +#else + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif + + REPLY_MACRO (VL_API_IKEV2_INITIATE_SA_INIT_REPLY); +} + +static void +vl_api_ikev2_initiate_del_ike_sa_t_handler (vl_api_ikev2_initiate_del_ike_sa_t + * mp) +{ + vl_api_ikev2_initiate_del_ike_sa_reply_t *rmp; + int rv = 0; + +#if WITH_LIBSSL > 0 + vlib_main_t *vm = vlib_get_main (); + clib_error_t *error; + + error = ikev2_initiate_delete_ike_sa (vm, mp->ispi); + if (error) + rv = VNET_API_ERROR_UNSPECIFIED; +#else + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif + + REPLY_MACRO (VL_API_IKEV2_INITIATE_DEL_IKE_SA_REPLY); +} + +static void + vl_api_ikev2_initiate_del_child_sa_t_handler + (vl_api_ikev2_initiate_del_child_sa_t * mp) +{ + vl_api_ikev2_initiate_del_child_sa_reply_t *rmp; + int rv = 0; + +#if WITH_LIBSSL > 0 + vlib_main_t *vm = vlib_get_main (); + clib_error_t *error; + + error = ikev2_initiate_delete_child_sa (vm, mp->ispi); + if (error) + rv = VNET_API_ERROR_UNSPECIFIED; +#else + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif + + REPLY_MACRO (VL_API_IKEV2_INITIATE_DEL_CHILD_SA_REPLY); +} + +static void + vl_api_ikev2_initiate_rekey_child_sa_t_handler + (vl_api_ikev2_initiate_rekey_child_sa_t * mp) +{ + vl_api_ikev2_initiate_rekey_child_sa_reply_t *rmp; + int rv = 0; + +#if WITH_LIBSSL > 0 + vlib_main_t *vm = vlib_get_main (); + clib_error_t *error; + + error = ikev2_initiate_rekey_child_sa (vm, mp->ispi); + if (error) + rv = VNET_API_ERROR_UNSPECIFIED; +#else + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif + + REPLY_MACRO (VL_API_IKEV2_INITIATE_REKEY_CHILD_SA_REPLY); +} + /* * ipsec_api_hookup * Add vpe's API message handlers to the table. -- cgit 1.2.3-korg From a9a20e7f69f4a91a4d5267ab5ce14125bdc7d6c6 Mon Sep 17 00:00:00 2001 From: Billy McFall Date: Wed, 15 Feb 2017 11:39:12 -0500 Subject: VPP-635: CLI Memory leak with invalid parameter In the CLI parsing, below is a common pattern: /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "x")) x = 1; : else return clib_error_return (0, "unknown input `%U'", format_unformat_error, line_input); } unformat_free (line_input); The 'else' returns if an unknown string is encountered. There a memory leak because the 'unformat_free(line_input)' is not called. There is a large number of instances of this pattern. Replaced the previous pattern with: /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "x")) x = 1; : else { error = clib_error_return (0, "unknown input `%U'", format_unformat_error, line_input); goto done: } } /* ...Remaining code... */ done: unformat_free (line_input); return error; } In multiple files, 'unformat_free (line_input);' was never called, so there was a memory leak whether an invalid string was entered or not. Also, there were multiple instance where: error = clib_error_return (0, "unknown input `%U'", format_unformat_error, line_input); used 'input' as the last parameter instead of 'line_input'. The result is that output did not contain the substring in error, instead just an empty string. Fixed all of those as well. There are a lot of file, and very mind numbing work, so tried to keep it to a pattern to avoid mistakes. Change-Id: I8902f0c32a47dd7fb3bb3471a89818571702f1d2 Signed-off-by: Billy McFall Signed-off-by: Dave Barach --- build-root/emacs-lisp/tunnel-c-skel.el | 19 ++- src/plugins/ila/ila.c | 25 ++- src/plugins/lb/cli.c | 99 ++++++----- src/plugins/sixrd/sixrd.c | 42 +++-- src/plugins/snat/snat.c | 139 +++++++++++----- src/vlib/threads_cli.c | 79 ++++++--- src/vlib/trace.c | 13 +- src/vlib/unix/cli.c | 22 ++- src/vnet/devices/af_packet/cli.c | 56 +++++-- src/vnet/devices/dpdk/cli.c | 290 +++++++++++++++++++++++---------- src/vnet/devices/dpdk/ipsec/cli.c | 15 +- src/vnet/devices/netmap/cli.c | 54 ++++-- src/vnet/devices/virtio/vhost-user.c | 62 +++++-- src/vnet/gre/interface.c | 35 ++-- src/vnet/ip/ip4_source_check.c | 6 +- src/vnet/ip/ip4_test.c | 15 +- src/vnet/ip/ip6_neighbor.c | 27 ++- src/vnet/ip/lookup.c | 34 ++-- src/vnet/ipsec-gre/interface.c | 34 ++-- src/vnet/ipsec/ipsec_cli.c | 177 +++++++++++++------- src/vnet/l2/l2_patch.c | 26 ++- src/vnet/l2/l2_xcrw.c | 34 +++- src/vnet/l2tp/l2tp.c | 39 +++-- src/vnet/lisp-cp/lisp_cli.c | 139 ++++++++++++---- src/vnet/lisp-gpe/interface.c | 58 +++++-- src/vnet/lisp-gpe/lisp_gpe.c | 13 +- src/vnet/map/map.c | 186 +++++++++++++++------ src/vnet/mpls/mpls.c | 2 + src/vnet/mpls/mpls_tunnel.c | 19 ++- src/vnet/pg/cli.c | 39 +++-- src/vnet/policer/node_funcs.c | 19 ++- src/vnet/policer/policer.c | 13 +- src/vnet/unix/tapcli.c | 57 +++++-- src/vnet/vxlan-gpe/vxlan_gpe.c | 62 +++++-- src/vnet/vxlan/vxlan.c | 81 ++++++--- src/vpp/app/l2t.c | 9 +- src/vpp/app/vpe_cli.c | 24 ++- 37 files changed, 1487 insertions(+), 576 deletions(-) (limited to 'src/vnet/devices') diff --git a/build-root/emacs-lisp/tunnel-c-skel.el b/build-root/emacs-lisp/tunnel-c-skel.el index aa260e53..a1b1757d 100644 --- a/build-root/emacs-lisp/tunnel-c-skel.el +++ b/build-root/emacs-lisp/tunnel-c-skel.el @@ -288,6 +288,7 @@ static clib_error_t * vlib_cli_command_t * cmd) { unformat_input_t _line_input, * line_input = &_line_input; + clib_error_t *error = 0; ip4_address_t src, dst; u8 is_add = 1; u8 src_set = 0; @@ -322,13 +323,19 @@ static clib_error_t * { encap_fib_index = fib_index_from_fib_id (tmp); if (encap_fib_index == ~0) - return clib_error_return (0, \"nonexistent encap fib id %d\", tmp); + { + unformat_free (line_input); + return clib_error_return (0, \"nonexistent encap fib id %d\", tmp); + } } else if (unformat (line_input, \"decap-vrf-id %d\", &tmp)) { decap_fib_index = fib_index_from_fib_id (tmp); if (decap_fib_index == ~0) - return clib_error_return (0, \"nonexistent decap fib id %d\", tmp); + { + unformat_free (line_input); + return clib_error_return (0, \"nonexistent decap fib id %d\", tmp); + } } else if (unformat (line_input, \"decap-next %U\", unformat_decap_next, &decap_next_index)) @@ -346,8 +353,12 @@ static clib_error_t * * in the " ENCAP_STACK " header */ else - return clib_error_return (0, \"parse error: '%U'\", - format_unformat_error, line_input); + { + error = clib_error_return (0, \"parse error: '%U'\", + format_unformat_error, line_input); + unformat_free (line_input); + return error; + } } unformat_free (line_input); diff --git a/src/plugins/ila/ila.c b/src/plugins/ila/ila.c index e0f3907f..52c7ea55 100644 --- a/src/plugins/ila/ila.c +++ b/src/plugins/ila/ila.c @@ -949,6 +949,7 @@ ila_entry_command_fn (vlib_main_t * vm, ila_add_del_entry_args_t args = { 0 }; u8 next_hop_set = 0; int ret; + clib_error_t *error = 0; args.type = ILA_TYPE_IID; args.csum_mode = ILA_CSUM_MODE_NO_ACTION; @@ -986,19 +987,29 @@ ila_entry_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) args.is_del = 1; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (!next_hop_set) - return clib_error_return (0, "Specified a next hop"); + { + error = clib_error_return (0, "Specified a next hop"); + goto done; + } if ((ret = ila_add_del_entry (&args))) - return clib_error_return (0, "ila_add_del_entry returned error %d", ret); + { + error = clib_error_return (0, "ila_add_del_entry returned error %d", ret); + goto done; + } - return NULL; +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (ila_entry_command, static) = diff --git a/src/plugins/lb/cli.c b/src/plugins/lb/cli.c index b59c6426..6452a875 100644 --- a/src/plugins/lb/cli.c +++ b/src/plugins/lb/cli.c @@ -28,13 +28,16 @@ lb_vip_command_fn (vlib_main_t * vm, int ret; u32 gre4 = 0; lb_vip_type_t type; + clib_error_t *error = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; - if (!unformat(line_input, "%U", unformat_ip46_prefix, &prefix, &plen, IP46_TYPE_ANY, &plen)) - return clib_error_return (0, "invalid vip prefix: '%U'", - format_unformat_error, line_input); + if (!unformat(line_input, "%U", unformat_ip46_prefix, &prefix, &plen, IP46_TYPE_ANY, &plen)) { + error = clib_error_return (0, "invalid vip prefix: '%U'", + format_unformat_error, line_input); + goto done; + } while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { @@ -46,13 +49,13 @@ lb_vip_command_fn (vlib_main_t * vm, gre4 = 1; else if (unformat(line_input, "encap gre6")) gre4 = 0; - else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + else { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (ip46_prefix_is_ip4(&prefix, plen)) { type = (gre4)?LB_VIP_TYPE_IP4_GRE4:LB_VIP_TYPE_IP4_GRE6; @@ -65,17 +68,25 @@ lb_vip_command_fn (vlib_main_t * vm, u32 index; if (!del) { if ((ret = lb_vip_add(&prefix, plen, type, new_len, &index))) { - return clib_error_return (0, "lb_vip_add error %d", ret); + error = clib_error_return (0, "lb_vip_add error %d", ret); + goto done; } else { vlib_cli_output(vm, "lb_vip_add ok %d", index); } } else { - if ((ret = lb_vip_find_index(&prefix, plen, &index))) - return clib_error_return (0, "lb_vip_find_index error %d", ret); - else if ((ret = lb_vip_del(index))) - return clib_error_return (0, "lb_vip_del error %d", ret); + if ((ret = lb_vip_find_index(&prefix, plen, &index))) { + error = clib_error_return (0, "lb_vip_find_index error %d", ret); + goto done; + } else if ((ret = lb_vip_del(index))) { + error = clib_error_return (0, "lb_vip_del error %d", ret); + goto done; + } } - return NULL; + +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (lb_vip_command, static) = @@ -96,16 +107,21 @@ lb_as_command_fn (vlib_main_t * vm, u32 vip_index; u8 del = 0; int ret; + clib_error_t *error = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; - if (!unformat(line_input, "%U", unformat_ip46_prefix, &vip_prefix, &vip_plen, IP46_TYPE_ANY)) - return clib_error_return (0, "invalid as address: '%U'", - format_unformat_error, line_input); + if (!unformat(line_input, "%U", unformat_ip46_prefix, &vip_prefix, &vip_plen, IP46_TYPE_ANY)) { + error = clib_error_return (0, "invalid as address: '%U'", + format_unformat_error, line_input); + goto done; + } - if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, &vip_index))) - return clib_error_return (0, "lb_vip_find_index error %d", ret); + if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, &vip_index))) { + error = clib_error_return (0, "lb_vip_find_index error %d", ret); + goto done; + } while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { @@ -114,15 +130,15 @@ lb_as_command_fn (vlib_main_t * vm, } else if (unformat(line_input, "del")) { del = 1; } else { - vec_free(as_array); - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; } } if (!vec_len(as_array)) { - vec_free(as_array); - return clib_error_return (0, "No AS address provided"); + error = clib_error_return (0, "No AS address provided"); + goto done; } lb_garbage_collection(); @@ -130,18 +146,21 @@ lb_as_command_fn (vlib_main_t * vm, if (del) { if ((ret = lb_vip_del_ass(vip_index, as_array, vec_len(as_array)))) { - vec_free(as_array); - return clib_error_return (0, "lb_vip_del_ass error %d", ret); + error = clib_error_return (0, "lb_vip_del_ass error %d", ret); + goto done; } } else { if ((ret = lb_vip_add_ass(vip_index, as_array, vec_len(as_array)))) { - vec_free(as_array); - return clib_error_return (0, "lb_vip_add_ass error %d", ret); + error = clib_error_return (0, "lb_vip_add_ass error %d", ret); + goto done; } } +done: + unformat_free (line_input); vec_free(as_array); - return 0; + + return error; } VLIB_CLI_COMMAND (lb_as_command, static) = @@ -163,6 +182,7 @@ lb_conf_command_fn (vlib_main_t * vm, u32 per_cpu_sticky_buckets_log2 = 0; u32 flow_timeout = lbm->flow_timeout; int ret; + clib_error_t *error = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -181,19 +201,24 @@ lb_conf_command_fn (vlib_main_t * vm, per_cpu_sticky_buckets = 1 << per_cpu_sticky_buckets_log2; } else if (unformat(line_input, "timeout %d", &flow_timeout)) ; - else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + else { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - lb_garbage_collection(); - if ((ret = lb_conf(&ip4, &ip6, per_cpu_sticky_buckets, flow_timeout))) - return clib_error_return (0, "lb_conf error %d", ret); + if ((ret = lb_conf(&ip4, &ip6, per_cpu_sticky_buckets, flow_timeout))) { + error = clib_error_return (0, "lb_conf error %d", ret); + goto done; + } - return NULL; +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (lb_conf_command, static) = diff --git a/src/plugins/sixrd/sixrd.c b/src/plugins/sixrd/sixrd.c index 71fc181f..67a9a3ad 100644 --- a/src/plugins/sixrd/sixrd.c +++ b/src/plugins/sixrd/sixrd.c @@ -192,6 +192,7 @@ sixrd_add_domain_command_fn (vlib_main_t *vm, u32 num_m_args = 0; /* Optional arguments */ u32 mtu = 0; + clib_error_t *error = 0; /* Get a line of input. */ if (!unformat_user(input, unformat_line_input, line_input)) @@ -205,19 +206,25 @@ sixrd_add_domain_command_fn (vlib_main_t *vm, num_m_args++; else if (unformat(line_input, "mtu %d", &mtu)) num_m_args++; - else - return clib_error_return(0, "unknown input `%U'", - format_unformat_error, input); + else { + error = clib_error_return(0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free(line_input); - if (num_m_args < 3) - return clib_error_return(0, "mandatory argument(s) missing"); + if (num_m_args < 3) { + error = clib_error_return(0, "mandatory argument(s) missing"); + goto done; + } sixrd_create_domain(&ip6_prefix, ip6_prefix_len, &ip4_prefix, ip4_prefix_len, &ip4_src, &sixrd_domain_index, mtu); - return 0; +done: + unformat_free (line_input); + + return error; } static clib_error_t * @@ -228,6 +235,7 @@ sixrd_del_domain_command_fn (vlib_main_t *vm, unformat_input_t _line_input, *line_input = &_line_input; u32 num_m_args = 0; u32 sixrd_domain_index; + clib_error_t *error = 0; /* Get a line of input. */ if (! unformat_user(input, unformat_line_input, line_input)) @@ -236,18 +244,24 @@ sixrd_del_domain_command_fn (vlib_main_t *vm, while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { if (unformat(line_input, "index %d", &sixrd_domain_index)) num_m_args++; - else - return clib_error_return(0, "unknown input `%U'", - format_unformat_error, input); + else { + error = clib_error_return(0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free(line_input); - if (num_m_args != 1) - return clib_error_return(0, "mandatory argument(s) missing"); + if (num_m_args != 1) { + error = clib_error_return(0, "mandatory argument(s) missing"); + goto done; + } sixrd_delete_domain(sixrd_domain_index); - return 0; +done: + unformat_free (line_input); + + return error; } static u8 * diff --git a/src/plugins/snat/snat.c b/src/plugins/snat/snat.c index 73854a7a..8c2bacdb 100644 --- a/src/plugins/snat/snat.c +++ b/src/plugins/snat/snat.c @@ -1705,6 +1705,7 @@ add_address_command_fn (vlib_main_t * vm, int i, count; int is_add = 1; int rv = 0; + clib_error_t *error = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -1721,19 +1722,27 @@ add_address_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) is_add = 0; else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (sm->static_mapping_only) - return clib_error_return (0, "static mapping only mode"); + { + error = clib_error_return (0, "static mapping only mode"); + goto done; + } start_host_order = clib_host_to_net_u32 (start_addr.as_u32); end_host_order = clib_host_to_net_u32 (end_addr.as_u32); if (end_host_order < start_host_order) - return clib_error_return (0, "end address less than start address"); + { + error = clib_error_return (0, "end address less than start address"); + goto done; + } count = (end_host_order - start_host_order) + 1; @@ -1755,11 +1764,11 @@ add_address_command_fn (vlib_main_t * vm, switch (rv) { case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "S-NAT address not exist."); - break; + error = clib_error_return (0, "S-NAT address not exist."); + goto done; case VNET_API_ERROR_UNSPECIFIED: - return clib_error_return (0, "S-NAT address used in static mapping."); - break; + error = clib_error_return (0, "S-NAT address used in static mapping."); + goto done; default: break; } @@ -1767,7 +1776,10 @@ add_address_command_fn (vlib_main_t * vm, increment_v4_address (&this_addr); } - return 0; +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (add_address_command, static) = { @@ -1807,10 +1819,12 @@ snat_feature_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) is_del = 1; else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (vec_len (inside_sw_if_indices)) { @@ -1830,6 +1844,8 @@ snat_feature_command_fn (vlib_main_t * vm, } } +done: + unformat_free (line_input); vec_free (inside_sw_if_indices); vec_free (outside_sw_if_indices); @@ -1923,13 +1939,18 @@ add_static_mapping_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) is_add = 0; else - return clib_error_return (0, "unknown input: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (!addr_only && !proto_set) - return clib_error_return (0, "missing protocol"); + { + error = clib_error_return (0, "missing protocol"); + goto done; + } rv = snat_add_static_mapping(l_addr, e_addr, (u16) l_port, (u16) e_port, vrf_id, addr_only, sw_if_index, proto, is_add); @@ -1937,22 +1958,27 @@ add_static_mapping_command_fn (vlib_main_t * vm, switch (rv) { case VNET_API_ERROR_INVALID_VALUE: - return clib_error_return (0, "External port already in use."); - break; + error = clib_error_return (0, "External port already in use."); + goto done; case VNET_API_ERROR_NO_SUCH_ENTRY: if (is_add) - return clib_error_return (0, "External addres must be allocated."); + error = clib_error_return (0, "External addres must be allocated."); else - return clib_error_return (0, "Mapping not exist."); - break; + error = clib_error_return (0, "Mapping not exist."); + goto done; case VNET_API_ERROR_NO_SUCH_FIB: - return clib_error_return (0, "No such VRF id."); + error = clib_error_return (0, "No such VRF id."); + goto done; case VNET_API_ERROR_VALUE_EXIST: - return clib_error_return (0, "Mapping already exist."); + error = clib_error_return (0, "Mapping already exist."); + goto done; default: break; } +done: + unformat_free (line_input); + return error; } @@ -1985,6 +2011,7 @@ set_workers_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; uword *bitmap = 0; int rv = 0; + clib_error_t *error = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -1995,13 +2022,18 @@ set_workers_command_fn (vlib_main_t * vm, if (unformat (line_input, "%U", unformat_bitmap_list, &bitmap)) ; else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (bitmap == 0) - return clib_error_return (0, "List of workers must be specified."); + { + error = clib_error_return (0, "List of workers must be specified."); + goto done; + } rv = snat_set_workers(bitmap); @@ -2010,17 +2042,20 @@ set_workers_command_fn (vlib_main_t * vm, switch (rv) { case VNET_API_ERROR_INVALID_WORKER: - return clib_error_return (0, "Invalid worker(s)."); - break; + error = clib_error_return (0, "Invalid worker(s)."); + goto done; case VNET_API_ERROR_FEATURE_DISABLED: - return clib_error_return (0, + error = clib_error_return (0, "Supported only if 2 or more workes available."); - break; + goto done; default: break; } - return 0; +done: + unformat_free (line_input); + + return error; } /*? @@ -2047,6 +2082,7 @@ snat_ipfix_logging_enable_disable_command_fn (vlib_main_t * vm, u32 src_port = 0; u8 enable = 1; int rv = 0; + clib_error_t *error = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -2061,17 +2097,25 @@ snat_ipfix_logging_enable_disable_command_fn (vlib_main_t * vm, else if (unformat (line_input, "disable")) enable = 0; else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); rv = snat_ipfix_logging_enable_disable (enable, domain_id, (u16) src_port); if (rv) - return clib_error_return (0, "ipfix logging enable failed"); + { + error = clib_error_return (0, "ipfix logging enable failed"); + goto done; + } - return 0; +done: + unformat_free (line_input); + + return error; } /*? @@ -2604,6 +2648,7 @@ snat_add_interface_address_command_fn (vlib_main_t * vm, u32 sw_if_index; int rv; int is_del = 0; + clib_error_t *error = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -2617,8 +2662,11 @@ snat_add_interface_address_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) is_del = 1; else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } rv = snat_add_interface_address (sm, sw_if_index, is_del); @@ -2629,10 +2677,15 @@ snat_add_interface_address_command_fn (vlib_main_t * vm, break; default: - return clib_error_return (0, "snat_add_interface_address returned %d", - rv); + error = clib_error_return (0, "snat_add_interface_address returned %d", + rv); + goto done; } - return 0; + +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (snat_add_interface_address_command, static) = { diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c index 54cc1aed..36f8109e 100644 --- a/src/vlib/threads_cli.c +++ b/src/vlib/threads_cli.c @@ -163,21 +163,31 @@ trace_frame_queue (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "index %u", &index)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (enable > 1) - return clib_error_return (0, "expecting on or off"); + { + error = clib_error_return (0, "expecting on or off"); + goto done; + } if (vec_len (tm->frame_queue_mains) == 0) - return clib_error_return (0, "no worker handoffs exist"); + { + error = clib_error_return (0, "no worker handoffs exist"); + goto done; + } if (index > vec_len (tm->frame_queue_mains) - 1) - return clib_error_return (0, - "expecting valid worker handoff queue index"); + { + error = clib_error_return (0, + "expecting valid worker handoff queue index"); + goto done; + } fqm = vec_elt_at_index (tm->frame_queue_mains, index); @@ -185,7 +195,7 @@ trace_frame_queue (vlib_main_t * vm, unformat_input_t * input, if (num_fq == 0) { vlib_cli_output (vm, "No frame queues exist\n"); - return error; + goto done; } // Allocate storage for trace if necessary @@ -204,6 +214,10 @@ trace_frame_queue (vlib_main_t * vm, unformat_input_t * input, memset (fqh, 0, sizeof (*fqh)); fqm->vlib_frame_queues[fqix]->trace = enable; } + +done: + unformat_free (line_input); + return error; } @@ -432,28 +446,33 @@ test_frame_queue_nelts (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "index %u", &index)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (index > vec_len (tm->frame_queue_mains) - 1) - return clib_error_return (0, - "expecting valid worker handoff queue index"); + { + error = clib_error_return (0, + "expecting valid worker handoff queue index"); + goto done; + } fqm = vec_elt_at_index (tm->frame_queue_mains, index); if ((nelts != 4) && (nelts != 8) && (nelts != 16) && (nelts != 32)) { - return clib_error_return (0, "expecting 4,8,16,32"); + error = clib_error_return (0, "expecting 4,8,16,32"); + goto done; } num_fq = vec_len (fqm->vlib_frame_queues); if (num_fq == 0) { vlib_cli_output (vm, "No frame queues exist\n"); - return error; + goto done; } for (fqix = 0; fqix < num_fq; fqix++) @@ -461,6 +480,9 @@ test_frame_queue_nelts (vlib_main_t * vm, unformat_input_t * input, fqm->vlib_frame_queues[fqix]->nelts = nelts; } +done: + unformat_free (line_input); + return error; } @@ -499,15 +521,19 @@ test_frame_queue_threshold (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "index %u", &index)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (index > vec_len (tm->frame_queue_mains) - 1) - return clib_error_return (0, - "expecting valid worker handoff queue index"); + { + error = clib_error_return (0, + "expecting valid worker handoff queue index"); + goto done; + } fqm = vec_elt_at_index (tm->frame_queue_mains, index); @@ -515,7 +541,7 @@ test_frame_queue_threshold (vlib_main_t * vm, unformat_input_t * input, if (threshold == ~(u32) 0) { vlib_cli_output (vm, "expecting threshold value\n"); - return error; + goto done; } if (threshold == 0) @@ -525,7 +551,7 @@ test_frame_queue_threshold (vlib_main_t * vm, unformat_input_t * input, if (num_fq == 0) { vlib_cli_output (vm, "No frame queues exist\n"); - return error; + goto done; } for (fqix = 0; fqix < num_fq; fqix++) @@ -533,6 +559,9 @@ test_frame_queue_threshold (vlib_main_t * vm, unformat_input_t * input, fqm->vlib_frame_queues[fqix]->vector_threshold = threshold; } +done: + unformat_free (line_input); + return error; } diff --git a/src/vlib/trace.c b/src/vlib/trace.c index dcdb837f..6d487ae1 100644 --- a/src/vlib/trace.c +++ b/src/vlib/trace.c @@ -372,6 +372,7 @@ cli_add_trace_buffer (vlib_main_t * vm, vlib_trace_node_t *tn; u32 node_index, add; u8 verbose = 0; + clib_error_t *error = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -384,8 +385,11 @@ cli_add_trace_buffer (vlib_main_t * vm, else if (unformat (line_input, "verbose")) verbose = 1; else - return clib_error_create ("expected NODE COUNT, got `%U'", - format_unformat_error, line_input); + { + error = clib_error_create ("expected NODE COUNT, got `%U'", + format_unformat_error, line_input); + goto done; + } } /* *INDENT-OFF* */ @@ -403,7 +407,10 @@ cli_add_trace_buffer (vlib_main_t * vm, })); /* *INDENT-ON* */ - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c index 69fca6ec..88e2453c 100644 --- a/src/vlib/unix/cli.c +++ b/src/vlib/unix/cli.c @@ -2835,6 +2835,7 @@ unix_cli_set_terminal_pager (vlib_main_t * vm, unix_cli_main_t *cm = &unix_cli_main; unix_cli_file_t *cf; unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -2852,13 +2853,17 @@ unix_cli_set_terminal_pager (vlib_main_t * vm, "Pager limit set to %u lines; note, this is global.\n", um->cli_pager_buffer_limit); else - return clib_error_return (0, "unknown parameter: `%U`", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown parameter: `%U`", + format_unformat_error, line_input); + goto done; + } } +done: unformat_free (line_input); - return 0; + return error; } /*? @@ -2886,6 +2891,7 @@ unix_cli_set_terminal_history (vlib_main_t * vm, unix_cli_file_t *cf; unformat_input_t _line_input, *line_input = &_line_input; u32 limit; + clib_error_t *error = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -2901,8 +2907,11 @@ unix_cli_set_terminal_history (vlib_main_t * vm, else if (unformat (line_input, "limit %u", &cf->history_limit)) ; else - return clib_error_return (0, "unknown parameter: `%U`", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown parameter: `%U`", + format_unformat_error, line_input); + goto done; + } /* If we reduced history size, or turned it off, purge the history */ limit = cf->has_history ? cf->history_limit : 0; @@ -2914,9 +2923,10 @@ unix_cli_set_terminal_history (vlib_main_t * vm, } } +done: unformat_free (line_input); - return 0; + return error; } /*? diff --git a/src/vnet/devices/af_packet/cli.c b/src/vnet/devices/af_packet/cli.c index 6baa26e1..d4aa7016 100644 --- a/src/vnet/devices/af_packet/cli.c +++ b/src/vnet/devices/af_packet/cli.c @@ -49,6 +49,7 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input, u8 *hw_addr_ptr = 0; u32 sw_if_index; int r; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -63,29 +64,47 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input, (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr)) hw_addr_ptr = hwaddr; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (host_if_name == NULL) - return clib_error_return (0, "missing host interface name"); + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } r = af_packet_create_if (vm, host_if_name, hw_addr_ptr, &sw_if_index); - vec_free (host_if_name); if (r == VNET_API_ERROR_SYSCALL_ERROR_1) - return clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + { + error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + goto done; + } if (r == VNET_API_ERROR_INVALID_INTERFACE) - return clib_error_return (0, "Invalid interface name"); + { + error = clib_error_return (0, "Invalid interface name"); + goto done; + } if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) - return clib_error_return (0, "Interface elready exists"); + { + error = clib_error_return (0, "Interface elready exists"); + goto done; + } vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index); - return 0; + +done: + vec_free (host_if_name); + unformat_free (line_input); + + return error; } /*? @@ -124,6 +143,7 @@ af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, { unformat_input_t _line_input, *line_input = &_line_input; u8 *host_if_name = NULL; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -134,18 +154,26 @@ af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, if (unformat (line_input, "name %s", &host_if_name)) ; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (host_if_name == NULL) - return clib_error_return (0, "missing host interface name"); + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } af_packet_delete_if (vm, host_if_name); + +done: vec_free (host_if_name); + unformat_free (line_input); - return 0; + return error; } /*? diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c index d133cfd9..1fc665ac 100644 --- a/src/vnet/devices/dpdk/cli.c +++ b/src/vnet/devices/dpdk/cli.c @@ -398,7 +398,7 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, u32 hw_if_index = (u32) ~ 0; u32 nb_rx_desc = (u32) ~ 0; u32 nb_tx_desc = (u32) ~ 0; - clib_error_t *rv; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -414,25 +414,37 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "rx %d", &nb_rx_desc)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - return clib_error_return (0, "number of descriptors can be set only for " - "physical devices"); + { + error = + clib_error_return (0, + "number of descriptors can be set only for " + "physical devices"); + goto done; + } if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) && (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc)) - return clib_error_return (0, "nothing changed"); + { + error = clib_error_return (0, "nothing changed"); + goto done; + } if (nb_rx_desc != (u32) ~ 0) xd->nb_rx_desc = nb_rx_desc; @@ -440,9 +452,12 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, if (nb_tx_desc != (u32) ~ 0) xd->nb_tx_desc = nb_tx_desc; - rv = dpdk_port_setup (dm, xd); + error = dpdk_port_setup (dm, xd); + +done: + unformat_free (line_input); - return rv; + return error; } /* *INDENT-OFF* */ @@ -523,6 +538,7 @@ set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, u32 queue = (u32) 0; u32 cpu = (u32) ~ 0; int i; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -538,18 +554,25 @@ set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "thread %d", &cpu)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } if (cpu < dm->input_cpu_first_index || cpu >= (dm->input_cpu_first_index + dm->input_cpu_count)) - return clib_error_return (0, "please specify valid thread id"); + { + error = clib_error_return (0, "please specify valid thread id"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -563,7 +586,7 @@ set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, queue == dq->queue_id) { if (cpu == i) /* nothing to do */ - return 0; + goto done; vec_del1(dm->devices_by_cpu[i], dq - dm->devices_by_cpu[i]); vec_add2(dm->devices_by_cpu[cpu], dq, 1); @@ -586,13 +609,18 @@ set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, VLIB_NODE_STATE_POLLING); - return 0; + goto done; } } /* *INDENT-ON* */ } - return clib_error_return (0, "not found"); + error = clib_error_return (0, "not found"); + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -653,6 +681,7 @@ set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, u32 hw_if_index = (u32) ~ 0; u32 cpu = (u32) ~ 0; int i; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -666,18 +695,22 @@ set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "thread %d", &cpu)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) return clib_error_return (0, "please specify valid interface name"); if (cpu < dm->hqos_cpu_first_index || cpu >= (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) - return clib_error_return (0, "please specify valid thread id"); + { + error = clib_error_return (0, "please specify valid thread id"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -689,7 +722,7 @@ set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index) { if (cpu == i) /* nothing to do */ - return 0; + goto done; vec_del1 (dm->devices_by_hqos_cpu[i], dq - dm->devices_by_hqos_cpu[i]); @@ -703,12 +736,17 @@ set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, vec_sort_with_function (dm->devices_by_hqos_cpu[cpu], dpdk_device_queue_sort); - return 0; + goto done; } } } - return clib_error_return (0, "not found"); + error = clib_error_return (0, "not found"); + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -732,6 +770,7 @@ set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, u32 pipe_id = (u32) ~ 0; u32 profile_id = (u32) ~ 0; int rv; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -749,14 +788,18 @@ set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "profile %d", &profile_id)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -765,9 +808,15 @@ set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, profile_id); if (rv) - return clib_error_return (0, "pipe configuration failed"); + { + error = clib_error_return (0, "pipe configuration failed"); + goto done; + } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -797,6 +846,7 @@ set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, .tc_period = 10, }; int rv; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -829,23 +879,33 @@ set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "period %d", &p.tc_period)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p); if (rv) - return clib_error_return (0, "subport configuration failed"); + { + error = clib_error_return (0, "subport configuration failed"); + goto done; + } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -872,6 +932,7 @@ set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, u32 queue = (u32) ~ 0; u32 entry = (u32) ~ 0; u32 val, i; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -889,20 +950,33 @@ set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "queue %d", &queue)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } if (entry >= 64) - return clib_error_return (0, "invalid entry"); + { + error = clib_error_return (0, "invalid entry"); + goto done; + } if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) - return clib_error_return (0, "invalid traffic class"); + { + error = clib_error_return (0, "invalid traffic class"); + goto done; + } if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) - return clib_error_return (0, "invalid traffic class"); + { + error = clib_error_return (0, "invalid traffic class"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -911,7 +985,10 @@ set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); /* Should never happen, shut up Coverity warning */ if (p == 0) - return clib_error_return (0, "no worker registrations?"); + { + error = clib_error_return (0, "no worker registrations?"); + goto done; + } vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; int worker_thread_first = tr->first_index; @@ -921,7 +998,10 @@ set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, for (i = 0; i < worker_thread_count; i++) xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -939,6 +1019,7 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, unformat_input_t _line_input, *line_input = &_line_input; vlib_thread_main_t *tm = vlib_get_thread_main (); dpdk_main_t *dm = &dpdk_main; + clib_error_t *error = NULL; /* Device specific data */ struct rte_eth_dev_info dev_info; @@ -984,15 +1065,19 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "mask %llx", &mask)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - /* Get interface */ if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -1019,7 +1104,7 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, if (devconf->hqos_enabled == 0) { vlib_cli_output (vm, "HQoS disabled for this interface"); - return 0; + goto done; } n_subports_per_port = devconf->hqos.port.n_subports_per_port; @@ -1028,27 +1113,39 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, /* Validate packet field configuration: id, offset and mask */ if (id >= 3) - return clib_error_return (0, "invalid packet field id"); + { + error = clib_error_return (0, "invalid packet field id"); + goto done; + } switch (id) { case 0: if (dpdk_hqos_validate_mask (mask, n_subports_per_port) != 0) - return clib_error_return (0, "invalid subport ID mask " - "(n_subports_per_port = %u)", - n_subports_per_port); + { + error = clib_error_return (0, "invalid subport ID mask " + "(n_subports_per_port = %u)", + n_subports_per_port); + goto done; + } break; case 1: if (dpdk_hqos_validate_mask (mask, n_pipes_per_subport) != 0) - return clib_error_return (0, "invalid pipe ID mask " - "(n_pipes_per_subport = %u)", - n_pipes_per_subport); + { + error = clib_error_return (0, "invalid pipe ID mask " + "(n_pipes_per_subport = %u)", + n_pipes_per_subport); + goto done; + } break; case 2: default: if (dpdk_hqos_validate_mask (mask, tctbl_size) != 0) - return clib_error_return (0, "invalid TC table index mask " - "(TC table size = %u)", tctbl_size); + { + error = clib_error_return (0, "invalid TC table index mask " + "(TC table size = %u)", tctbl_size); + goto done; + } } /* Propagate packet field configuration to all workers */ @@ -1075,7 +1172,10 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, __builtin_ctzll (mask); } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -1106,6 +1206,7 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, dpdk_device_config_t *devconf = 0; vlib_thread_registration_t *tr; uword *p = 0; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -1117,14 +1218,18 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, &hw_if_index)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify interface name!!"); + { + error = clib_error_return (0, "please specify interface name!!"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -1151,7 +1256,7 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, if (devconf->hqos_enabled == 0) { vlib_cli_output (vm, "HQoS disabled for this interface"); - return 0; + goto done; } /* Detect the set of worker threads */ @@ -1159,7 +1264,10 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, /* Should never happen, shut up Coverity warning */ if (p == 0) - return clib_error_return (0, "no worker registrations?"); + { + error = clib_error_return (0, "no worker registrations?"); + goto done; + } tr = (vlib_thread_registration_t *) p[0]; @@ -1284,7 +1392,10 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, } #endif - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -1315,6 +1426,7 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, u32 qindex; struct rte_sched_queue_stats stats; u16 qlen; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -1339,14 +1451,18 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify interface name!!"); + { + error = clib_error_return (0, "please specify interface name!!"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -1373,7 +1489,7 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, if (devconf->hqos_enabled == 0) { vlib_cli_output (vm, "HQoS disabled for this interface"); - return 0; + goto done; } /* @@ -1386,7 +1502,10 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, if (rte_sched_queue_read_stats (xd->hqos_ht->hqos, qindex, &stats, &qlen) != 0) - return clib_error_return (0, "failed to read stats"); + { + error = clib_error_return (0, "failed to read stats"); + goto done; + } vlib_cli_output (vm, "%=24s%=16s", "Stats Parameter", "Value"); vlib_cli_output (vm, "%=24s%=16d", "Packets", stats.n_pkts); @@ -1399,7 +1518,10 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "%=24s%=16d", "Bytes dropped", stats.n_bytes_dropped); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/devices/dpdk/ipsec/cli.c b/src/vnet/devices/dpdk/ipsec/cli.c index 93df4a64..f9d3a5d0 100644 --- a/src/vnet/devices/dpdk/ipsec/cli.c +++ b/src/vnet/devices/dpdk/ipsec/cli.c @@ -111,6 +111,7 @@ lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input, { unformat_input_t _line_input, *line_input = &_line_input; u16 detail = 0; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -120,15 +121,19 @@ lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input, if (unformat (line_input, "verbose")) detail = 1; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - dpdk_ipsec_show_mapping (vm, detail); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/devices/netmap/cli.c b/src/vnet/devices/netmap/cli.c index 6157f27c..71363294 100644 --- a/src/vnet/devices/netmap/cli.c +++ b/src/vnet/devices/netmap/cli.c @@ -37,6 +37,7 @@ netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, u8 is_pipe = 0; u8 is_master = 0; u32 sw_if_index = ~0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -57,30 +58,48 @@ netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "slave")) is_master = 0; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (host_if_name == NULL) - return clib_error_return (0, "missing host interface name"); + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } r = netmap_create_if (vm, host_if_name, hw_addr_ptr, is_pipe, is_master, &sw_if_index); if (r == VNET_API_ERROR_SYSCALL_ERROR_1) - return clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + { + error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + goto done; + } if (r == VNET_API_ERROR_INVALID_INTERFACE) - return clib_error_return (0, "Invalid interface name"); + { + error = clib_error_return (0, "Invalid interface name"); + goto done; + } if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) - return clib_error_return (0, "Interface already exists"); + { + error = clib_error_return (0, "Interface already exists"); + goto done; + } vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index); - return 0; + +done: + unformat_free (line_input); + + return error; } /*? @@ -144,6 +163,7 @@ netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, { unformat_input_t _line_input, *line_input = &_line_input; u8 *host_if_name = NULL; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -154,17 +174,25 @@ netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, if (unformat (line_input, "name %s", &host_if_name)) ; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (host_if_name == NULL) - return clib_error_return (0, "missing host interface name"); + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } netmap_delete_if (vm, host_if_name); - return 0; +done: + unformat_free (line_input); + + return error; } /*? diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 315daa77..c43f6e67 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2682,6 +2682,7 @@ vhost_user_connect_command_fn (vlib_main_t * vm, u32 custom_dev_instance = ~0; u8 hwaddr[6]; u8 *hw = NULL; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -2704,10 +2705,12 @@ vhost_user_connect_command_fn (vlib_main_t * vm, renumber = 1; } else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); vnet_main_t *vnm = vnet_get_main (); @@ -2716,14 +2719,18 @@ vhost_user_connect_command_fn (vlib_main_t * vm, is_server, &sw_if_index, feature_mask, renumber, custom_dev_instance, hw))) { - vec_free (sock_filename); - return clib_error_return (0, "vhost_user_create_if returned %d", rv); + error = clib_error_return (0, "vhost_user_create_if returned %d", rv); + goto done; } - vec_free (sock_filename); vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index); - return 0; + +done: + vec_free (sock_filename); + unformat_free (line_input); + + return error; } clib_error_t * @@ -2734,6 +2741,7 @@ vhost_user_delete_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; u32 sw_if_index = ~0; vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -2751,15 +2759,25 @@ vhost_user_delete_command_fn (vlib_main_t * vm, vnet_get_sup_hw_interface (vnm, sw_if_index); if (hwif == NULL || vhost_user_dev_class.index != hwif->dev_class_index) - return clib_error_return (0, "Not a vhost interface"); + { + error = clib_error_return (0, "Not a vhost interface"); + goto done; + } } else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); + vhost_user_delete_if (vnm, vm, sw_if_index); - return 0; + +done: + unformat_free (line_input); + + return error; } int @@ -3286,6 +3304,7 @@ vhost_thread_command_fn (vlib_main_t * vm, u32 sw_if_index; u8 del = 0; int rv; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -3295,9 +3314,9 @@ vhost_thread_command_fn (vlib_main_t * vm, (line_input, "%U %d", unformat_vnet_sw_interface, vnet_get_main (), &sw_if_index, &worker_thread_index)) { - unformat_free (line_input); - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; } if (unformat (line_input, "del")) @@ -3305,9 +3324,16 @@ vhost_thread_command_fn (vlib_main_t * vm, if ((rv = vhost_user_thread_placement (sw_if_index, worker_thread_index, del))) - return clib_error_return (0, "vhost_user_thread_placement returned %d", - rv); - return 0; + { + error = clib_error_return (0, "vhost_user_thread_placement returned %d", + rv); + goto done; + } + +done: + unformat_free (line_input); + + return error; } diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c index d624587d..d4476ac4 100644 --- a/src/vnet/gre/interface.c +++ b/src/vnet/gre/interface.c @@ -491,6 +491,7 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, u32 num_m_args = 0; u8 is_add = 1; u32 sw_if_index; + clib_error_t *error = NULL; /* Get a line of input. */ if (! unformat_user (input, unformat_line_input, line_input)) @@ -508,16 +509,24 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "teb")) teb = 1; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args < 2) - return clib_error_return (0, "mandatory argument(s) missing"); + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } if (memcmp (&src, &dst, sizeof(src)) == 0) - return clib_error_return (0, "src and dst are identical"); + { + error = clib_error_return (0, "src and dst are identical"); + goto done; + } memset (a, 0, sizeof (*a)); a->outer_fib_id = outer_fib_id; @@ -536,15 +545,21 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index); break; case VNET_API_ERROR_INVALID_VALUE: - return clib_error_return (0, "GRE tunnel already exists..."); + error = clib_error_return (0, "GRE tunnel already exists..."); + goto done; case VNET_API_ERROR_NO_SUCH_FIB: - return clib_error_return (0, "outer fib ID %d doesn't exist\n", - outer_fib_id); + error = clib_error_return (0, "outer fib ID %d doesn't exist\n", + outer_fib_id); + goto done; default: - return clib_error_return (0, "vnet_gre_add_del_tunnel returned %d", rv); + error = clib_error_return (0, "vnet_gre_add_del_tunnel returned %d", rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = { diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c index d461cc88..3af32f2e 100644 --- a/src/vnet/ip/ip4_source_check.c +++ b/src/vnet/ip/ip4_source_check.c @@ -399,6 +399,8 @@ set_ip_source_check (vlib_main_t * vm, vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, is_del == 0, &config, sizeof (config)); done: + unformat_free (line_input); + return error; } @@ -531,7 +533,9 @@ ip_source_check_accept (vlib_main_t * vm, } done: - return (error); + unformat_free (line_input); + + return error; } /*? diff --git a/src/vnet/ip/ip4_test.c b/src/vnet/ip/ip4_test.c index 45d17113..73dabfdc 100644 --- a/src/vnet/ip/ip4_test.c +++ b/src/vnet/ip/ip4_test.c @@ -143,8 +143,11 @@ thrash (vlib_main_t * vm, else if (unformat (line_input, "verbose")) verbose = 1; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } } @@ -178,7 +181,7 @@ thrash (vlib_main_t * vm, if (p == 0) { vlib_cli_output (vm, "Couldn't map fib id %d to fib index\n", table_id); - return 0; + goto done; } table_index = p[0]; @@ -294,7 +297,11 @@ thrash (vlib_main_t * vm, pool_free (tm->route_pool); } - return 0; + +done: + unformat_free (line_input); + + return error; } /*? diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 7229591e..6b53137f 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -2923,7 +2923,10 @@ ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input, else if (unformat (line_input, "ra-lifetime")) { if (!unformat (line_input, "%d", &ra_lifetime)) - return (error = unformat_parse_error (line_input)); + { + error = unformat_parse_error (line_input); + goto done; + } use_lifetime = 1; break; } @@ -2931,13 +2934,19 @@ ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input, { if (!unformat (line_input, "%d %d", &ra_initial_count, &ra_initial_interval)) - return (error = unformat_parse_error (line_input)); + { + error = unformat_parse_error (line_input); + goto done; + } break; } else if (unformat (line_input, "ra-interval")) { if (!unformat (line_input, "%d", &ra_max_interval)) - return (error = unformat_parse_error (line_input)); + { + error = unformat_parse_error (line_input); + goto done; + } if (!unformat (line_input, "%d", &ra_min_interval)) ra_min_interval = 0; @@ -2949,7 +2958,10 @@ ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input, break; } else - return (unformat_parse_error (line_input)); + { + error = unformat_parse_error (line_input); + goto done; + } } if (add_radv_info) @@ -3006,7 +3018,10 @@ ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input, else if (unformat (line_input, "no-onlink")) no_onlink = 1; else - return (unformat_parse_error (line_input)); + { + error = unformat_parse_error (line_input); + goto done; + } } ip6_neighbor_ra_prefix (vm, sw_if_index, @@ -3018,9 +3033,9 @@ ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input, off_link, no_autoconfig, no_onlink, is_no); } +done: unformat_free (line_input); -done: return error; } diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 0ef0e7a6..807b87b6 100644 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -568,8 +568,6 @@ vnet_ip_route_cmd (vlib_main_t * vm, } } - unformat_free (line_input); - if (vec_len (prefixs) == 0) { error = @@ -704,6 +702,7 @@ done: vec_free (dpos); vec_free (prefixs); vec_free (rpaths); + unformat_free (line_input); return error; } @@ -872,8 +871,6 @@ vnet_ip_mroute_cmd (vlib_main_t * vm, } } - unformat_free (line_input); - if (~0 == table_id) { /* @@ -970,6 +967,8 @@ vnet_ip_mroute_cmd (vlib_main_t * vm, (scount * gcount) / (timet[1] - timet[0])); done: + unformat_free (line_input); + return error; } @@ -1149,24 +1148,37 @@ probe_neighbor_address (vlib_main_t * vm, is_ip4 = 0; } else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (sw_if_index == ~0) - return clib_error_return (0, "Interface required, not set."); + { + error = clib_error_return (0, "Interface required, not set."); + goto done; + } if (address_set == 0) - return clib_error_return (0, "ip address required, not set."); + { + error = clib_error_return (0, "ip address required, not set."); + goto done; + } if (address_set > 1) - return clib_error_return (0, "Multiple ip addresses not supported."); + { + error = clib_error_return (0, "Multiple ip addresses not supported."); + goto done; + } if (is_ip4) error = ip4_probe_neighbor_wait (vm, &a4, sw_if_index, retry_count); else error = ip6_probe_neighbor_wait (vm, &a6, sw_if_index, retry_count); +done: + unformat_free (line_input); + return error; } diff --git a/src/vnet/ipsec-gre/interface.c b/src/vnet/ipsec-gre/interface.c index 3b6e4ac2..0772ce73 100644 --- a/src/vnet/ipsec-gre/interface.c +++ b/src/vnet/ipsec-gre/interface.c @@ -232,6 +232,7 @@ create_ipsec_gre_tunnel_command_fn (vlib_main_t * vm, vnet_ipsec_gre_add_del_tunnel_args_t _a, *a = &_a; int rv; u32 sw_if_index; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -250,16 +251,24 @@ create_ipsec_gre_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "remote-sa %d", &rsa)) num_m_args++; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args < 4) - return clib_error_return (0, "mandatory argument(s) missing"); + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } if (memcmp (&src, &dst, sizeof (src)) == 0) - return clib_error_return (0, "src and dst are identical"); + { + error = clib_error_return (0, "src and dst are identical"); + goto done; + } memset (a, 0, sizeof (*a)); a->is_add = is_add; @@ -277,14 +286,19 @@ create_ipsec_gre_tunnel_command_fn (vlib_main_t * vm, vnet_get_main (), sw_if_index); break; case VNET_API_ERROR_INVALID_VALUE: - return clib_error_return (0, "GRE tunnel already exists..."); + error = clib_error_return (0, "GRE tunnel already exists..."); + goto done; default: - return clib_error_return (0, - "vnet_ipsec_gre_add_del_tunnel returned %d", - rv); + error = clib_error_return (0, + "vnet_ipsec_gre_add_del_tunnel returned %d", + rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c index 3c1e26f2..0e034402 100644 --- a/src/vnet/ipsec/ipsec_cli.c +++ b/src/vnet/ipsec/ipsec_cli.c @@ -32,6 +32,7 @@ set_interface_spd_command_fn (vlib_main_t * vm, u32 sw_if_index = (u32) ~ 0; u32 spd_id; int is_add = 1; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -43,14 +44,18 @@ set_interface_spd_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) is_add = 0; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - - unformat_free (line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } ipsec_set_interface_spd (vm, sw_if_index, spd_id, is_add); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -72,7 +77,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, ipsec_sa_t sa; int is_add = ~0; u8 *ck = 0, *ik = 0; - clib_error_t *err = 0; + clib_error_t *error = NULL; memset (&sa, 0, sizeof (sa)); @@ -90,8 +95,11 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, else if (unformat (line_input, "esp")) sa.protocol = IPSEC_PROTOCOL_ESP; else if (unformat (line_input, "ah")) - //sa.protocol = IPSEC_PROTOCOL_AH; - return clib_error_return (0, "unsupported security protocol 'AH'"); + { + //sa.protocol = IPSEC_PROTOCOL_AH; + error = clib_error_return (0, "unsupported security protocol 'AH'"); + goto done; + } else if (unformat (line_input, "crypto-key %U", unformat_hex_string, &ck)) sa.crypto_key_len = vec_len (ck); @@ -102,8 +110,12 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, { if (sa.crypto_alg < IPSEC_CRYPTO_ALG_AES_CBC_128 || sa.crypto_alg >= IPSEC_CRYPTO_N_ALG) - return clib_error_return (0, "unsupported crypto-alg: '%U'", - format_ipsec_crypto_alg, sa.crypto_alg); + { + error = clib_error_return (0, "unsupported crypto-alg: '%U'", + format_ipsec_crypto_alg, + sa.crypto_alg); + goto done; + } } else if (unformat (line_input, "integ-key %U", unformat_hex_string, &ik)) @@ -113,8 +125,12 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, { if (sa.integ_alg < IPSEC_INTEG_ALG_SHA1_96 || sa.integ_alg >= IPSEC_INTEG_N_ALG) - return clib_error_return (0, "unsupported integ-alg: '%U'", - format_ipsec_integ_alg, sa.integ_alg); + { + error = clib_error_return (0, "unsupported integ-alg: '%U'", + format_ipsec_integ_alg, + sa.integ_alg); + goto done; + } } else if (unformat (line_input, "tunnel-src %U", unformat_ip4_address, &sa.tunnel_src_addr.ip4)) @@ -135,12 +151,13 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, sa.is_tunnel_ip6 = 1; } else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (sa.crypto_key_len > sizeof (sa.crypto_key)) sa.crypto_key_len = sizeof (sa.crypto_key); @@ -156,14 +173,17 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, if (is_add) { ASSERT (im->cb.check_support_cb); - err = im->cb.check_support_cb (&sa); - if (err) - return err; + error = im->cb.check_support_cb (&sa); + if (error) + goto done; } ipsec_add_del_sa (vm, &sa, is_add); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -183,6 +203,7 @@ ipsec_spd_add_del_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; u32 spd_id = ~0; int is_add = ~0; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -196,18 +217,25 @@ ipsec_spd_add_del_command_fn (vlib_main_t * vm, else if (unformat (line_input, "%u", &spd_id)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (spd_id == ~0) - return clib_error_return (0, "please specify SPD ID"); + { + error = clib_error_return (0, "please specify SPD ID"); + goto done; + } ipsec_add_del_spd (vm, spd_id, is_add); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -230,6 +258,7 @@ ipsec_policy_add_del_command_fn (vlib_main_t * vm, int is_add = 0; int is_ip_any = 1; u32 tmp, tmp2; + clib_error_t *error = NULL; memset (&p, 0, sizeof (p)); p.lport.stop = p.rport.stop = ~0; @@ -262,7 +291,10 @@ ipsec_policy_add_del_command_fn (vlib_main_t * vm, &p.policy)) { if (p.policy == IPSEC_POLICY_ACTION_RESOLVE) - return clib_error_return (0, "unsupported action: 'resolve'"); + { + error = clib_error_return (0, "unsupported action: 'resolve'"); + goto done; + } } else if (unformat (line_input, "sa %u", &p.sa_id)) ; @@ -300,19 +332,24 @@ ipsec_policy_add_del_command_fn (vlib_main_t * vm, p.rport.stop = tmp2; } else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - ipsec_add_del_policy (vm, &p, is_add); if (is_ip_any) { p.is_ipv6 = 1; ipsec_add_del_policy (vm, &p, is_add); } - return 0; + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -332,6 +369,7 @@ set_ipsec_sa_key_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; ipsec_sa_t sa; u8 *ck = 0, *ik = 0; + clib_error_t *error = NULL; memset (&sa, 0, sizeof (sa)); @@ -349,12 +387,13 @@ set_ipsec_sa_key_command_fn (vlib_main_t * vm, if (unformat (line_input, "integ-key %U", unformat_hex_string, &ik)) sa.integ_key_len = vec_len (ik); else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (sa.crypto_key_len > sizeof (sa.crypto_key)) sa.crypto_key_len = sizeof (sa.crypto_key); @@ -369,7 +408,10 @@ set_ipsec_sa_key_command_fn (vlib_main_t * vm, ipsec_set_sa_key (vm, &sa); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -649,6 +691,7 @@ create_ipsec_tunnel_command_fn (vlib_main_t * vm, ipsec_add_del_tunnel_args_t a; int rv; u32 num_m_args = 0; + clib_error_t *error = NULL; memset (&a, 0, sizeof (a)); a.is_add = 1; @@ -673,13 +716,18 @@ create_ipsec_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) a.is_add = 0; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args < 4) - return clib_error_return (0, "mandatory argument(s) missing"); + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } rv = ipsec_add_del_tunnel_if (&a); @@ -689,16 +737,21 @@ create_ipsec_tunnel_command_fn (vlib_main_t * vm, break; case VNET_API_ERROR_INVALID_VALUE: if (a.is_add) - return clib_error_return (0, - "IPSec tunnel interface already exists..."); + error = clib_error_return (0, + "IPSec tunnel interface already exists..."); else - return clib_error_return (0, "IPSec tunnel interface not exists..."); + error = clib_error_return (0, "IPSec tunnel interface not exists..."); + goto done; default: - return clib_error_return (0, "ipsec_register_interface returned %d", - rv); + error = clib_error_return (0, "ipsec_register_interface returned %d", + rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -720,6 +773,7 @@ set_interface_key_command_fn (vlib_main_t * vm, u32 hw_if_index = (u32) ~ 0; u32 alg; u8 *key = 0; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -748,25 +802,38 @@ set_interface_key_command_fn (vlib_main_t * vm, else if (unformat (line_input, "%U", unformat_hex_string, &key)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (type == IPSEC_IF_SET_KEY_TYPE_NONE) - return clib_error_return (0, "unknown key type"); + { + error = clib_error_return (0, "unknown key type"); + goto done; + } if (alg > 0 && vec_len (key) == 0) - return clib_error_return (0, "key is not specified"); + { + error = clib_error_return (0, "key is not specified"); + goto done; + } if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "interface not specified"); + { + error = clib_error_return (0, "interface not specified"); + goto done; + } ipsec_set_interface_key (im->vnet_main, hw_if_index, type, alg, key); + +done: vec_free (key); + unformat_free (line_input); - return 0; + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c index 5e4691f4..ff3d2f3a 100644 --- a/src/vnet/l2/l2_patch.c +++ b/src/vnet/l2/l2_patch.c @@ -315,6 +315,7 @@ test_patch_command_fn (vlib_main_t * vm, int rx_set = 0; int tx_set = 0; int is_add = 1; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -335,10 +336,16 @@ test_patch_command_fn (vlib_main_t * vm, } if (rx_set == 0) - return clib_error_return (0, "rx interface not set"); + { + error = clib_error_return (0, "rx interface not set"); + goto done; + } if (tx_set == 0) - return clib_error_return (0, "tx interface not set"); + { + error = clib_error_return (0, "tx interface not set"); + goto done; + } rv = vnet_l2_patch_add_del (rx_sw_if_index, tx_sw_if_index, is_add); @@ -348,17 +355,24 @@ test_patch_command_fn (vlib_main_t * vm, break; case VNET_API_ERROR_INVALID_SW_IF_INDEX: - return clib_error_return (0, "rx interface not a physical port"); + error = clib_error_return (0, "rx interface not a physical port"); + goto done; case VNET_API_ERROR_INVALID_SW_IF_INDEX_2: - return clib_error_return (0, "tx interface not a physical port"); + error = clib_error_return (0, "tx interface not a physical port"); + goto done; default: - return clib_error_return + error = clib_error_return (0, "WARNING: vnet_l2_patch_add_del returned %d", rv); + goto done; } - return 0; + +done: + unformat_free (line_input); + + return error; } /*? diff --git a/src/vnet/l2/l2_xcrw.c b/src/vnet/l2/l2_xcrw.c index 70610a85..d08a5d8f 100644 --- a/src/vnet/l2/l2_xcrw.c +++ b/src/vnet/l2/l2_xcrw.c @@ -409,6 +409,7 @@ set_l2_xcrw_command_fn (vlib_main_t * vm, u8 *rw = 0; vnet_main_t *vnm = vnet_get_main (); int rv; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) @@ -416,8 +417,11 @@ set_l2_xcrw_command_fn (vlib_main_t * vm, if (!unformat (line_input, "%U", unformat_vnet_sw_interface, vnm, &l2_sw_if_index)) - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { @@ -436,7 +440,10 @@ set_l2_xcrw_command_fn (vlib_main_t * vm, } if (next_node_index == ~0) - return clib_error_return (0, "next node not specified"); + { + error = clib_error_return (0, "next node not specified"); + goto done; + } if (tx_fib_id != ~0) { @@ -448,7 +455,11 @@ set_l2_xcrw_command_fn (vlib_main_t * vm, p = hash_get (ip4_main.fib_index_by_table_id, tx_fib_id); if (p == 0) - return clib_error_return (0, "nonexistent tx_fib_id %d", tx_fib_id); + { + error = + clib_error_return (0, "nonexistent tx_fib_id %d", tx_fib_id); + goto done; + } tx_fib_index = p[0]; } @@ -463,16 +474,21 @@ set_l2_xcrw_command_fn (vlib_main_t * vm, break; case VNET_API_ERROR_INVALID_SW_IF_INDEX: - return clib_error_return (0, "%U not cross-connected", - format_vnet_sw_if_index_name, - vnm, l2_sw_if_index); + error = clib_error_return (0, "%U not cross-connected", + format_vnet_sw_if_index_name, + vnm, l2_sw_if_index); + goto done; + default: - return clib_error_return (0, "vnet_configure_l2_xcrw returned %d", rv); + error = clib_error_return (0, "vnet_configure_l2_xcrw returned %d", rv); + goto done; } +done: vec_free (rw); + unformat_free (line_input); - return 0; + return error; } /*? diff --git a/src/vnet/l2tp/l2tp.c b/src/vnet/l2tp/l2tp.c index a4531dab..2d323397 100644 --- a/src/vnet/l2tp/l2tp.c +++ b/src/vnet/l2tp/l2tp.c @@ -427,6 +427,7 @@ create_l2tpv3_tunnel_command_fn (vlib_main_t * vm, u32 sw_if_index; u32 encap_fib_id = ~0; u32 encap_fib_index = ~0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -455,18 +456,22 @@ create_l2tpv3_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "l2-sublayer-present")) l2_sublayer_present = 1; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (encap_fib_id != ~0) { uword *p; ip6_main_t *im = &ip6_main; if (!(p = hash_get (im->fib_index_by_table_id, encap_fib_id))) - return clib_error_return (0, "No fib with id %d", encap_fib_id); + { + error = clib_error_return (0, "No fib with id %d", encap_fib_id); + goto done; + } encap_fib_index = p[0]; } else @@ -475,9 +480,15 @@ create_l2tpv3_tunnel_command_fn (vlib_main_t * vm, } if (our_address_set == 0) - return clib_error_return (0, "our address not specified"); + { + error = clib_error_return (0, "our address not specified"); + goto done; + } if (client_address_set == 0) - return clib_error_return (0, "client address not specified"); + { + error = clib_error_return (0, "client address not specified"); + goto done; + } rv = create_l2tpv3_ipv6_tunnel (lm, &client_address, &our_address, local_session_id, remote_session_id, @@ -491,16 +502,22 @@ create_l2tpv3_tunnel_command_fn (vlib_main_t * vm, vnet_get_main (), sw_if_index); break; case VNET_API_ERROR_INVALID_VALUE: - return clib_error_return (0, "session already exists..."); + error = clib_error_return (0, "session already exists..."); + goto done; case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "session does not exist..."); + error = clib_error_return (0, "session does not exist..."); + goto done; default: - return clib_error_return (0, "l2tp_session_add_del returned %d", rv); + error = clib_error_return (0, "l2tp_session_add_del returned %d", rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/lisp-cp/lisp_cli.c b/src/vnet/lisp-cp/lisp_cli.c index 25d11c61..05df9fb6 100644 --- a/src/vnet/lisp-cp/lisp_cli.c +++ b/src/vnet/lisp-cp/lisp_cli.c @@ -25,6 +25,7 @@ lisp_show_adjacencies_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "%s %40s\n", "leid", "reid"); unformat_input_t _line_input, *line_input = &_line_input; u32 vni = ~0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -38,14 +39,14 @@ lisp_show_adjacencies_command_fn (vlib_main_t * vm, { vlib_cli_output (vm, "parse error: '%U'", format_unformat_error, line_input); - return 0; + goto done; } } if (~0 == vni) { vlib_cli_output (vm, "error: no vni specified!"); - return 0; + goto done; } adjs = vnet_lisp_adjacencies_get_by_vni (vni); @@ -57,7 +58,10 @@ lisp_show_adjacencies_command_fn (vlib_main_t * vm, } vec_free (adjs); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -77,6 +81,7 @@ lisp_add_del_map_server_command_fn (vlib_main_t * vm, u8 is_add = 1, ip_set = 0; ip_address_t ip; unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -94,14 +99,14 @@ lisp_add_del_map_server_command_fn (vlib_main_t * vm, { vlib_cli_output (vm, "parse error: '%U'", format_unformat_error, line_input); - return 0; + goto done; } } if (!ip_set) { vlib_cli_output (vm, "map-server ip address not set!"); - return 0; + goto done; } rv = vnet_lisp_add_del_map_server (&ip, is_add); @@ -109,7 +114,10 @@ lisp_add_del_map_server_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "failed to %s map-server!", is_add ? "add" : "delete"); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -191,7 +199,7 @@ lisp_add_del_local_eid_command_fn (vlib_main_t * vm, unformat_input_t * input, if (key && (0 == key_id)) { vlib_cli_output (vm, "invalid key_id!"); - return 0; + goto done;; } gid_address_copy (&a->eid, &eid); @@ -213,6 +221,8 @@ done: vec_free (locator_set_name); gid_address_free (&a->eid); vec_free (a->key); + unformat_free (line_input); + return error; } @@ -233,6 +243,7 @@ lisp_eid_table_map_command_fn (vlib_main_t * vm, u8 is_add = 1, is_l2 = 0; u32 vni = 0, dp_id = 0; unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -250,11 +261,16 @@ lisp_eid_table_map_command_fn (vlib_main_t * vm, is_l2 = 1; else { - return unformat_parse_error (line_input); + error = unformat_parse_error (line_input); + goto done; } } vnet_lisp_eid_table_map (vni, dp_id, is_l2, is_add); - return 0; + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -479,7 +495,7 @@ lisp_add_del_adjacency_command_fn (vlib_main_t * vm, unformat_input_t * input, != ip_prefix_version (leid_ippref))) { clib_warning ("remote and local EIDs are of different types!"); - return error; + goto done; } memset (a, 0, sizeof (a[0])); @@ -512,6 +528,7 @@ lisp_map_request_mode_command_fn (vlib_main_t * vm, { unformat_input_t _i, *i = &_i; map_request_mode_t mr_mode = _MR_MODE_MAX; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, i)) @@ -533,12 +550,15 @@ lisp_map_request_mode_command_fn (vlib_main_t * vm, if (_MR_MODE_MAX == mr_mode) { clib_warning ("No LISP map request mode entered!"); - return 0; + goto done; } vnet_lisp_set_map_request_mode (mr_mode); + done: - return 0; + unformat_free (i); + + return error; } /* *INDENT-OFF* */ @@ -630,7 +650,10 @@ lisp_pitr_set_locator_set_command_fn (vlib_main_t * vm, else if (unformat (line_input, "disable")) is_add = 0; else - return clib_error_return (0, "parse error"); + { + error = clib_error_return (0, "parse error"); + goto done; + } } if (!locator_name_set) @@ -648,6 +671,8 @@ lisp_pitr_set_locator_set_command_fn (vlib_main_t * vm, done: if (locator_set_name) vec_free (locator_set_name); + unformat_free (line_input); + return error; } @@ -771,6 +796,7 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm, gid_address_t eid; u8 print_all = 1; u8 filter = 0; + clib_error_t *error = NULL; memset (&eid, 0, sizeof (eid)); @@ -787,8 +813,11 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm, else if (unformat (line_input, "remote")) filter = 2; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } vlib_cli_output (vm, "%-35s%-20s%-30s%-20s%-s", @@ -818,7 +847,7 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm, { mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &eid); if ((u32) ~ 0 == mi) - return 0; + goto done; mapit = pool_elt_at_index (lcm->mapping_pool, mi); locator_set_t *ls = pool_elt_at_index (lcm->locator_set_pool, @@ -827,14 +856,17 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm, if (filter && !((1 == filter && ls->local) || (2 == filter && !ls->local))) { - return 0; + goto done; } vlib_cli_output (vm, "%U,", format_eid_entry, lcm->vnet_main, lcm, mapit, ls); } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -853,6 +885,7 @@ lisp_enable_disable_command_fn (vlib_main_t * vm, unformat_input_t * input, unformat_input_t _line_input, *line_input = &_line_input; u8 is_enabled = 0; u8 is_set = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -869,16 +902,24 @@ lisp_enable_disable_command_fn (vlib_main_t * vm, unformat_input_t * input, is_set = 1; else { - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; } } if (!is_set) - return clib_error_return (0, "state not set"); + { + error = clib_error_return (0, "state not set"); + goto done; + } vnet_lisp_enable_disable (is_enabled); - return 0; + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -897,6 +938,7 @@ lisp_map_register_enable_disable_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; u8 is_enabled = 0; u8 is_set = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -915,18 +957,22 @@ lisp_map_register_enable_disable_command_fn (vlib_main_t * vm, { vlib_cli_output (vm, "parse error: '%U'", format_unformat_error, line_input); - return 0; + goto done; } } if (!is_set) { vlib_cli_output (vm, "state not set!"); - return 0; + goto done; } vnet_lisp_map_register_enable_disable (is_enabled); - return 0; + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -945,6 +991,7 @@ lisp_rloc_probe_enable_disable_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; u8 is_enabled = 0; u8 is_set = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -963,18 +1010,22 @@ lisp_rloc_probe_enable_disable_command_fn (vlib_main_t * vm, { vlib_cli_output (vm, "parse error: '%U'", format_unformat_error, line_input); - return 0; + goto done; } } if (!is_set) { vlib_cli_output (vm, "state not set!"); - return 0; + goto done; } vnet_lisp_rloc_probe_enable_disable (is_enabled); - return 0; + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -1022,6 +1073,7 @@ lisp_show_eid_table_map_command_fn (vlib_main_t * vm, lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); uword *vni_table = 0; u8 is_l2 = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -1040,14 +1092,17 @@ lisp_show_eid_table_map_command_fn (vlib_main_t * vm, is_l2 = 0; } else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } if (!vni_table) { vlib_cli_output (vm, "Error: expected l2|l3 param!\n"); - return 0; + goto done; } vlib_cli_output (vm, "%=10s%=10s", "VNI", is_l2 ? "BD" : "VRF"); @@ -1059,7 +1114,10 @@ lisp_show_eid_table_map_command_fn (vlib_main_t * vm, })); /* *INDENT-ON* */ - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -1131,6 +1189,8 @@ done: vec_free (locators); if (locator_set_name) vec_free (locator_set_name); + unformat_free (line_input); + return error; } @@ -1205,6 +1265,8 @@ lisp_add_del_locator_in_set_command_fn (vlib_main_t * vm, done: vec_free (locators); vec_free (locator_set_name); + unformat_free (line_input); + return error; } @@ -1322,6 +1384,8 @@ lisp_add_del_map_resolver_command_fn (vlib_main_t * vm, } done: + unformat_free (line_input); + return error; } @@ -1372,11 +1436,11 @@ lisp_add_del_mreq_itr_rlocs_command_fn (vlib_main_t * vm, is_add ? "add" : "delete"); } +done: vec_free (locator_set_name); + unformat_free (line_input); -done: return error; - } /* *INDENT-OFF* */ @@ -1438,7 +1502,10 @@ lisp_use_petr_set_locator_set_command_fn (vlib_main_t * vm, else if (unformat (line_input, "disable")) is_add = 0; else - return clib_error_return (0, "parse error"); + { + error = clib_error_return (0, "parse error"); + goto done; + } } if (!ip_set) @@ -1454,6 +1521,8 @@ lisp_use_petr_set_locator_set_command_fn (vlib_main_t * vm, } done: + unformat_free (line_input); + return error; } diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c index 2142e095..19ac22e7 100644 --- a/src/vnet/lisp-gpe/interface.c +++ b/src/vnet/lisp-gpe/interface.c @@ -794,6 +794,7 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, u32 table_id, vni, bd_id; u8 vni_is_set = 0, vrf_is_set = 0, bd_index_is_set = 0; u8 nsh_iface = 0; + clib_error_t *error = NULL; if (vnet_lisp_gpe_enable_disable_status () == 0) { @@ -828,8 +829,9 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, } else { - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; } } @@ -839,7 +841,8 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, { if (~0 == lisp_gpe_add_nsh_iface (&lisp_gpe_main)) { - return clib_error_return (0, "NSH interface not created"); + error = clib_error_return (0, "NSH interface not created"); + goto done; } } else @@ -850,21 +853,34 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, } if (vrf_is_set && bd_index_is_set) - return clib_error_return (0, - "Cannot set both vrf and brdige domain index!"); + { + error = clib_error_return + (0, "Cannot set both vrf and brdige domain index!"); + goto done; + } if (!vni_is_set) - return clib_error_return (0, "vni must be set!"); + { + error = clib_error_return (0, "vni must be set!"); + goto done; + } if (!vrf_is_set && !bd_index_is_set) - return clib_error_return (0, "vrf or bridge domain index must be set!"); + { + error = + clib_error_return (0, "vrf or bridge domain index must be set!"); + goto done; + } if (bd_index_is_set) { if (is_add) { if (~0 == lisp_gpe_tenant_l2_iface_add_or_lock (vni, bd_id)) - return clib_error_return (0, "L2 interface not created"); + { + error = clib_error_return (0, "L2 interface not created"); + goto done; + } } else lisp_gpe_tenant_l2_iface_unlock (vni); @@ -874,13 +890,35 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, if (is_add) { if (~0 == lisp_gpe_tenant_l3_iface_add_or_lock (vni, table_id)) - return clib_error_return (0, "L3 interface not created"); + { + error = clib_error_return (0, "L3 interface not created"); + goto done; + } } else lisp_gpe_tenant_l3_iface_unlock (vni); } - return (NULL); + if (nsh_iface) + { + if (is_add) + { + if (~0 == lisp_gpe_add_nsh_iface (&lisp_gpe_main)) + { + error = clib_error_return (0, "NSH interface not created"); + goto done; + } + else + { + lisp_gpe_del_nsh_iface (&lisp_gpe_main); + } + } + } + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/lisp-gpe/lisp_gpe.c b/src/vnet/lisp-gpe/lisp_gpe.c index 1f8afdae..f2fbcbd5 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.c +++ b/src/vnet/lisp-gpe/lisp_gpe.c @@ -218,6 +218,7 @@ lisp_gpe_enable_disable_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; u8 is_en = 1; vnet_lisp_gpe_enable_disable_args_t _a, *a = &_a; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -231,12 +232,18 @@ lisp_gpe_enable_disable_command_fn (vlib_main_t * vm, is_en = 0; else { - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; } } a->is_en = is_en; - return vnet_lisp_gpe_enable_disable (a); + error = vnet_lisp_gpe_enable_disable (a); + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c index aeec6a94..a2d28118 100644 --- a/src/vnet/map/map.c +++ b/src/vnet/map/map.c @@ -465,6 +465,8 @@ map_security_check_command_fn (vlib_main_t * vm, { unformat_input_t _line_input, *line_input = &_line_input; map_main_t *mm = &map_main; + clib_error_t *error = NULL; + /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -476,11 +478,17 @@ map_security_check_command_fn (vlib_main_t * vm, else if (unformat (line_input, "on")) mm->sec_check = true; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + +done: unformat_free (line_input); - return 0; + + return error; } static clib_error_t * @@ -490,6 +498,8 @@ map_security_check_frag_command_fn (vlib_main_t * vm, { unformat_input_t _line_input, *line_input = &_line_input; map_main_t *mm = &map_main; + clib_error_t *error = NULL; + /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -501,11 +511,17 @@ map_security_check_frag_command_fn (vlib_main_t * vm, else if (unformat (line_input, "on")) mm->sec_check_frag = true; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + +done: unformat_free (line_input); - return 0; + + return error; } static clib_error_t * @@ -523,6 +539,7 @@ map_add_domain_command_fn (vlib_main_t * vm, u32 mtu = 0; u8 flags = 0; ip6_src_len = 128; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -559,20 +576,28 @@ map_add_domain_command_fn (vlib_main_t * vm, else if (unformat (line_input, "map-t")) flags |= MAP_DOMAIN_TRANSLATION; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args < 3) - return clib_error_return (0, "mandatory argument(s) missing"); + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } map_create_domain (&ip4_prefix, ip4_prefix_len, &ip6_prefix, ip6_prefix_len, &ip6_src, ip6_src_len, ea_bits_len, psid_offset, psid_length, &map_domain_index, mtu, flags); - return 0; +done: + unformat_free (line_input); + + return error; } static clib_error_t * @@ -582,6 +607,7 @@ map_del_domain_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; u32 num_m_args = 0; u32 map_domain_index; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -592,17 +618,25 @@ map_del_domain_command_fn (vlib_main_t * vm, if (unformat (line_input, "index %d", &map_domain_index)) num_m_args++; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args != 1) - return clib_error_return (0, "mandatory argument(s) missing"); + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } map_delete_domain (map_domain_index); - return 0; +done: + unformat_free (line_input); + + return error; } static clib_error_t * @@ -613,6 +647,7 @@ map_add_rule_command_fn (vlib_main_t * vm, ip6_address_t tep; u32 num_m_args = 0; u32 psid = 0, map_domain_index; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -628,19 +663,29 @@ map_add_rule_command_fn (vlib_main_t * vm, if (unformat (line_input, "ip6-dst %U", unformat_ip6_address, &tep)) num_m_args++; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args != 3) - return clib_error_return (0, "mandatory argument(s) missing"); + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } if (map_add_del_psid (map_domain_index, psid, &tep, 1) != 0) { - return clib_error_return (0, "Failing to add Mapping Rule"); + error = clib_error_return (0, "Failing to add Mapping Rule"); + goto done; } - return 0; + +done: + unformat_free (line_input); + + return error; } #if MAP_SKIP_IP6_LOOKUP @@ -653,6 +698,7 @@ map_pre_resolve_command_fn (vlib_main_t * vm, ip4_address_t ip4nh; ip6_address_t ip6nh; map_main_t *mm = &map_main; + clib_error_t *error = NULL; memset (&ip4nh, 0, sizeof (ip4nh)); memset (&ip6nh, 0, sizeof (ip6nh)); @@ -669,14 +715,19 @@ map_pre_resolve_command_fn (vlib_main_t * vm, if (unformat (line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh)) mm->preresolve_ip6 = ip6nh; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); map_pre_resolve (&ip4nh, &ip6nh); - return 0; +done: + unformat_free (line_input); + + return error; } #endif @@ -688,6 +739,7 @@ map_icmp_relay_source_address_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; ip4_address_t icmp_src_address; map_main_t *mm = &map_main; + clib_error_t *error = NULL; mm->icmp4_src_address.as_u32 = 0; @@ -701,12 +753,17 @@ map_icmp_relay_source_address_command_fn (vlib_main_t * vm, (line_input, "%U", unformat_ip4_address, &icmp_src_address)) mm->icmp4_src_address = icmp_src_address; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + +done: unformat_free (line_input); - return 0; + return error; } static clib_error_t * @@ -717,6 +774,7 @@ map_icmp_unreachables_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; map_main_t *mm = &map_main; int num_m_args = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -730,16 +788,21 @@ map_icmp_unreachables_command_fn (vlib_main_t * vm, else if (unformat (line_input, "off")) mm->icmp6_enabled = false; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args != 1) - return clib_error_return (0, "mandatory argument(s) missing"); + error = clib_error_return (0, "mandatory argument(s) missing"); - return 0; +done: + unformat_free (line_input); + + return error; } static clib_error_t * @@ -748,6 +811,7 @@ map_fragment_command_fn (vlib_main_t * vm, { unformat_input_t _line_input, *line_input = &_line_input; map_main_t *mm = &map_main; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -760,12 +824,17 @@ map_fragment_command_fn (vlib_main_t * vm, else if (unformat (line_input, "outer")) mm->frag_inner = false; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + +done: unformat_free (line_input); - return 0; + return error; } static clib_error_t * @@ -775,6 +844,7 @@ map_fragment_df_command_fn (vlib_main_t * vm, { unformat_input_t _line_input, *line_input = &_line_input; map_main_t *mm = &map_main; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -787,12 +857,17 @@ map_fragment_df_command_fn (vlib_main_t * vm, else if (unformat (line_input, "off")) mm->frag_ignore_df = false; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + +done: unformat_free (line_input); - return 0; + return error; } static clib_error_t * @@ -803,6 +878,7 @@ map_traffic_class_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; map_main_t *mm = &map_main; u32 tc = 0; + clib_error_t *error = NULL; mm->tc_copy = false; @@ -817,12 +893,17 @@ map_traffic_class_command_fn (vlib_main_t * vm, else if (unformat (line_input, "%x", &tc)) mm->tc = tc & 0xff; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + +done: unformat_free (line_input); - return 0; + return error; } static u8 * @@ -922,6 +1003,7 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input, map_domain_t *d; bool counters = false; u32 map_domain_index = ~0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -934,10 +1016,12 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "index %d", &map_domain_index)) ; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (pool_elts (mm->domains) == 0) vlib_cli_output (vm, "No MAP domains are configured..."); @@ -952,15 +1036,19 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input, { if (pool_is_free_index (mm->domains, map_domain_index)) { - return clib_error_return (0, "MAP domain does not exists %d", - map_domain_index); + error = clib_error_return (0, "MAP domain does not exists %d", + map_domain_index); + goto done; } d = pool_elt_at_index (mm->domains, map_domain_index); vlib_cli_output (vm, "%U", format_map_domain, d, counters); } - return 0; +done: + unformat_free (line_input); + + return error; } static clib_error_t * diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c index 0e610e17..7ae4aa00 100644 --- a/src/vnet/mpls/mpls.c +++ b/src/vnet/mpls/mpls.c @@ -470,6 +470,8 @@ vnet_mpls_local_label (vlib_main_t * vm, } done: + unformat_free (line_input); + return error; } diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c index 8d1e30a3..e488271d 100644 --- a/src/vnet/mpls/mpls_tunnel.c +++ b/src/vnet/mpls/mpls_tunnel.c @@ -535,6 +535,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, fib_route_path_t rpath, *rpaths = NULL; mpls_label_t out_label = MPLS_LABEL_INVALID, *labels = NULL; u32 sw_if_index; + clib_error_t *error = NULL; memset(&rpath, 0, sizeof(rpath)); @@ -595,8 +596,11 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "l2-only")) l2_only = 1; else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } if (is_del) @@ -606,17 +610,22 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, else { if (0 == vec_len(labels)) - return clib_error_return (0, "No Output Labels '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "No Output Labels '%U'", + format_unformat_error, line_input); + goto done; + } vec_add1(rpaths, rpath); vnet_mpls_tunnel_add(rpaths, labels, l2_only, &sw_if_index); } +done: vec_free(labels); vec_free(rpaths); + unformat_free (line_input); - return (NULL); + return error; } /*? diff --git a/src/vnet/pg/cli.c b/src/vnet/pg/cli.c index f5896b43..3c249a7b 100644 --- a/src/vnet/pg/cli.c +++ b/src/vnet/pg/cli.c @@ -547,21 +547,30 @@ pg_capture_cmd_fn (vlib_main_t * vm, else { error = clib_error_create ("unknown input `%U'", - format_unformat_error, input); - return error; + format_unformat_error, line_input); + goto done; } } if (!hi) - return clib_error_return (0, "Please specify interface name"); + { + error = clib_error_return (0, "Please specify interface name"); + goto done; + } if (hi->dev_class_index != pg_dev_class.index) - return clib_error_return (0, "Please specify packet-generator interface"); + { + error = + clib_error_return (0, "Please specify packet-generator interface"); + goto done; + } if (!pcap_file_name && is_disable == 0) - return clib_error_return (0, "Please specify pcap file name"); + { + error = clib_error_return (0, "Please specify pcap file name"); + goto done; + } - unformat_free (line_input); pg_capture_args_t _a, *a = &_a; @@ -572,6 +581,10 @@ pg_capture_cmd_fn (vlib_main_t * vm, a->count = count; error = pg_capture (a); + +done: + unformat_free (line_input); + return error; } @@ -590,6 +603,7 @@ create_pg_if_cmd_fn (vlib_main_t * vm, pg_main_t *pg = &pg_main; unformat_input_t _line_input, *line_input = &_line_input; u32 if_id; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -600,14 +614,19 @@ create_pg_if_cmd_fn (vlib_main_t * vm, ; else - return clib_error_create ("unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_create ("unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + pg_interface_add_or_get (pg, if_id); + +done: unformat_free (line_input); - pg_interface_add_or_get (pg, if_id); - return 0; + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/policer/node_funcs.c b/src/vnet/policer/node_funcs.c index 1f4997ff..457dd09f 100644 --- a/src/vnet/policer/node_funcs.c +++ b/src/vnet/policer/node_funcs.c @@ -447,6 +447,7 @@ test_policer_command_fn (vlib_main_t * vm, int rx_set = 0; int is_add = 1; int is_show = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -468,7 +469,10 @@ test_policer_command_fn (vlib_main_t * vm, } if (rx_set == 0) - return clib_error_return (0, "interface not set"); + { + error = clib_error_return (0, "interface not set"); + goto done; + } if (is_show) { @@ -477,12 +481,13 @@ test_policer_command_fn (vlib_main_t * vm, policer = pool_elt_at_index (pm->policers, pi); vlib_cli_output (vm, "%U", format_policer_instance, policer); - return 0; + goto done; } if (is_add && config_name == 0) { - return clib_error_return (0, "policer config name required"); + error = clib_error_return (0, "policer config name required"); + goto done; } rv = test_policer_add_del (rx_sw_if_index, config_name, is_add); @@ -493,11 +498,15 @@ test_policer_command_fn (vlib_main_t * vm, break; default: - return clib_error_return + error = clib_error_return (0, "WARNING: vnet_vnet_policer_add_del returned %d", rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/policer/policer.c b/src/vnet/policer/policer.c index 290a6af5..cd754e29 100644 --- a/src/vnet/policer/policer.c +++ b/src/vnet/policer/policer.c @@ -413,6 +413,7 @@ configure_policer_command_fn (vlib_main_t * vm, u8 is_add = 1; u8 *name = 0; u32 pi; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -433,13 +434,19 @@ configure_policer_command_fn (vlib_main_t * vm, foreach_config_param #undef _ else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + error = policer_add_del (vm, name, &c, &pi, is_add); + +done: unformat_free (line_input); - return policer_add_del (vm, name, &c, &pi, is_add); + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index 48e81b50..25c930c6 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -1308,6 +1308,7 @@ tap_connect_command_fn (vlib_main_t * vm, int ip6_address_set = 0; u32 ip4_mask_width = 0; u32 ip6_mask_width = 0; + clib_error_t *error = NULL; if (tm->is_disabled) return clib_error_return (0, "device disabled..."); @@ -1336,12 +1337,18 @@ tap_connect_command_fn (vlib_main_t * vm, else if (unformat (line_input, "%s", &intfc_name)) ; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } if (intfc_name == 0) - return clib_error_return (0, "interface name must be specified"); + { + error = clib_error_return (0, "interface name must be specified"); + goto done; + } memset (ap, 0, sizeof (*ap)); @@ -1367,48 +1374,64 @@ tap_connect_command_fn (vlib_main_t * vm, switch (rv) { case VNET_API_ERROR_SYSCALL_ERROR_1: - return clib_error_return (0, "Couldn't open /dev/net/tun"); + error = clib_error_return (0, "Couldn't open /dev/net/tun"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_2: - return clib_error_return (0, "Error setting flags on '%s'", intfc_name); - + error = clib_error_return (0, "Error setting flags on '%s'", intfc_name); + goto done; + case VNET_API_ERROR_SYSCALL_ERROR_3: - return clib_error_return (0, "Couldn't open provisioning socket"); + error = clib_error_return (0, "Couldn't open provisioning socket"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_4: - return clib_error_return (0, "Couldn't get if_index"); + error = clib_error_return (0, "Couldn't get if_index"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_5: - return clib_error_return (0, "Couldn't bind provisioning socket"); + error = clib_error_return (0, "Couldn't bind provisioning socket"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_6: - return clib_error_return (0, "Couldn't set device non-blocking flag"); + error = clib_error_return (0, "Couldn't set device non-blocking flag"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_7: - return clib_error_return (0, "Couldn't set device MTU"); + error = clib_error_return (0, "Couldn't set device MTU"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_8: - return clib_error_return (0, "Couldn't get interface flags"); + error = clib_error_return (0, "Couldn't get interface flags"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_9: - return clib_error_return (0, "Couldn't set intfc admin state up"); + error = clib_error_return (0, "Couldn't set intfc admin state up"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_10: - return clib_error_return (0, "Couldn't set intfc address/mask"); + error = clib_error_return (0, "Couldn't set intfc address/mask"); + goto done; case VNET_API_ERROR_INVALID_REGISTRATION: - return clib_error_return (0, "Invalid registration"); + error = clib_error_return (0, "Invalid registration"); + goto done; case 0: break; default: - return clib_error_return (0, "Unknown error: %d", rv); + error = clib_error_return (0, "Unknown error: %d", rv); + goto done; } vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index); - return 0; + +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (tap_connect_command, static) = { diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c index b97510c4..2cba596f 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.c +++ b/src/vnet/vxlan-gpe/vxlan_gpe.c @@ -454,6 +454,7 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, u32 tmp; vnet_vxlan_gpe_add_del_tunnel_args_t _a, * a = &_a; u32 sw_if_index; + clib_error_t *error = NULL; /* Get a line of input. */ if (! unformat_user (input, unformat_line_input, line_input)) @@ -494,7 +495,10 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, encap_fib_index = ip4_fib_index_from_table_id (tmp); if (encap_fib_index == ~0) - return clib_error_return (0, "nonexistent encap fib id %d", tmp); + { + error = clib_error_return (0, "nonexistent encap fib id %d", tmp); + goto done; + } } else if (unformat (line_input, "decap-vrf-id %d", &tmp)) { @@ -504,7 +508,10 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, decap_fib_index = ip4_fib_index_from_table_id (tmp); if (decap_fib_index == ~0) - return clib_error_return (0, "nonexistent decap fib id %d", tmp); + { + error = clib_error_return (0, "nonexistent decap fib id %d", tmp); + goto done; + } } else if (unformat (line_input, "vni %d", &vni)) vni_set = 1; @@ -517,27 +524,43 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, else if (unformat(line_input, "next-nsh")) protocol = VXLAN_GPE_PROTOCOL_NSH; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (local_set == 0) - return clib_error_return (0, "tunnel local address not specified"); + { + error = clib_error_return (0, "tunnel local address not specified"); + goto done; + } if (remote_set == 0) - return clib_error_return (0, "tunnel remote address not specified"); + { + error = clib_error_return (0, "tunnel remote address not specified"); + goto done; + } if (ipv4_set && ipv6_set) - return clib_error_return (0, "both IPv4 and IPv6 addresses specified"); + { + error = clib_error_return (0, "both IPv4 and IPv6 addresses specified"); + goto done; + } if ((ipv4_set && memcmp(&local.ip4, &remote.ip4, sizeof(local.ip4)) == 0) || (ipv6_set && memcmp(&local.ip6, &remote.ip6, sizeof(local.ip6)) == 0)) - return clib_error_return (0, "src and dst addresses are identical"); + { + error = clib_error_return (0, "src and dst addresses are identical"); + goto done; + } if (vni_set == 0) - return clib_error_return (0, "vni not specified"); + { + error = clib_error_return (0, "vni not specified"); + goto done; + } memset (a, 0, sizeof (*a)); @@ -558,20 +581,27 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index); break; case VNET_API_ERROR_INVALID_DECAP_NEXT: - return clib_error_return (0, "invalid decap-next..."); + error = clib_error_return (0, "invalid decap-next..."); + goto done; case VNET_API_ERROR_TUNNEL_EXIST: - return clib_error_return (0, "tunnel already exists..."); + error = clib_error_return (0, "tunnel already exists..."); + goto done; case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "tunnel does not exist..."); + error = clib_error_return (0, "tunnel does not exist..."); + goto done; default: - return clib_error_return + error = clib_error_return (0, "vnet_vxlan_gpe_add_del_tunnel returned %d", rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = { diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c index 849fc25d..eedc16f8 100644 --- a/src/vnet/vxlan/vxlan.c +++ b/src/vnet/vxlan/vxlan.c @@ -657,6 +657,7 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm, int rv; vnet_vxlan_add_del_tunnel_args_t _a, * a = &_a; u32 tunnel_sw_if_index; + clib_error_t *error = NULL; /* Cant "universally zero init" (={0}) due to GCC bug 53119 */ memset(&src, 0, sizeof src); @@ -715,7 +716,10 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm, { encap_fib_index = fib_table_find (fib_ip_proto (ipv6_set), tmp); if (encap_fib_index == ~0) - return clib_error_return (0, "nonexistent encap-vrf-id %d", tmp); + { + error = clib_error_return (0, "nonexistent encap-vrf-id %d", tmp); + goto done; + } } else if (unformat (line_input, "decap-next %U", unformat_decap_next, &decap_next_index, ipv4_set)) @@ -723,41 +727,72 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "vni %d", &vni)) { if (vni >> 24) - return clib_error_return (0, "vni %d out of range", vni); + { + error = clib_error_return (0, "vni %d out of range", vni); + goto done; + } } else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (src_set == 0) - return clib_error_return (0, "tunnel src address not specified"); + { + error = clib_error_return (0, "tunnel src address not specified"); + goto done; + } if (dst_set == 0) - return clib_error_return (0, "tunnel dst address not specified"); + { + error = clib_error_return (0, "tunnel dst address not specified"); + goto done; + } if (grp_set && !ip46_address_is_multicast(&dst)) - return clib_error_return (0, "tunnel group address not multicast"); + { + error = clib_error_return (0, "tunnel group address not multicast"); + goto done; + } if (grp_set == 0 && ip46_address_is_multicast(&dst)) - return clib_error_return (0, "dst address must be unicast"); + { + error = clib_error_return (0, "dst address must be unicast"); + goto done; + } if (grp_set && mcast_sw_if_index == ~0) - return clib_error_return (0, "tunnel nonexistent multicast device"); + { + error = clib_error_return (0, "tunnel nonexistent multicast device"); + goto done; + } if (ipv4_set && ipv6_set) - return clib_error_return (0, "both IPv4 and IPv6 addresses specified"); + { + error = clib_error_return (0, "both IPv4 and IPv6 addresses specified"); + goto done; + } if (ip46_address_cmp(&src, &dst) == 0) - return clib_error_return (0, "src and dst addresses are identical"); + { + error = clib_error_return (0, "src and dst addresses are identical"); + goto done; + } if (decap_next_index == ~0) - return clib_error_return (0, "next node not found"); + { + error = clib_error_return (0, "next node not found"); + goto done; + } if (vni == 0) - return clib_error_return (0, "vni not specified"); + { + error = clib_error_return (0, "vni not specified"); + goto done; + } memset (a, 0, sizeof (*a)); @@ -779,17 +814,23 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm, break; case VNET_API_ERROR_TUNNEL_EXIST: - return clib_error_return (0, "tunnel already exists..."); + error = clib_error_return (0, "tunnel already exists..."); + goto done; case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "tunnel does not exist..."); + error = clib_error_return (0, "tunnel does not exist..."); + goto done; default: - return clib_error_return + error = clib_error_return (0, "vnet_vxlan_add_del_tunnel returned %d", rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } /*? @@ -912,6 +953,8 @@ set_ip_vxlan_bypass (u32 is_ip6, vnet_int_vxlan_bypass_mode (sw_if_index, is_ip6, is_enable); done: + unformat_free (line_input); + return error; } diff --git a/src/vpp/app/l2t.c b/src/vpp/app/l2t.c index 45dd2807..e1eda155 100644 --- a/src/vpp/app/l2t.c +++ b/src/vpp/app/l2t.c @@ -254,6 +254,7 @@ l2tp_session_add_command_fn (vlib_main_t * vm, u32 local_session_id = 1, remote_session_id = 1; int our_address_set = 0, client_address_set = 0; int l2_sublayer_present = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -290,8 +291,12 @@ l2tp_session_add_command_fn (vlib_main_t * vm, else if (unformat (line_input, "l2-sublayer-present")) l2_sublayer_present = 1; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + unformat_free (line_input); + return error; + } } unformat_free (line_input); diff --git a/src/vpp/app/vpe_cli.c b/src/vpp/app/vpe_cli.c index a26bf71f..94bdc84c 100644 --- a/src/vpp/app/vpe_cli.c +++ b/src/vpp/app/vpe_cli.c @@ -36,6 +36,7 @@ virtual_ip_cmd_fn_command_fn (vlib_main_t * vm, mac_addr_t *mac_addrs = 0; u32 sw_if_index; u32 i; + clib_error_t *error = NULL; next_hops = NULL; rpaths = NULL; @@ -49,7 +50,11 @@ virtual_ip_cmd_fn_command_fn (vlib_main_t * vm, if (!unformat (line_input, "%U %U", unformat_ip4_address, &prefix.fp_addr.ip4, unformat_vnet_sw_interface, vnm, &sw_if_index)) - goto barf; + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { @@ -67,13 +72,18 @@ virtual_ip_cmd_fn_command_fn (vlib_main_t * vm, } else { - barf: - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; } } + if (vec_len (mac_addrs) == 0 || vec_len (mac_addrs) != vec_len (next_hops)) - goto barf; + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } /* Create / delete special interface route /32's */ @@ -100,10 +110,12 @@ virtual_ip_cmd_fn_command_fn (vlib_main_t * vm, &prefix, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, rpaths); +done: vec_free (mac_addrs); vec_free (next_hops); + unformat_free (line_input); - return 0; + return error; } /* *INDENT-OFF* */ -- cgit 1.2.3-korg From 04197eea69c77a0145dc54d56fc767a03897c90e Mon Sep 17 00:00:00 2001 From: Billy McFall Date: Wed, 22 Feb 2017 14:13:42 -0500 Subject: VPP-279: Document changes for vnet/vnet/devices Add doxygen documentation for dpdk CLI commands. Outside of adding documentation to the CLI Commands, modified the CLI code as follows: * The "set dpdk interface placement" command allows the user to move interface/queues to a different thread. But there is only a subset of threads that are valid. Updated the "show dpdk interface placement" command to display all valid threads, even if all interface/queues have been moved off. Updated the "show dpdk interface hqos placement" the same way. * There is a command to modify the Subport attributes, but no way to display the changes. Added a "Subport" section to the "show dpdk interface hqos" command. * Reworked the "set dpdk interface hqos subport" command. - The current implementation had a local rte_sched_subport_params structure and initialized it to default values, then overwrote with what was input. The side effect of this is that if all the current data is non-default, and a new command is entered with just one attribute, all the remaining attrbutes are getting set back to default under the cover. Very confusing for the user. Updated the code to read the current value and overwrite what has changed. - DPDK does not have a read subport data, so no way query the current applied values. The set command was not updating the local copy that is created at init. Modified the code to store the updated values if the DPDK apply function was successful. - Several functions repeated the same code to get a pointer to the local HQoS data. Added a utility function.get_hqos(..), to perform this action. Did not port other code to use new function. * The "set dpdk interface hqos pktfield" allows the user to set the packet fields required for classifiying the incoming packet. The classification is across three fields (subport, pipe, tc). The command was using 0,1,2 to represent these three fields, but had no explanation regarding these magic numbers. Updated the command to take the three tokens (subport, pipe, tc) for more clarity. For legacy sake, still allow 0,1,2 to be entered. Also updated the "show dpdk interface hqos" command to show these tokens. * The "set dpdk interface hqos tctbl" maps an interface and value 0-63 to a traffic class and queue. The "show dpdk interface hqos" command showed the internal DPDK magic number for traffic class and queue. Updated the show command to display what was input instead of the magic number. * The "show dpdk hqos queue" command always returns zeros by default because RTE_SCHED_COLLECT_STATS is not defined in DPDK. Took me a while to figure out why I wasn't getting values returned. So returned an error message if RTE_SCHED_COLLECT_STATS is not defined instead of zeros. Change-Id: I22b640d668245839ee977ef3602175c61d91d24c Signed-off-by: Billy McFall --- src/vnet/devices/dpdk/cli.c | 678 ++++++++++++++++++++++++++++++++++----- src/vnet/devices/dpdk/qos_doc.md | 39 ++- 2 files changed, 617 insertions(+), 100 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c index 1fc665ac..99998862 100644 --- a/src/vnet/devices/dpdk/cli.c +++ b/src/vnet/devices/dpdk/cli.c @@ -33,6 +33,56 @@ * Abstraction Layer and pcap Tx Trace. */ + +static clib_error_t * +get_hqos (u32 hw_if_index, u32 subport_id, dpdk_device_t ** xd, + dpdk_device_config_t ** devconf) +{ + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + struct rte_eth_dev_info dev_info; + uword *p = 0; + clib_error_t *error = NULL; + + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + if (subport_id != 0) + { + error = clib_error_return (0, "Invalid subport"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + *xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get ((*xd)->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + (*devconf) = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + (*devconf) = &dm->conf->default_devconf; + +done: + return error; +} + static clib_error_t * pcap_trace_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -316,10 +366,19 @@ show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, return 0; } +/*? + * This command displays statistics of each DPDK mempool. + * + * @cliexpar + * Example of how to display DPDK buffer data: + * @cliexstart{show dpdk buffer} + * name="mbuf_pool_socket0" available = 15104 allocated = 1280 total = 16384 + * @cliexend +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_show_dpdk_bufferr,static) = { .path = "show dpdk buffer", - .short_help = "show dpdk buffer state", + .short_help = "show dpdk buffer", .function = show_dpdk_buffer, .is_mp_safe = 1, }; @@ -378,10 +437,36 @@ test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, return 0; } +/*? + * This command tests the allocation and freeing of DPDK buffers. + * If both 'allocate' and 'free' are entered on the + * same command, the 'free' is executed first. If no + * parameters are provided, this command display how many DPDK buffers + * the test command has allocated. + * + * @cliexpar + * @parblock + * + * Example of how to display how many DPDK buffer test command has allcoated: + * @cliexstart{test dpdk buffer} + * Currently 0 buffers allocated + * @cliexend + * + * Example of how to allocate DPDK buffers using the test command: + * @cliexstart{test dpdk buffer allocate 10} + * Currently 10 buffers allocated + * @cliexend + * + * Example of how to free DPDK buffers allocated by the test command: + * @cliexstart{test dpdk buffer free 10} + * Currently 0 buffers allocated + * @cliexend + * @endparblock +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = { .path = "test dpdk buffer", - .short_help = "test dpdk buffer [allocate ][free ]", + .short_help = "test dpdk buffer [allocate ] [free ]", .function = test_dpdk_buffer, .is_mp_safe = 1, }; @@ -460,10 +545,20 @@ done: return error; } +/*? + * This command sets the number of DPDK 'rx' and + * 'tx' descriptors for the given physical interface. Use + * the command 'show hardware-interface' to display the + * current descriptor allocation. + * + * @cliexpar + * Example of how to set the DPDK interface descriptors: + * @cliexcmd{set dpdk interface descriptors GigabitEthernet0/8/0 rx 512 tx 512} +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = { .path = "set dpdk interface descriptors", - .short_help = "set dpdk interface descriptors [rx ] [tx ]", + .short_help = "set dpdk interface descriptors [rx ] [tx ]", .function = set_dpdk_if_desc, }; /* *INDENT-ON* */ @@ -482,7 +577,8 @@ show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, for (cpu = 0; cpu < vec_len (dm->devices_by_cpu); cpu++) { - if (vec_len (dm->devices_by_cpu[cpu])) + if (cpu >= dm->input_cpu_first_index && + cpu < (dm->input_cpu_first_index + dm->input_cpu_count)) vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, vlib_worker_threads[cpu].name, vlib_worker_threads[cpu].lcore_id); @@ -499,6 +595,21 @@ show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, return 0; } +/*? + * This command is used to display the thread and core each + * DPDK interface and queue is assigned too. + * + * @cliexpar + * Example of how to display the DPDK interface placement: + * @cliexstart{show dpdk interface placement} + * Thread 1 (vpp_wk_0 at lcore 1): + * GigabitEthernet0/8/0 queue 0 + * GigabitEthernet0/9/0 queue 0 + * Thread 2 (vpp_wk_1 at lcore 2): + * GigabitEthernet0/8/0 queue 1 + * GigabitEthernet0/9/0 queue 1 + * @cliexend +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_show_dpdk_if_placement,static) = { .path = "show dpdk interface placement", @@ -596,18 +707,18 @@ set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, rte_lcore_to_socket_id(vlib_worker_threads[cpu].lcore_id); vec_sort_with_function(dm->devices_by_cpu[i], - dpdk_device_queue_sort); + dpdk_device_queue_sort); vec_sort_with_function(dm->devices_by_cpu[cpu], - dpdk_device_queue_sort); + dpdk_device_queue_sort); if (vec_len(dm->devices_by_cpu[i]) == 0) vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, - VLIB_NODE_STATE_DISABLED); + VLIB_NODE_STATE_DISABLED); if (vec_len(dm->devices_by_cpu[cpu]) == 1) vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); + VLIB_NODE_STATE_POLLING); goto done; } @@ -623,10 +734,30 @@ done: return error; } +/*? + * This command is used to assign a given interface, and optionally a + * given queue, to a different thread. This will not create a thread, + * so the thread must already exist. Use '/etc/vpp/startup.conf' + * for the initial thread creation. If the 'queue' is not provided, + * it defaults to 0. + * + * @cliexpar + * Example of how to display the DPDK interface placement: + * @cliexstart{show dpdk interface placement} + * Thread 1 (vpp_wk_0 at lcore 1): + * GigabitEthernet0/8/0 queue 0 + * GigabitEthernet0/9/0 queue 0 + * Thread 2 (vpp_wk_1 at lcore 2): + * GigabitEthernet0/8/0 queue 1 + * GigabitEthernet0/9/0 queue 1 + * @cliexend + * Example of how to assign a DPDK interface and queue to a thread: + * @cliexcmd{set dpdk interface placement GigabitEthernet0/8/0 queue 1 thread 1} +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { .path = "set dpdk interface placement", - .short_help = "set dpdk interface placement [queue ] thread ", + .short_help = "set dpdk interface placement [queue ] thread ", .function = set_dpdk_if_placement, }; /* *INDENT-ON* */ @@ -645,7 +776,8 @@ show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, for (cpu = 0; cpu < vec_len (dm->devices_by_hqos_cpu); cpu++) { - if (vec_len (dm->devices_by_hqos_cpu[cpu])) + if (cpu >= dm->hqos_cpu_first_index && + cpu < (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, vlib_worker_threads[cpu].name, vlib_worker_threads[cpu].lcore_id); @@ -661,6 +793,19 @@ show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, return 0; } +/*? + * This command is used to display the thread and core each + * DPDK output interface and HQoS queue is assigned too. + * + * @cliexpar + * Example of how to display the DPDK output interface and HQoS queue placement: + * @cliexstart{show dpdk interface hqos placement} + * Thread 1 (vpp_hqos-threads_0 at lcore 3): + * GigabitEthernet0/8/0 queue 0 + * Thread 2 (vpp_hqos-threads_1 at lcore 4): + * GigabitEthernet0/9/0 queue 0 + * @cliexend +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos_placement, static) = { .path = "show dpdk interface hqos placement", @@ -749,10 +894,27 @@ done: return error; } +/*? + * This command is used to assign a given DPDK output interface and + * HQoS queue to a different thread. This will not create a thread, + * so the thread must already exist. Use '/etc/vpp/startup.conf' + * for the initial thread creation. See @ref qos_doc for more details. + * + * @cliexpar + * Example of how to display the DPDK output interface and HQoS queue placement: + * @cliexstart{show dpdk interface hqos placement} + * Thread 1 (vpp_hqos-threads_0 at lcore 3): + * GigabitEthernet0/8/0 queue 0 + * Thread 2 (vpp_hqos-threads_1 at lcore 4): + * GigabitEthernet0/9/0 queue 0 + * @cliexend + * Example of how to assign a DPDK output interface and HQoS queue to a thread: + * @cliexcmd{set dpdk interface hqos placement GigabitEthernet0/8/0 thread 2} +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_placement, static) = { .path = "set dpdk interface hqos placement", - .short_help = "set dpdk interface hqos placement thread ", + .short_help = "set dpdk interface hqos placement thread ", .function = set_dpdk_if_hqos_placement, }; /* *INDENT-ON* */ @@ -819,12 +981,28 @@ done: return error; } +/*? + * This command is used to change the profile associate with a HQoS pipe. The + * '' is zero based. Use the command + * 'show dpdk interface hqos' to display the content of each profile. + * See @ref qos_doc for more details. + * + * @note + * Currently there is not an API to create a new HQoS pipe profile. One is + * created by default in the code (search for 'hqos_pipe_params_default''). + * Additional profiles can be created in code and code recompiled. Then use this + * command to assign it. + * + * @cliexpar + * Example of how to assign a new profile to a HQoS pipe: + * @cliexcmd{set dpdk interface hqos pipe GigabitEthernet0/8/0 subport 0 pipe 2 profile 1} +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pipe, static) = { .path = "set dpdk interface hqos pipe", - .short_help = "set dpdk interface hqos pipe subport pipe " - "profile ", + .short_help = "set dpdk interface hqos pipe subport pipe " + "profile ", .function = set_dpdk_if_hqos_pipe, }; /* *INDENT-ON* */ @@ -835,18 +1013,18 @@ set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, { unformat_input_t _line_input, *line_input = &_line_input; dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; + dpdk_device_t *xd = NULL; u32 hw_if_index = (u32) ~ 0; u32 subport_id = (u32) ~ 0; - struct rte_sched_subport_params p = { - .tb_rate = 1250000000, /* 10GbE */ - .tb_size = 1000000, - .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000}, - .tc_period = 10, - }; + struct rte_sched_subport_params p; int rv; clib_error_t *error = NULL; + u32 tb_rate = (u32) ~ 0; + u32 tb_size = (u32) ~ 0; + u32 tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = + { (u32) ~ 0, (u32) ~ 0, (u32) ~ 0, (u32) ~ 0 }; + u32 tc_period = (u32) ~ 0; + dpdk_device_config_t *devconf = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -859,24 +1037,19 @@ set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, ; else if (unformat (line_input, "subport %d", &subport_id)) ; - else if (unformat (line_input, "rate %d", &p.tb_rate)) - { - p.tc_rate[0] = p.tb_rate; - p.tc_rate[1] = p.tb_rate; - p.tc_rate[2] = p.tb_rate; - p.tc_rate[3] = p.tb_rate; - } - else if (unformat (line_input, "bktsize %d", &p.tb_size)) + else if (unformat (line_input, "rate %d", &tb_rate)) ; - else if (unformat (line_input, "tc0 %d", &p.tc_rate[0])) + else if (unformat (line_input, "bktsize %d", &tb_size)) ; - else if (unformat (line_input, "tc1 %d", &p.tc_rate[1])) + else if (unformat (line_input, "tc0 %d", &tc_rate[0])) ; - else if (unformat (line_input, "tc2 %d", &p.tc_rate[2])) + else if (unformat (line_input, "tc1 %d", &tc_rate[1])) ; - else if (unformat (line_input, "tc3 %d", &p.tc_rate[3])) + else if (unformat (line_input, "tc2 %d", &tc_rate[2])) ; - else if (unformat (line_input, "period %d", &p.tc_period)) + else if (unformat (line_input, "tc3 %d", &tc_rate[3])) + ; + else if (unformat (line_input, "period %d", &tc_period)) ; else { @@ -886,20 +1059,59 @@ set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, } } - if (hw_if_index == (u32) ~ 0) + error = get_hqos (hw_if_index, subport_id, &xd, &devconf); + + if (error == NULL) { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } + /* Copy the current values over to local structure. */ + memcpy (&p, &devconf->hqos.subport[subport_id], sizeof (p)); - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); + /* Update local structure with input values. */ + if (tb_rate != (u32) ~ 0) + { + p.tb_rate = tb_rate; + p.tc_rate[0] = tb_rate; + p.tc_rate[1] = tb_rate; + p.tc_rate[2] = tb_rate; + p.tc_rate[3] = tb_rate; + } + if (tb_size != (u32) ~ 0) + { + p.tb_size = tb_size; + } + if (tc_rate[0] != (u32) ~ 0) + { + p.tc_rate[0] = tc_rate[0]; + } + if (tc_rate[1] != (u32) ~ 0) + { + p.tc_rate[1] = tc_rate[1]; + } + if (tc_rate[2] != (u32) ~ 0) + { + p.tc_rate[2] = tc_rate[2]; + } + if (tc_rate[3] != (u32) ~ 0) + { + p.tc_rate[3] = tc_rate[3]; + } + if (tc_period != (u32) ~ 0) + { + p.tc_period = tc_period; + } - rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p); - if (rv) - { - error = clib_error_return (0, "subport configuration failed"); - goto done; + /* Apply changes. */ + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p); + if (rv) + { + error = clib_error_return (0, "subport configuration failed"); + goto done; + } + else + { + /* Successfully applied, so save of the input values. */ + memcpy (&devconf->hqos.subport[subport_id], &p, sizeof (p)); + } } done: @@ -908,10 +1120,25 @@ done: return error; } +/*? + * This command is used to set the subport level parameters such as token + * bucket rate (bytes per seconds), token bucket size (bytes), traffic class + * rates (bytes per seconds) and token update period (Milliseconds). + * + * By default, the 'rate' is set to 1250000000 bytes/second (10GbE + * rate) and each of the four traffic classes is set to 100% of the port rate. + * If the 'rate' is updated by this command, all four traffic classes + * are assigned the same value. Each of the four traffic classes can be updated + * individually. + * + * @cliexpar + * Example of how modify the subport attributes for a 1GbE link: + * @cliexcmd{set dpdk interface hqos subport GigabitEthernet0/8/0 subport 0 rate 125000000} +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_subport, static) = { .path = "set dpdk interface hqos subport", - .short_help = "set dpdk interface hqos subport subport " + .short_help = "set dpdk interface hqos subport subport " "[rate ] [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] " "[period ]", .function = set_dpdk_if_hqos_subport, @@ -974,7 +1201,7 @@ set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, } if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) { - error = clib_error_return (0, "invalid traffic class"); + error = clib_error_return (0, "invalid traffic class queue"); goto done; } @@ -1004,10 +1231,31 @@ done: return error; } +/*? + * This command is used to set the traffic class translation table. The + * traffic class translation table is used to map 64 values (0-63) to one of + * four traffic class and one of four HQoS input queue. Use the 'show + * dpdk interface hqos' command to display the traffic class translation + * table. See @ref qos_doc for more details. + * + * This command has the following parameters: + * + * - - Used to specify the output interface. + * + * - entry - Mapped value (0-63) to assign traffic class and queue to. + * + * - tc - Traffic class (0-3) to be used by the provided mapped value. + * + * - queue - HQoS input queue (0-3) to be used by the provided mapped value. + * + * @cliexpar + * Example of how modify the traffic class translation table: + * @cliexcmd{set dpdk interface hqos tctbl GigabitEthernet0/8/0 entry 16 tc 2 queue 2} +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_tctbl, static) = { .path = "set dpdk interface hqos tctbl", - .short_help = "set dpdk interface hqos tctbl entry tc queue ", + .short_help = "set dpdk interface hqos tctbl entry tc queue ", .function = set_dpdk_if_hqos_tctbl, }; /* *INDENT-ON* */ @@ -1058,6 +1306,12 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, &hw_if_index)) ; + else if (unformat (line_input, "id subport")) + id = 0; + else if (unformat (line_input, "id pipe")) + id = 1; + else if (unformat (line_input, "id tc")) + id = 2; else if (unformat (line_input, "id %d", &id)) ; else if (unformat (line_input, "offset %d", &offset)) @@ -1178,11 +1432,50 @@ done: return error; } +/*? + * This command is used to set the packet fields required for classifiying the + * incoming packet. As a result of classification process, packet field + * information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, + * color) and stored in packet mbuf. + * + * This command has the following parameters: + * + * - - Used to specify the output interface. + * + * - id subport|pipe|tc - Classification occurs across three fields. + * This parameter indicates which of the three masks are being configured. Legacy + * code used 0-2 to represent these three fields, so 0-2 is still accepted. + * - subport|0 - Currently only one subport is supported, so only + * an empty mask is supported for the subport classification. + * - pipe|1 - Currently, 4096 pipes per subport are supported, so a + * 12-bit mask should be configure to map to the 0-4095 pipes. + * - tc|2 - The translation table (see 'set dpdk interface hqos + * tctbl' command) maps each value (0-63) into one of the 4 traffic classes + * per pipe. A 6-bit mask should be configure to map this field to a traffic class. + * + * - offset - Offset in the packet to apply the 64-bit mask for classification. + * The offset should be on an 8-byte boundary (0,8,16,24..). + * + * - mask - 64-bit mask to apply to packet at the given 'offset'. + * Bits must be contiguous and should not include '0x'. + * + * The default values for the 'pktfield' assumes Ethernet/IPv4/UDP packets with + * no VLAN. Adjust based on expected packet format and desired classification field. + * - 'subport' is always empty (offset 0 mask 0000000000000000) + * - By default, 'pipe' maps to the UDP payload bits 12 .. 23 (offset 40 + * mask 0000000fff000000) + * - By default, 'tc' maps to the DSCP field in IP header (offset 48 mask + * 00000000000000fc) + * + * @cliexpar + * Example of how modify the 'pipe' classification filter to match VLAN: + * @cliexcmd{set dpdk interface hqos pktfield GigabitEthernet0/8/0 id pipe offset 8 mask 0000000000000FFF} +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pktfield, static) = { .path = "set dpdk interface hqos pktfield", - .short_help = "set dpdk interface hqos pktfield id offset " - "mask ", + .short_help = "set dpdk interface hqos pktfield id subport|pipe|tc offset " + "mask ", .function = set_dpdk_if_hqos_pktfield, }; /* *INDENT-ON* */ @@ -1201,7 +1494,7 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, dpdk_device_hqos_per_worker_thread_t *wk; u32 *tctbl; u32 hw_if_index = (u32) ~ 0; - u32 profile_id, i; + u32 profile_id, subport_id, i; struct rte_eth_dev_info dev_info; dpdk_device_config_t *devconf = 0; vlib_thread_registration_t *tr; @@ -1284,40 +1577,156 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, ht->hqos_burst_deq); vlib_cli_output (vm, - " Packet field 0: slab position = %4u, slab bitmask = 0x%016llx", + " Packet field 0: slab position = %4u, slab bitmask = 0x%016llx (subport)", wk->hqos_field0_slabpos, wk->hqos_field0_slabmask); vlib_cli_output (vm, - " Packet field 1: slab position = %4u, slab bitmask = 0x%016llx", + " Packet field 1: slab position = %4u, slab bitmask = 0x%016llx (pipe)", wk->hqos_field1_slabpos, wk->hqos_field1_slabmask); vlib_cli_output (vm, - " Packet field 2: slab position = %4u, slab bitmask = 0x%016llx", + " Packet field 2: slab position = %4u, slab bitmask = 0x%016llx (tc)", wk->hqos_field2_slabpos, wk->hqos_field2_slabmask); - vlib_cli_output (vm, " Packet field 2 translation table:"); - vlib_cli_output (vm, " [ 0 .. 15]: " - "%2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u", - tctbl[0], tctbl[1], tctbl[2], tctbl[3], - tctbl[4], tctbl[5], tctbl[6], tctbl[7], - tctbl[8], tctbl[9], tctbl[10], tctbl[11], - tctbl[12], tctbl[13], tctbl[14], tctbl[15]); - vlib_cli_output (vm, " [16 .. 31]: " - "%2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u", - tctbl[16], tctbl[17], tctbl[18], tctbl[19], - tctbl[20], tctbl[21], tctbl[22], tctbl[23], - tctbl[24], tctbl[25], tctbl[26], tctbl[27], - tctbl[28], tctbl[29], tctbl[30], tctbl[31]); - vlib_cli_output (vm, " [32 .. 47]: " - "%2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u", - tctbl[32], tctbl[33], tctbl[34], tctbl[35], - tctbl[36], tctbl[37], tctbl[38], tctbl[39], - tctbl[40], tctbl[41], tctbl[42], tctbl[43], - tctbl[44], tctbl[45], tctbl[46], tctbl[47]); - vlib_cli_output (vm, " [48 .. 63]: " - "%2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u", - tctbl[48], tctbl[49], tctbl[50], tctbl[51], - tctbl[52], tctbl[53], tctbl[54], tctbl[55], - tctbl[56], tctbl[57], tctbl[58], tctbl[59], - tctbl[60], tctbl[61], tctbl[62], tctbl[63]); - + vlib_cli_output (vm, + " Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...)"); + vlib_cli_output (vm, + " [ 0 .. 15]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[0] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[0] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[1] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[1] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[2] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[2] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[3] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[3] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[4] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[4] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[5] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[5] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[6] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[6] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[7] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[7] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[8] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[8] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[9] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[9] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[10] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[10] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[11] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[11] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[12] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[12] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[13] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[13] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[14] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[14] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[15] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[15] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, + " [16 .. 31]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[16] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[16] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[17] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[17] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[18] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[18] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[19] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[19] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[20] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[20] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[21] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[21] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[22] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[22] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[23] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[23] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[24] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[24] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[25] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[25] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[26] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[26] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[27] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[27] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[28] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[28] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[29] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[29] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[30] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[30] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[31] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[31] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, + " [32 .. 47]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[32] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[32] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[33] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[33] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[34] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[34] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[35] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[35] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[36] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[36] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[37] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[37] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[38] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[38] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[39] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[39] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[40] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[40] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[41] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[41] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[42] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[42] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[43] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[43] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[44] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[44] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[45] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[45] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[46] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[46] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[47] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[47] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, + " [48 .. 63]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[48] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[48] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[49] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[49] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[50] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[50] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[51] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[51] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[52] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[52] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[53] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[53] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[54] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[54] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[55] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[55] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[56] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[56] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[57] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[57] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[58] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[58] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[59] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[59] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[60] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[60] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[61] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[61] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[62] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[62] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[63] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[63] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); vlib_cli_output (vm, " Port:"); vlib_cli_output (vm, " Rate = %u bytes/second", cfg->port.rate); vlib_cli_output (vm, " MTU = %u bytes", cfg->port.mtu); @@ -1334,6 +1743,23 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, " Number of pipe profiles = %u", cfg->port.n_pipe_profiles); + for (subport_id = 0; subport_id < vec_len (cfg->subport); subport_id++) + { + vlib_cli_output (vm, " Subport %u:", subport_id); + vlib_cli_output (vm, " Rate = %u bytes/second", + cfg->subport[subport_id].tb_rate); + vlib_cli_output (vm, " Token bucket size = %u bytes", + cfg->subport[subport_id].tb_size); + vlib_cli_output (vm, + " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", + cfg->subport[subport_id].tc_rate[0], + cfg->subport[subport_id].tc_rate[1], + cfg->subport[subport_id].tc_rate[2], + cfg->subport[subport_id].tc_rate[3]); + vlib_cli_output (vm, " TC period = %u milliseconds", + cfg->subport[subport_id].tc_period); + } + for (profile_id = 0; profile_id < vec_len (cfg->pipe); profile_id++) { vlib_cli_output (vm, " Pipe profile %u:", profile_id); @@ -1398,10 +1824,53 @@ done: return error; } +/*? + * This command is used to display details of an output interface's HQoS + * settings. + * + * @cliexpar + * Example of how to display HQoS settings for an interfaces: + * @cliexstart{show dpdk interface hqos GigabitEthernet0/8/0} + * Thread: + * Input SWQ size = 4096 packets + * Enqueue burst size = 256 packets + * Dequeue burst size = 220 packets + * Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) + * Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) + * Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) + * Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) + * [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * Port: + * Rate = 1250000000 bytes/second + * MTU = 1514 bytes + * Frame overhead = 24 bytes + * Number of subports = 1 + * Number of pipes per subport = 4096 + * Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets + * Number of pipe profiles = 2 + * Subport 0: + * Rate = 1250000000 bytes/second + * Token bucket size = 1000000 bytes + * Traffic class rate: TC0 = 1250000000, TC1 = 1250000000, TC2 = 1250000000, TC3 = 1250000000 bytes/second + * TC period = 10 milliseconds + * Pipe profile 0: + * Rate = 305175 bytes/second + * Token bucket size = 1000000 bytes + * Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second + * TC period = 40 milliseconds + * TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * @cliexend +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos, static) = { .path = "show dpdk interface hqos", - .short_help = "show dpdk interface hqos ", + .short_help = "show dpdk interface hqos ", .function = show_dpdk_if_hqos, }; @@ -1412,6 +1881,8 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; +#ifdef RTE_SCHED_COLLECT_STATS dpdk_main_t *dm = &dpdk_main; u32 hw_if_index = (u32) ~ 0; u32 subport = (u32) ~ 0; @@ -1426,7 +1897,6 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, u32 qindex; struct rte_sched_queue_stats stats; u16 qlen; - clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -1517,6 +1987,16 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "%=24s%=16d", "Bytes", stats.n_bytes); vlib_cli_output (vm, "%=24s%=16d", "Bytes dropped", stats.n_bytes_dropped); +#else + + /* Get a line of input */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + vlib_cli_output (vm, "RTE_SCHED_COLLECT_STATS disabled in DPDK"); + goto done; + +#endif done: unformat_free (line_input); @@ -1524,10 +2004,29 @@ done: return error; } +/*? + * This command is used to display statistics associated with a HQoS traffic class + * queue. + * + * @note + * Statistic collection by the scheduler is disabled by default in DPDK. In order to + * turn it on, add the following line to '../vpp/dpdk/Makefile': + * - $(call set,RTE_SCHED_COLLECT_STATS,y) + * + * @cliexpar + * Example of how to display statistics of HQoS a HQoS traffic class queue: + * @cliexstart{show dpdk hqos queue GigabitEthernet0/9/0 subport 0 pipe 3181 tc 0 tc_q 0} + * Stats Parameter Value + * Packets 140 + * Packets dropped 0 + * Bytes 8400 + * Bytes dropped 0 + * @cliexend +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = { .path = "show dpdk hqos queue", - .short_help = "show dpdk hqos queue subport pipe tc tc_q ", + .short_help = "show dpdk hqos queue subport pipe tc tc_q ", .function = show_dpdk_hqos_queue_stats, }; /* *INDENT-ON* */ @@ -1544,10 +2043,21 @@ show_dpdk_version_command_fn (vlib_main_t * vm, return 0; } +/*? + * This command is used to display the current DPDK version and + * the list of arguments passed to DPDK when started. + * + * @cliexpar + * Example of how to display how many DPDK buffer test command has allcoated: + * @cliexstart{show dpdk version} + * DPDK Version: DPDK 16.11.0 + * DPDK EAL init args: -c 1 -n 4 --huge-dir /run/vpp/hugepages --file-prefix vpp -w 0000:00:08.0 -w 0000:00:09.0 --master-lcore 0 --socket-mem 256 + * @cliexend +?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_vpe_version_command, static) = { .path = "show dpdk version", - .short_help = "show dpdk version information", + .short_help = "show dpdk version", .function = show_dpdk_version_command_fn, }; /* *INDENT-ON* */ diff --git a/src/vnet/devices/dpdk/qos_doc.md b/src/vnet/devices/dpdk/qos_doc.md index 9bd0659d..7c064246 100644 --- a/src/vnet/devices/dpdk/qos_doc.md +++ b/src/vnet/devices/dpdk/qos_doc.md @@ -195,21 +195,22 @@ token bucket rate (bytes per seconds), token bucket size (bytes), traffic class rates (bytes per seconds) and token update period (Milliseconds). ``` -set dpdk interface hqos subport subport [rate ] +set dpdk interface hqos subport subport [rate ] [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] ``` For setting the pipe profile, following command can be used. ``` -set dpdk interface hqos pipe subport pipe profile +set dpdk interface hqos pipe subport pipe + profile ``` To assign QoS scheduler instance to the specific thread, following command can be used. ``` -set dpdk interface hqos placement thread +set dpdk interface hqos placement thread ``` The command below is used to set the packet fields required for classifiying @@ -218,13 +219,14 @@ information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, color) and stored in packet mbuf. ``` -set dpdk interface hqos pktfield id offset mask +set dpdk interface hqos pktfield id subport|pipe|tc offset + mask ``` The DSCP table entries used for idenfiying the traffic class and queue can be set using the command below; ``` -set dpdk interface hqos tctbl entry tc queue +set dpdk interface hqos tctbl entry tc queue ``` @@ -238,14 +240,14 @@ The QoS Scheduler configuration can displayed using the command below. Input SWQ size = 4096 packets Enqueue burst size = 256 packets Dequeue burst size = 220 packets - Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 - Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 - Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc - Packet field 2 translation table: - [ 0 .. 15]: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - [16 .. 31]: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - [32 .. 47]: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - [48 .. 63]: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) + Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) + Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) + Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) + [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 Port: Rate = 1250000000 bytes/second MTU = 1514 bytes @@ -254,7 +256,12 @@ The QoS Scheduler configuration can displayed using the command below. Number of pipes per subport = 4096 Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets Number of pipe profiles = 1 - Pipe profile 0: + Subport 0: + Rate = 120000000 bytes/second + Token bucket size = 1000000 bytes + Traffic class rate: TC0 = 120000000, TC1 = 120000000, TC2 = 120000000, TC3 = 120000000 bytes/second + TC period = 10 milliseconds + Pipe profile 0: Rate = 305175 bytes/second Token bucket size = 1000000 bytes Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second @@ -270,9 +277,9 @@ below command. ``` vpp# show dpdk interface hqos placement - Thread 5 (vpp_hqos-threads_0 at lcore 5): + Thread 5 (vpp_hqos-threads_0 at lcore 5): TenGigabitEthernet2/0/0 queue 0 - Thread 6 (vpp_hqos-threads_1 at lcore 6): + Thread 6 (vpp_hqos-threads_1 at lcore 6): TenGigabitEthernet4/0/1 queue 0 ``` -- cgit 1.2.3-korg From 02767e9f2eaf7ccfc4079f83d467ad2d3faee203 Mon Sep 17 00:00:00 2001 From: Radu Nicolau Date: Thu, 23 Feb 2017 14:28:49 +0000 Subject: Fixed QAT device binding and device unbinding when vpp package is removed Change-Id: I35ad6a42093cad0945df1df09a39c63c4560dce6 Signed-off-by: Radu Nicolau --- build-root/deb/debian/vpp.postrm | 2 +- build-root/rpm/vpp.spec | 2 +- src/vnet/devices/dpdk/init.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/vnet/devices') diff --git a/build-root/deb/debian/vpp.postrm b/build-root/deb/debian/vpp.postrm index ac16a459..24b4842f 100644 --- a/build-root/deb/debian/vpp.postrm +++ b/build-root/deb/debian/vpp.postrm @@ -7,7 +7,7 @@ pci_dirs=`find /sys/bus/pci/drivers -type d -name igb_uio -o -name uio_pci_gener for d in $pci_dirs; do for f in ${d}/*; do [ -e "${f}/config" ] || continue - echo 1 > ${f}/remove + echo ${f##*/} > ${d}/unbind basename `dirname ${f}` | xargs echo -n "Removing driver"; echo " for PCI ID" `basename ${f}` removed=y done diff --git a/build-root/rpm/vpp.spec b/build-root/rpm/vpp.spec index 95196e9b..7bc18ca4 100644 --- a/build-root/rpm/vpp.spec +++ b/build-root/rpm/vpp.spec @@ -244,7 +244,7 @@ pci_dirs=`find /sys/bus/pci/drivers -type d -name igb_uio -o -name uio_pci_gener for d in $pci_dirs; do for f in ${d}/*; do [ -e "${f}/config" ] || continue - echo 1 > ${f}/remove + echo ${f##*/} > ${d}/unbind basename `dirname ${f}` | xargs echo -n "Removing driver"; echo " for PCI ID" `basename ${f}` removed=y done diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c index 7249cc52..c50c0659 100755 --- a/src/vnet/devices/dpdk/init.c +++ b/src/vnet/devices/dpdk/init.c @@ -858,7 +858,7 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) vec_reset_length (pci_addr); pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); - if (d->device_class != PCI_CLASS_NETWORK_ETHERNET) + if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO) continue; if (num_whitelisted) -- cgit 1.2.3-korg From ff542707733102b2573dca2496ea427b3dba3b10 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 27 Feb 2017 11:29:20 +0100 Subject: vlib: add VLIB_BUFFER_EXT_HDR_VALID flag Change-Id: If56c66dd12eded1cc997087de5fd1b975766c4e2 Signed-off-by: Damjan Marion --- src/vlib/buffer.h | 2 ++ src/vnet/buffer.h | 3 --- src/vnet/devices/dpdk/device.c | 2 +- src/vnet/devices/dpdk/init.c | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index fffb50c8..8ea79502 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -77,6 +77,7 @@ typedef struct
VLIB_BUFFER_REPL_FAIL: packet replication failure
VLIB_BUFFER_RECYCLE: as it says
VLIB_BUFFER_FLOW_REPORT: buffer is a flow report, +
VLIB_BUFFER_EXT_HDR_VALID: buffer contains valid external buffer manager header, set to avoid adding it to a flow report
VLIB_BUFFER_FLAG_USER(n): user-defined bit N */ @@ -88,6 +89,7 @@ typedef struct #define VLIB_BUFFER_REPL_FAIL (1 << 4) #define VLIB_BUFFER_RECYCLE (1 << 5) #define VLIB_BUFFER_FLOW_REPORT (1 << 6) +#define VLIB_BUFFER_EXT_HDR_VALID (1 << 7) /* User defined buffer flags. */ #define LOG2_VLIB_BUFFER_FLAG_USER(n) (32 - (n)) diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 45fc352a..f1cc6371 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -61,9 +61,6 @@ #define ETH_BUFFER_VLAN_BITS (ETH_BUFFER_VLAN_1_DEEP | \ ETH_BUFFER_VLAN_2_DEEP) -#define LOG2_VNET_BUFFER_RTE_MBUF_VALID LOG2_VLIB_BUFFER_FLAG_USER(5) -#define VNET_BUFFER_RTE_MBUF_VALID (1 << LOG2_VNET_BUFFER_RTE_MBUF_VALID) - #define LOG2_BUFFER_HANDOFF_NEXT_VALID LOG2_VLIB_BUFFER_FLAG_USER(6) #define BUFFER_HANDOFF_NEXT_VALID (1 << LOG2_BUFFER_HANDOFF_NEXT_VALID) diff --git a/src/vnet/devices/dpdk/device.c b/src/vnet/devices/dpdk/device.c index cd32389c..c9d9a567 100644 --- a/src/vnet/devices/dpdk/device.c +++ b/src/vnet/devices/dpdk/device.c @@ -159,7 +159,7 @@ dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b, /* buffer is coming from non-dpdk source so we need to init rte_mbuf header */ - if (PREDICT_FALSE ((b->flags & VNET_BUFFER_RTE_MBUF_VALID) == 0)) + if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_EXT_HDR_VALID) == 0)) { vlib_buffer_t *b2 = b; last_mb = mb = rte_mbuf_from_vlib_buffer (b2); diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c index c50c0659..ec008c20 100755 --- a/src/vnet/devices/dpdk/init.c +++ b/src/vnet/devices/dpdk/init.c @@ -1790,7 +1790,7 @@ dpdk_init (vlib_main_t * vm) /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ dm->buffer_flags_template = - (VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_RTE_MBUF_VALID + (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; -- cgit 1.2.3-korg From c47ed032c6d036a9f942fc9ced48874fad55b48c Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 25 Jan 2017 14:18:03 +0100 Subject: vlib: add buffer cloning support Change-Id: I50070611af15b2b4cc29664a8bee4f821ac3c835 Signed-off-by: Damjan Marion --- src/scripts/vnet/mcast/ip4 | 19 +-- src/vlib/buffer.c | 254 ++++++++++------------------------------- src/vlib/buffer.h | 4 +- src/vlib/buffer_funcs.h | 113 +++++++++++++++++- src/vnet/devices/dpdk/buffer.c | 41 +++++-- src/vnet/devices/dpdk/device.c | 11 +- src/vnet/dpo/replicate_dpo.c | 76 ++++++------ src/vnet/dpo/replicate_dpo.h | 3 + 8 files changed, 256 insertions(+), 265 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/scripts/vnet/mcast/ip4 b/src/scripts/vnet/mcast/ip4 index 69f1ee00..eb6bab27 100644 --- a/src/scripts/vnet/mcast/ip4 +++ b/src/scripts/vnet/mcast/ip4 @@ -2,7 +2,7 @@ packet-generator new { name x limit 1 node ip4-input - size 64-64 + size 512-512 no-recycle data { ICMP: 1.0.0.2 -> 232.1.1.1 @@ -11,12 +11,15 @@ packet-generator new { } } -trace add pg-input 100 -loop create -loop create -set int state loop0 up -set int state loop1 up +create packet-generator interface pg1 +create packet-generator interface pg2 +create packet-generator interface pg3 + +set int state pg1 up +set int state pg2 up +set int state pg3 up ip mroute add 232.1.1.1 via pg0 Accept -ip mroute add 232.1.1.1 via loop0 Forward -ip mroute add 232.1.1.1 via loop1 Forward +ip mroute add 232.1.1.1 via pg1 Forward +ip mroute add 232.1.1.1 via pg2 Forward +ip mroute add 232.1.1.1 via pg3 Forward diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 95b4344f..4f5eb09d 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -68,8 +68,9 @@ format_vlib_buffer (u8 * s, va_list * args) vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *); uword indent = format_get_indent (s); - s = format (s, "current data %d, length %d, free-list %d", - b->current_data, b->current_length, b->free_list_index); + s = format (s, "current data %d, length %d, free-list %d, clone-count %u", + b->current_data, b->current_length, b->free_list_index, + b->n_add_refs); if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID) s = format (s, ", totlen-nifb %d", @@ -84,8 +85,10 @@ format_vlib_buffer (u8 * s, va_list * args) u32 next_buffer = b->next_buffer; b = vlib_get_buffer (vm, next_buffer); - s = format (s, "\n%Unext-buffer 0x%x, segment length %d", - format_white_space, indent, next_buffer, b->current_length); + s = + format (s, "\n%Unext-buffer 0x%x, segment length %d, clone-count %u", + format_white_space, indent, next_buffer, b->current_length, + b->n_add_refs); } return s; @@ -262,7 +265,7 @@ vlib_main_t **vlib_mains; /* When dubugging validate that given buffers are either known allocated or known free. */ -static void __attribute__ ((unused)) +static void vlib_buffer_validate_alloc_free (vlib_main_t * vm, u32 * buffers, uword n_buffers, @@ -362,6 +365,7 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, /* Setup free buffer template. */ f->buffer_init_template.free_list_index = f->index; + f->buffer_init_template.n_add_refs = 0; if (is_public) { @@ -620,19 +624,11 @@ vlib_buffer_free_inline (vlib_main_t * vm, { vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *fl; - static u32 *next_to_free[2]; /* smp bad */ - u32 i_next_to_free, *b, *n, *f, fi; - uword n_left; + u32 fi; int i; - static vlib_buffer_free_list_t **announce_list; - vlib_buffer_free_list_t *fl0 = 0, *fl1 = 0; - u32 bi0 = (u32) ~ 0, bi1 = (u32) ~ 0, fi0, fi1 = (u32) ~ 0; - u8 free0, free1 = 0, free_next0, free_next1; u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, u32 follow_buffer_next); - ASSERT (os_get_cpu_number () == 0); - cb = bm->buffer_free_callback; if (PREDICT_FALSE (cb != 0)) @@ -641,203 +637,68 @@ vlib_buffer_free_inline (vlib_main_t * vm, if (!n_buffers) return; - /* Use first buffer to get default free list. */ - { - u32 bi0 = buffers[0]; - vlib_buffer_t *b0; - - b0 = vlib_get_buffer (vm, bi0); - fl = vlib_buffer_get_buffer_free_list (vm, b0, &fi); - if (fl->buffers_added_to_freelist_function) - vec_add1 (announce_list, fl); - } - - vec_validate (next_to_free[0], n_buffers - 1); - vec_validate (next_to_free[1], n_buffers - 1); - - i_next_to_free = 0; - n_left = n_buffers; - b = buffers; - -again: - /* Verify that buffers are known allocated. */ - vlib_buffer_validate_alloc_free (vm, b, - n_left, VLIB_BUFFER_KNOWN_ALLOCATED); - - vec_add2_aligned (fl->buffers, f, n_left, CLIB_CACHE_LINE_BYTES); - - n = next_to_free[i_next_to_free]; - while (n_left >= 4) - { - vlib_buffer_t *b0, *b1, *binit0, *binit1, dummy_buffers[2]; - - bi0 = b[0]; - bi1 = b[1]; - - f[0] = bi0; - f[1] = bi1; - f += 2; - b += 2; - n_left -= 2; - - /* Prefetch buffers for next iteration. */ - vlib_prefetch_buffer_with_index (vm, b[0], WRITE); - vlib_prefetch_buffer_with_index (vm, b[1], WRITE); - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - free0 = (b0->flags & VLIB_BUFFER_RECYCLE) == 0; - free1 = (b1->flags & VLIB_BUFFER_RECYCLE) == 0; - - /* Must be before init which will over-write buffer flags. */ - if (follow_buffer_next) - { - n[0] = b0->next_buffer; - free_next0 = free0 && (b0->flags & VLIB_BUFFER_NEXT_PRESENT) != 0; - n += free_next0; - - n[0] = b1->next_buffer; - free_next1 = free1 && (b1->flags & VLIB_BUFFER_NEXT_PRESENT) != 0; - n += free_next1; - } - else - free_next0 = free_next1 = 0; - - /* Must be before init which will over-write buffer free list. */ - fi0 = b0->free_list_index; - fi1 = b1->free_list_index; - - if (PREDICT_FALSE (fi0 != fi || fi1 != fi)) - goto slow_path_x2; - - binit0 = free0 ? b0 : &dummy_buffers[0]; - binit1 = free1 ? b1 : &dummy_buffers[1]; - - vlib_buffer_init_two_for_free_list (binit0, binit1, fl); - continue; - - slow_path_x2: - /* Backup speculation. */ - f -= 2; - n -= free_next0 + free_next1; - - _vec_len (fl->buffers) = f - fl->buffers; - - fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0); - fl1 = pool_elt_at_index (bm->buffer_free_list_pool, fi1); - - vlib_buffer_add_to_free_list (vm, fl0, bi0, free0); - if (PREDICT_FALSE (fl0->buffers_added_to_freelist_function != 0)) - { - int i; - for (i = 0; i < vec_len (announce_list); i++) - if (fl0 == announce_list[i]) - goto no_fl0; - vec_add1 (announce_list, fl0); - } - no_fl0: - if (PREDICT_FALSE (fl1->buffers_added_to_freelist_function != 0)) - { - int i; - for (i = 0; i < vec_len (announce_list); i++) - if (fl1 == announce_list[i]) - goto no_fl1; - vec_add1 (announce_list, fl1); - } - - no_fl1: - vlib_buffer_add_to_free_list (vm, fl1, bi1, free1); - - /* Possibly change current free list. */ - if (fi0 != fi && fi1 != fi) - { - fi = fi1; - fl = pool_elt_at_index (bm->buffer_free_list_pool, fi); - } - - vec_add2_aligned (fl->buffers, f, n_left, CLIB_CACHE_LINE_BYTES); - } - - while (n_left >= 1) + for (i = 0; i < n_buffers; i++) { - vlib_buffer_t *b0, *binit0, dummy_buffers[1]; + vlib_buffer_t *b; + u32 bi = buffers[i]; - bi0 = b[0]; - f[0] = bi0; - f += 1; - b += 1; - n_left -= 1; - - b0 = vlib_get_buffer (vm, bi0); + b = vlib_get_buffer (vm, bi); - free0 = (b0->flags & VLIB_BUFFER_RECYCLE) == 0; + fl = vlib_buffer_get_buffer_free_list (vm, b, &fi); - /* Must be before init which will over-write buffer flags. */ - if (follow_buffer_next) + /* The only current use of this callback: multicast recycle */ + if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) { - n[0] = b0->next_buffer; - free_next0 = free0 && (b0->flags & VLIB_BUFFER_NEXT_PRESENT) != 0; - n += free_next0; + int j; + + vlib_buffer_add_to_free_list + (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); + + for (j = 0; j < vec_len (bm->announce_list); j++) + { + if (fl == bm->announce_list[j]) + goto already_announced; + } + vec_add1 (bm->announce_list, fl); + already_announced: + ; } else - free_next0 = 0; - - /* Must be before init which will over-write buffer free list. */ - fi0 = b0->free_list_index; - - if (PREDICT_FALSE (fi0 != fi)) - goto slow_path_x1; - - binit0 = free0 ? b0 : &dummy_buffers[0]; - - vlib_buffer_init_for_free_list (binit0, fl); - continue; - - slow_path_x1: - /* Backup speculation. */ - f -= 1; - n -= free_next0; - - _vec_len (fl->buffers) = f - fl->buffers; - - fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0); - - vlib_buffer_add_to_free_list (vm, fl0, bi0, free0); - if (PREDICT_FALSE (fl0->buffers_added_to_freelist_function != 0)) { - int i; - for (i = 0; i < vec_len (announce_list); i++) - if (fl0 == announce_list[i]) - goto no_fl00; - vec_add1 (announce_list, fl0); + if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) + { + u32 flags, next; + + do + { + vlib_buffer_t *nb = vlib_get_buffer (vm, bi); + flags = nb->flags; + next = nb->next_buffer; + if (nb->n_add_refs) + nb->n_add_refs--; + else + { + vlib_buffer_validate_alloc_free (vm, &bi, 1, + VLIB_BUFFER_KNOWN_ALLOCATED); + vlib_buffer_add_to_free_list (vm, fl, bi, 1); + } + bi = next; + } + while (follow_buffer_next + && (flags & VLIB_BUFFER_NEXT_PRESENT)); + + } } - - no_fl00: - fi = fi0; - fl = pool_elt_at_index (bm->buffer_free_list_pool, fi); - - vec_add2_aligned (fl->buffers, f, n_left, CLIB_CACHE_LINE_BYTES); } - - if (follow_buffer_next && ((n_left = n - next_to_free[i_next_to_free]) > 0)) - { - b = next_to_free[i_next_to_free]; - i_next_to_free ^= 1; - goto again; - } - - _vec_len (fl->buffers) = f - fl->buffers; - - if (vec_len (announce_list)) + if (vec_len (bm->announce_list)) { vlib_buffer_free_list_t *fl; - for (i = 0; i < vec_len (announce_list); i++) + for (i = 0; i < vec_len (bm->announce_list); i++) { - fl = announce_list[i]; + fl = bm->announce_list[i]; fl->buffers_added_to_freelist_function (vm, fl); } - _vec_len (announce_list) = 0; + _vec_len (bm->announce_list) = 0; } } @@ -922,6 +783,7 @@ vlib_packet_template_init (vlib_main_t * vm, fl->buffer_init_template.current_data = 0; fl->buffer_init_template.current_length = n_packet_data_bytes; fl->buffer_init_template.flags = 0; + fl->buffer_init_template.n_add_refs = 0; vlib_worker_thread_barrier_release (vm); } diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index 8ea79502..b4015b30 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -119,7 +119,9 @@ typedef struct feature node */ - u8 dont_waste_me[3]; /**< Available space in the (precious) + u8 n_add_refs; /**< Number of additional references to this buffer. */ + + u8 dont_waste_me[2]; /**< Available space in the (precious) first 32 octets of buffer metadata Before allocating any of it, discussion required! */ diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index 0b583a61..e0fde5f2 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -530,6 +530,110 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b) return fd; } +/** \brief Create multiple clones of buffer and store them in the supplied array + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param src_buffer - (u32) source buffer index + @param buffers - (u32 * ) buffer index array + @param n_buffers - (u8) number of buffer clones requested + @param head_end_offset - (u16) offset relative to current position + where packet head ends + @return - (u8) number of buffers actually cloned, may be + less than the number requested or zero +*/ + +always_inline u8 +vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, + u8 n_buffers, u16 head_end_offset) +{ + u8 i; + vlib_buffer_t *s = vlib_get_buffer (vm, src_buffer); + + ASSERT (s->n_add_refs == 0); + ASSERT (n_buffers); + + if (s->current_length <= head_end_offset + CLIB_CACHE_LINE_BYTES * 2) + { + buffers[0] = src_buffer; + for (i = 1; i < n_buffers; i++) + { + vlib_buffer_t *d; + d = vlib_buffer_copy (vm, s); + if (d == 0) + return i; + buffers[i] = vlib_get_buffer_index (vm, d); + + } + return n_buffers; + } + + n_buffers = vlib_buffer_alloc_from_free_list (vm, buffers, n_buffers, + s->free_list_index); + if (PREDICT_FALSE (n_buffers == 0)) + { + buffers[0] = src_buffer; + return 1; + } + + for (i = 0; i < n_buffers; i++) + { + vlib_buffer_t *d = vlib_get_buffer (vm, buffers[i]); + d->current_data = s->current_data; + d->current_length = head_end_offset; + d->free_list_index = s->free_list_index; + d->total_length_not_including_first_buffer = + s->total_length_not_including_first_buffer + s->current_length - + head_end_offset; + d->flags = s->flags | VLIB_BUFFER_NEXT_PRESENT; + d->flags &= ~VLIB_BUFFER_EXT_HDR_VALID; + clib_memcpy (d->opaque, s->opaque, sizeof (s->opaque)); + clib_memcpy (vlib_buffer_get_current (d), vlib_buffer_get_current (s), + head_end_offset); + d->next_buffer = src_buffer; + } + vlib_buffer_advance (s, head_end_offset); + s->n_add_refs = n_buffers - 1; + while (s->flags & VLIB_BUFFER_NEXT_PRESENT) + { + s = vlib_get_buffer (vm, s->next_buffer); + s->n_add_refs = n_buffers - 1; + } + + return n_buffers; +} + +/** \brief Attach cloned tail to the buffer + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param head - (vlib_buffer_t *) head buffer + @param tail - (Vlib buffer_t *) tail buffer to clone and attach to head +*/ + +always_inline void +vlib_buffer_attach_clone (vlib_main_t * vm, vlib_buffer_t * head, + vlib_buffer_t * tail) +{ + ASSERT ((head->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); + ASSERT (head->free_list_index == tail->free_list_index); + + head->flags |= VLIB_BUFFER_NEXT_PRESENT; + head->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; + head->flags &= ~VLIB_BUFFER_EXT_HDR_VALID; + head->flags |= (tail->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID); + head->next_buffer = vlib_get_buffer_index (vm, tail); + head->total_length_not_including_first_buffer = tail->current_length + + tail->total_length_not_including_first_buffer; + +next_segment: + __sync_add_and_fetch (&tail->n_add_refs, 1); + + if (tail->flags & VLIB_BUFFER_NEXT_PRESENT) + { + tail = vlib_get_buffer (vm, tail->next_buffer); + goto next_segment; + } +} + /* Initializes the buffer as an empty packet with no chained buffers. */ always_inline void vlib_buffer_chain_init (vlib_buffer_t * first) @@ -695,7 +799,8 @@ vlib_buffer_init_for_free_list (vlib_buffer_t * dst, _(flags); _(free_list_index); #undef _ - ASSERT (dst->total_length_not_including_first_buffer == 0); + dst->total_length_not_including_first_buffer = 0; + ASSERT (dst->n_add_refs == 0); } always_inline void @@ -727,8 +832,10 @@ vlib_buffer_init_two_for_free_list (vlib_buffer_t * dst0, _(flags); _(free_list_index); #undef _ - ASSERT (dst0->total_length_not_including_first_buffer == 0); - ASSERT (dst1->total_length_not_including_first_buffer == 0); + dst0->total_length_not_including_first_buffer = 0; + dst1->total_length_not_including_first_buffer = 0; + ASSERT (dst0->n_add_refs == 0); + ASSERT (dst1->n_add_refs == 0); } #if CLIB_DEBUG > 0 diff --git a/src/vnet/devices/dpdk/buffer.c b/src/vnet/devices/dpdk/buffer.c index 007093e4..f95d4cb5 100644 --- a/src/vnet/devices/dpdk/buffer.c +++ b/src/vnet/devices/dpdk/buffer.c @@ -79,20 +79,46 @@ STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM, "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM"); +static_always_inline void +dpdk_rte_pktmbuf_free (vlib_main_t * vm, vlib_buffer_t * b) +{ + vlib_buffer_t *hb = b; + struct rte_mbuf *mb; + u32 next, flags; + mb = rte_mbuf_from_vlib_buffer (hb); + +next: + flags = b->flags; + next = b->next_buffer; + mb = rte_mbuf_from_vlib_buffer (b); + + if (PREDICT_FALSE (b->n_add_refs)) + { + rte_mbuf_refcnt_update (mb, b->n_add_refs); + b->n_add_refs = 0; + } + + rte_pktmbuf_free_seg (mb); + + if (flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, next); + goto next; + } +} + static void del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) { u32 i; - struct rte_mbuf *mb; vlib_buffer_t *b; for (i = 0; i < vec_len (f->buffers); i++) { b = vlib_get_buffer (vm, f->buffers[i]); - mb = rte_mbuf_from_vlib_buffer (b); - ASSERT (rte_mbuf_refcnt_read (mb) == 1); - rte_pktmbuf_free (mb); + dpdk_rte_pktmbuf_free (vm, b); } + vec_free (f->name); vec_free (f->buffers); } @@ -325,7 +351,6 @@ vlib_buffer_free_inline (vlib_main_t * vm, for (i = 0; i < n_buffers; i++) { vlib_buffer_t *b; - struct rte_mbuf *mb; b = vlib_get_buffer (vm, buffers[i]); @@ -351,11 +376,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, else { if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) - { - mb = rte_mbuf_from_vlib_buffer (b); - ASSERT (rte_mbuf_refcnt_read (mb) == 1); - rte_pktmbuf_free (mb); - } + dpdk_rte_pktmbuf_free (vm, b); } } if (vec_len (bm->announce_list)) diff --git a/src/vnet/devices/dpdk/device.c b/src/vnet/devices/dpdk/device.c index c9d9a567..17397900 100644 --- a/src/vnet/devices/dpdk/device.c +++ b/src/vnet/devices/dpdk/device.c @@ -168,13 +168,11 @@ dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b, { b2 = vlib_get_buffer (vm, b2->next_buffer); mb = rte_mbuf_from_vlib_buffer (b2); - last_mb->next = mb; - last_mb = mb; rte_pktmbuf_reset (mb); } } - first_mb = mb = rte_mbuf_from_vlib_buffer (b); + last_mb = first_mb = mb = rte_mbuf_from_vlib_buffer (b); first_mb->nb_segs = 1; mb->data_len = b->current_length; mb->pkt_len = maybe_multiseg ? vlib_buffer_length_in_chain (vm, b) : @@ -185,10 +183,17 @@ dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b, { b = vlib_get_buffer (vm, b->next_buffer); mb = rte_mbuf_from_vlib_buffer (b); + last_mb->next = mb; + last_mb = mb; mb->data_len = b->current_length; mb->pkt_len = b->current_length; mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; first_mb->nb_segs++; + if (PREDICT_FALSE (b->n_add_refs)) + { + rte_mbuf_refcnt_update (mb, b->n_add_refs); + b->n_add_refs = 0; + } } } diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c index a67b19c8..a9f334be 100644 --- a/src/vnet/dpo/replicate_dpo.c +++ b/src/vnet/dpo/replicate_dpo.c @@ -625,6 +625,7 @@ replicate_inline (vlib_main_t * vm, vlib_frame_t * frame) { vlib_combined_counter_main_t * cm = &replicate_main.repm_counters; + replicate_main_t * rm = &replicate_main; u32 n_left_from, * from, * to_next, next_index; u32 cpu_index = os_get_cpu_number(); @@ -645,13 +646,11 @@ replicate_inline (vlib_main_t * vm, const replicate_t *rep0; vlib_buffer_t * b0, *c0; const dpo_id_t *dpo0; + u8 num_cloned; bi0 = from[0]; - to_next[0] = bi0; from += 1; - to_next += 1; n_left_from -= 1; - n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; @@ -661,50 +660,21 @@ replicate_inline (vlib_main_t * vm, cm, cpu_index, repi0, 1, vlib_buffer_length_in_chain(vm, b0)); - /* ship the original to the first bucket */ - dpo0 = replicate_get_bucket_i(rep0, 0); - next0 = dpo0->dpoi_next_node; - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vec_validate (rm->clones[cpu_index], rep0->rep_n_buckets - 1); - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); - t->rep_index = repi0; - t->dpo = *dpo0; - } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); + num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[cpu_index], rep0->rep_n_buckets, 128); - /* ship copies to the rest of the buckets */ - for (bucket = 1; bucket < rep0->rep_n_buckets; bucket++) - { - /* - * After the enqueue of the first buffer, and of all subsequent - * buffers in this loop, it is possible that we over-flow the - * frame of the to-next node. When this happens we need to 'put' - * that full frame to the node and get a fresh empty one. - * Note that these are macros with side effects that change - * to_next & n_left_to_next - */ - if (PREDICT_FALSE(0 == n_left_to_next)) - { - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - } + if (num_cloned != rep0->rep_n_buckets) + { + vlib_node_increment_counter + (vm, node->node_index, + REPLICATE_DPO_ERROR_BUFFER_ALLOCATION_FAILURE, 1); + } - /* Make a copy. This can fail, so deal with it. */ - c0 = vlib_buffer_copy(vm, b0); - if (PREDICT_FALSE (c0 == 0)) - { - vlib_node_increment_counter - (vm, node->node_index, - REPLICATE_DPO_ERROR_BUFFER_ALLOCATION_FAILURE, - 1); - continue; - } - - ci0 = vlib_get_buffer_index(vm, c0); + for (bucket = 0; bucket < num_cloned; bucket++) + { + ci0 = rm->clones[cpu_index][bucket]; + c0 = vlib_get_buffer(vm, ci0); to_next[0] = ci0; to_next += 1; @@ -724,7 +694,13 @@ replicate_inline (vlib_main_t * vm, vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, ci0, next0); + if (PREDICT_FALSE (n_left_to_next == 0)) + { + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + } } + vec_reset_length (rm->clones[cpu_index]); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); @@ -797,3 +773,15 @@ VLIB_REGISTER_NODE (ip6_replicate_node) = { [0] = "error-drop", }, }; + +clib_error_t * +replicate_dpo_init (vlib_main_t * vm) +{ + replicate_main_t * rm = &replicate_main; + + vec_validate (rm->clones, vlib_num_workers()); + + return 0; +} + +VLIB_INIT_FUNCTION (replicate_dpo_init); diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h index a564739c..77273015 100644 --- a/src/vnet/dpo/replicate_dpo.h +++ b/src/vnet/dpo/replicate_dpo.h @@ -32,6 +32,9 @@ typedef struct replicate_main_t_ { vlib_combined_counter_main_t repm_counters; + + /* per-cpu vector of cloned packets */ + u32 **clones; } replicate_main_t; extern replicate_main_t replicate_main; -- cgit 1.2.3-korg From f7c379403a98cf060d28bac24916c51067c4ec90 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 28 Feb 2017 23:26:30 +0100 Subject: dpdk: retire support for dpdk 16.07 Change-Id: I8585552c026415340fe9fd0458cb8450da3c4ae2 Signed-off-by: Damjan Marion --- dpdk/Makefile | 1 - ...Add-packet_type-metadata-in-the-i40e-vPMD.patch | 1210 - ...0e-Enable-bad-checksum-flags-in-i40e-vPMD.patch | 111 - ...T_RX_VLAN_PKT-iff-returned-packet-has-VLA.patch | 42 - ...vert-ixgbe-fix-packet-type-from-vector-Rx.patch | 133 - ...DPAA2-Poll-Mode-Driver-Support-dpdk-16.07.patch | 40106 ------------------- ...ers-reset-packet_type-before-using-buffer.patch | 70 - ...low-applications-to-override-rte_delay_us.patch | 43 - ...rash-in-igb_uio-driver-when-the-device-is.patch | 38 - ...mporarily-disable-unthrottled-log-message.patch | 26 - ...bad-L4-checksum-ptype-set-on-ICMP-packets.patch | 18 - ...irtio-enable-indirect-descriptors-feature.patch | 34 - src/vnet/devices/dpdk/dpdk.h | 20 - src/vnet/devices/dpdk/format.c | 12 - src/vnet/devices/dpdk/init.c | 10 +- src/vnet/devices/dpdk/main.c | 7 +- src/vnet/devices/dpdk/node.c | 12 - 17 files changed, 3 insertions(+), 41890 deletions(-) delete mode 100644 dpdk/dpdk-16.07_patches/0001-i40e-Add-packet_type-metadata-in-the-i40e-vPMD.patch delete mode 100644 dpdk/dpdk-16.07_patches/0002-i40e-Enable-bad-checksum-flags-in-i40e-vPMD.patch delete mode 100644 dpdk/dpdk-16.07_patches/0003-enic-Set-PKT_RX_VLAN_PKT-iff-returned-packet-has-VLA.patch delete mode 100644 dpdk/dpdk-16.07_patches/0004-Revert-ixgbe-fix-packet-type-from-vector-Rx.patch delete mode 100644 dpdk/dpdk-16.07_patches/0005-NXP-DPAA2-Poll-Mode-Driver-Support-dpdk-16.07.patch delete mode 100644 dpdk/dpdk-16.07_patches/0005-drivers-reset-packet_type-before-using-buffer.patch delete mode 100644 dpdk/dpdk-16.07_patches/0006-Allow-applications-to-override-rte_delay_us.patch delete mode 100644 dpdk/dpdk-16.07_patches/0007-UIO-Fix-a-crash-in-igb_uio-driver-when-the-device-is.patch delete mode 100644 dpdk/dpdk-16.07_patches/0008-Temporarily-disable-unthrottled-log-message.patch delete mode 100644 dpdk/dpdk-16.07_patches/0009-enic-bad-L4-checksum-ptype-set-on-ICMP-packets.patch delete mode 100644 dpdk/dpdk-16.07_patches/0010-virtio-enable-indirect-descriptors-feature.patch (limited to 'src/vnet/devices') diff --git a/dpdk/Makefile b/dpdk/Makefile index f8c85c96..8e187cc2 100644 --- a/dpdk/Makefile +++ b/dpdk/Makefile @@ -29,7 +29,6 @@ PKG_SUFFIX ?= vpp1 DPDK_BASE_URL ?= http://fast.dpdk.org/rel DPDK_TARBALL := dpdk-$(DPDK_VERSION).tar.xz DPDK_TAR_URL := $(DPDK_BASE_URL)/$(DPDK_TARBALL) -DPDK_16.07_TARBALL_MD5_CKSUM := 690a2bb570103e58d12f9806e8bf21be DPDK_16.11_TARBALL_MD5_CKSUM := 06c1c577795360719d0b4fafaeee21e9 DPDK_17.02_TARBALL_MD5_CKSUM := 6b9f7387c35641f4e8dbba3e528f2376 DPDK_SOURCE := $(B)/dpdk-$(DPDK_VERSION) diff --git a/dpdk/dpdk-16.07_patches/0001-i40e-Add-packet_type-metadata-in-the-i40e-vPMD.patch b/dpdk/dpdk-16.07_patches/0001-i40e-Add-packet_type-metadata-in-the-i40e-vPMD.patch deleted file mode 100644 index 74d9416e..00000000 --- a/dpdk/dpdk-16.07_patches/0001-i40e-Add-packet_type-metadata-in-the-i40e-vPMD.patch +++ /dev/null @@ -1,1210 +0,0 @@ -From 79a2ddaf4d7df7172faa54716ae7647ad7a549b9 Mon Sep 17 00:00:00 2001 -From: Damjan Marion -Date: Thu, 14 Jul 2016 09:59:01 -0700 -Subject: [PATCH 1/2] i40e: Add packet_type metadata in the i40e vPMD - -The ptype is decoded from the rx descriptor and stored -in the packet type field in the mbuf using the same function -as the non-vector driver. - -Signed-off-by: Damjan Marion -Signed-off-by: Jeff Shaw ---- - drivers/net/i40e/i40e_rxtx.c | 566 +-------------------------------------- - drivers/net/i40e/i40e_rxtx.h | 563 ++++++++++++++++++++++++++++++++++++++ - drivers/net/i40e/i40e_rxtx_vec.c | 16 ++ - 3 files changed, 581 insertions(+), 564 deletions(-) - -diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c -index d3cfb98..2903347 100644 ---- a/drivers/net/i40e/i40e_rxtx.c -+++ b/drivers/net/i40e/i40e_rxtx.c -@@ -174,569 +174,6 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t qword) - } - #endif - --/* For each value it means, datasheet of hardware can tell more details -- * -- * @note: fix i40e_dev_supported_ptypes_get() if any change here. -- */ --static inline uint32_t --i40e_rxd_pkt_type_mapping(uint8_t ptype) --{ -- static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = { -- /* L2 types */ -- /* [0] reserved */ -- [1] = RTE_PTYPE_L2_ETHER, -- [2] = RTE_PTYPE_L2_ETHER_TIMESYNC, -- /* [3] - [5] reserved */ -- [6] = RTE_PTYPE_L2_ETHER_LLDP, -- /* [7] - [10] reserved */ -- [11] = RTE_PTYPE_L2_ETHER_ARP, -- /* [12] - [21] reserved */ -- -- /* Non tunneled IPv4 */ -- [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_L4_FRAG, -- [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_L4_NONFRAG, -- [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_L4_UDP, -- /* [25] reserved */ -- [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_L4_TCP, -- [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_L4_SCTP, -- [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_L4_ICMP, -- -- /* IPv4 --> IPv4 */ -- [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [32] reserved */ -- [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv4 --> IPv6 */ -- [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [39] reserved */ -- [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv4 --> GRE/Teredo/VXLAN */ -- [43] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT, -- -- /* IPv4 --> GRE/Teredo/VXLAN --> IPv4 */ -- [44] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [47] reserved */ -- [48] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [50] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv4 --> GRE/Teredo/VXLAN --> IPv6 */ -- [51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [53] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [54] reserved */ -- [55] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [56] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [57] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv4 --> GRE/Teredo/VXLAN --> MAC */ -- [58] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER, -- -- /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */ -- [59] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [60] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [62] reserved */ -- [63] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [64] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [65] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */ -- [66] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [67] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [68] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [69] reserved */ -- [70] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [71] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [72] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN */ -- [73] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN, -- -- /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */ -- [74] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [75] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [76] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [77] reserved */ -- [78] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [79] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [80] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */ -- [81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [83] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [84] reserved */ -- [85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [87] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* Non tunneled IPv6 */ -- [88] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_L4_FRAG, -- [89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_L4_NONFRAG, -- [90] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_L4_UDP, -- /* [91] reserved */ -- [92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_L4_TCP, -- [93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_L4_SCTP, -- [94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_L4_ICMP, -- -- /* IPv6 --> IPv4 */ -- [95] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [96] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [97] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [98] reserved */ -- [99] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [100] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [101] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv6 --> IPv6 */ -- [102] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [103] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [104] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [105] reserved */ -- [106] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [107] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [108] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_IP | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv6 --> GRE/Teredo/VXLAN */ -- [109] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT, -- -- /* IPv6 --> GRE/Teredo/VXLAN --> IPv4 */ -- [110] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [111] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [112] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [113] reserved */ -- [114] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [115] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [116] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv6 --> GRE/Teredo/VXLAN --> IPv6 */ -- [117] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [118] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [119] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [120] reserved */ -- [121] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [122] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [123] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv6 --> GRE/Teredo/VXLAN --> MAC */ -- [124] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER, -- -- /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */ -- [125] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [126] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [127] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [128] reserved */ -- [129] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [130] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [131] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */ -- [132] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [133] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [134] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [135] reserved */ -- [136] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [137] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [138] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN */ -- [139] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN, -- -- /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */ -- [140] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [141] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [142] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [143] reserved */ -- [144] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [145] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [146] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */ -- [147] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_FRAG, -- [148] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_NONFRAG, -- [149] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_UDP, -- /* [150] reserved */ -- [151] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_TCP, -- [152] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_SCTP, -- [153] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_TUNNEL_GRENAT | -- RTE_PTYPE_INNER_L2_ETHER_VLAN | -- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_INNER_L4_ICMP, -- -- /* L2 NSH packet type */ -- [154] = RTE_PTYPE_L2_ETHER_NSH, -- [155] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_L4_FRAG, -- [156] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_L4_NONFRAG, -- [157] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_L4_UDP, -- [158] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_L4_TCP, -- [159] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_L4_SCTP, -- [160] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -- RTE_PTYPE_L4_ICMP, -- [161] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_L4_FRAG, -- [162] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_L4_NONFRAG, -- [163] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_L4_UDP, -- [164] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_L4_TCP, -- [165] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_L4_SCTP, -- [166] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -- RTE_PTYPE_L4_ICMP, -- -- /* All others reserved */ -- }; -- -- return type_table[ptype]; --} -- - #define I40E_RX_DESC_EXT_STATUS_FLEXBH_MASK 0x03 - #define I40E_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01 - #define I40E_RX_DESC_EXT_STATUS_FLEXBH_FLEX 0x02 -@@ -2136,7 +1573,8 @@ i40e_dev_supported_ptypes_get(struct rte_eth_dev *dev) - #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC - dev->rx_pkt_burst == i40e_recv_pkts_bulk_alloc || - #endif -- dev->rx_pkt_burst == i40e_recv_scattered_pkts) -+ dev->rx_pkt_burst == i40e_recv_scattered_pkts || -+ dev->rx_pkt_burst == i40e_recv_pkts_vec) - return ptypes; - return NULL; - } -diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h -index 98179f0..ecdb13c 100644 ---- a/drivers/net/i40e/i40e_rxtx.h -+++ b/drivers/net/i40e/i40e_rxtx.h -@@ -255,4 +255,567 @@ void i40e_set_tx_function_flag(struct rte_eth_dev *dev, - struct i40e_tx_queue *txq); - void i40e_set_tx_function(struct rte_eth_dev *dev); - -+/* For each value it means, datasheet of hardware can tell more details -+ * -+ * @note: fix i40e_dev_supported_ptypes_get() if any change here. -+ */ -+static inline uint32_t -+i40e_rxd_pkt_type_mapping(uint8_t ptype) -+{ -+ static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = { -+ /* L2 types */ -+ /* [0] reserved */ -+ [1] = RTE_PTYPE_L2_ETHER, -+ [2] = RTE_PTYPE_L2_ETHER_TIMESYNC, -+ /* [3] - [5] reserved */ -+ [6] = RTE_PTYPE_L2_ETHER_LLDP, -+ /* [7] - [10] reserved */ -+ [11] = RTE_PTYPE_L2_ETHER_ARP, -+ /* [12] - [21] reserved */ -+ -+ /* Non tunneled IPv4 */ -+ [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_L4_FRAG, -+ [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_L4_NONFRAG, -+ [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_L4_UDP, -+ /* [25] reserved */ -+ [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_L4_TCP, -+ [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_L4_SCTP, -+ [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_L4_ICMP, -+ -+ /* IPv4 --> IPv4 */ -+ [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [32] reserved */ -+ [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv4 --> IPv6 */ -+ [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [39] reserved */ -+ [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv4 --> GRE/Teredo/VXLAN */ -+ [43] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT, -+ -+ /* IPv4 --> GRE/Teredo/VXLAN --> IPv4 */ -+ [44] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [47] reserved */ -+ [48] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [50] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv4 --> GRE/Teredo/VXLAN --> IPv6 */ -+ [51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [53] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [54] reserved */ -+ [55] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [56] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [57] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv4 --> GRE/Teredo/VXLAN --> MAC */ -+ [58] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER, -+ -+ /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */ -+ [59] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [60] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [62] reserved */ -+ [63] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [64] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [65] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */ -+ [66] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [67] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [68] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [69] reserved */ -+ [70] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [71] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [72] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN */ -+ [73] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN, -+ -+ /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */ -+ [74] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [75] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [76] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [77] reserved */ -+ [78] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [79] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [80] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */ -+ [81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [83] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [84] reserved */ -+ [85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [87] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* Non tunneled IPv6 */ -+ [88] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_L4_FRAG, -+ [89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_L4_NONFRAG, -+ [90] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_L4_UDP, -+ /* [91] reserved */ -+ [92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_L4_TCP, -+ [93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_L4_SCTP, -+ [94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_L4_ICMP, -+ -+ /* IPv6 --> IPv4 */ -+ [95] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [96] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [97] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [98] reserved */ -+ [99] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [100] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [101] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv6 --> IPv6 */ -+ [102] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [103] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [104] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [105] reserved */ -+ [106] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [107] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [108] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_IP | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv6 --> GRE/Teredo/VXLAN */ -+ [109] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT, -+ -+ /* IPv6 --> GRE/Teredo/VXLAN --> IPv4 */ -+ [110] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [111] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [112] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [113] reserved */ -+ [114] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [115] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [116] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv6 --> GRE/Teredo/VXLAN --> IPv6 */ -+ [117] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [118] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [119] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [120] reserved */ -+ [121] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [122] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [123] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv6 --> GRE/Teredo/VXLAN --> MAC */ -+ [124] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER, -+ -+ /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */ -+ [125] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [126] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [127] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [128] reserved */ -+ [129] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [130] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [131] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */ -+ [132] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [133] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [134] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [135] reserved */ -+ [136] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [137] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [138] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN */ -+ [139] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN, -+ -+ /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */ -+ [140] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [141] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [142] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [143] reserved */ -+ [144] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [145] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [146] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */ -+ [147] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_FRAG, -+ [148] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_NONFRAG, -+ [149] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_UDP, -+ /* [150] reserved */ -+ [151] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_TCP, -+ [152] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_SCTP, -+ [153] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_TUNNEL_GRENAT | -+ RTE_PTYPE_INNER_L2_ETHER_VLAN | -+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_INNER_L4_ICMP, -+ -+ /* L2 NSH packet type */ -+ [154] = RTE_PTYPE_L2_ETHER_NSH, -+ [155] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_L4_FRAG, -+ [156] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_L4_NONFRAG, -+ [157] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_L4_UDP, -+ [158] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_L4_TCP, -+ [159] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_L4_SCTP, -+ [160] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | -+ RTE_PTYPE_L4_ICMP, -+ [161] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_L4_FRAG, -+ [162] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_L4_NONFRAG, -+ [163] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_L4_UDP, -+ [164] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_L4_TCP, -+ [165] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_L4_SCTP, -+ [166] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | -+ RTE_PTYPE_L4_ICMP, -+ -+ /* All others reserved */ -+ }; -+ -+ return type_table[ptype]; -+} -+ - #endif /* _I40E_RXTX_H_ */ -diff --git a/drivers/net/i40e/i40e_rxtx_vec.c b/drivers/net/i40e/i40e_rxtx_vec.c -index 05cb415..e78ac63 100644 ---- a/drivers/net/i40e/i40e_rxtx_vec.c -+++ b/drivers/net/i40e/i40e_rxtx_vec.c -@@ -187,6 +187,21 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) - - #define PKTLEN_SHIFT 10 - -+static inline void -+desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts) -+{ -+ __m128i ptype0 = _mm_unpackhi_epi64(descs[0], descs[1]); -+ __m128i ptype1 = _mm_unpackhi_epi64(descs[2], descs[3]); -+ -+ ptype0 = _mm_srli_epi64(ptype0, 30); -+ ptype1 = _mm_srli_epi64(ptype1, 30); -+ -+ rx_pkts[0]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype0, 0)); -+ rx_pkts[1]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype0, 8)); -+ rx_pkts[2]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype1, 0)); -+ rx_pkts[3]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype1, 8)); -+} -+ - /* - * Notice: - * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet -@@ -393,6 +408,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, - pkt_mb2); - _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, - pkt_mb1); -+ desc_to_ptype_v(descs, &rx_pkts[pos]); - /* C.4 calc avaialbe number of desc */ - var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); - nb_pkts_recd += var; --- -2.7.4 - diff --git a/dpdk/dpdk-16.07_patches/0002-i40e-Enable-bad-checksum-flags-in-i40e-vPMD.patch b/dpdk/dpdk-16.07_patches/0002-i40e-Enable-bad-checksum-flags-in-i40e-vPMD.patch deleted file mode 100644 index 58256f19..00000000 --- a/dpdk/dpdk-16.07_patches/0002-i40e-Enable-bad-checksum-flags-in-i40e-vPMD.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 5917bd1cf9857979a7cae89f362d2c885f09d034 Mon Sep 17 00:00:00 2001 -From: Damjan Marion -Date: Thu, 14 Jul 2016 09:59:02 -0700 -Subject: [PATCH 2/2] i40e: Enable bad checksum flags in i40e vPMD - -Decode the checksum flags from the rx descriptor, setting -the appropriate bit in the mbuf ol_flags field when the flag -indicates a bad checksum. - -Signed-off-by: Damjan Marion -Signed-off-by: Jeff Shaw ---- - drivers/net/i40e/i40e_rxtx_vec.c | 48 +++++++++++++++++++++++----------------- - 1 file changed, 28 insertions(+), 20 deletions(-) - -diff --git a/drivers/net/i40e/i40e_rxtx_vec.c b/drivers/net/i40e/i40e_rxtx_vec.c -index e78ac63..ace51df 100644 ---- a/drivers/net/i40e/i40e_rxtx_vec.c -+++ b/drivers/net/i40e/i40e_rxtx_vec.c -@@ -138,19 +138,14 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) - static inline void - desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) - { -- __m128i vlan0, vlan1, rss; -- union { -- uint16_t e[4]; -- uint64_t dword; -- } vol; -+ __m128i vlan0, vlan1, rss, l3_l4e; - - /* mask everything except RSS, flow director and VLAN flags - * bit2 is for VLAN tag, bit11 for flow director indication - * bit13:12 for RSS indication. - */ -- const __m128i rss_vlan_msk = _mm_set_epi16( -- 0x0000, 0x0000, 0x0000, 0x0000, -- 0x3804, 0x3804, 0x3804, 0x3804); -+ const __m128i rss_vlan_msk = _mm_set_epi32( -+ 0x1c03004, 0x1c03004, 0x1c03004, 0x1c03004); - - /* map rss and vlan type to rss hash and vlan flag */ - const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0, -@@ -163,23 +158,36 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) - PKT_RX_RSS_HASH | PKT_RX_FDIR, PKT_RX_RSS_HASH, 0, 0, - 0, 0, PKT_RX_FDIR, 0); - -- vlan0 = _mm_unpackhi_epi16(descs[0], descs[1]); -- vlan1 = _mm_unpackhi_epi16(descs[2], descs[3]); -- vlan0 = _mm_unpacklo_epi32(vlan0, vlan1); -+ const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, -+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, -+ PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, -+ PKT_RX_EIP_CKSUM_BAD, -+ PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, -+ PKT_RX_L4_CKSUM_BAD, -+ PKT_RX_IP_CKSUM_BAD, -+ 0); -+ -+ vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]); -+ vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]); -+ vlan0 = _mm_unpacklo_epi64(vlan0, vlan1); - - vlan1 = _mm_and_si128(vlan0, rss_vlan_msk); - vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1); - -- rss = _mm_srli_epi16(vlan1, 11); -+ rss = _mm_srli_epi32(vlan1, 12); - rss = _mm_shuffle_epi8(rss_flags, rss); - -+ l3_l4e = _mm_srli_epi32(vlan1, 22); -+ l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e); -+ - vlan0 = _mm_or_si128(vlan0, rss); -- vol.dword = _mm_cvtsi128_si64(vlan0); -+ vlan0 = _mm_or_si128(vlan0, l3_l4e); - -- rx_pkts[0]->ol_flags = vol.e[0]; -- rx_pkts[1]->ol_flags = vol.e[1]; -- rx_pkts[2]->ol_flags = vol.e[2]; -- rx_pkts[3]->ol_flags = vol.e[3]; -+ rx_pkts[0]->ol_flags = _mm_extract_epi16(vlan0, 0); -+ rx_pkts[1]->ol_flags = _mm_extract_epi16(vlan0, 2); -+ rx_pkts[2]->ol_flags = _mm_extract_epi16(vlan0, 4); -+ rx_pkts[3]->ol_flags = _mm_extract_epi16(vlan0, 6); - } - #else - #define desc_to_olflags_v(desc, rx_pkts) do {} while (0) -@@ -754,7 +762,8 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) - #ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE - /* whithout rx ol_flags, no VP flag report */ - if (rxmode->hw_vlan_strip != 0 || -- rxmode->hw_vlan_extend != 0) -+ rxmode->hw_vlan_extend != 0 || -+ rxmode->hw_ip_checksum != 0) - return -1; - #endif - -@@ -765,8 +774,7 @@ i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) - /* - no csum error report support - * - no header split support - */ -- if (rxmode->hw_ip_checksum == 1 || -- rxmode->header_split == 1) -+ if (rxmode->header_split == 1) - return -1; - - return 0; --- -2.7.4 - diff --git a/dpdk/dpdk-16.07_patches/0003-enic-Set-PKT_RX_VLAN_PKT-iff-returned-packet-has-VLA.patch b/dpdk/dpdk-16.07_patches/0003-enic-Set-PKT_RX_VLAN_PKT-iff-returned-packet-has-VLA.patch deleted file mode 100644 index 53264158..00000000 --- a/dpdk/dpdk-16.07_patches/0003-enic-Set-PKT_RX_VLAN_PKT-iff-returned-packet-has-VLA.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 6a7a9e52ed2ccfa86c2def3a66a368a5577f2fc2 Mon Sep 17 00:00:00 2001 -From: John Daley -Date: Tue, 3 May 2016 13:56:05 -0700 -Subject: [PATCH] enic: Set PKT_RX_VLAN_PKT iff returned packet has VLAN tag - -Only set the ol_flags PKT_RX_VLAN_PKT bit if the packet being passed -to the application contains a VLAN tag. This is true whether -stripping is enabled or disabled. - -This area of the API is in flux, so behaviour may change in the -future. - -Signed-off-by: John Daley ---- - drivers/net/enic/enic_rxtx.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c -index 02b54dd..6a95389 100644 ---- a/drivers/net/enic/enic_rxtx.c -+++ b/drivers/net/enic/enic_rxtx.c -@@ -195,12 +195,16 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) - if (unlikely(!enic_cq_rx_desc_eop(ciflags))) - goto mbuf_flags_done; - -- /* VLAN stripping */ -+ /* VLAN stripping. Set PKT_RX_VLAN_PKT only if there is a vlan tag -+ * in the packet passed up -+ */ - if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) { -- pkt_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED; -+ pkt_flags |= PKT_RX_VLAN_STRIPPED; - mbuf->vlan_tci = enic_cq_rx_desc_vlan(cqrd); - } else { - mbuf->vlan_tci = 0; -+ if (enic_cq_rx_desc_vlan(cqrd)) -+ pkt_flags |= PKT_RX_VLAN_PKT; - } - - /* RSS flag */ --- - diff --git a/dpdk/dpdk-16.07_patches/0004-Revert-ixgbe-fix-packet-type-from-vector-Rx.patch b/dpdk/dpdk-16.07_patches/0004-Revert-ixgbe-fix-packet-type-from-vector-Rx.patch deleted file mode 100644 index 1c4585f1..00000000 --- a/dpdk/dpdk-16.07_patches/0004-Revert-ixgbe-fix-packet-type-from-vector-Rx.patch +++ /dev/null @@ -1,133 +0,0 @@ -From 44b3a182e791c2f023d2a237a03eb9d3014c7da6 Mon Sep 17 00:00:00 2001 -From: Ray Kinsella -Date: Thu, 4 Aug 2016 17:06:21 +0100 -Subject: [PATCH] Revert "ixgbe: fix packet type from vector Rx" - -This reverts commit d9a2009a81089093645fea2e04b51dd37edf3e6f. - -Conflicts: - drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c - -Signed-off-by: Ray Kinsella ---- - drivers/net/ixgbe/ixgbe_ethdev.c | 4 +++- - drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c | 34 +++++++++++++++++++++++----------- - 2 files changed, 26 insertions(+), 12 deletions(-) - -diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c -index d478a15..63966a6 100644 ---- a/drivers/net/ixgbe/ixgbe_ethdev.c -+++ b/drivers/net/ixgbe/ixgbe_ethdev.c -@@ -3117,7 +3117,9 @@ ixgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev) - if (dev->rx_pkt_burst == ixgbe_recv_pkts || - dev->rx_pkt_burst == ixgbe_recv_pkts_lro_single_alloc || - dev->rx_pkt_burst == ixgbe_recv_pkts_lro_bulk_alloc || -- dev->rx_pkt_burst == ixgbe_recv_pkts_bulk_alloc) -+ dev->rx_pkt_burst == ixgbe_recv_pkts_bulk_alloc || -+ dev->rx_pkt_burst == ixgbe_recv_pkts_vec || -+ dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec) - return ptypes; - return NULL; - } -diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c -index 1c4fd7c..3aae401 100644 ---- a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c -+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c -@@ -231,6 +231,8 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, - ); - __m128i dd_check, eop_check; - uint8_t vlan_flags; -+ __m128i desc_mask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, -+ 0xFFFFFFFF, 0xFFFF07F0); - - /* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */ - nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST); -@@ -271,8 +273,9 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, - 13, 12, /* octet 12~13, 16 bits data_len */ - 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ - 13, 12, /* octet 12~13, low 16 bits pkt_len */ -- 0xFF, 0xFF, /* skip 32 bit pkt_type */ -- 0xFF, 0xFF -+ 0xFF, 0xFF, /* skip high 16 bits pkt_type */ -+ 1, /* octet 1, 8 bits pkt_type field */ -+ 0 /* octet 0, 4 bits offset 4 pkt_type field */ - ); - - /* Cache is empty -> need to scan the buffer rings, but first move -@@ -294,6 +297,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, - for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; - pos += RTE_IXGBE_DESCS_PER_LOOP, - rxdp += RTE_IXGBE_DESCS_PER_LOOP) { -+ __m128i descs0[RTE_IXGBE_DESCS_PER_LOOP]; - __m128i descs[RTE_IXGBE_DESCS_PER_LOOP]; - __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; - __m128i zero, staterr, sterr_tmp1, sterr_tmp2; -@@ -304,7 +308,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, - - /* Read desc statuses backwards to avoid race condition */ - /* A.1 load 4 pkts desc */ -- descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); -+ descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); - - /* B.2 copy 2 mbuf point into rx_pkts */ - _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); -@@ -312,10 +316,10 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, - /* B.1 load 1 mbuf point */ - mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); - -- descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); -+ descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); - /* B.1 load 2 mbuf point */ -- descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); -- descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); -+ descs0[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); -+ descs0[0] = _mm_loadu_si128((__m128i *)(rxdp)); - - /* B.2 copy 2 mbuf point into rx_pkts */ - _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); -@@ -327,6 +331,14 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, - rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); - } - -+ /* A* mask out 0~3 bits RSS type */ -+ descs[3] = _mm_and_si128(descs0[3], desc_mask); -+ descs[2] = _mm_and_si128(descs0[2], desc_mask); -+ -+ /* A* mask out 0~3 bits RSS type */ -+ descs[1] = _mm_and_si128(descs0[1], desc_mask); -+ descs[0] = _mm_and_si128(descs0[0], desc_mask); -+ - /* avoid compiler reorder optimization */ - rte_compiler_barrier(); - -@@ -334,22 +346,22 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, - pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk); - pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk); - -- /* D.1 pkt 1,2 convert format from desc to pktmbuf */ -- pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk); -- pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk); -- - /* C.1 4=>2 filter staterr info only */ - sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]); - /* C.1 4=>2 filter staterr info only */ - sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]); - - /* set ol_flags with vlan packet type */ -- desc_to_olflags_v(descs, vlan_flags, &rx_pkts[pos]); -+ desc_to_olflags_v(descs0, vlan_flags, &rx_pkts[pos]); - - /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ - pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust); - pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust); - -+ /* D.1 pkt 1,2 convert format from desc to pktmbuf */ -+ pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk); -+ pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk); -+ - /* C.2 get 4 pkts staterr value */ - zero = _mm_xor_si128(dd_check, dd_check); - staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); --- -1.9.1 - diff --git a/dpdk/dpdk-16.07_patches/0005-NXP-DPAA2-Poll-Mode-Driver-Support-dpdk-16.07.patch b/dpdk/dpdk-16.07_patches/0005-NXP-DPAA2-Poll-Mode-Driver-Support-dpdk-16.07.patch deleted file mode 100644 index 9bd3b2a5..00000000 --- a/dpdk/dpdk-16.07_patches/0005-NXP-DPAA2-Poll-Mode-Driver-Support-dpdk-16.07.patch +++ /dev/null @@ -1,40106 +0,0 @@ -From 5a2069b38e85771f3857af390e407360d66cd6ed Mon Sep 17 00:00:00 2001 -From: Sachin Saxena -Date: Fri, 5 Aug 2016 14:06:11 +0530 -Subject: [PATCH 5/5] NXP DPAA2 Poll Mode Driver Support (dpdk-16.07) - - Upstreaming of DPAA2 driver changes is in progress.This patch will - temporary add the support in VPP in-built DPDK. - - Two types of changes: - 1. Driver specfic independent files. No impact on any other functionality. - 2. Changes in common EAL framework. These changes are done in compile time DPAA2 - specific flag, so no impact is expected on other existing features if not - compiling for DPAA2. - -Signed-off-by: Sachin Saxena ---- - config/defconfig_arm64-dpaa2-linuxapp-gcc | 18 +- - drivers/net/Makefile | 1 + - drivers/net/dpaa2/Makefile | 102 + - drivers/net/dpaa2/dpaa2_logs.h | 78 + - drivers/net/dpaa2/mc/dpaiop.c | 457 ++++ - drivers/net/dpaa2/mc/dpbp.c | 432 ++++ - drivers/net/dpaa2/mc/dpci.c | 501 ++++ - drivers/net/dpaa2/mc/dpcon.c | 400 +++ - drivers/net/dpaa2/mc/dpdbg.c | 547 +++++ - drivers/net/dpaa2/mc/dpdcei.c | 449 ++++ - drivers/net/dpaa2/mc/dpdmai.c | 452 ++++ - drivers/net/dpaa2/mc/dpdmux.c | 567 +++++ - drivers/net/dpaa2/mc/dpio.c | 468 ++++ - drivers/net/dpaa2/mc/dpmac.c | 422 ++++ - drivers/net/dpaa2/mc/dpmcp.c | 312 +++ - drivers/net/dpaa2/mc/dpmng.c | 58 + - drivers/net/dpaa2/mc/dpni.c | 1907 +++++++++++++++ - drivers/net/dpaa2/mc/dprc.c | 786 ++++++ - drivers/net/dpaa2/mc/dprtc.c | 509 ++++ - drivers/net/dpaa2/mc/dpseci.c | 502 ++++ - drivers/net/dpaa2/mc/dpsw.c | 1639 +++++++++++++ - drivers/net/dpaa2/mc/fsl_dpaiop.h | 494 ++++ - drivers/net/dpaa2/mc/fsl_dpaiop_cmd.h | 190 ++ - drivers/net/dpaa2/mc/fsl_dpbp.h | 438 ++++ - drivers/net/dpaa2/mc/fsl_dpbp_cmd.h | 172 ++ - drivers/net/dpaa2/mc/fsl_dpci.h | 594 +++++ - drivers/net/dpaa2/mc/fsl_dpci_cmd.h | 200 ++ - drivers/net/dpaa2/mc/fsl_dpcon.h | 407 +++ - drivers/net/dpaa2/mc/fsl_dpcon_cmd.h | 162 ++ - drivers/net/dpaa2/mc/fsl_dpdbg.h | 635 +++++ - drivers/net/dpaa2/mc/fsl_dpdbg_cmd.h | 249 ++ - drivers/net/dpaa2/mc/fsl_dpdcei.h | 515 ++++ - drivers/net/dpaa2/mc/fsl_dpdcei_cmd.h | 182 ++ - drivers/net/dpaa2/mc/fsl_dpdmai.h | 521 ++++ - drivers/net/dpaa2/mc/fsl_dpdmai_cmd.h | 191 ++ - drivers/net/dpaa2/mc/fsl_dpdmux.h | 724 ++++++ - drivers/net/dpaa2/mc/fsl_dpdmux_cmd.h | 256 ++ - drivers/net/dpaa2/mc/fsl_dpio.h | 460 ++++ - drivers/net/dpaa2/mc/fsl_dpio_cmd.h | 184 ++ - drivers/net/dpaa2/mc/fsl_dpkg.h | 174 ++ - drivers/net/dpaa2/mc/fsl_dpmac.h | 593 +++++ - drivers/net/dpaa2/mc/fsl_dpmac_cmd.h | 195 ++ - drivers/net/dpaa2/mc/fsl_dpmcp.h | 332 +++ - drivers/net/dpaa2/mc/fsl_dpmcp_cmd.h | 135 + - drivers/net/dpaa2/mc/fsl_dpmng.h | 74 + - drivers/net/dpaa2/mc/fsl_dpmng_cmd.h | 46 + - drivers/net/dpaa2/mc/fsl_dpni.h | 2581 ++++++++++++++++++++ - drivers/net/dpaa2/mc/fsl_dpni_cmd.h | 1058 ++++++++ - drivers/net/dpaa2/mc/fsl_dprc.h | 1032 ++++++++ - drivers/net/dpaa2/mc/fsl_dprc_cmd.h | 755 ++++++ - drivers/net/dpaa2/mc/fsl_dprtc.h | 434 ++++ - drivers/net/dpaa2/mc/fsl_dprtc_cmd.h | 181 ++ - drivers/net/dpaa2/mc/fsl_dpseci.h | 647 +++++ - drivers/net/dpaa2/mc/fsl_dpseci_cmd.h | 241 ++ - drivers/net/dpaa2/mc/fsl_dpsw.h | 2164 ++++++++++++++++ - drivers/net/dpaa2/mc/fsl_dpsw_cmd.h | 916 +++++++ - drivers/net/dpaa2/mc/fsl_mc_cmd.h | 221 ++ - drivers/net/dpaa2/mc/fsl_mc_sys.h | 98 + - drivers/net/dpaa2/mc/fsl_net.h | 480 ++++ - drivers/net/dpaa2/mc/mc_sys.c | 127 + - drivers/net/dpaa2/qbman/driver/qbman_debug.c | 929 +++++++ - drivers/net/dpaa2/qbman/driver/qbman_debug.h | 140 ++ - drivers/net/dpaa2/qbman/driver/qbman_portal.c | 1441 +++++++++++ - drivers/net/dpaa2/qbman/driver/qbman_portal.h | 270 ++ - drivers/net/dpaa2/qbman/driver/qbman_private.h | 168 ++ - drivers/net/dpaa2/qbman/driver/qbman_sys.h | 373 +++ - drivers/net/dpaa2/qbman/driver/qbman_sys_decl.h | 69 + - drivers/net/dpaa2/qbman/include/compat.h | 637 +++++ - .../dpaa2/qbman/include/drivers/fsl_qbman_base.h | 151 ++ - .../dpaa2/qbman/include/drivers/fsl_qbman_portal.h | 1087 +++++++++ - drivers/net/dpaa2/rte_eth_dpaa2_pvt.h | 330 +++ - drivers/net/dpaa2/rte_eth_dpbp.c | 377 +++ - drivers/net/dpaa2/rte_eth_dpio.c | 336 +++ - drivers/net/dpaa2/rte_eth_dpni.c | 2269 +++++++++++++++++ - drivers/net/dpaa2/rte_eth_dpni_annot.h | 310 +++ - lib/librte_eal/common/eal_private.h | 7 + - lib/librte_eal/linuxapp/eal/Makefile | 4 + - lib/librte_eal/linuxapp/eal/eal.c | 5 + - lib/librte_eal/linuxapp/eal/eal_soc.c | 67 + - lib/librte_eal/linuxapp/eal/eal_vfio_fsl_mc.c | 650 +++++ - lib/librte_eal/linuxapp/eal/eal_vfio_fsl_mc.h | 98 + - lib/librte_mempool/rte_mempool.h | 8 + - mk/rte.app.mk | 1 + - 83 files changed, 39391 insertions(+), 1 deletion(-) - create mode 100644 drivers/net/dpaa2/Makefile - create mode 100644 drivers/net/dpaa2/dpaa2_logs.h - create mode 100644 drivers/net/dpaa2/mc/dpaiop.c - create mode 100644 drivers/net/dpaa2/mc/dpbp.c - create mode 100644 drivers/net/dpaa2/mc/dpci.c - create mode 100644 drivers/net/dpaa2/mc/dpcon.c - create mode 100644 drivers/net/dpaa2/mc/dpdbg.c - create mode 100644 drivers/net/dpaa2/mc/dpdcei.c - create mode 100644 drivers/net/dpaa2/mc/dpdmai.c - create mode 100644 drivers/net/dpaa2/mc/dpdmux.c - create mode 100644 drivers/net/dpaa2/mc/dpio.c - create mode 100644 drivers/net/dpaa2/mc/dpmac.c - create mode 100644 drivers/net/dpaa2/mc/dpmcp.c - create mode 100644 drivers/net/dpaa2/mc/dpmng.c - create mode 100644 drivers/net/dpaa2/mc/dpni.c - create mode 100644 drivers/net/dpaa2/mc/dprc.c - create mode 100644 drivers/net/dpaa2/mc/dprtc.c - create mode 100644 drivers/net/dpaa2/mc/dpseci.c - create mode 100644 drivers/net/dpaa2/mc/dpsw.c - create mode 100644 drivers/net/dpaa2/mc/fsl_dpaiop.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpaiop_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpbp.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpbp_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpci.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpci_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpcon.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpcon_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpdbg.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpdbg_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpdcei.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpdcei_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpdmai.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpdmai_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpdmux.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpdmux_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpio.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpio_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpkg.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpmac.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpmac_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpmcp.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpmcp_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpmng.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpmng_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpni.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpni_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dprc.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dprc_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dprtc.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dprtc_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpseci.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpseci_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpsw.h - create mode 100644 drivers/net/dpaa2/mc/fsl_dpsw_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_mc_cmd.h - create mode 100644 drivers/net/dpaa2/mc/fsl_mc_sys.h - create mode 100644 drivers/net/dpaa2/mc/fsl_net.h - create mode 100644 drivers/net/dpaa2/mc/mc_sys.c - create mode 100644 drivers/net/dpaa2/qbman/driver/qbman_debug.c - create mode 100644 drivers/net/dpaa2/qbman/driver/qbman_debug.h - create mode 100644 drivers/net/dpaa2/qbman/driver/qbman_portal.c - create mode 100644 drivers/net/dpaa2/qbman/driver/qbman_portal.h - create mode 100644 drivers/net/dpaa2/qbman/driver/qbman_private.h - create mode 100644 drivers/net/dpaa2/qbman/driver/qbman_sys.h - create mode 100644 drivers/net/dpaa2/qbman/driver/qbman_sys_decl.h - create mode 100644 drivers/net/dpaa2/qbman/include/compat.h - create mode 100644 drivers/net/dpaa2/qbman/include/drivers/fsl_qbman_base.h - create mode 100644 drivers/net/dpaa2/qbman/include/drivers/fsl_qbman_portal.h - create mode 100644 drivers/net/dpaa2/rte_eth_dpaa2_pvt.h - create mode 100644 drivers/net/dpaa2/rte_eth_dpbp.c - create mode 100644 drivers/net/dpaa2/rte_eth_dpio.c - create mode 100644 drivers/net/dpaa2/rte_eth_dpni.c - create mode 100644 drivers/net/dpaa2/rte_eth_dpni_annot.h - create mode 100644 lib/librte_eal/linuxapp/eal/eal_soc.c - create mode 100644 lib/librte_eal/linuxapp/eal/eal_vfio_fsl_mc.c - create mode 100644 lib/librte_eal/linuxapp/eal/eal_vfio_fsl_mc.h - -diff --git a/config/defconfig_arm64-dpaa2-linuxapp-gcc b/config/defconfig_arm64-dpaa2-linuxapp-gcc -index 66df54c..e42fa90 100644 ---- a/config/defconfig_arm64-dpaa2-linuxapp-gcc -+++ b/config/defconfig_arm64-dpaa2-linuxapp-gcc -@@ -1,6 +1,6 @@ - # BSD LICENSE - # --# Copyright(c) 2016 Freescale Semiconductor, Inc. All rights reserved. -+# Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved. - # - # Redistribution and use in source and binary forms, with or without - # modification, are permitted provided that the following conditions -@@ -40,3 +40,19 @@ CONFIG_RTE_ARCH_ARM_TUNE="cortex-a57+fp+simd" - # - CONFIG_RTE_MAX_LCORE=8 - CONFIG_RTE_MAX_NUMA_NODES=1 -+ -+CONFIG_RTE_PKTMBUF_HEADROOM=256 -+# -+#Kernel KNI component - disable by default to avoid kernel -+#code dependency -+# -+CONFIG_RTE_KNI_KMOD=n -+ -+# Compile software PMD backed by FSL DPAA2 files -+# -+CONFIG_RTE_LIBRTE_DPAA2_PMD=y -+CONFIG_RTE_LIBRTE_DPAA2_USE_PHYS_IOVA=n -+CONFIG_RTE_LIBRTE_DPAA2_DEBUG_INIT=n -+CONFIG_RTE_LIBRTE_DPAA2_DEBUG_DRIVER=n -+CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n -+CONFIG_RTE_MBUF_DEFAULT_MEMPOOL_OPS="dpaa2" -diff --git a/drivers/net/Makefile b/drivers/net/Makefile -index bc93230..a71c14a 100644 ---- a/drivers/net/Makefile -+++ b/drivers/net/Makefile -@@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx - DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio - DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3 - DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt -+DIRS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += dpaa2 - - ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y) - DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost -diff --git a/drivers/net/dpaa2/Makefile b/drivers/net/dpaa2/Makefile -new file mode 100644 -index 0000000..3cf1782 ---- /dev/null -+++ b/drivers/net/dpaa2/Makefile -@@ -0,0 +1,102 @@ -+# BSD LICENSE -+# -+# Copyright (c) 2014 Freescale Semiconductor, Inc. All rights reserved. -+# -+# Redistribution and use in source and binary forms, with or without -+# modification, are permitted provided that the following conditions -+# are met: -+# -+# * Redistributions of source code must retain the above copyright -+# notice, this list of conditions and the following disclaimer. -+# * Redistributions in binary form must reproduce the above copyright -+# notice, this list of conditions and the following disclaimer in -+# the documentation and/or other materials provided with the -+# distribution. -+# * Neither the name of Freescale Semiconductor nor the names of its -+# contributors may be used to endorse or promote products derived -+# from this software without specific prior written permission. -+# -+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+include $(RTE_SDK)/mk/rte.vars.mk -+ -+# -+# library name -+# -+LIB = librte_pmd_dpaa2.a -+ -+ifeq ($(CONFIG_RTE_LIBRTE_DPAA2_DEBUG_INIT),y) -+CFLAGS += -O0 -g -+CFLAGS += "-Wno-error" -+else -+CFLAGS += -O3 -g -+CFLAGS += $(WERROR_FLAGS) -+endif -+CFLAGS +=-Wno-strict-aliasing -+CFLAGS +=-Wno-missing-prototypes -+CFLAGS +=-Wno-missing-declarations -+CFLAGS +=-Wno-unused-function -+ -+CFLAGS += -I$(RTE_SDK)/drivers/net/dpaa2/mc -+CFLAGS += -I$(RTE_SDK)/drivers/net/dpaa2/qbman/include -+CFLAGS += -I$(RTE_SDK)/drivers/net/dpaa2/qbman/include/drivers -+CFLAGS += -I$(RTE_SDK)/drivers/net/dpaa2/driver/ -+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include -+CFLAGS += -I$(RTE_SDK)/lib/librte_ether -+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/linuxapp/eal -+ -+EXPORT_MAP := rte_pmd_dpaa2_version.map -+ -+LIBABIVER := 1 -+# -+# all source are stored in SRCS-y -+# -+SRCS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += \ -+ mc/dprc.c \ -+ mc/dprtc.o \ -+ mc/dpaiop.c \ -+ mc/dpdbg.o \ -+ mc/dpdcei.c \ -+ mc/dpdmai.c \ -+ mc/dpmac.c \ -+ mc/dpmcp.c \ -+ mc/dpbp.c \ -+ mc/dpio.c \ -+ mc/dpni.c \ -+ mc/dpsw.c \ -+ mc/dpci.c \ -+ mc/dpcon.c \ -+ mc/dpseci.c \ -+ mc/dpmng.c \ -+ mc/dpdmux.c \ -+ mc/mc_sys.c -+ -+# -+# all source are stored in SRCS-y -+# -+SRCS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += \ -+ qbman/driver/qbman_portal.c \ -+ qbman/driver/qbman_debug.c -+ -+SRCS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += rte_eth_dpni.c -+SRCS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += rte_eth_dpio.c -+SRCS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += rte_eth_dpbp.c -+ -+# -+# Export include files -+# -+SYMLINK-y-include += -+ -+# this lib depends upon: -+DEPDIRS-y += lib/librte_eal -+include $(RTE_SDK)/mk/rte.lib.mk -diff --git a/drivers/net/dpaa2/dpaa2_logs.h b/drivers/net/dpaa2/dpaa2_logs.h -new file mode 100644 -index 0000000..534d4b5 ---- /dev/null -+++ b/drivers/net/dpaa2/dpaa2_logs.h -@@ -0,0 +1,78 @@ -+/*- -+ * BSD LICENSE -+ * -+ * Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * * Neither the name of Freescale Semiconductor, Inc nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#ifndef _DPAA2_LOGS_H_ -+#define _DPAA2_LOGS_H_ -+ -+#define PMD_INIT_LOG(level, fmt, args...) \ -+ RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ##args) -+ -+#ifdef RTE_LIBRTE_DPAA2_DEBUG_INIT -+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>") -+#else -+#define PMD_INIT_FUNC_TRACE() do { } while (0) -+#endif -+ -+#ifdef RTE_LIBRTE_DPAA2_DEBUG_RX -+#define PMD_RX_LOG(level, fmt, args...) \ -+ RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) -+#else -+#define PMD_RX_LOG(level, fmt, args...) do { } while (0) -+#endif -+ -+#ifdef RTE_LIBRTE_DPAA2_DEBUG_TX -+#define PMD_TX_LOG(level, fmt, args...) \ -+ RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) -+#else -+#define PMD_TX_LOG(level, fmt, args...) do { } while (0) -+#endif -+ -+#ifdef RTE_LIBRTE_DPAA2_DEBUG_TX_FREE -+#define PMD_TX_FREE_LOG(level, fmt, args...) \ -+ RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) -+#else -+#define PMD_TX_FREE_LOG(level, fmt, args...) do { } while (0) -+#endif -+ -+#ifdef RTE_LIBRTE_DPAA2_DEBUG_DRIVER -+#define PMD_DRV_LOG_RAW(level, fmt, args...) \ -+ RTE_LOG(level, PMD, "%s(): " fmt, __func__, ## args) -+#else -+#define PMD_DRV_LOG_RAW(level, fmt, args...) do { } while (0) -+#endif -+ -+#define PMD_DRV_LOG2(level, fmt, args...) do { } while (0) -+ -+#define PMD_DRV_LOG(level, fmt, args...) \ -+ PMD_DRV_LOG_RAW(level, fmt "\n", ## args) -+ -+#endif /* _DPAA2_LOGS_H_ */ -diff --git a/drivers/net/dpaa2/mc/dpaiop.c b/drivers/net/dpaa2/mc/dpaiop.c -new file mode 100644 -index 0000000..7c1ecff ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpaiop.c -@@ -0,0 +1,457 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpaiop_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpaiop_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPAIOP_CMD_OPEN(cmd, dpaiop_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return err; -+} -+ -+int dpaiop_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_CLOSE, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpaiop_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpaiop_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ (void)(cfg); /* unused */ -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ DPAIOP_CMD_CREATE(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpaiop_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpaiop_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_RESET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpaiop_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpaiop_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ -+ DPAIOP_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpaiop_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpaiop_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ -+ DPAIOP_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPAIOP_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpaiop_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ -+ DPAIOP_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpaiop_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ -+ DPAIOP_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPAIOP_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpaiop_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ -+ DPAIOP_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpaiop_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ -+ DPAIOP_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPAIOP_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpaiop_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPAIOP_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPAIOP_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpaiop_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPAIOP_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpaiop_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpaiop_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPAIOP_RSP_GET_ATTRIBUTES(cmd, attr); -+ -+ return 0; -+} -+ -+int dpaiop_load(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpaiop_load_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_LOAD, -+ cmd_flags, -+ token); -+ DPAIOP_CMD_LOAD(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpaiop_run(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpaiop_run_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_RUN, -+ cmd_flags, -+ token); -+ DPAIOP_CMD_RUN(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpaiop_get_sl_version(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpaiop_sl_version *version) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_GET_SL_VERSION, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPAIOP_RSP_GET_SL_VERSION(cmd, version); -+ -+ return 0; -+} -+ -+int dpaiop_get_state(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint32_t *state) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_GET_STATE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPAIOP_RSP_GET_STATE(cmd, *state); -+ -+ return 0; -+} -+ -+int dpaiop_set_time_of_day(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint64_t time_of_day) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_SET_TIME_OF_DAY, -+ cmd_flags, -+ token); -+ -+ DPAIOP_CMD_SET_TIME_OF_DAY(cmd, time_of_day); -+ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpaiop_get_time_of_day(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint64_t *time_of_day) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ cmd.header = mc_encode_cmd_header(DPAIOP_CMDID_GET_TIME_OF_DAY, -+ cmd_flags, -+ token); -+ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ DPAIOP_RSP_GET_TIME_OF_DAY(cmd, *time_of_day); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dpbp.c b/drivers/net/dpaa2/mc/dpbp.c -new file mode 100644 -index 0000000..87899b8 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpbp.c -@@ -0,0 +1,432 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpbp_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpbp_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPBP_CMD_OPEN(cmd, dpbp_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return err; -+} -+ -+int dpbp_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_CLOSE, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpbp_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpbp_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ (void)(cfg); /* unused */ -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpbp_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpbp_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_ENABLE, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpbp_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_DISABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpbp_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_IS_ENABLED, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPBP_RSP_IS_ENABLED(cmd, *en); -+ -+ return 0; -+} -+ -+int dpbp_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_RESET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpbp_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpbp_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ -+ DPBP_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpbp_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpbp_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ -+ DPBP_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPBP_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpbp_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ -+ DPBP_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpbp_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ -+ DPBP_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPBP_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpbp_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ -+ DPBP_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpbp_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ -+ DPBP_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPBP_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpbp_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ -+ DPBP_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPBP_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpbp_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ -+ DPBP_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpbp_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpbp_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPBP_RSP_GET_ATTRIBUTES(cmd, attr); -+ -+ return 0; -+} -+ -+int dpbp_set_notifications(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpbp_notification_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_SET_NOTIFICATIONS, -+ cmd_flags, -+ token); -+ -+ DPBP_CMD_SET_NOTIFICATIONS(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpbp_get_notifications(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpbp_notification_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPBP_CMDID_GET_NOTIFICATIONS, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPBP_CMD_GET_NOTIFICATIONS(cmd, cfg); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dpci.c b/drivers/net/dpaa2/mc/dpci.c -new file mode 100644 -index 0000000..2ec02a1 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpci.c -@@ -0,0 +1,501 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpci_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpci_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPCI_CMD_OPEN(cmd, dpci_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpci_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_CLOSE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpci_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpci_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ DPCI_CMD_CREATE(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpci_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpci_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_ENABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpci_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_DISABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpci_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_IS_ENABLED, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCI_RSP_IS_ENABLED(cmd, *en); -+ -+ return 0; -+} -+ -+int dpci_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_RESET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpci_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpci_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ DPCI_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpci_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpci_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ DPCI_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCI_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpci_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPCI_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpci_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPCI_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCI_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpci_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPCI_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpci_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPCI_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCI_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpci_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPCI_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCI_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpci_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPCI_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpci_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpci_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCI_RSP_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpci_get_peer_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpci_peer_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_GET_PEER_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCI_RSP_GET_PEER_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpci_get_link_state(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *up) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_GET_LINK_STATE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCI_RSP_GET_LINK_STATE(cmd, *up); -+ -+ return 0; -+} -+ -+int dpci_set_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t priority, -+ const struct dpci_rx_queue_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_SET_RX_QUEUE, -+ cmd_flags, -+ token); -+ DPCI_CMD_SET_RX_QUEUE(cmd, priority, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpci_get_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t priority, -+ struct dpci_rx_queue_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_GET_RX_QUEUE, -+ cmd_flags, -+ token); -+ DPCI_CMD_GET_RX_QUEUE(cmd, priority); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCI_RSP_GET_RX_QUEUE(cmd, attr); -+ -+ return 0; -+} -+ -+int dpci_get_tx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t priority, -+ struct dpci_tx_queue_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCI_CMDID_GET_TX_QUEUE, -+ cmd_flags, -+ token); -+ DPCI_CMD_GET_TX_QUEUE(cmd, priority); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCI_RSP_GET_TX_QUEUE(cmd, attr); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dpcon.c b/drivers/net/dpaa2/mc/dpcon.c -new file mode 100644 -index 0000000..56dbcf7 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpcon.c -@@ -0,0 +1,400 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpcon_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpcon_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPCON_CMD_OPEN(cmd, dpcon_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpcon_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_CLOSE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpcon_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpcon_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ DPCON_CMD_CREATE(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpcon_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpcon_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_ENABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpcon_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_DISABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpcon_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_IS_ENABLED, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCON_RSP_IS_ENABLED(cmd, *en); -+ -+ return 0; -+} -+ -+int dpcon_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_RESET, -+ cmd_flags, token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpcon_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpcon_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ DPCON_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpcon_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpcon_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ DPCON_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCON_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpcon_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPCON_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpcon_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPCON_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCON_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpcon_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPCON_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpcon_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPCON_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCON_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpcon_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPCON_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCON_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpcon_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPCON_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpcon_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpcon_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPCON_RSP_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpcon_set_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpcon_notification_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPCON_CMDID_SET_NOTIFICATION, -+ cmd_flags, -+ token); -+ DPCON_CMD_SET_NOTIFICATION(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -diff --git a/drivers/net/dpaa2/mc/dpdbg.c b/drivers/net/dpaa2/mc/dpdbg.c -new file mode 100644 -index 0000000..6f2a08d ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpdbg.c -@@ -0,0 +1,547 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpdbg_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpdbg_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPDBG_CMD_OPEN(cmd, dpdbg_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return err; -+} -+ -+int dpdbg_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_CLOSE, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdbg_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdbg_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDBG_RSP_GET_ATTRIBUTES(cmd, attr); -+ -+ return 0; -+} -+ -+int dpdbg_get_dpni_info(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ struct dpdbg_dpni_info *info) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_GET_DPNI_INFO, -+ cmd_flags, -+ token); -+ DPDBG_CMD_GET_DPNI_INFO(cmd, dpni_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDBG_RSP_GET_DPNI_INFO(cmd, info); -+ -+ return 0; -+} -+ -+int dpdbg_get_dpni_priv_tx_conf_fqid(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ uint8_t sender_id, -+ uint32_t *fqid) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header( -+ DPDBG_CMDID_GET_DPNI_PRIV_TX_CONF_FQID, -+ cmd_flags, -+ token); -+ DPDBG_CMD_GET_DPNI_PRIV_TX_CONF_FQID(cmd, dpni_id, sender_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDBG_RSP_GET_DPNI_PRIV_TX_CONF_FQID(cmd, *fqid); -+ -+ return 0; -+} -+ -+int dpdbg_get_dpcon_info(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpcon_id, -+ struct dpdbg_dpcon_info *info) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_GET_DPCON_INFO, -+ cmd_flags, -+ token); -+ DPDBG_CMD_GET_DPCON_INFO(cmd, dpcon_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDBG_RSP_GET_DPCON_INFO(cmd, info); -+ -+ return 0; -+} -+ -+int dpdbg_get_dpbp_info(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpbp_id, -+ struct dpdbg_dpbp_info *info) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_GET_DPBP_INFO, -+ cmd_flags, -+ token); -+ DPDBG_CMD_GET_DPBP_INFO(cmd, dpbp_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDBG_RSP_GET_DPBP_INFO(cmd, info); -+ -+ return 0; -+} -+ -+int dpdbg_get_dpci_fqid(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpci_id, -+ uint8_t priority, -+ uint32_t *fqid) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_GET_DPBP_INFO, -+ cmd_flags, -+ token); -+ DPDBG_CMD_GET_DPCI_FQID(cmd, dpci_id, priority); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDBG_RSP_GET_DPCI_FQID(cmd, *fqid); -+ -+ return 0; -+} -+ -+int dpdbg_prepare_ctlu_global_rule(struct dpkg_profile_cfg *dpkg_rule, -+ uint8_t *rule_buf) -+{ -+ int i, j; -+ int offset = 0; -+ int param = 1; -+ uint64_t *params = (uint64_t *)rule_buf; -+ -+ if (!rule_buf || !dpkg_rule) -+ return -EINVAL; -+ -+ params[0] |= mc_enc(0, 8, dpkg_rule->num_extracts); -+ params[0] = cpu_to_le64(params[0]); -+ -+ if (dpkg_rule->num_extracts >= DPKG_MAX_NUM_OF_EXTRACTS) -+ return -EINVAL; -+ -+ for (i = 0; i < dpkg_rule->num_extracts; i++) { -+ switch (dpkg_rule->extracts[i].type) { -+ case DPKG_EXTRACT_FROM_HDR: -+ params[param] |= mc_enc(0, 8, -+ dpkg_rule->extracts[i].extract.from_hdr.prot); -+ params[param] |= mc_enc(8, 4, -+ dpkg_rule->extracts[i].extract.from_hdr.type); -+ params[param] |= mc_enc(16, 8, -+ dpkg_rule->extracts[i].extract.from_hdr.size); -+ params[param] |= mc_enc(24, 8, -+ dpkg_rule->extracts[i].extract.from_hdr.offset); -+ params[param] |= mc_enc(32, 32, -+ dpkg_rule->extracts[i].extract.from_hdr.field); -+ params[param] = cpu_to_le64(params[param]); -+ param++; -+ params[param] |= mc_enc(0, 8, -+ dpkg_rule->extracts[i].extract. -+ from_hdr.hdr_index); -+ break; -+ case DPKG_EXTRACT_FROM_DATA: -+ params[param] |= mc_enc(16, 8, -+ dpkg_rule->extracts[i].extract.from_data.size); -+ params[param] |= mc_enc(24, 8, -+ dpkg_rule->extracts[i].extract. -+ from_data.offset); -+ params[param] = cpu_to_le64(params[param]); -+ param++; -+ break; -+ case DPKG_EXTRACT_FROM_PARSE: -+ params[param] |= mc_enc(16, 8, -+ dpkg_rule->extracts[i].extract.from_parse.size); -+ params[param] |= mc_enc(24, 8, -+ dpkg_rule->extracts[i].extract. -+ from_parse.offset); -+ params[param] = cpu_to_le64(params[param]); -+ param++; -+ break; -+ default: -+ return -EINVAL; -+ } -+ params[param] |= mc_enc( -+ 24, 8, dpkg_rule->extracts[i].num_of_byte_masks); -+ params[param] |= mc_enc(32, 4, dpkg_rule->extracts[i].type); -+ params[param] = cpu_to_le64(params[param]); -+ param++; -+ for (offset = 0, j = 0; -+ j < DPKG_NUM_OF_MASKS; -+ offset += 16, j++) { -+ params[param] |= mc_enc( -+ (offset), 8, -+ dpkg_rule->extracts[i].masks[j].mask); -+ params[param] |= mc_enc( -+ (offset + 8), 8, -+ dpkg_rule->extracts[i].masks[j].offset); -+ } -+ params[param] = cpu_to_le64(params[param]); -+ param++; -+ } -+ return 0; -+} -+ -+int dpdbg_set_ctlu_global_marking(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t marking, -+ struct dpdbg_rule_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_SET_CTLU_GLOBAL_MARKING, -+ cmd_flags, -+ token); -+ DPDBG_CMD_SET_CTLU_GLOBAL_MARKING(cmd, marking, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdbg_set_dpni_rx_marking(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ struct dpdbg_dpni_rx_marking_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_SET_DPNI_RX_MARKING, -+ cmd_flags, -+ token); -+ DPDBG_CMD_SET_DPNI_RX_MARKING(cmd, dpni_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdbg_set_dpni_tx_conf_marking(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ uint16_t sender_id, -+ uint8_t marking) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_SET_DPNI_TX_CONF_MARKING, -+ cmd_flags, -+ token); -+ DPDBG_CMD_SET_DPNI_TX_CONF_MARKING(cmd, dpni_id, sender_id, marking); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdbg_set_dpio_marking(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpio_id, -+ uint8_t marking) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_SET_DPIO_MARKING, -+ cmd_flags, -+ token); -+ DPDBG_CMD_SET_DPIO_MARKING(cmd, dpio_id, marking); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdbg_set_ctlu_global_trace(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdbg_rule_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_SET_CTLU_GLOBAL_TRACE, -+ cmd_flags, -+ token); -+ DPDBG_CMD_SET_CTLU_GLOBAL_TRACE(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdbg_set_dpio_trace(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpio_id, -+ struct dpdbg_dpio_trace_cfg -+ trace_point[DPDBG_NUM_OF_DPIO_TRACE_POINTS]) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_SET_DPIO_TRACE, -+ cmd_flags, -+ token); -+ DPDBG_CMD_SET_DPIO_TRACE(cmd, dpio_id, trace_point); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdbg_set_dpni_rx_trace(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ struct dpdbg_dpni_rx_trace_cfg *trace_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_SET_DPNI_RX_TRACE, -+ cmd_flags, -+ token); -+ DPDBG_CMD_SET_DPNI_RX_TRACE(cmd, dpni_id, trace_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdbg_set_dpni_tx_trace(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ uint16_t sender_id, -+ struct dpdbg_dpni_tx_trace_cfg *trace_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_SET_DPNI_TX_TRACE, -+ cmd_flags, -+ token); -+ DPDBG_CMD_SET_DPNI_TX_TRACE(cmd, dpni_id, sender_id, trace_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdbg_set_dpcon_trace(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpcon_id, -+ struct dpdbg_dpcon_trace_cfg -+ trace_point[DPDBG_NUM_OF_DPCON_TRACE_POINTS]) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_SET_DPCON_TRACE, -+ cmd_flags, -+ token); -+ DPDBG_CMD_SET_DPCON_TRACE(cmd, dpcon_id, trace_point); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdbg_set_dpseci_trace(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpseci_id, -+ struct dpdbg_dpseci_trace_cfg -+ trace_point[DPDBG_NUM_OF_DPSECI_TRACE_POINTS]) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_SET_DPSECI_TRACE, -+ cmd_flags, -+ token); -+ DPDBG_CMD_SET_DPSECI_TRACE(cmd, dpseci_id, trace_point); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdbg_get_dpmac_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpmac_id, -+ enum dpmac_counter counter_type, -+ uint64_t *counter) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_GET_DPMAC_COUNTER, -+ cmd_flags, -+ token); -+ DPDBG_CMD_GET_DPMAC_COUNTER(cmd, dpmac_id, counter_type); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDBG_RSP_GET_DPMAC_COUNTER(cmd, *counter); -+ -+ return 0; -+} -+ -+int dpdbg_get_dpni_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ enum dpni_counter counter_type, -+ uint64_t *counter) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDBG_CMDID_GET_DPNI_COUNTER, -+ cmd_flags, -+ token); -+ DPDBG_CMD_GET_DPMAC_COUNTER(cmd, dpni_id, counter_type); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDBG_RSP_GET_DPNI_COUNTER(cmd, *counter); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dpdcei.c b/drivers/net/dpaa2/mc/dpdcei.c -new file mode 100644 -index 0000000..a5c4c47 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpdcei.c -@@ -0,0 +1,449 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpdcei_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpdcei_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPDCEI_CMD_OPEN(cmd, dpdcei_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpdcei_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_CLOSE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdcei_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpdcei_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ DPDCEI_CMD_CREATE(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpdcei_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdcei_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_ENABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdcei_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_DISABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdcei_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_IS_ENABLED, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDCEI_RSP_IS_ENABLED(cmd, *en); -+ -+ return 0; -+} -+ -+int dpdcei_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_RESET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdcei_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpdcei_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ DPDCEI_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDCEI_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpdcei_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpdcei_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ DPDCEI_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdcei_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPDCEI_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDCEI_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpdcei_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPDCEI_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdcei_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPDCEI_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDCEI_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpdcei_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPDCEI_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdcei_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPDCEI_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDCEI_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpdcei_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPDCEI_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdcei_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdcei_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDCEI_RSP_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpdcei_set_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpdcei_rx_queue_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_SET_RX_QUEUE, -+ cmd_flags, -+ token); -+ DPDCEI_CMD_SET_RX_QUEUE(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdcei_get_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdcei_rx_queue_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_GET_RX_QUEUE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDCEI_RSP_GET_RX_QUEUE(cmd, attr); -+ -+ return 0; -+} -+ -+int dpdcei_get_tx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdcei_tx_queue_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDCEI_CMDID_GET_TX_QUEUE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDCEI_RSP_GET_TX_QUEUE(cmd, attr); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dpdmai.c b/drivers/net/dpaa2/mc/dpdmai.c -new file mode 100644 -index 0000000..154d2c6 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpdmai.c -@@ -0,0 +1,452 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpdmai_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpdmai_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPDMAI_CMD_OPEN(cmd, dpdmai_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpdmai_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_CLOSE, -+ cmd_flags, token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmai_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpdmai_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ DPDMAI_CMD_CREATE(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpdmai_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmai_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_ENABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmai_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_DISABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmai_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_IS_ENABLED, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMAI_RSP_IS_ENABLED(cmd, *en); -+ -+ return 0; -+} -+ -+int dpdmai_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_RESET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmai_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpdmai_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ DPDMAI_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMAI_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpdmai_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpdmai_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ DPDMAI_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmai_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPDMAI_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMAI_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpdmai_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPDMAI_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmai_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPDMAI_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMAI_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpdmai_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPDMAI_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmai_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPDMAI_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMAI_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpdmai_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPDMAI_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmai_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdmai_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMAI_RSP_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t priority, -+ const struct dpdmai_rx_queue_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_SET_RX_QUEUE, -+ cmd_flags, -+ token); -+ DPDMAI_CMD_SET_RX_QUEUE(cmd, priority, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t priority, struct dpdmai_rx_queue_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_RX_QUEUE, -+ cmd_flags, -+ token); -+ DPDMAI_CMD_GET_RX_QUEUE(cmd, priority); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMAI_RSP_GET_RX_QUEUE(cmd, attr); -+ -+ return 0; -+} -+ -+int dpdmai_get_tx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t priority, -+ struct dpdmai_tx_queue_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_TX_QUEUE, -+ cmd_flags, -+ token); -+ DPDMAI_CMD_GET_TX_QUEUE(cmd, priority); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMAI_RSP_GET_TX_QUEUE(cmd, attr); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dpdmux.c b/drivers/net/dpaa2/mc/dpdmux.c -new file mode 100644 -index 0000000..dc07608 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpdmux.c -@@ -0,0 +1,567 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpdmux_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpdmux_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPDMUX_CMD_OPEN(cmd, dpdmux_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpdmux_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_CLOSE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpdmux_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ DPDMUX_CMD_CREATE(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpdmux_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_ENABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_DISABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_IS_ENABLED, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMUX_RSP_IS_ENABLED(cmd, *en); -+ -+ return 0; -+} -+ -+int dpdmux_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_RESET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpdmux_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpdmux_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMUX_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpdmux_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMUX_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpdmux_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMUX_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpdmux_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMUX_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpdmux_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdmux_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMUX_RSP_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpdmux_ul_set_max_frame_length(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t max_frame_length) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_UL_SET_MAX_FRAME_LENGTH, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_UL_SET_MAX_FRAME_LENGTH(cmd, max_frame_length); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_ul_reset_counters(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_UL_RESET_COUNTERS, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_if_set_accepted_frames(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpdmux_accepted_frames *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_IF_SET_ACCEPTED_FRAMES, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_IF_SET_ACCEPTED_FRAMES(cmd, if_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_if_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpdmux_if_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_IF_GET_ATTR, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_IF_GET_ATTR(cmd, if_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMUX_RSP_IF_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpdmux_if_remove_l2_rule(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpdmux_l2_rule *rule) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_IF_REMOVE_L2_RULE, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_IF_REMOVE_L2_RULE(cmd, if_id, rule); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_if_add_l2_rule(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpdmux_l2_rule *rule) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_IF_ADD_L2_RULE, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_IF_ADD_L2_RULE(cmd, if_id, rule); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_if_get_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ enum dpdmux_counter_type counter_type, -+ uint64_t *counter) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_IF_GET_COUNTER, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_IF_GET_COUNTER(cmd, if_id, counter_type); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMUX_RSP_IF_GET_COUNTER(cmd, *counter); -+ -+ return 0; -+} -+ -+int dpdmux_if_set_link_cfg(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpdmux_link_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_IF_SET_LINK_CFG, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_IF_SET_LINK_CFG(cmd, if_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpdmux_if_get_link_state(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpdmux_link_state *state) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPDMUX_CMDID_IF_GET_LINK_STATE, -+ cmd_flags, -+ token); -+ DPDMUX_CMD_IF_GET_LINK_STATE(cmd, if_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPDMUX_RSP_IF_GET_LINK_STATE(cmd, state); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dpio.c b/drivers/net/dpaa2/mc/dpio.c -new file mode 100644 -index 0000000..f511e29 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpio.c -@@ -0,0 +1,468 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpio_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpio_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPIO_CMD_OPEN(cmd, dpio_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpio_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_CLOSE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpio_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpio_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ DPIO_CMD_CREATE(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpio_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpio_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_ENABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpio_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_DISABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpio_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_IS_ENABLED, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPIO_RSP_IS_ENABLED(cmd, *en); -+ -+ return 0; -+} -+ -+int dpio_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_RESET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpio_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpio_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ DPIO_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpio_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpio_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ DPIO_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPIO_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpio_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPIO_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpio_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPIO_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPIO_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpio_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPIO_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpio_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPIO_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPIO_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpio_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPIO_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPIO_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpio_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPIO_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpio_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpio_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPIO_RSP_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpio_set_stashing_destination(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t sdest) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_SET_STASHING_DEST, -+ cmd_flags, -+ token); -+ DPIO_CMD_SET_STASHING_DEST(cmd, sdest); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpio_get_stashing_destination(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t *sdest) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_GET_STASHING_DEST, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPIO_RSP_GET_STASHING_DEST(cmd, *sdest); -+ -+ return 0; -+} -+ -+int dpio_add_static_dequeue_channel(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpcon_id, -+ uint8_t *channel_index) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPIO_CMDID_ADD_STATIC_DEQUEUE_CHANNEL, -+ cmd_flags, -+ token); -+ DPIO_CMD_ADD_STATIC_DEQUEUE_CHANNEL(cmd, dpcon_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPIO_RSP_ADD_STATIC_DEQUEUE_CHANNEL(cmd, *channel_index); -+ -+ return 0; -+} -+ -+int dpio_remove_static_dequeue_channel(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpcon_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header( -+ DPIO_CMDID_REMOVE_STATIC_DEQUEUE_CHANNEL, -+ cmd_flags, -+ token); -+ DPIO_CMD_REMOVE_STATIC_DEQUEUE_CHANNEL(cmd, dpcon_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -diff --git a/drivers/net/dpaa2/mc/dpmac.c b/drivers/net/dpaa2/mc/dpmac.c -new file mode 100644 -index 0000000..f31d949 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpmac.c -@@ -0,0 +1,422 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpmac_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpmac_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPMAC_CMD_OPEN(cmd, dpmac_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return err; -+} -+ -+int dpmac_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_CLOSE, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmac_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpmac_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ DPMAC_CMD_CREATE(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpmac_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmac_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpmac_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ DPMAC_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmac_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpmac_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ DPMAC_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPMAC_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpmac_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPMAC_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmac_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPMAC_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPMAC_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpmac_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPMAC_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmac_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPMAC_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPMAC_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpmac_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPMAC_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPMAC_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpmac_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPMAC_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmac_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpmac_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPMAC_RSP_GET_ATTRIBUTES(cmd, attr); -+ -+ return 0; -+} -+ -+int dpmac_mdio_read(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpmac_mdio_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_MDIO_READ, -+ cmd_flags, -+ token); -+ DPMAC_CMD_MDIO_READ(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPMAC_RSP_MDIO_READ(cmd, cfg->data); -+ -+ return 0; -+} -+ -+int dpmac_mdio_write(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpmac_mdio_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_MDIO_WRITE, -+ cmd_flags, -+ token); -+ DPMAC_CMD_MDIO_WRITE(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmac_get_link_cfg(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpmac_link_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err = 0; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_GET_LINK_CFG, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ DPMAC_RSP_GET_LINK_CFG(cmd, cfg); -+ -+ return 0; -+} -+ -+int dpmac_set_link_state(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpmac_link_state *link_state) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_SET_LINK_STATE, -+ cmd_flags, -+ token); -+ DPMAC_CMD_SET_LINK_STATE(cmd, link_state); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmac_get_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ enum dpmac_counter type, -+ uint64_t *counter) -+{ -+ struct mc_command cmd = { 0 }; -+ int err = 0; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMAC_CMDID_GET_COUNTER, -+ cmd_flags, -+ token); -+ DPMAC_CMD_GET_COUNTER(cmd, type); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ DPMAC_RSP_GET_COUNTER(cmd, *counter); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dpmcp.c b/drivers/net/dpaa2/mc/dpmcp.c -new file mode 100644 -index 0000000..dfd84b8 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpmcp.c -@@ -0,0 +1,312 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpmcp_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpmcp_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPMCP_CMD_OPEN(cmd, dpmcp_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return err; -+} -+ -+int dpmcp_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_CLOSE, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmcp_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpmcp_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ DPMCP_CMD_CREATE(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpmcp_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmcp_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_RESET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmcp_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpmcp_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ DPMCP_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmcp_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpmcp_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ DPMCP_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPMCP_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpmcp_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPMCP_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmcp_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPMCP_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPMCP_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpmcp_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPMCP_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpmcp_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPMCP_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPMCP_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpmcp_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPMCP_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPMCP_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpmcp_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpmcp_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMCP_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPMCP_RSP_GET_ATTRIBUTES(cmd, attr); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dpmng.c b/drivers/net/dpaa2/mc/dpmng.c -new file mode 100644 -index 0000000..cac5ba5 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpmng.c -@@ -0,0 +1,58 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int mc_get_version(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ struct mc_version *mc_ver_info) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPMNG_CMDID_GET_VERSION, -+ cmd_flags, -+ 0); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPMNG_RSP_GET_VERSION(cmd, mc_ver_info); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dpni.c b/drivers/net/dpaa2/mc/dpni.c -new file mode 100644 -index 0000000..cdd2f37 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpni.c -@@ -0,0 +1,1907 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpni_prepare_key_cfg(const struct dpkg_profile_cfg *cfg, -+ uint8_t *key_cfg_buf) -+{ -+ int i, j; -+ int offset = 0; -+ int param = 1; -+ uint64_t *params = (uint64_t *)key_cfg_buf; -+ -+ if (!key_cfg_buf || !cfg) -+ return -EINVAL; -+ -+ params[0] |= mc_enc(0, 8, cfg->num_extracts); -+ params[0] = cpu_to_le64(params[0]); -+ -+ if (cfg->num_extracts >= DPKG_MAX_NUM_OF_EXTRACTS) -+ return -EINVAL; -+ -+ for (i = 0; i < cfg->num_extracts; i++) { -+ switch (cfg->extracts[i].type) { -+ case DPKG_EXTRACT_FROM_HDR: -+ params[param] |= mc_enc(0, 8, -+ cfg->extracts[i].extract.from_hdr.prot); -+ params[param] |= mc_enc(8, 4, -+ cfg->extracts[i].extract.from_hdr.type); -+ params[param] |= mc_enc(16, 8, -+ cfg->extracts[i].extract.from_hdr.size); -+ params[param] |= mc_enc(24, 8, -+ cfg->extracts[i].extract. -+ from_hdr.offset); -+ params[param] |= mc_enc(32, 32, -+ cfg->extracts[i].extract. -+ from_hdr.field); -+ params[param] = cpu_to_le64(params[param]); -+ param++; -+ params[param] |= mc_enc(0, 8, -+ cfg->extracts[i].extract. -+ from_hdr.hdr_index); -+ break; -+ case DPKG_EXTRACT_FROM_DATA: -+ params[param] |= mc_enc(16, 8, -+ cfg->extracts[i].extract. -+ from_data.size); -+ params[param] |= mc_enc(24, 8, -+ cfg->extracts[i].extract. -+ from_data.offset); -+ params[param] = cpu_to_le64(params[param]); -+ param++; -+ break; -+ case DPKG_EXTRACT_FROM_PARSE: -+ params[param] |= mc_enc(16, 8, -+ cfg->extracts[i].extract. -+ from_parse.size); -+ params[param] |= mc_enc(24, 8, -+ cfg->extracts[i].extract. -+ from_parse.offset); -+ params[param] = cpu_to_le64(params[param]); -+ param++; -+ break; -+ default: -+ return -EINVAL; -+ } -+ params[param] |= mc_enc( -+ 24, 8, cfg->extracts[i].num_of_byte_masks); -+ params[param] |= mc_enc(32, 4, cfg->extracts[i].type); -+ params[param] = cpu_to_le64(params[param]); -+ param++; -+ for (offset = 0, j = 0; -+ j < DPKG_NUM_OF_MASKS; -+ offset += 16, j++) { -+ params[param] |= mc_enc( -+ (offset), 8, cfg->extracts[i].masks[j].mask); -+ params[param] |= mc_enc( -+ (offset + 8), 8, -+ cfg->extracts[i].masks[j].offset); -+ } -+ params[param] = cpu_to_le64(params[param]); -+ param++; -+ } -+ return 0; -+} -+ -+int dpni_prepare_extended_cfg(const struct dpni_extended_cfg *cfg, -+ uint8_t *ext_cfg_buf) -+{ -+ uint64_t *ext_params = (uint64_t *)ext_cfg_buf; -+ -+ DPNI_PREP_EXTENDED_CFG(ext_params, cfg); -+ -+ return 0; -+} -+ -+int dpni_extract_extended_cfg(struct dpni_extended_cfg *cfg, -+ const uint8_t *ext_cfg_buf) -+{ -+ const uint64_t *ext_params = (const uint64_t *)ext_cfg_buf; -+ -+ DPNI_EXT_EXTENDED_CFG(ext_params, cfg); -+ -+ return 0; -+} -+ -+int dpni_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpni_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPNI_CMD_OPEN(cmd, dpni_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpni_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_CLOSE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpni_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ DPNI_CMD_CREATE(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpni_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_pools(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_pools_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_POOLS, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_POOLS(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_ENABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_DISABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_IS_ENABLED, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_IS_ENABLED(cmd, *en); -+ -+ return 0; -+} -+ -+int dpni_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_RESET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpni_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpni_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpni_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpni_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpni_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpni_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPNI_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_ATTR(cmd, attr); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpni_set_errors_behavior(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_error_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_ERRORS_BEHAVIOR, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_ERRORS_BEHAVIOR(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_rx_buffer_layout(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_buffer_layout *layout) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_RX_BUFFER_LAYOUT, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_RX_BUFFER_LAYOUT(cmd, layout); -+ -+ return 0; -+} -+ -+int dpni_set_rx_buffer_layout(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_buffer_layout *layout) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_RX_BUFFER_LAYOUT, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_RX_BUFFER_LAYOUT(cmd, layout); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_tx_buffer_layout(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_buffer_layout *layout) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_TX_BUFFER_LAYOUT, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_TX_BUFFER_LAYOUT(cmd, layout); -+ -+ return 0; -+} -+ -+int dpni_set_tx_buffer_layout(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_buffer_layout *layout) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_TX_BUFFER_LAYOUT, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_TX_BUFFER_LAYOUT(cmd, layout); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_tx_conf_buffer_layout(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_buffer_layout *layout) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_TX_CONF_BUFFER_LAYOUT, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_TX_CONF_BUFFER_LAYOUT(cmd, layout); -+ -+ return 0; -+} -+ -+int dpni_set_tx_conf_buffer_layout(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_buffer_layout *layout) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_TX_CONF_BUFFER_LAYOUT, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_TX_CONF_BUFFER_LAYOUT(cmd, layout); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_l3_chksum_validation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_L3_CHKSUM_VALIDATION, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_L3_CHKSUM_VALIDATION(cmd, *en); -+ -+ return 0; -+} -+ -+int dpni_set_l3_chksum_validation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_L3_CHKSUM_VALIDATION, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_L3_CHKSUM_VALIDATION(cmd, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_l4_chksum_validation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_L4_CHKSUM_VALIDATION, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_L4_CHKSUM_VALIDATION(cmd, *en); -+ -+ return 0; -+} -+ -+int dpni_set_l4_chksum_validation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_L4_CHKSUM_VALIDATION, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_L4_CHKSUM_VALIDATION(cmd, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_qdid(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *qdid) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_QDID, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_QDID(cmd, *qdid); -+ -+ return 0; -+} -+ -+int dpni_get_sp_info(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_sp_info *sp_info) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_SP_INFO, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_SP_INFO(cmd, sp_info); -+ -+ return 0; -+} -+ -+int dpni_get_tx_data_offset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *data_offset) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_TX_DATA_OFFSET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_TX_DATA_OFFSET(cmd, *data_offset); -+ -+ return 0; -+} -+ -+int dpni_get_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ enum dpni_counter counter, -+ uint64_t *value) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_COUNTER, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_COUNTER(cmd, counter); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_COUNTER(cmd, *value); -+ -+ return 0; -+} -+ -+int dpni_set_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ enum dpni_counter counter, -+ uint64_t value) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_COUNTER, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_COUNTER(cmd, counter, value); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_link_cfg(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_link_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_LINK_CFG, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_LINK_CFG(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_link_state(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_link_state *state) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_LINK_STATE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_LINK_STATE(cmd, state); -+ -+ return 0; -+} -+ -+int dpni_set_tx_shaping(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_tx_shaping_cfg *tx_shaper) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_TX_SHAPING, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_TX_SHAPING(cmd, tx_shaper); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_max_frame_length(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t max_frame_length) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_MAX_FRAME_LENGTH, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_MAX_FRAME_LENGTH(cmd, max_frame_length); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_max_frame_length(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *max_frame_length) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_MAX_FRAME_LENGTH, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_MAX_FRAME_LENGTH(cmd, *max_frame_length); -+ -+ return 0; -+} -+ -+int dpni_set_mtu(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t mtu) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_MTU, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_MTU(cmd, mtu); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_mtu(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *mtu) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_MTU, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_MTU(cmd, *mtu); -+ -+ return 0; -+} -+ -+int dpni_set_multicast_promisc(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_MCAST_PROMISC, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_MULTICAST_PROMISC(cmd, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_multicast_promisc(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_MCAST_PROMISC, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_MULTICAST_PROMISC(cmd, *en); -+ -+ return 0; -+} -+ -+int dpni_set_unicast_promisc(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_UNICAST_PROMISC, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_UNICAST_PROMISC(cmd, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_unicast_promisc(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_UNICAST_PROMISC, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_UNICAST_PROMISC(cmd, *en); -+ -+ return 0; -+} -+ -+int dpni_set_primary_mac_addr(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const uint8_t mac_addr[6]) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_PRIM_MAC, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_PRIMARY_MAC_ADDR(cmd, mac_addr); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_primary_mac_addr(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t mac_addr[6]) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_PRIM_MAC, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_PRIMARY_MAC_ADDR(cmd, mac_addr); -+ -+ return 0; -+} -+ -+int dpni_add_mac_addr(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const uint8_t mac_addr[6]) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_ADD_MAC_ADDR, -+ cmd_flags, -+ token); -+ DPNI_CMD_ADD_MAC_ADDR(cmd, mac_addr); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_remove_mac_addr(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const uint8_t mac_addr[6]) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_REMOVE_MAC_ADDR, -+ cmd_flags, -+ token); -+ DPNI_CMD_REMOVE_MAC_ADDR(cmd, mac_addr); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_clear_mac_filters(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int unicast, -+ int multicast) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_CLR_MAC_FILTERS, -+ cmd_flags, -+ token); -+ DPNI_CMD_CLEAR_MAC_FILTERS(cmd, unicast, multicast); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_vlan_filters(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_VLAN_FILTERS, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_VLAN_FILTERS(cmd, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_add_vlan_id(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_ADD_VLAN_ID, -+ cmd_flags, -+ token); -+ DPNI_CMD_ADD_VLAN_ID(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_remove_vlan_id(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_REMOVE_VLAN_ID, -+ cmd_flags, -+ token); -+ DPNI_CMD_REMOVE_VLAN_ID(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_clear_vlan_filters(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_CLR_VLAN_FILTERS, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_tx_selection(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_tx_selection_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_TX_SELECTION, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_TX_SELECTION(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_rx_tc_dist(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ const struct dpni_rx_tc_dist_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_RX_TC_DIST, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_RX_TC_DIST(cmd, tc_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_tx_flow(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *flow_id, -+ const struct dpni_tx_flow_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_TX_FLOW, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_TX_FLOW(cmd, *flow_id, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_SET_TX_FLOW(cmd, *flow_id); -+ -+ return 0; -+} -+ -+int dpni_get_tx_flow(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t flow_id, -+ struct dpni_tx_flow_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_TX_FLOW, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_TX_FLOW(cmd, flow_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_TX_FLOW(cmd, attr); -+ -+ return 0; -+} -+ -+int dpni_set_rx_flow(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ uint16_t flow_id, -+ const struct dpni_queue_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_RX_FLOW, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_RX_FLOW(cmd, tc_id, flow_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_rx_flow(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ uint16_t flow_id, -+ struct dpni_queue_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_RX_FLOW, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_RX_FLOW(cmd, tc_id, flow_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_RX_FLOW(cmd, attr); -+ -+ return 0; -+} -+ -+int dpni_set_rx_err_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_queue_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_RX_ERR_QUEUE, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_RX_ERR_QUEUE(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_rx_err_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_queue_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_RX_ERR_QUEUE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPNI_RSP_GET_RX_ERR_QUEUE(cmd, attr); -+ -+ return 0; -+} -+ -+int dpni_set_tx_conf_revoke(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int revoke) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_TX_CONF_REVOKE, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_TX_CONF_REVOKE(cmd, revoke); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_qos_table(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_qos_tbl_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_QOS_TBL, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_QOS_TABLE(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_add_qos_entry(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_rule_cfg *cfg, -+ uint8_t tc_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_ADD_QOS_ENT, -+ cmd_flags, -+ token); -+ DPNI_CMD_ADD_QOS_ENTRY(cmd, cfg, tc_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_remove_qos_entry(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_rule_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_REMOVE_QOS_ENT, -+ cmd_flags, -+ token); -+ DPNI_CMD_REMOVE_QOS_ENTRY(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_clear_qos_table(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_CLR_QOS_TBL, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_add_fs_entry(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ const struct dpni_rule_cfg *cfg, -+ uint16_t flow_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_ADD_FS_ENT, -+ cmd_flags, -+ token); -+ DPNI_CMD_ADD_FS_ENTRY(cmd, tc_id, cfg, flow_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_remove_fs_entry(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ const struct dpni_rule_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_REMOVE_FS_ENT, -+ cmd_flags, -+ token); -+ DPNI_CMD_REMOVE_FS_ENTRY(cmd, tc_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_clear_fs_entries(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_CLR_FS_ENT, -+ cmd_flags, -+ token); -+ DPNI_CMD_CLEAR_FS_ENTRIES(cmd, tc_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_vlan_insertion(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_VLAN_INSERTION, -+ cmd_flags, token); -+ DPNI_CMD_SET_VLAN_INSERTION(cmd, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_vlan_removal(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_VLAN_REMOVAL, -+ cmd_flags, token); -+ DPNI_CMD_SET_VLAN_REMOVAL(cmd, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_ipr(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_IPR, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_IPR(cmd, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_ipf(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_IPF, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_IPF(cmd, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_rx_tc_policing(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ const struct dpni_rx_tc_policing_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_RX_TC_POLICING, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_RX_TC_POLICING(cmd, tc_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_rx_tc_policing(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ struct dpni_rx_tc_policing_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_RX_TC_POLICING, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_RX_TC_POLICING(cmd, tc_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ DPNI_RSP_GET_RX_TC_POLICING(cmd, cfg); -+ -+ return 0; -+} -+ -+void dpni_prepare_early_drop(const struct dpni_early_drop_cfg *cfg, -+ uint8_t *early_drop_buf) -+{ -+ uint64_t *ext_params = (uint64_t *)early_drop_buf; -+ -+ DPNI_PREP_EARLY_DROP(ext_params, cfg); -+} -+ -+void dpni_extract_early_drop(struct dpni_early_drop_cfg *cfg, -+ const uint8_t *early_drop_buf) -+{ -+ const uint64_t *ext_params = (const uint64_t *)early_drop_buf; -+ -+ DPNI_EXT_EARLY_DROP(ext_params, cfg); -+} -+ -+int dpni_set_rx_tc_early_drop(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ uint64_t early_drop_iova) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_RX_TC_EARLY_DROP, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_RX_TC_EARLY_DROP(cmd, tc_id, early_drop_iova); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_rx_tc_early_drop(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ uint64_t early_drop_iova) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_RX_TC_EARLY_DROP, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_RX_TC_EARLY_DROP(cmd, tc_id, early_drop_iova); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_tx_tc_early_drop(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ uint64_t early_drop_iova) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_TX_TC_EARLY_DROP, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_TX_TC_EARLY_DROP(cmd, tc_id, early_drop_iova); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_tx_tc_early_drop(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ uint64_t early_drop_iova) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_TX_TC_EARLY_DROP, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_TX_TC_EARLY_DROP(cmd, tc_id, early_drop_iova); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_set_rx_tc_congestion_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ const struct dpni_congestion_notification_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header( -+ DPNI_CMDID_SET_RX_TC_CONGESTION_NOTIFICATION, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_RX_TC_CONGESTION_NOTIFICATION(cmd, tc_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_rx_tc_congestion_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ struct dpni_congestion_notification_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header( -+ DPNI_CMDID_GET_RX_TC_CONGESTION_NOTIFICATION, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_RX_TC_CONGESTION_NOTIFICATION(cmd, tc_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ DPNI_RSP_GET_RX_TC_CONGESTION_NOTIFICATION(cmd, cfg); -+ -+ return 0; -+} -+ -+int dpni_set_tx_tc_congestion_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ const struct dpni_congestion_notification_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header( -+ DPNI_CMDID_SET_TX_TC_CONGESTION_NOTIFICATION, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_TX_TC_CONGESTION_NOTIFICATION(cmd, tc_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_tx_tc_congestion_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ struct dpni_congestion_notification_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header( -+ DPNI_CMDID_GET_TX_TC_CONGESTION_NOTIFICATION, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_TX_TC_CONGESTION_NOTIFICATION(cmd, tc_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ DPNI_RSP_GET_TX_TC_CONGESTION_NOTIFICATION(cmd, cfg); -+ -+ return 0; -+} -+ -+int dpni_set_tx_conf(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t flow_id, -+ const struct dpni_tx_conf_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_TX_CONF, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_TX_CONF(cmd, flow_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_tx_conf(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t flow_id, -+ struct dpni_tx_conf_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_TX_CONF, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_TX_CONF(cmd, flow_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ DPNI_RSP_GET_TX_CONF(cmd, attr); -+ -+ return 0; -+} -+ -+int dpni_set_tx_conf_congestion_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t flow_id, -+ const struct dpni_congestion_notification_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header( -+ DPNI_CMDID_SET_TX_CONF_CONGESTION_NOTIFICATION, -+ cmd_flags, -+ token); -+ DPNI_CMD_SET_TX_CONF_CONGESTION_NOTIFICATION(cmd, flow_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpni_get_tx_conf_congestion_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t flow_id, -+ struct dpni_congestion_notification_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header( -+ DPNI_CMDID_GET_TX_CONF_CONGESTION_NOTIFICATION, -+ cmd_flags, -+ token); -+ DPNI_CMD_GET_TX_CONF_CONGESTION_NOTIFICATION(cmd, flow_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ DPNI_RSP_GET_TX_CONF_CONGESTION_NOTIFICATION(cmd, cfg); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dprc.c b/drivers/net/dpaa2/mc/dprc.c -new file mode 100644 -index 0000000..75c6a68 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dprc.c -@@ -0,0 +1,786 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dprc_get_container_id(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int *container_id) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_CONT_ID, -+ cmd_flags, -+ 0); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_CONTAINER_ID(cmd, *container_id); -+ -+ return 0; -+} -+ -+int dprc_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int container_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_OPEN, cmd_flags, -+ 0); -+ DPRC_CMD_OPEN(cmd, container_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dprc_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_CLOSE, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_create_container(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dprc_cfg *cfg, -+ int *child_container_id, -+ uint64_t *child_portal_paddr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ DPRC_CMD_CREATE_CONTAINER(cmd, cfg); -+ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_CREATE_CONT, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_CREATE_CONTAINER(cmd, *child_container_id, -+ *child_portal_paddr); -+ -+ return 0; -+} -+ -+int dprc_destroy_container(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int child_container_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_DESTROY_CONT, -+ cmd_flags, -+ token); -+ DPRC_CMD_DESTROY_CONTAINER(cmd, child_container_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_reset_container(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int child_container_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_RESET_CONT, -+ cmd_flags, -+ token); -+ DPRC_CMD_RESET_CONTAINER(cmd, child_container_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dprc_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dprc_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dprc_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ DPRC_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dprc_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPRC_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dprc_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPRC_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dprc_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPRC_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dprc_attributes *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_ATTRIBUTES(cmd, attr); -+ -+ return 0; -+} -+ -+int dprc_set_res_quota(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int child_container_id, -+ char *type, -+ uint16_t quota) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_SET_RES_QUOTA, -+ cmd_flags, -+ token); -+ DPRC_CMD_SET_RES_QUOTA(cmd, child_container_id, type, quota); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_get_res_quota(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int child_container_id, -+ char *type, -+ uint16_t *quota) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_RES_QUOTA, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_RES_QUOTA(cmd, child_container_id, type); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_RES_QUOTA(cmd, *quota); -+ -+ return 0; -+} -+ -+int dprc_assign(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int container_id, -+ struct dprc_res_req *res_req) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_ASSIGN, -+ cmd_flags, -+ token); -+ DPRC_CMD_ASSIGN(cmd, container_id, res_req); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_unassign(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int child_container_id, -+ struct dprc_res_req *res_req) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_UNASSIGN, -+ cmd_flags, -+ token); -+ DPRC_CMD_UNASSIGN(cmd, child_container_id, res_req); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_get_pool_count(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *pool_count) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_POOL_COUNT, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_POOL_COUNT(cmd, *pool_count); -+ -+ return 0; -+} -+ -+int dprc_get_pool(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int pool_index, -+ char *type) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_POOL, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_POOL(cmd, pool_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_POOL(cmd, type); -+ -+ return 0; -+} -+ -+int dprc_get_obj_count(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *obj_count) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_OBJ_COUNT, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_OBJ_COUNT(cmd, *obj_count); -+ -+ return 0; -+} -+ -+int dprc_get_obj(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int obj_index, -+ struct dprc_obj_desc *obj_desc) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_OBJ, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_OBJ(cmd, obj_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_OBJ(cmd, obj_desc); -+ -+ return 0; -+} -+ -+int dprc_get_obj_desc(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *obj_type, -+ int obj_id, -+ struct dprc_obj_desc *obj_desc) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_OBJ_DESC, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_OBJ_DESC(cmd, obj_type, obj_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_OBJ_DESC(cmd, obj_desc); -+ -+ return 0; -+} -+ -+int dprc_set_obj_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *obj_type, -+ int obj_id, -+ uint8_t irq_index, -+ struct dprc_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_SET_OBJ_IRQ, -+ cmd_flags, -+ token); -+ DPRC_CMD_SET_OBJ_IRQ(cmd, -+ obj_type, -+ obj_id, -+ irq_index, -+ irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_get_obj_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *obj_type, -+ int obj_id, -+ uint8_t irq_index, -+ int *type, -+ struct dprc_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_OBJ_IRQ, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_OBJ_IRQ(cmd, obj_type, obj_id, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_OBJ_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dprc_get_res_count(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *type, -+ int *res_count) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ *res_count = 0; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_RES_COUNT, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_RES_COUNT(cmd, type); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_RES_COUNT(cmd, *res_count); -+ -+ return 0; -+} -+ -+int dprc_get_res_ids(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *type, -+ struct dprc_res_ids_range_desc *range_desc) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_RES_IDS, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_RES_IDS(cmd, range_desc, type); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_RES_IDS(cmd, range_desc); -+ -+ return 0; -+} -+ -+int dprc_get_obj_region(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *obj_type, -+ int obj_id, -+ uint8_t region_index, -+ struct dprc_region_desc *region_desc) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_OBJ_REG, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_OBJ_REGION(cmd, obj_type, obj_id, region_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_OBJ_REGION(cmd, region_desc); -+ -+ return 0; -+} -+ -+int dprc_set_obj_label(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *obj_type, -+ int obj_id, -+ char *label) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_SET_OBJ_LABEL, -+ cmd_flags, -+ token); -+ DPRC_CMD_SET_OBJ_LABEL(cmd, obj_type, obj_id, label); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_connect(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dprc_endpoint *endpoint1, -+ const struct dprc_endpoint *endpoint2, -+ const struct dprc_connection_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_CONNECT, -+ cmd_flags, -+ token); -+ DPRC_CMD_CONNECT(cmd, endpoint1, endpoint2, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_disconnect(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dprc_endpoint *endpoint) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_DISCONNECT, -+ cmd_flags, -+ token); -+ DPRC_CMD_DISCONNECT(cmd, endpoint); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprc_get_connection(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dprc_endpoint *endpoint1, -+ struct dprc_endpoint *endpoint2, -+ int *state) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRC_CMDID_GET_CONNECTION, -+ cmd_flags, -+ token); -+ DPRC_CMD_GET_CONNECTION(cmd, endpoint1); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRC_RSP_GET_CONNECTION(cmd, endpoint2, *state); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dprtc.c b/drivers/net/dpaa2/mc/dprtc.c -new file mode 100644 -index 0000000..73667af ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dprtc.c -@@ -0,0 +1,509 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dprtc_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dprtc_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPRTC_CMD_OPEN(cmd, dprtc_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return err; -+} -+ -+int dprtc_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_CLOSE, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprtc_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dprtc_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ (void)(cfg); /* unused */ -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dprtc_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprtc_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_ENABLE, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprtc_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_DISABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprtc_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_IS_ENABLED, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRTC_RSP_IS_ENABLED(cmd, *en); -+ -+ return 0; -+} -+ -+int dprtc_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_RESET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprtc_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dprtc_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ -+ DPRTC_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprtc_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dprtc_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ -+ DPRTC_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRTC_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dprtc_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ -+ DPRTC_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprtc_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ -+ DPRTC_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRTC_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dprtc_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ -+ DPRTC_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprtc_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ -+ DPRTC_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRTC_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dprtc_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ -+ DPRTC_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRTC_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dprtc_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ -+ DPRTC_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprtc_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dprtc_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRTC_RSP_GET_ATTRIBUTES(cmd, attr); -+ -+ return 0; -+} -+ -+int dprtc_set_clock_offset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int64_t offset) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_CLOCK_OFFSET, -+ cmd_flags, -+ token); -+ -+ DPRTC_CMD_SET_CLOCK_OFFSET(cmd, offset); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprtc_set_freq_compensation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint32_t freq_compensation) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_FREQ_COMPENSATION, -+ cmd_flags, -+ token); -+ -+ DPRTC_CMD_SET_FREQ_COMPENSATION(cmd, freq_compensation); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprtc_get_freq_compensation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint32_t *freq_compensation) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_FREQ_COMPENSATION, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRTC_RSP_GET_FREQ_COMPENSATION(cmd, *freq_compensation); -+ -+ return 0; -+} -+ -+int dprtc_get_time(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint64_t *time) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_TIME, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPRTC_RSP_GET_TIME(cmd, *time); -+ -+ return 0; -+} -+ -+int dprtc_set_time(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint64_t time) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_TIME, -+ cmd_flags, -+ token); -+ -+ DPRTC_CMD_SET_TIME(cmd, time); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dprtc_set_alarm(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, uint64_t time) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_ALARM, -+ cmd_flags, -+ token); -+ -+ DPRTC_CMD_SET_ALARM(cmd, time); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -diff --git a/drivers/net/dpaa2/mc/dpseci.c b/drivers/net/dpaa2/mc/dpseci.c -new file mode 100644 -index 0000000..a4b932a ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpseci.c -@@ -0,0 +1,502 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+int dpseci_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpseci_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPSECI_CMD_OPEN(cmd, dpseci_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpseci_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_CLOSE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpseci_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpseci_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ DPSECI_CMD_CREATE(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpseci_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpseci_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_ENABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpseci_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_DISABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpseci_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_IS_ENABLED, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSECI_RSP_IS_ENABLED(cmd, *en); -+ -+ return 0; -+} -+ -+int dpseci_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_RESET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpseci_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpseci_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ DPSECI_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSECI_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpseci_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpseci_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ DPSECI_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpseci_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPSECI_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSECI_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpseci_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPSECI_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpseci_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPSECI_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSECI_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpseci_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPSECI_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpseci_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPSECI_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSECI_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpseci_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPSECI_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpseci_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpseci_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSECI_RSP_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpseci_set_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t queue, -+ const struct dpseci_rx_queue_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_SET_RX_QUEUE, -+ cmd_flags, -+ token); -+ DPSECI_CMD_SET_RX_QUEUE(cmd, queue, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpseci_get_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t queue, -+ struct dpseci_rx_queue_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_RX_QUEUE, -+ cmd_flags, -+ token); -+ DPSECI_CMD_GET_RX_QUEUE(cmd, queue); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSECI_RSP_GET_RX_QUEUE(cmd, attr); -+ -+ return 0; -+} -+ -+int dpseci_get_tx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t queue, -+ struct dpseci_tx_queue_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_TX_QUEUE, -+ cmd_flags, -+ token); -+ DPSECI_CMD_GET_TX_QUEUE(cmd, queue); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSECI_RSP_GET_TX_QUEUE(cmd, attr); -+ -+ return 0; -+} -+ -+int dpseci_get_sec_attr(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpseci_sec_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_SEC_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSECI_RSP_GET_SEC_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpseci_get_sec_counters(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpseci_sec_counters *counters) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_SEC_COUNTERS, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSECI_RSP_GET_SEC_COUNTERS(cmd, counters); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/mc/dpsw.c b/drivers/net/dpaa2/mc/dpsw.c -new file mode 100644 -index 0000000..2034b55 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/dpsw.c -@@ -0,0 +1,1639 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+/* internal functions */ -+static void build_if_id_bitmap(const uint16_t *if_id, -+ const uint16_t num_ifs, -+ struct mc_command *cmd, -+ int start_param) -+{ -+ int i; -+ -+ for (i = 0; (i < num_ifs) && (i < DPSW_MAX_IF); i++) -+ cmd->params[start_param + (if_id[i] / 64)] |= mc_enc( -+ (if_id[i] % 64), 1, 1); -+} -+ -+static int read_if_id_bitmap(uint16_t *if_id, -+ uint16_t *num_ifs, -+ struct mc_command *cmd, -+ int start_param) -+{ -+ int bitmap[DPSW_MAX_IF] = { 0 }; -+ int i, j = 0; -+ int count = 0; -+ -+ for (i = 0; i < DPSW_MAX_IF; i++) { -+ bitmap[i] = (int)mc_dec(cmd->params[start_param + i / 64], -+ i % 64, 1); -+ count += bitmap[i]; -+ } -+ -+ *num_ifs = (uint16_t)count; -+ -+ for (i = 0; (i < DPSW_MAX_IF) && (j < count); i++) { -+ if (bitmap[i]) { -+ if_id[j] = (uint16_t)i; -+ j++; -+ } -+ } -+ -+ return 0; -+} -+ -+/* DPSW APIs */ -+int dpsw_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpsw_id, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_OPEN, -+ cmd_flags, -+ 0); -+ DPSW_CMD_OPEN(cmd, dpsw_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpsw_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_CLOSE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpsw_cfg *cfg, -+ uint16_t *token) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_CREATE, -+ cmd_flags, -+ 0); -+ DPSW_CMD_CREATE(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ *token = MC_CMD_HDR_READ_TOKEN(cmd.header); -+ -+ return 0; -+} -+ -+int dpsw_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_DESTROY, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_ENABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_DISABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IS_ENABLED, cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_IS_ENABLED(cmd, *en); -+ -+ return 0; -+} -+ -+int dpsw_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_RESET, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpsw_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_IRQ, -+ cmd_flags, -+ token); -+ DPSW_CMD_SET_IRQ(cmd, irq_index, irq_cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpsw_irq_cfg *irq_cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_IRQ, -+ cmd_flags, -+ token); -+ DPSW_CMD_GET_IRQ(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_GET_IRQ(cmd, *type, irq_cfg); -+ -+ return 0; -+} -+ -+int dpsw_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPSW_CMD_SET_IRQ_ENABLE(cmd, irq_index, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_IRQ_ENABLE, -+ cmd_flags, -+ token); -+ DPSW_CMD_GET_IRQ_ENABLE(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_GET_IRQ_ENABLE(cmd, *en); -+ -+ return 0; -+} -+ -+int dpsw_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPSW_CMD_SET_IRQ_MASK(cmd, irq_index, mask); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_IRQ_MASK, -+ cmd_flags, -+ token); -+ DPSW_CMD_GET_IRQ_MASK(cmd, irq_index); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_GET_IRQ_MASK(cmd, *mask); -+ -+ return 0; -+} -+ -+int dpsw_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPSW_CMD_GET_IRQ_STATUS(cmd, irq_index, *status); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_GET_IRQ_STATUS(cmd, *status); -+ -+ return 0; -+} -+ -+int dpsw_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_CLEAR_IRQ_STATUS, -+ cmd_flags, -+ token); -+ DPSW_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpsw_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpsw_set_reflection_if(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_REFLECTION_IF, -+ cmd_flags, -+ token); -+ DPSW_CMD_SET_REFLECTION_IF(cmd, if_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpsw_link_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_LINK_CFG, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_LINK_CFG(cmd, if_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_get_link_state(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpsw_link_state *state) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_LINK_STATE, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_GET_LINK_STATE(cmd, if_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_IF_GET_LINK_STATE(cmd, state); -+ -+ return 0; -+} -+ -+int dpsw_if_set_flooding(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ int en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_FLOODING, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_FLOODING(cmd, if_id, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_set_broadcast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ int en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_BROADCAST, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_FLOODING(cmd, if_id, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_set_multicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ int en) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_MULTICAST, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_FLOODING(cmd, if_id, en); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_set_tci(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_tci_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_TCI, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_TCI(cmd, if_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_get_tci(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpsw_tci_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err = 0; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_TCI, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_GET_TCI(cmd, if_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_IF_GET_TCI(cmd, cfg); -+ -+ return 0; -+} -+ -+int dpsw_if_set_stp(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_stp_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_STP, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_STP(cmd, if_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_set_accepted_frames(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_accepted_frames_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_ACCEPTED_FRAMES, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_ACCEPTED_FRAMES(cmd, if_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_set_accept_all_vlan(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ int accept_all) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_IF_ACCEPT_ALL_VLAN, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_ACCEPT_ALL_VLAN(cmd, if_id, accept_all); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_get_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ enum dpsw_counter type, -+ uint64_t *counter) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_COUNTER, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_GET_COUNTER(cmd, if_id, type); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_IF_GET_COUNTER(cmd, *counter); -+ -+ return 0; -+} -+ -+int dpsw_if_set_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ enum dpsw_counter type, -+ uint64_t counter) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_COUNTER, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_COUNTER(cmd, if_id, type, counter); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_set_tx_selection(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_tx_selection_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_TX_SELECTION, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_TX_SELECTION(cmd, if_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_add_reflection(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_reflection_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_ADD_REFLECTION, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_ADD_REFLECTION(cmd, if_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_remove_reflection(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_reflection_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_REMOVE_REFLECTION, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_REMOVE_REFLECTION(cmd, if_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_set_flooding_metering(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_metering_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_FLOODING_METERING, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_FLOODING_METERING(cmd, if_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_set_metering(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ uint8_t tc_id, -+ const struct dpsw_metering_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_METERING, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_METERING(cmd, if_id, tc_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+void dpsw_prepare_early_drop(const struct dpsw_early_drop_cfg *cfg, -+ uint8_t *early_drop_buf) -+{ -+ uint64_t *ext_params = (uint64_t *)early_drop_buf; -+ -+ DPSW_PREP_EARLY_DROP(ext_params, cfg); -+} -+ -+int dpsw_if_set_early_drop(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ uint8_t tc_id, -+ uint64_t early_drop_iova) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_EARLY_DROP, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_EARLY_DROP(cmd, if_id, tc_id, early_drop_iova); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_add_custom_tpid(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpsw_custom_tpid_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_ADD_CUSTOM_TPID, -+ cmd_flags, -+ token); -+ DPSW_CMD_ADD_CUSTOM_TPID(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_remove_custom_tpid(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpsw_custom_tpid_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_REMOVE_CUSTOM_TPID, -+ cmd_flags, -+ token); -+ DPSW_CMD_REMOVE_CUSTOM_TPID(cmd, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_ENABLE, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_ENABLE(cmd, if_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_DISABLE, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_DISABLE(cmd, if_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpsw_if_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_ATTR, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_GET_ATTR(cmd, if_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_IF_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ uint16_t frame_length) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_MAX_FRAME_LENGTH, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_SET_MAX_FRAME_LENGTH(cmd, if_id, frame_length); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_if_get_max_frame_length(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ uint16_t *frame_length) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_MAX_FRAME_LENGTH, -+ cmd_flags, -+ token); -+ DPSW_CMD_IF_GET_MAX_FRAME_LENGTH(cmd, if_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ DPSW_RSP_IF_GET_MAX_FRAME_LENGTH(cmd, *frame_length); -+ -+ return 0; -+} -+ -+int dpsw_vlan_add(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD, -+ cmd_flags, -+ token); -+ DPSW_CMD_VLAN_ADD(cmd, vlan_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_vlan_add_if(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_if_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ build_if_id_bitmap(cfg->if_id, cfg->num_ifs, &cmd, 1); -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD_IF, -+ cmd_flags, -+ token); -+ DPSW_CMD_VLAN_ADD_IF(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_if_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ build_if_id_bitmap(cfg->if_id, cfg->num_ifs, &cmd, 1); -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD_IF_UNTAGGED, -+ cmd_flags, -+ token); -+ DPSW_CMD_VLAN_ADD_IF_UNTAGGED(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_vlan_add_if_flooding(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_if_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ build_if_id_bitmap(cfg->if_id, cfg->num_ifs, &cmd, 1); -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD_IF_FLOODING, -+ cmd_flags, -+ token); -+ DPSW_CMD_VLAN_ADD_IF_FLOODING(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_if_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ build_if_id_bitmap(cfg->if_id, cfg->num_ifs, &cmd, 1); -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE_IF, -+ cmd_flags, -+ token); -+ DPSW_CMD_VLAN_REMOVE_IF(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_if_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ build_if_id_bitmap(cfg->if_id, cfg->num_ifs, &cmd, 1); -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE_IF_UNTAGGED, -+ cmd_flags, -+ token); -+ DPSW_CMD_VLAN_REMOVE_IF_UNTAGGED(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_vlan_remove_if_flooding(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_if_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ build_if_id_bitmap(cfg->if_id, cfg->num_ifs, &cmd, 1); -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE_IF_FLOODING, -+ cmd_flags, -+ token); -+ DPSW_CMD_VLAN_REMOVE_IF_FLOODING(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_vlan_remove(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE, -+ cmd_flags, -+ token); -+ DPSW_CMD_VLAN_REMOVE(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_vlan_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ struct dpsw_vlan_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_GET_ATTRIBUTES, -+ cmd_flags, -+ token); -+ DPSW_CMD_VLAN_GET_ATTR(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_VLAN_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpsw_vlan_get_if(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ struct dpsw_vlan_if_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_GET_IF, -+ cmd_flags, -+ token); -+ DPSW_CMD_VLAN_GET_IF(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_VLAN_GET_IF(cmd, cfg); -+ read_if_id_bitmap(cfg->if_id, &cfg->num_ifs, &cmd, 1); -+ -+ return 0; -+} -+ -+int dpsw_vlan_get_if_flooding(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ struct dpsw_vlan_if_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_GET_IF_FLOODING, -+ cmd_flags, -+ token); -+ DPSW_CMD_VLAN_GET_IF_FLOODING(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_VLAN_GET_IF_FLOODING(cmd, cfg); -+ read_if_id_bitmap(cfg->if_id, &cfg->num_ifs, &cmd, 1); -+ -+ return 0; -+} -+ -+int dpsw_vlan_get_if_untagged(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ struct dpsw_vlan_if_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_GET_IF_UNTAGGED, -+ cmd_flags, -+ token); -+ DPSW_CMD_VLAN_GET_IF_UNTAGGED(cmd, vlan_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_VLAN_GET_IF(cmd, cfg); -+ read_if_id_bitmap(cfg->if_id, &cfg->num_ifs, &cmd, 1); -+ -+ return 0; -+} -+ -+int dpsw_fdb_add(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *fdb_id, -+ const struct dpsw_fdb_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD, -+ cmd_flags, -+ token); -+ DPSW_CMD_FDB_ADD(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_FDB_ADD(cmd, *fdb_id); -+ -+ return 0; -+} -+ -+int dpsw_fdb_remove(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE, -+ cmd_flags, -+ token); -+ DPSW_CMD_FDB_REMOVE(cmd, fdb_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ const struct dpsw_fdb_unicast_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD_UNICAST, -+ cmd_flags, -+ token); -+ DPSW_CMD_FDB_ADD_UNICAST(cmd, fdb_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_fdb_get_unicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ struct dpsw_fdb_unicast_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_GET_UNICAST, -+ cmd_flags, -+ token); -+ DPSW_CMD_FDB_GET_UNICAST(cmd, fdb_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_FDB_GET_UNICAST(cmd, cfg); -+ -+ return 0; -+} -+ -+int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ const struct dpsw_fdb_unicast_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE_UNICAST, -+ cmd_flags, -+ token); -+ DPSW_CMD_FDB_REMOVE_UNICAST(cmd, fdb_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ const struct dpsw_fdb_multicast_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ build_if_id_bitmap(cfg->if_id, cfg->num_ifs, &cmd, 2); -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD_MULTICAST, -+ cmd_flags, -+ token); -+ DPSW_CMD_FDB_ADD_MULTICAST(cmd, fdb_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_fdb_get_multicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ struct dpsw_fdb_multicast_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_GET_MULTICAST, -+ cmd_flags, -+ token); -+ DPSW_CMD_FDB_GET_MULTICAST(cmd, fdb_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_FDB_GET_MULTICAST(cmd, cfg); -+ read_if_id_bitmap(cfg->if_id, &cfg->num_ifs, &cmd, 2); -+ -+ return 0; -+} -+ -+int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ const struct dpsw_fdb_multicast_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ build_if_id_bitmap(cfg->if_id, cfg->num_ifs, &cmd, 2); -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE_MULTICAST, -+ cmd_flags, -+ token); -+ DPSW_CMD_FDB_REMOVE_MULTICAST(cmd, fdb_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_fdb_set_learning_mode(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ enum dpsw_fdb_learning_mode mode) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_SET_LEARNING_MODE, -+ cmd_flags, -+ token); -+ DPSW_CMD_FDB_SET_LEARNING_MODE(cmd, fdb_id, mode); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_fdb_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ struct dpsw_fdb_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_GET_ATTR, -+ cmd_flags, -+ token); -+ DPSW_CMD_FDB_GET_ATTR(cmd, fdb_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_FDB_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpsw_acl_add(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *acl_id, -+ const struct dpsw_acl_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_ADD, -+ cmd_flags, -+ token); -+ DPSW_CMD_ACL_ADD(cmd, cfg); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_ACL_ADD(cmd, *acl_id); -+ -+ return 0; -+} -+ -+int dpsw_acl_remove(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t acl_id) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_REMOVE, -+ cmd_flags, -+ token); -+ DPSW_CMD_ACL_REMOVE(cmd, acl_id); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+void dpsw_acl_prepare_entry_cfg(const struct dpsw_acl_key *key, -+ uint8_t *entry_cfg_buf) -+{ -+ uint64_t *ext_params = (uint64_t *)entry_cfg_buf; -+ -+ DPSW_PREP_ACL_ENTRY(ext_params, key); -+} -+ -+int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t acl_id, -+ const struct dpsw_acl_entry_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_ADD_ENTRY, -+ cmd_flags, -+ token); -+ DPSW_CMD_ACL_ADD_ENTRY(cmd, acl_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t acl_id, -+ const struct dpsw_acl_entry_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_REMOVE_ENTRY, -+ cmd_flags, -+ token); -+ DPSW_CMD_ACL_REMOVE_ENTRY(cmd, acl_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_acl_add_if(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t acl_id, -+ const struct dpsw_acl_if_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ build_if_id_bitmap(cfg->if_id, cfg->num_ifs, &cmd, 1); -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_ADD_IF, -+ cmd_flags, -+ token); -+ DPSW_CMD_ACL_ADD_IF(cmd, acl_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_acl_remove_if(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t acl_id, -+ const struct dpsw_acl_if_cfg *cfg) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ build_if_id_bitmap(cfg->if_id, cfg->num_ifs, &cmd, 1); -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_REMOVE_IF, -+ cmd_flags, -+ token); -+ DPSW_CMD_ACL_REMOVE_IF(cmd, acl_id, cfg); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_acl_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t acl_id, -+ struct dpsw_acl_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_GET_ATTR, -+ cmd_flags, -+ token); -+ DPSW_CMD_ACL_GET_ATTR(cmd, acl_id); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_ACL_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpsw_ctrl_if_attr *attr) -+{ -+ struct mc_command cmd = { 0 }; -+ int err; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_GET_ATTR, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ err = mc_send_command(mc_io, &cmd); -+ if (err) -+ return err; -+ -+ /* retrieve response parameters */ -+ DPSW_RSP_CTRL_IF_GET_ATTR(cmd, attr); -+ -+ return 0; -+} -+ -+int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpsw_ctrl_if_pools_cfg *pools) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_SET_POOLS, -+ cmd_flags, -+ token); -+ DPSW_CMD_CTRL_IF_SET_POOLS(cmd, pools); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+int dpsw_ctrl_if_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_ENABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -+ -+/** -+* @brief Function disables control interface -+* @mc_io: Pointer to MC portal's I/O object -+* @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+* @token: Token of DPSW object -+* -+* Return: '0' on Success; Error code otherwise. -+*/ -+int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ struct mc_command cmd = { 0 }; -+ -+ /* prepare command */ -+ cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_DISABLE, -+ cmd_flags, -+ token); -+ -+ /* send command to mc*/ -+ return mc_send_command(mc_io, &cmd); -+} -diff --git a/drivers/net/dpaa2/mc/fsl_dpaiop.h b/drivers/net/dpaa2/mc/fsl_dpaiop.h -new file mode 100644 -index 0000000..b039b2a ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpaiop.h -@@ -0,0 +1,494 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPAIOP_H -+#define __FSL_DPAIOP_H -+ -+struct fsl_mc_io; -+ -+/* Data Path AIOP API -+ * Contains initialization APIs and runtime control APIs for DPAIOP -+ */ -+ -+/** -+ * dpaiop_open() - Open a control session for the specified object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpaiop_id: DPAIOP unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpaiop_create function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpaiop_id, -+ uint16_t *token); -+ -+/** -+ * dpaiop_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_close(struct fsl_mc_io *mc_io, uint32_t cmd_flags, uint16_t token); -+ -+/** -+ * struct dpaiop_cfg - Structure representing DPAIOP configuration -+ * @aiop_id: AIOP ID -+ * @aiop_container_id: AIOP container ID -+ */ -+struct dpaiop_cfg { -+ int aiop_id; -+ int aiop_container_id; -+}; -+ -+/** -+ * dpaiop_create() - Create the DPAIOP object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPAIOP object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpaiop_open function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpaiop_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpaiop_destroy() - Destroy the DPAIOP object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpaiop_destroy(struct fsl_mc_io *mc_io, uint32_t cmd_flags, uint16_t token); -+ -+/** -+ * dpaiop_reset() - Reset the DPAIOP, returns the object to initial state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_reset(struct fsl_mc_io *mc_io, uint32_t cmd_flags, uint16_t token); -+ -+/** -+ * struct dpaiop_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpaiop_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpaiop_set_irq() - Set IRQ information for the DPAIOP to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpaiop_irq_cfg *irq_cfg); -+ -+/** -+ * dpaiop_get_irq() - Get IRQ information from the DPAIOP. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpaiop_irq_cfg *irq_cfg); -+ -+/** -+ * dpaiop_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpaiop_get_irq_enable() - Get overall interrupt state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpaiop_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @irq_index: The interrupt index to configure -+ * @mask: Event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpaiop_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpaiop_get_irq_status() - Get the current status of any pending interrupts. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dpaiop_clear_irq_status() - Clear a pending interrupt's status -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @irq_index: The interrupt index to configure -+ * @status: Bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+ -+/** -+ * struct dpaiop_attr - Structure representing DPAIOP attributes -+ * @id: AIOP ID -+ * @version: DPAIOP version -+ */ -+struct dpaiop_attr { -+ int id; -+ /** -+ * struct version - Structure representing DPAIOP version -+ * @major: DPAIOP major version -+ * @minor: DPAIOP minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+}; -+ -+/** -+ * dpaiop_get_attributes - Retrieve DPAIOP attributes. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @attr: Returned object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpaiop_attr *attr); -+ -+/** -+ * struct dpaiop_load_cfg - AIOP load configuration -+ * @options: AIOP load options -+ * @img_iova: I/O virtual address of AIOP ELF image -+ * @img_size: Size of AIOP ELF image in memory (in bytes) -+ */ -+struct dpaiop_load_cfg { -+ uint64_t options; -+ uint64_t img_iova; -+ uint32_t img_size; -+}; -+ -+/** -+ * dpaiop_load_aiop() - Loads an image to AIOP -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @cfg: AIOP load configurations -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_load(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpaiop_load_cfg *cfg); -+ -+#define DPAIOP_RUN_OPT_DEBUG 0x0000000000000001ULL -+ -+/** -+ * struct dpaiop_run_cfg - AIOP run configuration -+ * @cores_mask: Mask of AIOP cores to run (core 0 in most significant bit) -+ * @options: Execution options (currently none defined) -+ * @args_iova: I/O virtual address of AIOP arguments -+ * @args_size: Size of AIOP arguments in memory (in bytes) -+ */ -+struct dpaiop_run_cfg { -+ uint64_t cores_mask; -+ uint64_t options; -+ uint64_t args_iova; -+ uint32_t args_size; -+}; -+ -+/** -+ * dpaiop_run_aiop() - Starts AIOP execution -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @cfg: AIOP run configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_run(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpaiop_run_cfg *cfg); -+ -+/** -+ * struct dpaiop_sl_version - AIOP SL (Service Layer) version -+ * @major: AIOP SL major version number -+ * @minor: AIOP SL minor version number -+ * @revision: AIOP SL revision number -+ */ -+struct dpaiop_sl_version { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t revision; -+}; -+ -+/** -+ * dpaiop_get_sl_version() - Get AIOP SL (Service Layer) version -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @version: AIOP SL version number -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_get_sl_version(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpaiop_sl_version *version); -+ -+/** -+ * AIOP states -+ * -+ * AIOP internal states, can be retrieved by calling dpaiop_get_state() routine -+ */ -+ -+/** -+ * AIOP reset successfully completed. -+ */ -+#define DPAIOP_STATE_RESET_DONE 0x00000000 -+/** -+ * AIOP reset is ongoing. -+ */ -+#define DPAIOP_STATE_RESET_ONGOING 0x00000001 -+ -+/** -+ * AIOP image loading successfully completed. -+ */ -+#define DPAIOP_STATE_LOAD_DONE 0x00000002 -+/** -+ * AIOP image loading is ongoing. -+ */ -+#define DPAIOP_STATE_LOAD_ONGIONG 0x00000004 -+/** -+ * AIOP image loading completed with error. -+ */ -+#define DPAIOP_STATE_LOAD_ERROR 0x00000008 -+ -+/** -+ * Boot process of AIOP cores is ongoing. -+ */ -+#define DPAIOP_STATE_BOOT_ONGOING 0x00000010 -+/** -+ * Boot process of AIOP cores completed with an error. -+ */ -+#define DPAIOP_STATE_BOOT_ERROR 0x00000020 -+/** -+ * AIOP cores are functional and running -+ */ -+#define DPAIOP_STATE_RUNNING 0x00000040 -+/** @} */ -+ -+/** -+ * dpaiop_get_state() - Get AIOP state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @state: AIOP state -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_get_state(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint32_t *state); -+ -+/** -+ * dpaiop_set_time_of_day() - Set AIOP internal time-of-day -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @time_of_day: Current number of milliseconds since the Epoch -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_set_time_of_day(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint64_t time_of_day); -+ -+/** -+ * dpaiop_get_time_of_day() - Get AIOP internal time-of-day -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPAIOP object -+ * @time_of_day: Current number of milliseconds since the Epoch -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpaiop_get_time_of_day(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint64_t *time_of_day); -+ -+#endif /* __FSL_DPAIOP_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpaiop_cmd.h b/drivers/net/dpaa2/mc/fsl_dpaiop_cmd.h -new file mode 100644 -index 0000000..5b77bb8 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpaiop_cmd.h -@@ -0,0 +1,190 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPAIOP_CMD_H -+#define _FSL_DPAIOP_CMD_H -+ -+/* DPAIOP Version */ -+#define DPAIOP_VER_MAJOR 1 -+#define DPAIOP_VER_MINOR 2 -+ -+/* Command IDs */ -+#define DPAIOP_CMDID_CLOSE 0x800 -+#define DPAIOP_CMDID_OPEN 0x80a -+#define DPAIOP_CMDID_CREATE 0x90a -+#define DPAIOP_CMDID_DESTROY 0x900 -+ -+#define DPAIOP_CMDID_GET_ATTR 0x004 -+#define DPAIOP_CMDID_RESET 0x005 -+ -+#define DPAIOP_CMDID_SET_IRQ 0x010 -+#define DPAIOP_CMDID_GET_IRQ 0x011 -+#define DPAIOP_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPAIOP_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPAIOP_CMDID_SET_IRQ_MASK 0x014 -+#define DPAIOP_CMDID_GET_IRQ_MASK 0x015 -+#define DPAIOP_CMDID_GET_IRQ_STATUS 0x016 -+#define DPAIOP_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPAIOP_CMDID_LOAD 0x280 -+#define DPAIOP_CMDID_RUN 0x281 -+#define DPAIOP_CMDID_GET_SL_VERSION 0x282 -+#define DPAIOP_CMDID_GET_STATE 0x283 -+#define DPAIOP_CMDID_SET_TIME_OF_DAY 0x284 -+#define DPAIOP_CMDID_GET_TIME_OF_DAY 0x285 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_OPEN(cmd, dpaiop_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpaiop_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_CREATE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, cfg->aiop_id);\ -+ MC_CMD_OP(cmd, 0, 32, 32, int, cfg->aiop_container_id);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_SET_IRQ_ENABLE(cmd, irq_index, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, en); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_RSP_GET_IRQ_ENABLE(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_RSP_GET_ATTRIBUTES(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->id);\ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_LOAD(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, cfg->img_size); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->img_iova); \ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->options); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_RUN(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, cfg->args_size); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->cores_mask); \ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->options); \ -+ MC_CMD_OP(cmd, 3, 0, 64, uint64_t, cfg->args_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_RSP_GET_SL_VERSION(cmd, version) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, version->major);\ -+ MC_RSP_OP(cmd, 0, 32, 32, uint32_t, version->minor);\ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, version->revision);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_RSP_GET_STATE(cmd, state) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, state) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_CMD_SET_TIME_OF_DAY(cmd, time_of_day) \ -+ MC_CMD_OP(cmd, 0, 0, 64, uint64_t, time_of_day) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPAIOP_RSP_GET_TIME_OF_DAY(cmd, time_of_day) \ -+ MC_RSP_OP(cmd, 0, 0, 64, uint64_t, time_of_day) -+ -+#endif /* _FSL_DPAIOP_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpbp.h b/drivers/net/dpaa2/mc/fsl_dpbp.h -new file mode 100644 -index 0000000..9856bb8 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpbp.h -@@ -0,0 +1,438 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPBP_H -+#define __FSL_DPBP_H -+ -+/* Data Path Buffer Pool API -+ * Contains initialization APIs and runtime control APIs for DPBP -+ */ -+ -+struct fsl_mc_io; -+ -+/** -+ * dpbp_open() - Open a control session for the specified object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpbp_id: DPBP unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpbp_create function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpbp_id, -+ uint16_t *token); -+ -+/** -+ * dpbp_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpbp_cfg - Structure representing DPBP configuration -+ * @options: place holder -+ */ -+struct dpbp_cfg { -+ uint32_t options; -+}; -+ -+/** -+ * dpbp_create() - Create the DPBP object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPBP object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpbp_open function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpbp_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpbp_destroy() - Destroy the DPBP object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpbp_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpbp_enable() - Enable the DPBP. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpbp_disable() - Disable the DPBP. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpbp_is_enabled() - Check if the DPBP is enabled. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * @en: Returns '1' if object is enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpbp_reset() - Reset the DPBP, returns the object to initial state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpbp_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpbp_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpbp_set_irq() - Set IRQ information for the DPBP to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpbp_irq_cfg *irq_cfg); -+ -+/** -+ * dpbp_get_irq() - Get IRQ information from the DPBP. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpbp_irq_cfg *irq_cfg); -+ -+/** -+ * dpbp_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpbp_get_irq_enable() - Get overall interrupt state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpbp_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * @irq_index: The interrupt index to configure -+ * @mask: Event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpbp_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpbp_get_irq_status() - Get the current status of any pending interrupts. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dpbp_clear_irq_status() - Clear a pending interrupt's status -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * @irq_index: The interrupt index to configure -+ * @status: Bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+ -+/** -+ * struct dpbp_attr - Structure representing DPBP attributes -+ * @id: DPBP object ID -+ * @version: DPBP version -+ * @bpid: Hardware buffer pool ID; should be used as an argument in -+ * acquire/release operations on buffers -+ */ -+struct dpbp_attr { -+ int id; -+ /** -+ * struct version - Structure representing DPBP version -+ * @major: DPBP major version -+ * @minor: DPBP minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+ uint16_t bpid; -+}; -+ -+/** -+ * dpbp_get_attributes - Retrieve DPBP attributes. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * @attr: Returned object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpbp_attr *attr); -+ -+/** -+ * DPBP notifications options -+ */ -+ -+/** -+ * BPSCN write will attempt to allocate into a cache (coherent write) -+ */ -+#define DPBP_NOTIF_OPT_COHERENT_WRITE 0x00000001 -+ -+/** -+ * struct dpbp_notification_cfg - Structure representing DPBP notifications -+ * towards software -+ * @depletion_entry: below this threshold the pool is "depleted"; -+ * set it to '0' to disable it -+ * @depletion_exit: greater than or equal to this threshold the pool exit its -+ * "depleted" state -+ * @surplus_entry: above this threshold the pool is in "surplus" state; -+ * set it to '0' to disable it -+ * @surplus_exit: less than or equal to this threshold the pool exit its -+ * "surplus" state -+ * @message_iova: MUST be given if either 'depletion_entry' or 'surplus_entry' -+ * is not '0' (enable); I/O virtual address (must be in DMA-able memory), -+ * must be 16B aligned. -+ * @message_ctx: The context that will be part of the BPSCN message and will -+ * be written to 'message_iova' -+ * @options: Mask of available options; use 'DPBP_NOTIF_OPT_' values -+ */ -+struct dpbp_notification_cfg { -+ uint32_t depletion_entry; -+ uint32_t depletion_exit; -+ uint32_t surplus_entry; -+ uint32_t surplus_exit; -+ uint64_t message_iova; -+ uint64_t message_ctx; -+ uint16_t options; -+}; -+ -+/** -+ * dpbp_set_notifications() - Set notifications towards software -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * @cfg: notifications configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_set_notifications(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpbp_notification_cfg *cfg); -+ -+/** -+ * dpbp_get_notifications() - Get the notifications configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPBP object -+ * @cfg: notifications configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpbp_get_notifications(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpbp_notification_cfg *cfg); -+ -+#endif /* __FSL_DPBP_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpbp_cmd.h b/drivers/net/dpaa2/mc/fsl_dpbp_cmd.h -new file mode 100644 -index 0000000..71ad96a ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpbp_cmd.h -@@ -0,0 +1,172 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPBP_CMD_H -+#define _FSL_DPBP_CMD_H -+ -+/* DPBP Version */ -+#define DPBP_VER_MAJOR 2 -+#define DPBP_VER_MINOR 2 -+ -+/* Command IDs */ -+#define DPBP_CMDID_CLOSE 0x800 -+#define DPBP_CMDID_OPEN 0x804 -+#define DPBP_CMDID_CREATE 0x904 -+#define DPBP_CMDID_DESTROY 0x900 -+ -+#define DPBP_CMDID_ENABLE 0x002 -+#define DPBP_CMDID_DISABLE 0x003 -+#define DPBP_CMDID_GET_ATTR 0x004 -+#define DPBP_CMDID_RESET 0x005 -+#define DPBP_CMDID_IS_ENABLED 0x006 -+ -+#define DPBP_CMDID_SET_IRQ 0x010 -+#define DPBP_CMDID_GET_IRQ 0x011 -+#define DPBP_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPBP_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPBP_CMDID_SET_IRQ_MASK 0x014 -+#define DPBP_CMDID_GET_IRQ_MASK 0x015 -+#define DPBP_CMDID_GET_IRQ_STATUS 0x016 -+#define DPBP_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPBP_CMDID_SET_NOTIFICATIONS 0x01b0 -+#define DPBP_CMDID_GET_NOTIFICATIONS 0x01b1 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_CMD_OPEN(cmd, dpbp_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpbp_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_RSP_IS_ENABLED(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_CMD_SET_IRQ_ENABLE(cmd, irq_index, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, en); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_RSP_GET_IRQ_ENABLE(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_RSP_GET_ATTRIBUTES(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, attr->bpid); \ -+ MC_RSP_OP(cmd, 0, 32, 32, int, attr->id);\ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_CMD_SET_NOTIFICATIONS(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, cfg->depletion_entry); \ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, cfg->depletion_exit);\ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, cfg->surplus_entry);\ -+ MC_CMD_OP(cmd, 1, 32, 32, uint32_t, cfg->surplus_exit);\ -+ MC_CMD_OP(cmd, 2, 0, 16, uint16_t, cfg->options);\ -+ MC_CMD_OP(cmd, 3, 0, 64, uint64_t, cfg->message_ctx);\ -+ MC_CMD_OP(cmd, 4, 0, 64, uint64_t, cfg->message_iova);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPBP_CMD_GET_NOTIFICATIONS(cmd, cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, cfg->depletion_entry); \ -+ MC_RSP_OP(cmd, 0, 32, 32, uint32_t, cfg->depletion_exit);\ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, cfg->surplus_entry);\ -+ MC_RSP_OP(cmd, 1, 32, 32, uint32_t, cfg->surplus_exit);\ -+ MC_RSP_OP(cmd, 2, 0, 16, uint16_t, cfg->options);\ -+ MC_RSP_OP(cmd, 3, 0, 64, uint64_t, cfg->message_ctx);\ -+ MC_RSP_OP(cmd, 4, 0, 64, uint64_t, cfg->message_iova);\ -+} while (0) -+#endif /* _FSL_DPBP_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpci.h b/drivers/net/dpaa2/mc/fsl_dpci.h -new file mode 100644 -index 0000000..d885935 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpci.h -@@ -0,0 +1,594 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPCI_H -+#define __FSL_DPCI_H -+ -+/* Data Path Communication Interface API -+ * Contains initialization APIs and runtime control APIs for DPCI -+ */ -+ -+struct fsl_mc_io; -+ -+/** General DPCI macros */ -+ -+/** -+ * Maximum number of Tx/Rx priorities per DPCI object -+ */ -+#define DPCI_PRIO_NUM 2 -+ -+/** -+ * Indicates an invalid frame queue -+ */ -+#define DPCI_FQID_NOT_VALID (uint32_t)(-1) -+ -+/** -+ * All queues considered; see dpci_set_rx_queue() -+ */ -+#define DPCI_ALL_QUEUES (uint8_t)(-1) -+ -+/** -+ * dpci_open() - Open a control session for the specified object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpci_id: DPCI unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpci_create() function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpci_id, -+ uint16_t *token); -+ -+/** -+ * dpci_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpci_cfg - Structure representing DPCI configuration -+ * @num_of_priorities: Number of receive priorities (queues) for the DPCI; -+ * note, that the number of transmit priorities (queues) -+ * is determined by the number of receive priorities of -+ * the peer DPCI object -+ */ -+struct dpci_cfg { -+ uint8_t num_of_priorities; -+}; -+ -+/** -+ * dpci_create() - Create the DPCI object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPCI object, allocate required resources and perform required -+ * initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpci_open() function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpci_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpci_destroy() - Destroy the DPCI object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpci_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpci_enable() - Enable the DPCI, allow sending and receiving frames. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpci_disable() - Disable the DPCI, stop sending and receiving frames. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpci_is_enabled() - Check if the DPCI is enabled. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @en: Returns '1' if object is enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpci_reset() - Reset the DPCI, returns the object to initial state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** DPCI IRQ Index and Events */ -+ -+/** -+ * IRQ index -+ */ -+#define DPCI_IRQ_INDEX 0 -+ -+/** -+ * IRQ event - indicates a change in link state -+ */ -+#define DPCI_IRQ_EVENT_LINK_CHANGED 0x00000001 -+/** -+ * IRQ event - indicates a connection event -+ */ -+#define DPCI_IRQ_EVENT_CONNECTED 0x00000002 -+/** -+ * IRQ event - indicates a disconnection event -+ */ -+#define DPCI_IRQ_EVENT_DISCONNECTED 0x00000004 -+ -+/** -+ * struct dpci_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpci_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpci_set_irq() - Set IRQ information for the DPCI to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpci_irq_cfg *irq_cfg); -+ -+/** -+ * dpci_get_irq() - Get IRQ information from the DPCI. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpci_irq_cfg *irq_cfg); -+ -+/** -+ * dpci_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpci_get_irq_enable() - Get overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpci_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: The interrupt index to configure -+ * @mask: event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpci_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpci_get_irq_status() - Get the current status of any pending interrupts. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dpci_clear_irq_status() - Clear a pending interrupt's status -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: The interrupt index to configure -+ * @status: bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+ -+/** -+ * struct dpci_attr - Structure representing DPCI attributes -+ * @id: DPCI object ID -+ * @version: DPCI version -+ * @num_of_priorities: Number of receive priorities -+ */ -+struct dpci_attr { -+ int id; -+ /** -+ * struct version - Structure representing DPCI attributes -+ * @major: DPCI major version -+ * @minor: DPCI minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+ uint8_t num_of_priorities; -+}; -+ -+/** -+ * dpci_get_attributes() - Retrieve DPCI attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @attr: Returned object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpci_attr *attr); -+ -+/** -+ * struct dpci_peer_attr - Structure representing the peer DPCI attributes -+ * @peer_id: DPCI peer id; if no peer is connected returns (-1) -+ * @num_of_priorities: The pper's number of receive priorities; determines the -+ * number of transmit priorities for the local DPCI object -+ */ -+struct dpci_peer_attr { -+ int peer_id; -+ uint8_t num_of_priorities; -+}; -+ -+/** -+ * dpci_get_peer_attributes() - Retrieve peer DPCI attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @attr: Returned peer attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_get_peer_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpci_peer_attr *attr); -+ -+/** -+ * dpci_get_link_state() - Retrieve the DPCI link state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @up: Returned link state; returns '1' if link is up, '0' otherwise -+ * -+ * DPCI can be connected to another DPCI, together they -+ * create a 'link'. In order to use the DPCI Tx and Rx queues, -+ * both objects must be enabled. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_get_link_state(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *up); -+ -+/** -+ * enum dpci_dest - DPCI destination types -+ * @DPCI_DEST_NONE: Unassigned destination; The queue is set in parked mode -+ * and does not generate FQDAN notifications; user is -+ * expected to dequeue from the queue based on polling or -+ * other user-defined method -+ * @DPCI_DEST_DPIO: The queue is set in schedule mode and generates FQDAN -+ * notifications to the specified DPIO; user is expected -+ * to dequeue from the queue only after notification is -+ * received -+ * @DPCI_DEST_DPCON: The queue is set in schedule mode and does not generate -+ * FQDAN notifications, but is connected to the specified -+ * DPCON object; -+ * user is expected to dequeue from the DPCON channel -+ */ -+enum dpci_dest { -+ DPCI_DEST_NONE = 0, -+ DPCI_DEST_DPIO = 1, -+ DPCI_DEST_DPCON = 2 -+}; -+ -+/** -+ * struct dpci_dest_cfg - Structure representing DPCI destination configuration -+ * @dest_type: Destination type -+ * @dest_id: Either DPIO ID or DPCON ID, depending on the destination type -+ * @priority: Priority selection within the DPIO or DPCON channel; valid -+ * values are 0-1 or 0-7, depending on the number of priorities -+ * in that channel; not relevant for 'DPCI_DEST_NONE' option -+ */ -+struct dpci_dest_cfg { -+ enum dpci_dest dest_type; -+ int dest_id; -+ uint8_t priority; -+}; -+ -+/** DPCI queue modification options */ -+ -+/** -+ * Select to modify the user's context associated with the queue -+ */ -+#define DPCI_QUEUE_OPT_USER_CTX 0x00000001 -+ -+/** -+ * Select to modify the queue's destination -+ */ -+#define DPCI_QUEUE_OPT_DEST 0x00000002 -+ -+/** -+ * struct dpci_rx_queue_cfg - Structure representing RX queue configuration -+ * @options: Flags representing the suggested modifications to the queue; -+ * Use any combination of 'DPCI_QUEUE_OPT_' flags -+ * @user_ctx: User context value provided in the frame descriptor of each -+ * dequeued frame; -+ * valid only if 'DPCI_QUEUE_OPT_USER_CTX' is contained in -+ * 'options' -+ * @dest_cfg: Queue destination parameters; -+ * valid only if 'DPCI_QUEUE_OPT_DEST' is contained in 'options' -+ */ -+struct dpci_rx_queue_cfg { -+ uint32_t options; -+ uint64_t user_ctx; -+ struct dpci_dest_cfg dest_cfg; -+}; -+ -+/** -+ * dpci_set_rx_queue() - Set Rx queue configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @priority: Select the queue relative to number of -+ * priorities configured at DPCI creation; use -+ * DPCI_ALL_QUEUES to configure all Rx queues -+ * identically. -+ * @cfg: Rx queue configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_set_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t priority, -+ const struct dpci_rx_queue_cfg *cfg); -+ -+/** -+ * struct dpci_rx_queue_attr - Structure representing Rx queue attributes -+ * @user_ctx: User context value provided in the frame descriptor of each -+ * dequeued frame -+ * @dest_cfg: Queue destination configuration -+ * @fqid: Virtual FQID value to be used for dequeue operations -+ */ -+struct dpci_rx_queue_attr { -+ uint64_t user_ctx; -+ struct dpci_dest_cfg dest_cfg; -+ uint32_t fqid; -+}; -+ -+/** -+ * dpci_get_rx_queue() - Retrieve Rx queue attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @priority: Select the queue relative to number of -+ * priorities configured at DPCI creation -+ * @attr: Returned Rx queue attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_get_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t priority, -+ struct dpci_rx_queue_attr *attr); -+ -+/** -+ * struct dpci_tx_queue_attr - Structure representing attributes of Tx queues -+ * @fqid: Virtual FQID to be used for sending frames to peer DPCI; -+ * returns 'DPCI_FQID_NOT_VALID' if a no peer is connected or if -+ * the selected priority exceeds the number of priorities of the -+ * peer DPCI object -+ */ -+struct dpci_tx_queue_attr { -+ uint32_t fqid; -+}; -+ -+/** -+ * dpci_get_tx_queue() - Retrieve Tx queue attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @priority: Select the queue relative to number of -+ * priorities of the peer DPCI object -+ * @attr: Returned Tx queue attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpci_get_tx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t priority, -+ struct dpci_tx_queue_attr *attr); -+ -+#endif /* __FSL_DPCI_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpci_cmd.h b/drivers/net/dpaa2/mc/fsl_dpci_cmd.h -new file mode 100644 -index 0000000..f45e435 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpci_cmd.h -@@ -0,0 +1,200 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPCI_CMD_H -+#define _FSL_DPCI_CMD_H -+ -+/* DPCI Version */ -+#define DPCI_VER_MAJOR 2 -+#define DPCI_VER_MINOR 2 -+ -+/* Command IDs */ -+#define DPCI_CMDID_CLOSE 0x800 -+#define DPCI_CMDID_OPEN 0x807 -+#define DPCI_CMDID_CREATE 0x907 -+#define DPCI_CMDID_DESTROY 0x900 -+ -+#define DPCI_CMDID_ENABLE 0x002 -+#define DPCI_CMDID_DISABLE 0x003 -+#define DPCI_CMDID_GET_ATTR 0x004 -+#define DPCI_CMDID_RESET 0x005 -+#define DPCI_CMDID_IS_ENABLED 0x006 -+ -+#define DPCI_CMDID_SET_IRQ 0x010 -+#define DPCI_CMDID_GET_IRQ 0x011 -+#define DPCI_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPCI_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPCI_CMDID_SET_IRQ_MASK 0x014 -+#define DPCI_CMDID_GET_IRQ_MASK 0x015 -+#define DPCI_CMDID_GET_IRQ_STATUS 0x016 -+#define DPCI_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPCI_CMDID_SET_RX_QUEUE 0x0e0 -+#define DPCI_CMDID_GET_LINK_STATE 0x0e1 -+#define DPCI_CMDID_GET_PEER_ATTR 0x0e2 -+#define DPCI_CMDID_GET_RX_QUEUE 0x0e3 -+#define DPCI_CMDID_GET_TX_QUEUE 0x0e4 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_OPEN(cmd, dpci_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpci_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_CREATE(cmd, cfg) \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, cfg->num_of_priorities) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_RSP_IS_ENABLED(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_SET_IRQ_ENABLE(cmd, irq_index, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, en); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_RSP_GET_IRQ_ENABLE(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_RSP_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->id);\ -+ MC_RSP_OP(cmd, 0, 48, 8, uint8_t, attr->num_of_priorities);\ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_RSP_GET_PEER_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->peer_id);\ -+ MC_RSP_OP(cmd, 1, 0, 8, uint8_t, attr->num_of_priorities);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_RSP_GET_LINK_STATE(cmd, up) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, up) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_SET_RX_QUEUE(cmd, priority, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, cfg->dest_cfg.dest_id);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->dest_cfg.priority);\ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, priority);\ -+ MC_CMD_OP(cmd, 0, 48, 4, enum dpci_dest, cfg->dest_cfg.dest_type);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->user_ctx);\ -+ MC_CMD_OP(cmd, 2, 0, 32, uint32_t, cfg->options);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_GET_RX_QUEUE(cmd, priority) \ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, priority) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_RSP_GET_RX_QUEUE(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->dest_cfg.dest_id);\ -+ MC_RSP_OP(cmd, 0, 32, 8, uint8_t, attr->dest_cfg.priority);\ -+ MC_RSP_OP(cmd, 0, 48, 4, enum dpci_dest, attr->dest_cfg.dest_type);\ -+ MC_RSP_OP(cmd, 1, 0, 8, uint64_t, attr->user_ctx);\ -+ MC_RSP_OP(cmd, 2, 0, 32, uint32_t, attr->fqid);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_CMD_GET_TX_QUEUE(cmd, priority) \ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, priority) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCI_RSP_GET_TX_QUEUE(cmd, attr) \ -+ MC_RSP_OP(cmd, 0, 32, 32, uint32_t, attr->fqid) -+ -+#endif /* _FSL_DPCI_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpcon.h b/drivers/net/dpaa2/mc/fsl_dpcon.h -new file mode 100644 -index 0000000..2555be5 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpcon.h -@@ -0,0 +1,407 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPCON_H -+#define __FSL_DPCON_H -+ -+/* Data Path Concentrator API -+ * Contains initialization APIs and runtime control APIs for DPCON -+ */ -+ -+struct fsl_mc_io; -+ -+/** General DPCON macros */ -+ -+/** -+ * Use it to disable notifications; see dpcon_set_notification() -+ */ -+#define DPCON_INVALID_DPIO_ID (int)(-1) -+ -+/** -+ * dpcon_open() - Open a control session for the specified object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpcon_id: DPCON unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpcon_create() function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpcon_id, -+ uint16_t *token); -+ -+/** -+ * dpcon_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpcon_cfg - Structure representing DPCON configuration -+ * @num_priorities: Number of priorities for the DPCON channel (1-8) -+ */ -+struct dpcon_cfg { -+ uint8_t num_priorities; -+}; -+ -+/** -+ * dpcon_create() - Create the DPCON object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPCON object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpcon_open() function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpcon_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpcon_destroy() - Destroy the DPCON object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpcon_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpcon_enable() - Enable the DPCON -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * -+ * Return: '0' on Success; Error code otherwise -+ */ -+int dpcon_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpcon_disable() - Disable the DPCON -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * -+ * Return: '0' on Success; Error code otherwise -+ */ -+int dpcon_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpcon_is_enabled() - Check if the DPCON is enabled. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * @en: Returns '1' if object is enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpcon_reset() - Reset the DPCON, returns the object to initial state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpcon_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpcon_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpcon_set_irq() - Set IRQ information for the DPCON to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpcon_irq_cfg *irq_cfg); -+ -+/** -+ * dpcon_get_irq() - Get IRQ information from the DPCON. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpcon_irq_cfg *irq_cfg); -+ -+/** -+ * dpcon_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpcon_get_irq_enable() - Get overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpcon_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * @irq_index: The interrupt index to configure -+ * @mask: Event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpcon_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpcon_get_irq_status() - Get the current status of any pending interrupts. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * @irq_index: The interrupt index to configure -+ * @status: interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dpcon_clear_irq_status() - Clear a pending interrupt's status -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * @irq_index: The interrupt index to configure -+ * @status: bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+ -+/** -+ * struct dpcon_attr - Structure representing DPCON attributes -+ * @id: DPCON object ID -+ * @version: DPCON version -+ * @qbman_ch_id: Channel ID to be used by dequeue operation -+ * @num_priorities: Number of priorities for the DPCON channel (1-8) -+ */ -+struct dpcon_attr { -+ int id; -+ /** -+ * struct version - DPCON version -+ * @major: DPCON major version -+ * @minor: DPCON minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+ uint16_t qbman_ch_id; -+ uint8_t num_priorities; -+}; -+ -+/** -+ * dpcon_get_attributes() - Retrieve DPCON attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * @attr: Object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpcon_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpcon_attr *attr); -+ -+/** -+ * struct dpcon_notification_cfg - Structure representing notification parameters -+ * @dpio_id: DPIO object ID; must be configured with a notification channel; -+ * to disable notifications set it to 'DPCON_INVALID_DPIO_ID'; -+ * @priority: Priority selection within the DPIO channel; valid values -+ * are 0-7, depending on the number of priorities in that channel -+ * @user_ctx: User context value provided with each CDAN message -+ */ -+struct dpcon_notification_cfg { -+ int dpio_id; -+ uint8_t priority; -+ uint64_t user_ctx; -+}; -+ -+/** -+ * dpcon_set_notification() - Set DPCON notification destination -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCON object -+ * @cfg: Notification parameters -+ * -+ * Return: '0' on Success; Error code otherwise -+ */ -+int dpcon_set_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpcon_notification_cfg *cfg); -+ -+#endif /* __FSL_DPCON_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpcon_cmd.h b/drivers/net/dpaa2/mc/fsl_dpcon_cmd.h -new file mode 100644 -index 0000000..ecb40d0 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpcon_cmd.h -@@ -0,0 +1,162 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPCON_CMD_H -+#define _FSL_DPCON_CMD_H -+ -+/* DPCON Version */ -+#define DPCON_VER_MAJOR 2 -+#define DPCON_VER_MINOR 2 -+ -+/* Command IDs */ -+#define DPCON_CMDID_CLOSE 0x800 -+#define DPCON_CMDID_OPEN 0x808 -+#define DPCON_CMDID_CREATE 0x908 -+#define DPCON_CMDID_DESTROY 0x900 -+ -+#define DPCON_CMDID_ENABLE 0x002 -+#define DPCON_CMDID_DISABLE 0x003 -+#define DPCON_CMDID_GET_ATTR 0x004 -+#define DPCON_CMDID_RESET 0x005 -+#define DPCON_CMDID_IS_ENABLED 0x006 -+ -+#define DPCON_CMDID_SET_IRQ 0x010 -+#define DPCON_CMDID_GET_IRQ 0x011 -+#define DPCON_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPCON_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPCON_CMDID_SET_IRQ_MASK 0x014 -+#define DPCON_CMDID_GET_IRQ_MASK 0x015 -+#define DPCON_CMDID_GET_IRQ_STATUS 0x016 -+#define DPCON_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPCON_CMDID_SET_NOTIFICATION 0x100 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_CMD_OPEN(cmd, dpcon_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpcon_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_CMD_CREATE(cmd, cfg) \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, cfg->num_priorities) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_RSP_IS_ENABLED(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val);\ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_CMD_SET_IRQ_ENABLE(cmd, irq_index, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, en); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_RSP_GET_IRQ_ENABLE(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_RSP_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->id);\ -+ MC_RSP_OP(cmd, 0, 32, 16, uint16_t, attr->qbman_ch_id);\ -+ MC_RSP_OP(cmd, 0, 48, 8, uint8_t, attr->num_priorities);\ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPCON_CMD_SET_NOTIFICATION(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, cfg->dpio_id);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->priority);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->user_ctx);\ -+} while (0) -+ -+#endif /* _FSL_DPCON_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpdbg.h b/drivers/net/dpaa2/mc/fsl_dpdbg.h -new file mode 100644 -index 0000000..ead22e8 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpdbg.h -@@ -0,0 +1,635 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPDBG_H -+#define __FSL_DPDBG_H -+ -+#include -+#include -+#include -+ -+/* Data Path Debug API -+ * Contains initialization APIs and runtime control APIs for DPDBG -+ */ -+ -+struct fsl_mc_io; -+ -+/** -+ * dpdbg_open() - Open a control session for the specified object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpdbg_id: DPDBG unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpdbg_id, -+ uint16_t *token); -+ -+/** -+ * dpdbg_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpdbg_attr - Structure representing DPDBG attributes -+ * @id: DPDBG object ID -+ * @version: DPDBG version -+ */ -+struct dpdbg_attr { -+ int id; -+ /** -+ * struct version - Structure representing DPDBG version -+ * @major: DPDBG major version -+ * @minor: DPDBG minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+}; -+ -+/** -+ * dpdbg_get_attributes - Retrieve DPDBG attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @attr: Returned object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdbg_attr *attr); -+ -+/** -+ * struct dpdbg_dpni_info - Info of DPNI -+ * @max_senders: Maximum number of different senders; used as the number -+ * of dedicated Tx flows; Non-power-of-2 values are rounded -+ * up to the next power-of-2 value as hardware demands it; -+ * '0' will be treated as '1' -+ * @qdid: Virtual QDID. -+ * @err_fqid: Virtual FQID for error queues -+ * @tx_conf_fqid: Virtual FQID for global TX confirmation queue -+ */ -+struct dpdbg_dpni_info { -+ uint8_t max_senders; -+ uint32_t qdid; -+ uint32_t err_fqid; -+ uint32_t tx_conf_fqid; -+}; -+ -+/** -+ * dpdbg_get_dpni_info() - Retrieve info for a specific DPNI -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpni_id: The requested DPNI ID -+ * @info: The returned info -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_get_dpni_info(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ struct dpdbg_dpni_info *info); -+ -+/** -+ * dpdbg_get_dpni_private_fqid() - Retrieve the virtual TX confirmation queue -+ * FQID of the required DPNI -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpni_id: The requested DPNI ID -+ * @sender_id: The requested sender ID -+ * @fqid: The returned virtual private TX confirmation FQID. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_get_dpni_priv_tx_conf_fqid(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ uint8_t sender_id, -+ uint32_t *fqid); -+ -+/** -+ * struct dpdbg_dpcon_info - Info of DPCON -+ * @ch_id: Channel ID -+ */ -+struct dpdbg_dpcon_info { -+ uint32_t ch_id; -+}; -+ -+/** -+ * dpdbg_get_dpcon_info() - Retrieve info of DPCON -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpcon_id: The requested DPCON ID -+ * @info: The returned info. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_get_dpcon_info(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpcon_id, -+ struct dpdbg_dpcon_info *info); -+ -+/** -+ * struct dpdbg_dpbp_info - Info of DPBP -+ * @bpid: Virtual buffer pool ID -+ */ -+struct dpdbg_dpbp_info { -+ uint32_t bpid; -+}; -+ -+/** -+ * dpdbg_get_dpbp_info() - Retrieve info of DPBP -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpbp_id: The requested DPBP ID -+ * @info: The returned info. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_get_dpbp_info(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpbp_id, -+ struct dpdbg_dpbp_info *info); -+ -+/** -+ * dpdbg_get_dpci_fqid() - Retrieve the virtual FQID of the required DPCI -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpci_id: The requested DPCI ID -+ * @priority: Select the queue relative to number of priorities configured at -+ * DPCI creation -+ * @fqid: The returned virtual FQID. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_get_dpci_fqid(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpci_id, -+ uint8_t priority, -+ uint32_t *fqid); -+ -+/** -+ * Maximum size for rule match (in bytes) -+ */ -+#define DPDBG_MAX_RULE_SIZE 56 -+/** -+ * Disable marking -+ */ -+#define DPDBG_DISABLE_MARKING 0xFF -+ -+/** -+ * dpdbg_prepare_ctlu_global_rule() - function prepare extract parameters -+ * @dpkg_rule: defining a full Key Generation profile (rule) -+ * @rule_buf: Zeroed 256 bytes of memory before mapping it to DMA -+ * -+ * This function has to be called before dpdbg_set_global_marking() -+ */ -+int dpdbg_prepare_ctlu_global_rule(struct dpkg_profile_cfg *dpkg_rule, -+ uint8_t *rule_buf); -+ -+/** -+ * struct dpdbg_rule_cfg - Rule configuration for table lookup -+ * @key_iova: I/O virtual address of the key (must be in DMA-able memory) -+ * @rule_iova: I/O virtual address of the rule (must be in DMA-able memory) -+ * @mask_iova: I/O virtual address of the mask (must be in DMA-able memory) -+ * @key_size: key and mask size (in bytes) -+ */ -+struct dpdbg_rule_cfg { -+ uint64_t key_iova; -+ uint64_t mask_iova; -+ uint64_t rule_iova; -+ uint8_t key_size; -+}; -+ -+/** -+ * dpdbg_set_ctlu_global_marking() - Set marking for all match rule frames -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @marking: The requested Debug marking -+ * @cfg: Marking rule to add -+ * -+ * Warning: must be called after dpdbg_prepare_global_rule() -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_set_ctlu_global_marking(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t marking, -+ struct dpdbg_rule_cfg *cfg); -+ -+/** -+ * All traffic classes considered -+ */ -+#define DPDBG_DPNI_ALL_TCS (uint8_t)(-1) -+/** -+ * All flows within traffic class considered -+ */ -+#define DPDBG_DPNI_ALL_TC_FLOWS (uint8_t)(-1) -+/** -+ * All buffer pools considered -+ */ -+#define DPDBG_DPNI_ALL_DPBP (uint8_t)(-1) -+ -+/** -+ * struct dpdbg_dpni_rx_marking_cfg - Ingress frame configuration -+ * @tc_id: Traffic class ID (0-7); DPDBG_DPNI_ALL_TCS for all traffic classes. -+ * @flow_id: Rx flow id within the traffic class; use -+ * 'DPDBG_DPNI_ALL_TC_FLOWS' to set all flows within this tc_id; -+ * ignored if tc_id is set to 'DPDBG_DPNI_ALL_TCS'; -+ * @dpbp_id: buffer pool ID; 'DPDBG_DPNI_ALL_DPBP' to set all DPBP -+ * @marking: Marking for match frames; -+ * 'DPDBG_DISABLE_MARKING' for disable marking -+ */ -+struct dpdbg_dpni_rx_marking_cfg { -+ uint8_t tc_id; -+ uint16_t flow_id; -+ uint16_t dpbp_id; -+ uint8_t marking; -+}; -+ -+/** -+ * dpdbg_set_dpni_rx_marking() - Set Rx frame marking for DPNI -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpni_id: The requested DPNI ID -+ * @cfg: RX frame marking configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_set_dpni_rx_marking(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ struct dpdbg_dpni_rx_marking_cfg *cfg); -+ -+/* selects global confirmation queues */ -+#define DPDBG_DPNI_GLOBAL_TX_CONF_QUEUE (uint16_t)(-1) -+ -+/** -+ * dpdbg_set_dpni_tx_conf_marking() - Set Tx frame marking for DPNI -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpni_id: The requested DPNI ID -+ * @sender_id: Sender Id for the confirmation queue; -+ * 'DPDBG_DPNI_GLOBAL_TX_CONF_QUEUE' for global confirmation queue -+ * @marking: The requested marking; -+ * 'DPDBG_DISABLE_MARKING' for disable marking -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_set_dpni_tx_conf_marking(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ uint16_t sender_id, -+ uint8_t marking); -+ -+/** -+ * dpdbg_set_dpio_marking() - Set debug frame marking on enqueue -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpio_id: The requested DPIO ID -+ * @marking: The requested marking; -+ * 'DPDBG_DISABLE_MARKING' for disable marking -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_set_dpio_marking(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpio_id, -+ uint8_t marking); -+ -+/** -+ * enum dpdbg_verbosity_level - Trace verbosity level -+ * @DPDBG_VERBOSITY_LEVEL_DISABLE: Trace disabled -+ * @DPDBG_VERBOSITY_LEVEL_TERSE: Terse trace -+ * @DPDBG_VERBOSITY_LEVEL_VERBOSE: Verbose trace -+ */ -+enum dpdbg_verbosity_level { -+ DPDBG_VERBOSITY_LEVEL_DISABLE = 0, -+ DPDBG_VERBOSITY_LEVEL_TERSE, -+ DPDBG_VERBOSITY_LEVEL_VERBOSE -+}; -+ -+/** -+ * dpdbg_set_ctlu_global_trace() - Set global trace configuration for CTLU trace -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @cfg: trace rule to add -+ * -+ * Warning: must be called after dpdbg_prepare_global_rule() -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_set_ctlu_global_trace(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdbg_rule_cfg *cfg); -+ -+/** -+ * Number of DPIO trace points -+ */ -+#define DPDBG_NUM_OF_DPIO_TRACE_POINTS 2 -+ -+/** -+ * enum dpdbg_dpio_trace_type - Define Trace point type -+ * @DPDBG_DPIO_TRACE_TYPE_ENQUEUE: This trace point triggers when an enqueue -+ * command, received via this portal, -+ * and containing a marked frame, is executed -+ * @DPDBG_DPIO_TRACE_TYPE_DEFERRED: This trace point triggers when the deferred -+ * enqueue of a marked frame received via this -+ * portal completes -+ */ -+enum dpdbg_dpio_trace_type { -+ DPDBG_DPIO_TRACE_TYPE_ENQUEUE = 0, -+ DPDBG_DPIO_TRACE_TYPE_DEFERRED = 1 -+}; -+ -+/** -+ * struct dpdbg_dpio_trace_cfg - Configure the behavior of a trace point -+ * when a frame marked with the specified DD code point is -+ * encountered -+ * @marking: this field will be written into the DD field of every FD -+ * enqueued in this DPIO. -+ * 'DPDBG_DISABLE_MARKING' for disable marking -+ * @verbosity: Verbosity level -+ * @enqueue_type: Enqueue trace point type defining a full Key Generation -+ * profile (rule) -+ */ -+struct dpdbg_dpio_trace_cfg { -+ uint8_t marking; -+ enum dpdbg_verbosity_level verbosity; -+ enum dpdbg_dpio_trace_type enqueue_type; -+}; -+ -+/** -+ * dpdbg_set_dpio_trace() - Set trace for DPIO for every enqueued frame to -+ * the portal -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpio_id: The requested DPIO ID -+ * @trace_point: Trace points configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_set_dpio_trace(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpio_id, -+ struct dpdbg_dpio_trace_cfg -+ trace_point[DPDBG_NUM_OF_DPIO_TRACE_POINTS]); -+ -+/** -+ * struct dpdbg_dpni_trace_cfg - Configure the behavior of a trace point when a -+ * @tc_id: Traffic class ID (0-7); DPDBG_DPNI_ALL_TCS for all traffic classes. -+ * @flow_id: Rx flow id within the traffic class; use -+ * 'DPDBG_DPNI_ALL_TC_FLOWS' to set all flows within this tc_id; -+ * ignored if tc_id is set to 'DPDBG_DPNI_ALL_TCS'; -+ * @dpbp_id: buffer pool ID; 'DPDBG_DPNI_ALL_DPBP' to set all DPBP -+ * @marking: Marking for match frames; -+ * 'DPDBG_DISABLE_MARKING' for disable marking -+ */ -+struct dpdbg_dpni_rx_trace_cfg { -+ uint8_t tc_id; -+ uint16_t flow_id; -+ uint16_t dpbp_id; -+ uint8_t marking; -+}; -+ -+/** -+ * dpdbg_set_dpni_rx_trace() - Set trace for DPNI ingress (WRIOP ingress). -+ * in case of multiple requests for different DPNIs - the trace -+ * will be for the latest DPNI requested. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpni_id: The requested DPNI ID -+ * @trace_cfg: Trace configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_set_dpni_rx_trace(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ struct dpdbg_dpni_rx_trace_cfg *trace_cfg); -+ -+/** -+ * All DPNI senders -+ */ -+#define DPDBG_DPNI_ALL_SENDERS (uint16_t)(-1) -+ -+/** -+ * struct dpdbg_dpni_trace_cfg - Configure the behavior of a trace point when a -+ * frame marked with the specified DD code point is encountered -+ * @marking: The requested debug marking; -+ * 'DPDBG_DISABLE_MARKING' for disable marking -+ */ -+struct dpdbg_dpni_tx_trace_cfg { -+ uint8_t marking; -+}; -+ -+/** -+ * dpdbg_set_dpni_tx_trace() - Set trace for DPNI dequeued frames -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpni_id: The requested DPNI ID -+ * @sender_id: Sender ID; 'DPDBG_DPNI_ALL_SENDERS' for all senders -+ * @trace_cfg: Trace configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_set_dpni_tx_trace(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ uint16_t sender_id, -+ struct dpdbg_dpni_tx_trace_cfg *trace_cfg); -+ -+/** -+ * Number of DPCON trace points -+ */ -+#define DPDBG_NUM_OF_DPCON_TRACE_POINTS 2 -+ -+/** -+ * struct dpdbg_dpcon_trace_cfg - Configure the behavior of a trace point when a -+ * frame marked with the specified DD code point is encountered -+ * @marking: The requested debug marking; -+ * 'DPDBG_DISABLE_MARKING' for disable marking -+ * @verbosity: Verbosity level -+ */ -+struct dpdbg_dpcon_trace_cfg { -+ uint8_t marking; -+ enum dpdbg_verbosity_level verbosity; -+}; -+ -+/** -+ * dpdbg_set_dpcon_trace() - Set trace for DPCON when a frame marked with a -+ * specified marking is dequeued from a WQ in the -+ * channel selected -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpcon_id: The requested DPCON ID -+ * @trace_point: Trace points configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_set_dpcon_trace(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpcon_id, -+ struct dpdbg_dpcon_trace_cfg -+ trace_point[DPDBG_NUM_OF_DPCON_TRACE_POINTS]); -+ -+/** -+ * Number of DPSECI trace points -+ */ -+#define DPDBG_NUM_OF_DPSECI_TRACE_POINTS 2 -+ -+/** -+ * struct dpdbg_dpseci_trace_cfg - Configure the behavior of a trace point when -+ * a frame marked with the specified DD code point is -+ * encountered -+ * @marking: The requested debug marking; -+ * 'DPDBG_DISABLE_MARKING' for disable marking -+ * @verbosity: Verbosity level -+ */ -+struct dpdbg_dpseci_trace_cfg { -+ uint8_t marking; -+ enum dpdbg_verbosity_level verbosity; -+}; -+ -+/** -+ * dpdbg_set_dpseci_trace() - Set trace for DPSECI when a frame marked with the -+ * specific marking is enqueued via this portal. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpseci_id: The requested DPSECI ID -+ * @trace_point: Trace points configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_set_dpseci_trace(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpseci_id, -+ struct dpdbg_dpseci_trace_cfg -+ trace_point[DPDBG_NUM_OF_DPSECI_TRACE_POINTS]); -+ -+/** -+ * dpdbg_get_dpmac_counter() - DPMAC packet throughput -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpmac_id: The requested DPMAC ID -+ * @counter_type: The requested DPMAC counter -+ * @counter: Returned counter value -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_get_dpmac_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpmac_id, -+ enum dpmac_counter counter_type, -+ uint64_t *counter); -+ -+/** -+ * dpdbg_get_dpni_counter() - DPNI packet throughput -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDBG object -+ * @dpni_id: The requested DPNI ID -+ * @counter_type: The requested DPNI counter -+ * @counter: Returned counter value -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdbg_get_dpni_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpni_id, -+ enum dpni_counter counter_type, -+ uint64_t *counter); -+ -+#endif /* __FSL_DPDBG_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpdbg_cmd.h b/drivers/net/dpaa2/mc/fsl_dpdbg_cmd.h -new file mode 100644 -index 0000000..b672788 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpdbg_cmd.h -@@ -0,0 +1,249 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPDBG_CMD_H -+#define _FSL_DPDBG_CMD_H -+ -+/* DPDBG Version */ -+#define DPDBG_VER_MAJOR 1 -+#define DPDBG_VER_MINOR 0 -+ -+/* Command IDs */ -+#define DPDBG_CMDID_CLOSE 0x800 -+#define DPDBG_CMDID_OPEN 0x80F -+ -+#define DPDBG_CMDID_GET_ATTR 0x004 -+ -+#define DPDBG_CMDID_GET_DPNI_INFO 0x130 -+#define DPDBG_CMDID_GET_DPNI_PRIV_TX_CONF_FQID 0x131 -+#define DPDBG_CMDID_GET_DPCON_INFO 0x132 -+#define DPDBG_CMDID_GET_DPBP_INFO 0x133 -+#define DPDBG_CMDID_GET_DPCI_FQID 0x134 -+ -+#define DPDBG_CMDID_SET_CTLU_GLOBAL_MARKING 0x135 -+#define DPDBG_CMDID_SET_DPNI_RX_MARKING 0x136 -+#define DPDBG_CMDID_SET_DPNI_TX_CONF_MARKING 0x137 -+#define DPDBG_CMDID_SET_DPIO_MARKING 0x138 -+ -+#define DPDBG_CMDID_SET_CTLU_GLOBAL_TRACE 0x140 -+#define DPDBG_CMDID_SET_DPIO_TRACE 0x141 -+#define DPDBG_CMDID_SET_DPNI_RX_TRACE 0x142 -+#define DPDBG_CMDID_SET_DPNI_TX_TRACE 0x143 -+#define DPDBG_CMDID_SET_DPCON_TRACE 0x145 -+#define DPDBG_CMDID_SET_DPSECI_TRACE 0x146 -+ -+#define DPDBG_CMDID_GET_DPMAC_COUNTER 0x150 -+#define DPDBG_CMDID_GET_DPNI_COUNTER 0x151 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_OPEN(cmd, dpdbg_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpdbg_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_RSP_GET_ATTRIBUTES(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 32, 32, int, attr->id);\ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_GET_DPNI_INFO(cmd, dpni_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpni_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_RSP_GET_DPNI_INFO(cmd, info) \ -+do { \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, info->qdid);\ -+ MC_RSP_OP(cmd, 1, 32, 8, uint8_t, info->max_senders);\ -+ MC_RSP_OP(cmd, 2, 0, 32, uint32_t, info->err_fqid);\ -+ MC_RSP_OP(cmd, 2, 32, 32, uint32_t, info->tx_conf_fqid);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_GET_DPNI_PRIV_TX_CONF_FQID(cmd, dpni_id, sender_id) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpni_id);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, sender_id);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_RSP_GET_DPNI_PRIV_TX_CONF_FQID(cmd, fqid) \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, fqid) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_GET_DPCON_INFO(cmd, dpcon_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpcon_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_RSP_GET_DPCON_INFO(cmd, info) \ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, info->ch_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_GET_DPBP_INFO(cmd, dpbp_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpbp_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_RSP_GET_DPBP_INFO(cmd, info) \ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, info->bpid) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_GET_DPCI_FQID(cmd, dpci_id, priority) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpci_id);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, priority);\ -+} while (0) -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_RSP_GET_DPCI_FQID(cmd, fqid) \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, fqid) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_SET_CTLU_GLOBAL_MARKING(cmd, marking, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, marking);\ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, cfg->key_size); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->key_iova); \ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->mask_iova); \ -+ MC_CMD_OP(cmd, 3, 0, 64, uint64_t, cfg->rule_iova); \ -+} while (0) -+ -+#define DPDBG_CMD_SET_DPNI_RX_MARKING(cmd, dpni_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpni_id);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->tc_id);\ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, cfg->flow_id);\ -+ MC_CMD_OP(cmd, 1, 0, 16, uint16_t, cfg->dpbp_id);\ -+ MC_CMD_OP(cmd, 1, 16, 8, uint8_t, cfg->marking);\ -+} while (0) -+ -+#define DPDBG_CMD_SET_DPNI_TX_CONF_MARKING(cmd, dpni_id, sender_id, marking) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpni_id);\ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, sender_id);\ -+ MC_CMD_OP(cmd, 1, 16, 8, uint8_t, marking);\ -+} while (0) -+ -+#define DPDBG_CMD_SET_DPIO_MARKING(cmd, dpio_id, marking) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpio_id);\ -+ MC_CMD_OP(cmd, 1, 16, 8, uint8_t, marking);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_SET_CTLU_GLOBAL_TRACE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, cfg->key_size); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->key_iova); \ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->mask_iova); \ -+ MC_CMD_OP(cmd, 3, 0, 64, uint64_t, cfg->rule_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_SET_DPIO_TRACE(cmd, dpio_id, trace_point) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpio_id);\ -+ MC_CMD_OP(cmd, 1, 0, 4, enum dpdbg_verbosity_level, \ -+ trace_point[0].verbosity); \ -+ MC_CMD_OP(cmd, 1, 4, 4, enum dpdbg_dpio_trace_type, \ -+ trace_point[0].enqueue_type); \ -+ MC_CMD_OP(cmd, 1, 8, 8, uint8_t, trace_point[0].marking); \ -+ MC_CMD_OP(cmd, 1, 32, 4, enum dpdbg_verbosity_level, \ -+ trace_point[1].verbosity); \ -+ MC_CMD_OP(cmd, 1, 36, 4, enum dpdbg_dpio_trace_type, \ -+ trace_point[1].enqueue_type); \ -+ MC_CMD_OP(cmd, 1, 40, 8, uint8_t, trace_point[1].marking); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_SET_DPNI_RX_TRACE(cmd, dpni_id, trace_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpni_id);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, trace_cfg->tc_id);\ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, trace_cfg->flow_id);\ -+ MC_CMD_OP(cmd, 1, 0, 16, uint16_t, trace_cfg->dpbp_id);\ -+ MC_CMD_OP(cmd, 1, 16, 8, uint8_t, trace_cfg->marking);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_SET_DPNI_TX_TRACE(cmd, dpni_id, sender_id, trace_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpni_id);\ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, sender_id);\ -+ MC_CMD_OP(cmd, 1, 16, 8, uint8_t, trace_cfg->marking);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_SET_DPCON_TRACE(cmd, dpcon_id, trace_point) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpcon_id);\ -+ MC_CMD_OP(cmd, 1, 0, 4, enum dpdbg_verbosity_level, \ -+ trace_point[0].verbosity); \ -+ MC_CMD_OP(cmd, 1, 8, 8, uint8_t, trace_point[0].marking); \ -+ MC_CMD_OP(cmd, 1, 32, 4, enum dpdbg_verbosity_level, \ -+ trace_point[1].verbosity); \ -+ MC_CMD_OP(cmd, 1, 40, 8, uint8_t, trace_point[1].marking); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_SET_DPSECI_TRACE(cmd, dpseci_id, trace_point) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpseci_id);\ -+ MC_CMD_OP(cmd, 1, 0, 4, enum dpdbg_verbosity_level, \ -+ trace_point[0].verbosity); \ -+ MC_CMD_OP(cmd, 1, 8, 8, uint8_t, trace_point[0].marking); \ -+ MC_CMD_OP(cmd, 1, 32, 4, enum dpdbg_verbosity_level, \ -+ trace_point[1].verbosity); \ -+ MC_CMD_OP(cmd, 1, 40, 8, uint8_t, trace_point[1].marking); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_GET_DPMAC_COUNTER(cmd, dpmac_id, counter_type) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpmac_id);\ -+ MC_CMD_OP(cmd, 0, 32, 16, enum dpmac_counter, counter_type);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_RSP_GET_DPMAC_COUNTER(cmd, counter) \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, counter) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_CMD_GET_DPNI_COUNTER(cmd, dpni_id, counter_type) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpni_id);\ -+ MC_CMD_OP(cmd, 0, 32, 16, enum dpni_counter, counter_type);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDBG_RSP_GET_DPNI_COUNTER(cmd, counter) \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, counter) -+ -+#endif /* _FSL_DPDBG_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpdcei.h b/drivers/net/dpaa2/mc/fsl_dpdcei.h -new file mode 100644 -index 0000000..319795c ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpdcei.h -@@ -0,0 +1,515 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPDCEI_H -+#define __FSL_DPDCEI_H -+ -+/* Data Path DCE Interface API -+ * Contains initialization APIs and runtime control APIs for DPDCEI -+ */ -+ -+struct fsl_mc_io; -+ -+/** General DPDCEI macros */ -+ -+/** -+ * Indicates an invalid frame queue -+ */ -+#define DPDCEI_FQID_NOT_VALID (uint32_t)(-1) -+ -+/** -+ * enum dpdcei_engine - DCE engine block -+ * @DPDCEI_ENGINE_COMPRESSION: Engine compression -+ * @DPDCEI_ENGINE_DECOMPRESSION: Engine decompression -+ */ -+enum dpdcei_engine { -+ DPDCEI_ENGINE_COMPRESSION, -+ DPDCEI_ENGINE_DECOMPRESSION -+}; -+ -+/** -+ * dpdcei_open() - Open a control session for the specified object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @dpdcei_id: DPDCEI unique ID -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpdcei_create() function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpdcei_id, -+ uint16_t *token); -+ -+/** -+ * dpdcei_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpdcei_cfg - Structure representing DPDCEI configuration -+ * @engine: compression or decompression engine to be selected -+ * @priority: Priority for the DCE hardware processing (valid values 1-8). -+ */ -+struct dpdcei_cfg { -+ enum dpdcei_engine engine; -+ uint8_t priority; -+}; -+ -+/** -+ * dpdcei_create() - Create the DPDCEI object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @cfg: configuration parameters -+ * -+ * Create the DPDCEI object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpdcei_open() function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpdcei_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpdcei_destroy() - Destroy the DPDCEI object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpdcei_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpdcei_enable() - Enable the DPDCEI, allow sending and receiving frames. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpdcei_disable() - Disable the DPDCEI, stop sending and receiving frames. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpdcei_is_enabled() - Check if the DPDCEI is enabled. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @en: Return '1' for object enabled/'0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpdcei_reset() - Reset the DPDCEI, returns the object to initial state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpdcei_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpdcei_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpdcei_set_irq() - Set IRQ information for the DPDCEI to trigger an interrupt -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpdcei_irq_cfg *irq_cfg); -+ -+/** -+ * dpdcei_get_irq() - Get IRQ information from the DPDCEI -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpdcei_irq_cfg *irq_cfg); -+ -+/** -+ * dpdcei_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpdcei_get_irq_enable() - Get overall interrupt state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned Interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpdcei_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: The interrupt index to configure -+ * @mask: event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpdcei_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpdcei_get_irq_status() - Get the current status of any pending interrupts -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dpdcei_clear_irq_status() - Clear a pending interrupt's status -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @irq_index: The interrupt index to configure -+ * @status: bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+/** -+ * struct dpdcei_attr - Structure representing DPDCEI attributes -+ * @id: DPDCEI object ID -+ * @engine: DCE engine block -+ * @version: DPDCEI version -+ */ -+struct dpdcei_attr { -+ int id; -+ enum dpdcei_engine engine; -+ /** -+ * struct version - DPDCEI version -+ * @major: DPDCEI major version -+ * @minor: DPDCEI minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+}; -+ -+/** -+ * dpdcei_get_attributes() - Retrieve DPDCEI attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @attr: Returned object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdcei_attr *attr); -+ -+/** -+ * enum dpdcei_dest - DPDCEI destination types -+ * @DPDCEI_DEST_NONE: Unassigned destination; The queue is set in parked mode -+ * and does not generate FQDAN notifications; -+ * user is expected to dequeue from the queue based on -+ * polling or other user-defined method -+ * @DPDCEI_DEST_DPIO: The queue is set in schedule mode and generates FQDAN -+ * notifications to the specified DPIO; user is expected to -+ * dequeue from the queue only after notification is -+ * received -+ * @DPDCEI_DEST_DPCON: The queue is set in schedule mode and does not generate -+ * FQDAN notifications, but is connected to the specified -+ * DPCON object; -+ * user is expected to dequeue from the DPCON channel -+ */ -+enum dpdcei_dest { -+ DPDCEI_DEST_NONE = 0, -+ DPDCEI_DEST_DPIO = 1, -+ DPDCEI_DEST_DPCON = 2 -+}; -+ -+/** -+ * struct dpdcei_dest_cfg - Structure representing DPDCEI destination parameters -+ * @dest_type: Destination type -+ * @dest_id: Either DPIO ID or DPCON ID, depending on the destination type -+ * @priority: Priority selection within the DPIO or DPCON channel; valid values -+ * are 0-1 or 0-7, depending on the number of priorities in that -+ * channel; not relevant for 'DPDCEI_DEST_NONE' option -+ */ -+struct dpdcei_dest_cfg { -+ enum dpdcei_dest dest_type; -+ int dest_id; -+ uint8_t priority; -+}; -+ -+/** DPDCEI queue modification options */ -+ -+/** -+ * Select to modify the user's context associated with the queue -+ */ -+#define DPDCEI_QUEUE_OPT_USER_CTX 0x00000001 -+ -+/** -+ * Select to modify the queue's destination -+ */ -+#define DPDCEI_QUEUE_OPT_DEST 0x00000002 -+ -+/** -+ * struct dpdcei_rx_queue_cfg - RX queue configuration -+ * @options: Flags representing the suggested modifications to the queue; -+ * Use any combination of 'DPDCEI_QUEUE_OPT_' flags -+ * @user_ctx: User context value provided in the frame descriptor of each -+ * dequeued frame; -+ * valid only if 'DPDCEI_QUEUE_OPT_USER_CTX' is contained in 'options' -+ * @dest_cfg: Queue destination parameters; -+ * valid only if 'DPDCEI_QUEUE_OPT_DEST' is contained in 'options' -+ */ -+struct dpdcei_rx_queue_cfg { -+ uint32_t options; -+ uint64_t user_ctx; -+ struct dpdcei_dest_cfg dest_cfg; -+}; -+ -+/** -+ * dpdcei_set_rx_queue() - Set Rx queue configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @cfg: Rx queue configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_set_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpdcei_rx_queue_cfg *cfg); -+ -+/** -+ * struct dpdcei_rx_queue_attr - Structure representing attributes of Rx queues -+ * @user_ctx: User context value provided in the frame descriptor of each -+ * dequeued frame -+ * @dest_cfg: Queue destination configuration -+ * @fqid: Virtual FQID value to be used for dequeue operations -+ */ -+struct dpdcei_rx_queue_attr { -+ uint64_t user_ctx; -+ struct dpdcei_dest_cfg dest_cfg; -+ uint32_t fqid; -+}; -+ -+/** -+ * dpdcei_get_rx_queue() - Retrieve Rx queue attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @attr: Returned Rx queue attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_get_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdcei_rx_queue_attr *attr); -+ -+/** -+ * struct dpdcei_tx_queue_attr - Structure representing attributes of Tx queues -+ * @fqid: Virtual FQID to be used for sending frames to DCE hardware -+ */ -+struct dpdcei_tx_queue_attr { -+ uint32_t fqid; -+}; -+ -+/** -+ * dpdcei_get_tx_queue() - Retrieve Tx queue attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDCEI object -+ * @attr: Returned Tx queue attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdcei_get_tx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdcei_tx_queue_attr *attr); -+ -+#endif /* __FSL_DPDCEI_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpdcei_cmd.h b/drivers/net/dpaa2/mc/fsl_dpdcei_cmd.h -new file mode 100644 -index 0000000..8452d88 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpdcei_cmd.h -@@ -0,0 +1,182 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPDCEI_CMD_H -+#define _FSL_DPDCEI_CMD_H -+ -+/* DPDCEI Version */ -+#define DPDCEI_VER_MAJOR 1 -+#define DPDCEI_VER_MINOR 2 -+ -+/* Command IDs */ -+#define DPDCEI_CMDID_CLOSE 0x800 -+#define DPDCEI_CMDID_OPEN 0x80D -+#define DPDCEI_CMDID_CREATE 0x90D -+#define DPDCEI_CMDID_DESTROY 0x900 -+ -+#define DPDCEI_CMDID_ENABLE 0x002 -+#define DPDCEI_CMDID_DISABLE 0x003 -+#define DPDCEI_CMDID_GET_ATTR 0x004 -+#define DPDCEI_CMDID_RESET 0x005 -+#define DPDCEI_CMDID_IS_ENABLED 0x006 -+ -+#define DPDCEI_CMDID_SET_IRQ 0x010 -+#define DPDCEI_CMDID_GET_IRQ 0x011 -+#define DPDCEI_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPDCEI_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPDCEI_CMDID_SET_IRQ_MASK 0x014 -+#define DPDCEI_CMDID_GET_IRQ_MASK 0x015 -+#define DPDCEI_CMDID_GET_IRQ_STATUS 0x016 -+#define DPDCEI_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPDCEI_CMDID_SET_RX_QUEUE 0x1B0 -+#define DPDCEI_CMDID_GET_RX_QUEUE 0x1B1 -+#define DPDCEI_CMDID_GET_TX_QUEUE 0x1B2 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_CMD_OPEN(cmd, dpdcei_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpdcei_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_CMD_CREATE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 8, 8, enum dpdcei_engine, cfg->engine);\ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, cfg->priority);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_RSP_IS_ENABLED(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_CMD_SET_IRQ_ENABLE(cmd, irq_index, enable_state) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, enable_state); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_RSP_GET_IRQ_ENABLE(cmd, enable_state) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, enable_state) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_RSP_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->id); \ -+ MC_RSP_OP(cmd, 0, 32, 8, enum dpdcei_engine, attr->engine); \ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_CMD_SET_RX_QUEUE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, cfg->dest_cfg.dest_id); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->dest_cfg.priority); \ -+ MC_CMD_OP(cmd, 0, 48, 4, enum dpdcei_dest, cfg->dest_cfg.dest_type); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->user_ctx); \ -+ MC_CMD_OP(cmd, 2, 0, 32, uint32_t, cfg->options);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_RSP_GET_RX_QUEUE(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->dest_cfg.dest_id);\ -+ MC_RSP_OP(cmd, 0, 32, 8, uint8_t, attr->dest_cfg.priority);\ -+ MC_RSP_OP(cmd, 0, 48, 4, enum dpdcei_dest, attr->dest_cfg.dest_type);\ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, attr->user_ctx);\ -+ MC_RSP_OP(cmd, 2, 0, 32, uint32_t, attr->fqid);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDCEI_RSP_GET_TX_QUEUE(cmd, attr) \ -+ MC_RSP_OP(cmd, 0, 32, 32, uint32_t, attr->fqid) -+ -+#endif /* _FSL_DPDCEI_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpdmai.h b/drivers/net/dpaa2/mc/fsl_dpdmai.h -new file mode 100644 -index 0000000..e931ce1 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpdmai.h -@@ -0,0 +1,521 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPDMAI_H -+#define __FSL_DPDMAI_H -+ -+struct fsl_mc_io; -+ -+/* Data Path DMA Interface API -+ * Contains initialization APIs and runtime control APIs for DPDMAI -+ */ -+ -+/* General DPDMAI macros */ -+ -+/** -+ * Maximum number of Tx/Rx priorities per DPDMAI object -+ */ -+#define DPDMAI_PRIO_NUM 2 -+ -+/** -+ * All queues considered; see dpdmai_set_rx_queue() -+ */ -+#define DPDMAI_ALL_QUEUES (uint8_t)(-1) -+ -+/** -+ * dpdmai_open() - Open a control session for the specified object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpdmai_id: DPDMAI unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpdmai_create() function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpdmai_id, -+ uint16_t *token); -+ -+/** -+ * dpdmai_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpdmai_cfg - Structure representing DPDMAI configuration -+ * @priorities: Priorities for the DMA hardware processing; valid priorities are -+ * configured with values 1-8; the entry following last valid entry -+ * should be configured with 0 -+ */ -+struct dpdmai_cfg { -+ uint8_t priorities[DPDMAI_PRIO_NUM]; -+}; -+ -+/** -+ * dpdmai_create() - Create the DPDMAI object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPDMAI object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpdmai_open() function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpdmai_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpdmai_destroy() - Destroy the DPDMAI object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpdmai_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpdmai_enable() - Enable the DPDMAI, allow sending and receiving frames. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpdmai_disable() - Disable the DPDMAI, stop sending and receiving frames. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpdmai_is_enabled() - Check if the DPDMAI is enabled. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @en: Returns '1' if object is enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpdmai_reset() - Reset the DPDMAI, returns the object to initial state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpdmai_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpdmai_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpdmai_set_irq() - Set IRQ information for the DPDMAI to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpdmai_irq_cfg *irq_cfg); -+ -+/** -+ * dpdmai_get_irq() - Get IRQ information from the DPDMAI -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpdmai_irq_cfg *irq_cfg); -+ -+/** -+ * dpdmai_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpdmai_get_irq_enable() - Get overall interrupt state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned Interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpdmai_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @irq_index: The interrupt index to configure -+ * @mask: event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpdmai_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpdmai_get_irq_status() - Get the current status of any pending interrupts -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dpdmai_clear_irq_status() - Clear a pending interrupt's status -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @irq_index: The interrupt index to configure -+ * @status: bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+ -+/** -+ * struct dpdmai_attr - Structure representing DPDMAI attributes -+ * @id: DPDMAI object ID -+ * @version: DPDMAI version -+ * @num_of_priorities: number of priorities -+ */ -+struct dpdmai_attr { -+ int id; -+ /** -+ * struct version - DPDMAI version -+ * @major: DPDMAI major version -+ * @minor: DPDMAI minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+ uint8_t num_of_priorities; -+}; -+ -+/** -+ * dpdmai_get_attributes() - Retrieve DPDMAI attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @attr: Returned object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdmai_attr *attr); -+ -+/** -+ * enum dpdmai_dest - DPDMAI destination types -+ * @DPDMAI_DEST_NONE: Unassigned destination; The queue is set in parked mode -+ * and does not generate FQDAN notifications; user is expected to dequeue -+ * from the queue based on polling or other user-defined method -+ * @DPDMAI_DEST_DPIO: The queue is set in schedule mode and generates FQDAN -+ * notifications to the specified DPIO; user is expected to dequeue -+ * from the queue only after notification is received -+ * @DPDMAI_DEST_DPCON: The queue is set in schedule mode and does not generate -+ * FQDAN notifications, but is connected to the specified DPCON object; -+ * user is expected to dequeue from the DPCON channel -+ */ -+enum dpdmai_dest { -+ DPDMAI_DEST_NONE = 0, -+ DPDMAI_DEST_DPIO = 1, -+ DPDMAI_DEST_DPCON = 2 -+}; -+ -+/** -+ * struct dpdmai_dest_cfg - Structure representing DPDMAI destination parameters -+ * @dest_type: Destination type -+ * @dest_id: Either DPIO ID or DPCON ID, depending on the destination type -+ * @priority: Priority selection within the DPIO or DPCON channel; valid values -+ * are 0-1 or 0-7, depending on the number of priorities in that -+ * channel; not relevant for 'DPDMAI_DEST_NONE' option -+ */ -+struct dpdmai_dest_cfg { -+ enum dpdmai_dest dest_type; -+ int dest_id; -+ uint8_t priority; -+}; -+ -+/* DPDMAI queue modification options */ -+ -+/** -+ * Select to modify the user's context associated with the queue -+ */ -+#define DPDMAI_QUEUE_OPT_USER_CTX 0x00000001 -+ -+/** -+ * Select to modify the queue's destination -+ */ -+#define DPDMAI_QUEUE_OPT_DEST 0x00000002 -+ -+/** -+ * struct dpdmai_rx_queue_cfg - DPDMAI RX queue configuration -+ * @options: Flags representing the suggested modifications to the queue; -+ * Use any combination of 'DPDMAI_QUEUE_OPT_' flags -+ * @user_ctx: User context value provided in the frame descriptor of each -+ * dequeued frame; -+ * valid only if 'DPDMAI_QUEUE_OPT_USER_CTX' is contained in 'options' -+ * @dest_cfg: Queue destination parameters; -+ * valid only if 'DPDMAI_QUEUE_OPT_DEST' is contained in 'options' -+ */ -+struct dpdmai_rx_queue_cfg { -+ uint32_t options; -+ uint64_t user_ctx; -+ struct dpdmai_dest_cfg dest_cfg; -+ -+}; -+ -+/** -+ * dpdmai_set_rx_queue() - Set Rx queue configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @priority: Select the queue relative to number of -+ * priorities configured at DPDMAI creation; use -+ * DPDMAI_ALL_QUEUES to configure all Rx queues -+ * identically. -+ * @cfg: Rx queue configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t priority, -+ const struct dpdmai_rx_queue_cfg *cfg); -+ -+/** -+ * struct dpdmai_rx_queue_attr - Structure representing attributes of Rx queues -+ * @user_ctx: User context value provided in the frame descriptor of each -+ * dequeued frame -+ * @dest_cfg: Queue destination configuration -+ * @fqid: Virtual FQID value to be used for dequeue operations -+ */ -+struct dpdmai_rx_queue_attr { -+ uint64_t user_ctx; -+ struct dpdmai_dest_cfg dest_cfg; -+ uint32_t fqid; -+}; -+ -+/** -+ * dpdmai_get_rx_queue() - Retrieve Rx queue attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @priority: Select the queue relative to number of -+ * priorities configured at DPDMAI creation -+ * @attr: Returned Rx queue attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t priority, -+ struct dpdmai_rx_queue_attr *attr); -+ -+/** -+ * struct dpdmai_tx_queue_attr - Structure representing attributes of Tx queues -+ * @fqid: Virtual FQID to be used for sending frames to DMA hardware -+ */ -+ -+struct dpdmai_tx_queue_attr { -+ uint32_t fqid; -+}; -+ -+/** -+ * dpdmai_get_tx_queue() - Retrieve Tx queue attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMAI object -+ * @priority: Select the queue relative to number of -+ * priorities configured at DPDMAI creation -+ * @attr: Returned Tx queue attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmai_get_tx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t priority, -+ struct dpdmai_tx_queue_attr *attr); -+ -+#endif /* __FSL_DPDMAI_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpdmai_cmd.h b/drivers/net/dpaa2/mc/fsl_dpdmai_cmd.h -new file mode 100644 -index 0000000..7c4a31a ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpdmai_cmd.h -@@ -0,0 +1,191 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPDMAI_CMD_H -+#define _FSL_DPDMAI_CMD_H -+ -+/* DPDMAI Version */ -+#define DPDMAI_VER_MAJOR 2 -+#define DPDMAI_VER_MINOR 2 -+ -+/* Command IDs */ -+#define DPDMAI_CMDID_CLOSE 0x800 -+#define DPDMAI_CMDID_OPEN 0x80E -+#define DPDMAI_CMDID_CREATE 0x90E -+#define DPDMAI_CMDID_DESTROY 0x900 -+ -+#define DPDMAI_CMDID_ENABLE 0x002 -+#define DPDMAI_CMDID_DISABLE 0x003 -+#define DPDMAI_CMDID_GET_ATTR 0x004 -+#define DPDMAI_CMDID_RESET 0x005 -+#define DPDMAI_CMDID_IS_ENABLED 0x006 -+ -+#define DPDMAI_CMDID_SET_IRQ 0x010 -+#define DPDMAI_CMDID_GET_IRQ 0x011 -+#define DPDMAI_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPDMAI_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPDMAI_CMDID_SET_IRQ_MASK 0x014 -+#define DPDMAI_CMDID_GET_IRQ_MASK 0x015 -+#define DPDMAI_CMDID_GET_IRQ_STATUS 0x016 -+#define DPDMAI_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPDMAI_CMDID_SET_RX_QUEUE 0x1A0 -+#define DPDMAI_CMDID_GET_RX_QUEUE 0x1A1 -+#define DPDMAI_CMDID_GET_TX_QUEUE 0x1A2 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_OPEN(cmd, dpdmai_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpdmai_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_CREATE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, cfg->priorities[0]);\ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, cfg->priorities[1]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_RSP_IS_ENABLED(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_SET_IRQ_ENABLE(cmd, irq_index, enable_state) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, enable_state); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_RSP_GET_IRQ_ENABLE(cmd, enable_state) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, enable_state) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_RSP_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->id); \ -+ MC_RSP_OP(cmd, 0, 32, 8, uint8_t, attr->num_of_priorities); \ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_SET_RX_QUEUE(cmd, priority, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, cfg->dest_cfg.dest_id); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->dest_cfg.priority); \ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, priority); \ -+ MC_CMD_OP(cmd, 0, 48, 4, enum dpdmai_dest, cfg->dest_cfg.dest_type); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->user_ctx); \ -+ MC_CMD_OP(cmd, 2, 0, 32, uint32_t, cfg->options);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_GET_RX_QUEUE(cmd, priority) \ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, priority) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_RSP_GET_RX_QUEUE(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->dest_cfg.dest_id);\ -+ MC_RSP_OP(cmd, 0, 32, 8, uint8_t, attr->dest_cfg.priority);\ -+ MC_RSP_OP(cmd, 0, 48, 4, enum dpdmai_dest, attr->dest_cfg.dest_type);\ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, attr->user_ctx);\ -+ MC_RSP_OP(cmd, 2, 0, 32, uint32_t, attr->fqid);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_CMD_GET_TX_QUEUE(cmd, priority) \ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, priority) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMAI_RSP_GET_TX_QUEUE(cmd, attr) \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, attr->fqid) -+ -+#endif /* _FSL_DPDMAI_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpdmux.h b/drivers/net/dpaa2/mc/fsl_dpdmux.h -new file mode 100644 -index 0000000..455a042 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpdmux.h -@@ -0,0 +1,724 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPDMUX_H -+#define __FSL_DPDMUX_H -+ -+#include -+ -+struct fsl_mc_io; -+ -+/* Data Path Demux API -+ * Contains API for handling DPDMUX topology and functionality -+ */ -+ -+/** -+ * dpdmux_open() - Open a control session for the specified object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpdmux_id: DPDMUX unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpdmux_create() function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpdmux_id, -+ uint16_t *token); -+ -+/** -+ * dpdmux_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * DPDMUX general options -+ */ -+ -+/** -+ * Enable bridging between internal interfaces -+ */ -+#define DPDMUX_OPT_BRIDGE_EN 0x0000000000000002ULL -+ -+#define DPDMUX_IRQ_INDEX_IF 0x0000 -+#define DPDMUX_IRQ_INDEX 0x0001 -+ -+/** -+ * IRQ event - Indicates that the link state changed -+ */ -+#define DPDMUX_IRQ_EVENT_LINK_CHANGED 0x0001 -+ -+/** -+ * enum dpdmux_manip - DPDMUX manipulation operations -+ * @DPDMUX_MANIP_NONE: No manipulation on frames -+ * @DPDMUX_MANIP_ADD_REMOVE_S_VLAN: Add S-VLAN on egress, remove it on ingress -+ */ -+enum dpdmux_manip { -+ DPDMUX_MANIP_NONE = 0x0, -+ DPDMUX_MANIP_ADD_REMOVE_S_VLAN = 0x1 -+}; -+ -+/** -+ * enum dpdmux_method - DPDMUX method options -+ * @DPDMUX_METHOD_NONE: no DPDMUX method -+ * @DPDMUX_METHOD_C_VLAN_MAC: DPDMUX based on C-VLAN and MAC address -+ * @DPDMUX_METHOD_MAC: DPDMUX based on MAC address -+ * @DPDMUX_METHOD_C_VLAN: DPDMUX based on C-VLAN -+ * @DPDMUX_METHOD_S_VLAN: DPDMUX based on S-VLAN -+ */ -+enum dpdmux_method { -+ DPDMUX_METHOD_NONE = 0x0, -+ DPDMUX_METHOD_C_VLAN_MAC = 0x1, -+ DPDMUX_METHOD_MAC = 0x2, -+ DPDMUX_METHOD_C_VLAN = 0x3, -+ DPDMUX_METHOD_S_VLAN = 0x4 -+}; -+ -+/** -+ * struct dpdmux_cfg - DPDMUX configuration parameters -+ * @method: Defines the operation method for the DPDMUX address table -+ * @manip: Required manipulation operation -+ * @num_ifs: Number of interfaces (excluding the uplink interface) -+ * @adv: Advanced parameters; default is all zeros; -+ * use this structure to change default settings -+ */ -+struct dpdmux_cfg { -+ enum dpdmux_method method; -+ enum dpdmux_manip manip; -+ uint16_t num_ifs; -+ /** -+ * struct adv - Advanced parameters -+ * @options: DPDMUX options - combination of 'DPDMUX_OPT_' flags -+ * @max_dmat_entries: Maximum entries in DPDMUX address table -+ * 0 - indicates default: 64 entries per interface. -+ * @max_mc_groups: Number of multicast groups in DPDMUX table -+ * 0 - indicates default: 32 multicast groups -+ * @max_vlan_ids: max vlan ids allowed in the system - -+ * relevant only case of working in mac+vlan method. -+ * 0 - indicates default 16 vlan ids. -+ */ -+ struct { -+ uint64_t options; -+ uint16_t max_dmat_entries; -+ uint16_t max_mc_groups; -+ uint16_t max_vlan_ids; -+ } adv; -+}; -+ -+/** -+ * dpdmux_create() - Create the DPDMUX object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPDMUX object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpdmux_open() function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpdmux_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpdmux_destroy() - Destroy the DPDMUX object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpdmux_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpdmux_enable() - Enable DPDMUX functionality -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpdmux_disable() - Disable DPDMUX functionality -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpdmux_is_enabled() - Check if the DPDMUX is enabled. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @en: Returns '1' if object is enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpdmux_reset() - Reset the DPDMUX, returns the object to initial state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpdmux_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpdmux_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpdmux_set_irq() - Set IRQ information for the DPDMUX to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpdmux_irq_cfg *irq_cfg); -+ -+/** -+ * dpdmux_get_irq() - Get IRQ information from the DPDMUX. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpdmux_irq_cfg *irq_cfg); -+ -+/** -+ * dpdmux_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpdmux_get_irq_enable() - Get overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpdmux_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @irq_index: The interrupt index to configure -+ * @mask: event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpdmux_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpdmux_get_irq_status() - Get the current status of any pending interrupts. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dpdmux_clear_irq_status() - Clear a pending interrupt's status -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @irq_index: The interrupt index to configure -+ * @status: bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+ -+/** -+ * struct dpdmux_attr - Structure representing DPDMUX attributes -+ * @id: DPDMUX object ID -+ * @version: DPDMUX version -+ * @options: Configuration options (bitmap) -+ * @method: DPDMUX address table method -+ * @manip: DPDMUX manipulation type -+ * @num_ifs: Number of interfaces (excluding the uplink interface) -+ * @mem_size: DPDMUX frame storage memory size -+ */ -+struct dpdmux_attr { -+ int id; -+ /** -+ * struct version - DPDMUX version -+ * @major: DPDMUX major version -+ * @minor: DPDMUX minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+ uint64_t options; -+ enum dpdmux_method method; -+ enum dpdmux_manip manip; -+ uint16_t num_ifs; -+ uint16_t mem_size; -+}; -+ -+/** -+ * dpdmux_get_attributes() - Retrieve DPDMUX attributes -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @attr: Returned object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpdmux_attr *attr); -+ -+/** -+ * dpdmux_ul_set_max_frame_length() - Set the maximum frame length in DPDMUX -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @max_frame_length: The required maximum frame length -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_ul_set_max_frame_length(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t max_frame_length); -+ -+/** -+ * enum dpdmux_counter_type - Counter types -+ * @DPDMUX_CNT_ING_FRAME: Counts ingress frames -+ * @DPDMUX_CNT_ING_BYTE: Counts ingress bytes -+ * @DPDMUX_CNT_ING_FLTR_FRAME: Counts filtered ingress frames -+ * @DPDMUX_CNT_ING_FRAME_DISCARD: Counts discarded ingress frames -+ * @DPDMUX_CNT_ING_MCAST_FRAME: Counts ingress multicast frames -+ * @DPDMUX_CNT_ING_MCAST_BYTE: Counts ingress multicast bytes -+ * @DPDMUX_CNT_ING_BCAST_FRAME: Counts ingress broadcast frames -+ * @DPDMUX_CNT_ING_BCAST_BYTES: Counts ingress broadcast bytes -+ * @DPDMUX_CNT_EGR_FRAME: Counts egress frames -+ * @DPDMUX_CNT_EGR_BYTE: Counts egress bytes -+ * @DPDMUX_CNT_EGR_FRAME_DISCARD: Counts discarded egress frames -+ */ -+enum dpdmux_counter_type { -+ DPDMUX_CNT_ING_FRAME = 0x0, -+ DPDMUX_CNT_ING_BYTE = 0x1, -+ DPDMUX_CNT_ING_FLTR_FRAME = 0x2, -+ DPDMUX_CNT_ING_FRAME_DISCARD = 0x3, -+ DPDMUX_CNT_ING_MCAST_FRAME = 0x4, -+ DPDMUX_CNT_ING_MCAST_BYTE = 0x5, -+ DPDMUX_CNT_ING_BCAST_FRAME = 0x6, -+ DPDMUX_CNT_ING_BCAST_BYTES = 0x7, -+ DPDMUX_CNT_EGR_FRAME = 0x8, -+ DPDMUX_CNT_EGR_BYTE = 0x9, -+ DPDMUX_CNT_EGR_FRAME_DISCARD = 0xa -+}; -+ -+/** -+ * enum dpdmux_accepted_frames_type - DPDMUX frame types -+ * @DPDMUX_ADMIT_ALL: The device accepts VLAN tagged, untagged and -+ * priority-tagged frames -+ * @DPDMUX_ADMIT_ONLY_VLAN_TAGGED: The device discards untagged frames or -+ * priority-tagged frames that are received on this -+ * interface -+ * @DPDMUX_ADMIT_ONLY_UNTAGGED: Untagged frames or priority-tagged frames -+ * received on this interface are accepted -+ */ -+enum dpdmux_accepted_frames_type { -+ DPDMUX_ADMIT_ALL = 0, -+ DPDMUX_ADMIT_ONLY_VLAN_TAGGED = 1, -+ DPDMUX_ADMIT_ONLY_UNTAGGED = 2 -+}; -+ -+/** -+ * enum dpdmux_action - DPDMUX action for un-accepted frames -+ * @DPDMUX_ACTION_DROP: Drop un-accepted frames -+ * @DPDMUX_ACTION_REDIRECT_TO_CTRL: Redirect un-accepted frames to the -+ * control interface -+ */ -+enum dpdmux_action { -+ DPDMUX_ACTION_DROP = 0, -+ DPDMUX_ACTION_REDIRECT_TO_CTRL = 1 -+}; -+ -+/** -+ * struct dpdmux_accepted_frames - Frame types configuration -+ * @type: Defines ingress accepted frames -+ * @unaccept_act: Defines action on frames not accepted -+ */ -+struct dpdmux_accepted_frames { -+ enum dpdmux_accepted_frames_type type; -+ enum dpdmux_action unaccept_act; -+}; -+ -+/** -+ * dpdmux_if_set_accepted_frames() - Set the accepted frame types -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @if_id: Interface ID (0 for uplink, or 1-num_ifs); -+ * @cfg: Frame types configuration -+ * -+ * if 'DPDMUX_ADMIT_ONLY_VLAN_TAGGED' is set - untagged frames or -+ * priority-tagged frames are discarded. -+ * if 'DPDMUX_ADMIT_ONLY_UNTAGGED' is set - untagged frames or -+ * priority-tagged frames are accepted. -+ * if 'DPDMUX_ADMIT_ALL' is set (default mode) - all VLAN tagged, -+ * untagged and priority-tagged frame are accepted; -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_if_set_accepted_frames(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpdmux_accepted_frames *cfg); -+ -+/** -+ * struct dpdmux_if_attr - Structure representing frame types configuration -+ * @rate: Configured interface rate (in bits per second) -+ * @enabled: Indicates if interface is enabled -+ * @accept_frame_type: Indicates type of accepted frames for the interface -+ */ -+struct dpdmux_if_attr { -+ uint32_t rate; -+ int enabled; -+ enum dpdmux_accepted_frames_type accept_frame_type; -+}; -+ -+/** -+ * dpdmux_if_get_attributes() - Obtain DPDMUX interface attributes -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @if_id: Interface ID (0 for uplink, or 1-num_ifs); -+ * @attr: Interface attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_if_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpdmux_if_attr *attr); -+ -+/** -+ * struct dpdmux_l2_rule - Structure representing L2 rule -+ * @mac_addr: MAC address -+ * @vlan_id: VLAN ID -+ */ -+struct dpdmux_l2_rule { -+ uint8_t mac_addr[6]; -+ uint16_t vlan_id; -+}; -+ -+/** -+ * dpdmux_if_remove_l2_rule() - Remove L2 rule from DPDMUX table -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @if_id: Destination interface ID -+ * @rule: L2 rule -+ * -+ * Function removes a L2 rule from DPDMUX table -+ * or adds an interface to an existing multicast address -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_if_remove_l2_rule(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpdmux_l2_rule *rule); -+ -+/** -+ * dpdmux_if_add_l2_rule() - Add L2 rule into DPDMUX table -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPDMUX object -+ * @if_id: Destination interface ID -+ * @rule: L2 rule -+ * -+ * Function adds a L2 rule into DPDMUX table -+ * or adds an interface to an existing multicast address -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_if_add_l2_rule(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpdmux_l2_rule *rule); -+ -+/** -+* dpdmux_if_get_counter() - Functions obtains specific counter of an interface -+* @mc_io: Pointer to MC portal's I/O object -+* @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+* @token: Token of DPDMUX object -+* @if_id: Interface Id -+* @counter_type: counter type -+* @counter: Returned specific counter information -+* -+* Return: '0' on Success; Error code otherwise. -+*/ -+int dpdmux_if_get_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ enum dpdmux_counter_type counter_type, -+ uint64_t *counter); -+ -+/** -+* dpdmux_ul_reset_counters() - Function resets the uplink counter -+* @mc_io: Pointer to MC portal's I/O object -+* @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+* @token: Token of DPDMUX object -+* -+* Return: '0' on Success; Error code otherwise. -+*/ -+int dpdmux_ul_reset_counters(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * Enable auto-negotiation -+ */ -+#define DPDMUX_LINK_OPT_AUTONEG 0x0000000000000001ULL -+/** -+ * Enable half-duplex mode -+ */ -+#define DPDMUX_LINK_OPT_HALF_DUPLEX 0x0000000000000002ULL -+/** -+ * Enable pause frames -+ */ -+#define DPDMUX_LINK_OPT_PAUSE 0x0000000000000004ULL -+/** -+ * Enable a-symmetric pause frames -+ */ -+#define DPDMUX_LINK_OPT_ASYM_PAUSE 0x0000000000000008ULL -+ -+/** -+ * struct dpdmux_link_cfg - Structure representing DPDMUX link configuration -+ * @rate: Rate -+ * @options: Mask of available options; use 'DPDMUX_LINK_OPT_' values -+ */ -+struct dpdmux_link_cfg { -+ uint32_t rate; -+ uint64_t options; -+}; -+ -+/** -+ * dpdmux_if_set_link_cfg() - set the link configuration. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: interface id -+ * @cfg: Link configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpdmux_if_set_link_cfg(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpdmux_link_cfg *cfg); -+/** -+ * struct dpdmux_link_state - Structure representing DPDMUX link state -+ * @rate: Rate -+ * @options: Mask of available options; use 'DPDMUX_LINK_OPT_' values -+ * @up: 0 - down, 1 - up -+ */ -+struct dpdmux_link_state { -+ uint32_t rate; -+ uint64_t options; -+ int up; -+}; -+ -+/** -+ * dpdmux_if_get_link_state - Return the link state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: interface id -+ * @state: link state -+ * -+ * @returns '0' on Success; Error code otherwise. -+ */ -+int dpdmux_if_get_link_state(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpdmux_link_state *state); -+ -+#endif /* __FSL_DPDMUX_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpdmux_cmd.h b/drivers/net/dpaa2/mc/fsl_dpdmux_cmd.h -new file mode 100644 -index 0000000..0a5cf17 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpdmux_cmd.h -@@ -0,0 +1,256 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPDMUX_CMD_H -+#define _FSL_DPDMUX_CMD_H -+ -+/* DPDMUX Version */ -+#define DPDMUX_VER_MAJOR 5 -+#define DPDMUX_VER_MINOR 0 -+ -+/* Command IDs */ -+#define DPDMUX_CMDID_CLOSE 0x800 -+#define DPDMUX_CMDID_OPEN 0x806 -+#define DPDMUX_CMDID_CREATE 0x906 -+#define DPDMUX_CMDID_DESTROY 0x900 -+ -+#define DPDMUX_CMDID_ENABLE 0x002 -+#define DPDMUX_CMDID_DISABLE 0x003 -+#define DPDMUX_CMDID_GET_ATTR 0x004 -+#define DPDMUX_CMDID_RESET 0x005 -+#define DPDMUX_CMDID_IS_ENABLED 0x006 -+ -+#define DPDMUX_CMDID_SET_IRQ 0x010 -+#define DPDMUX_CMDID_GET_IRQ 0x011 -+#define DPDMUX_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPDMUX_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPDMUX_CMDID_SET_IRQ_MASK 0x014 -+#define DPDMUX_CMDID_GET_IRQ_MASK 0x015 -+#define DPDMUX_CMDID_GET_IRQ_STATUS 0x016 -+#define DPDMUX_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPDMUX_CMDID_UL_SET_MAX_FRAME_LENGTH 0x0a1 -+ -+#define DPDMUX_CMDID_UL_RESET_COUNTERS 0x0a3 -+ -+#define DPDMUX_CMDID_IF_SET_ACCEPTED_FRAMES 0x0a7 -+#define DPDMUX_CMDID_IF_GET_ATTR 0x0a8 -+ -+#define DPDMUX_CMDID_IF_ADD_L2_RULE 0x0b0 -+#define DPDMUX_CMDID_IF_REMOVE_L2_RULE 0x0b1 -+#define DPDMUX_CMDID_IF_GET_COUNTER 0x0b2 -+#define DPDMUX_CMDID_IF_SET_LINK_CFG 0x0b3 -+#define DPDMUX_CMDID_IF_GET_LINK_STATE 0x0b4 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_OPEN(cmd, dpdmux_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpdmux_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_CREATE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, enum dpdmux_method, cfg->method);\ -+ MC_CMD_OP(cmd, 0, 8, 8, enum dpdmux_manip, cfg->manip);\ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->num_ifs);\ -+ MC_CMD_OP(cmd, 1, 0, 16, uint16_t, cfg->adv.max_dmat_entries);\ -+ MC_CMD_OP(cmd, 1, 16, 16, uint16_t, cfg->adv.max_mc_groups);\ -+ MC_CMD_OP(cmd, 1, 32, 16, uint16_t, cfg->adv.max_vlan_ids);\ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->adv.options);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_RSP_IS_ENABLED(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_SET_IRQ_ENABLE(cmd, irq_index, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, en);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_RSP_GET_IRQ_ENABLE(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) \ -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+} while (0) -+ -+#define DPDMUX_RSP_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 8, enum dpdmux_method, attr->method);\ -+ MC_RSP_OP(cmd, 0, 8, 8, enum dpdmux_manip, attr->manip);\ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, attr->num_ifs);\ -+ MC_RSP_OP(cmd, 0, 32, 16, uint16_t, attr->mem_size);\ -+ MC_RSP_OP(cmd, 2, 0, 32, int, attr->id);\ -+ MC_RSP_OP(cmd, 3, 0, 64, uint64_t, attr->options);\ -+ MC_RSP_OP(cmd, 4, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 4, 16, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_UL_SET_MAX_FRAME_LENGTH(cmd, max_frame_length) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, max_frame_length) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_IF_SET_ACCEPTED_FRAMES(cmd, if_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 4, enum dpdmux_accepted_frames_type, cfg->type);\ -+ MC_CMD_OP(cmd, 0, 20, 4, enum dpdmux_unaccepted_frames_action, \ -+ cfg->unaccept_act);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_IF_GET_ATTR(cmd, if_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_RSP_IF_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 56, 4, enum dpdmux_accepted_frames_type, \ -+ attr->accept_frame_type);\ -+ MC_RSP_OP(cmd, 0, 24, 1, int, attr->enabled);\ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, attr->rate);\ -+} while (0) -+ -+#define DPDMUX_CMD_IF_REMOVE_L2_RULE(cmd, if_id, l2_rule) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, l2_rule->mac_addr[5]);\ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, l2_rule->mac_addr[4]);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, l2_rule->mac_addr[3]);\ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, l2_rule->mac_addr[2]);\ -+ MC_CMD_OP(cmd, 0, 48, 8, uint8_t, l2_rule->mac_addr[1]);\ -+ MC_CMD_OP(cmd, 0, 56, 8, uint8_t, l2_rule->mac_addr[0]);\ -+ MC_CMD_OP(cmd, 1, 32, 16, uint16_t, l2_rule->vlan_id);\ -+} while (0) -+ -+#define DPDMUX_CMD_IF_ADD_L2_RULE(cmd, if_id, l2_rule) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, l2_rule->mac_addr[5]);\ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, l2_rule->mac_addr[4]);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, l2_rule->mac_addr[3]);\ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, l2_rule->mac_addr[2]);\ -+ MC_CMD_OP(cmd, 0, 48, 8, uint8_t, l2_rule->mac_addr[1]);\ -+ MC_CMD_OP(cmd, 0, 56, 8, uint8_t, l2_rule->mac_addr[0]);\ -+ MC_CMD_OP(cmd, 1, 32, 16, uint16_t, l2_rule->vlan_id);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_IF_GET_COUNTER(cmd, if_id, counter_type) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 8, enum dpdmux_counter_type, counter_type);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_RSP_IF_GET_COUNTER(cmd, counter) \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, counter) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_IF_SET_LINK_CFG(cmd, if_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, cfg->rate);\ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->options);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_CMD_IF_GET_LINK_STATE(cmd, if_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPDMUX_RSP_IF_GET_LINK_STATE(cmd, state) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 32, 1, int, state->up);\ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, state->rate);\ -+ MC_RSP_OP(cmd, 2, 0, 64, uint64_t, state->options);\ -+} while (0) -+ -+#endif /* _FSL_DPDMUX_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpio.h b/drivers/net/dpaa2/mc/fsl_dpio.h -new file mode 100644 -index 0000000..88a492f ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpio.h -@@ -0,0 +1,460 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPIO_H -+#define __FSL_DPIO_H -+ -+/* Data Path I/O Portal API -+ * Contains initialization APIs and runtime control APIs for DPIO -+ */ -+ -+struct fsl_mc_io; -+ -+/** -+ * dpio_open() - Open a control session for the specified object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpio_id: DPIO unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpio_create() function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpio_id, -+ uint16_t *token); -+ -+/** -+ * dpio_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * enum dpio_channel_mode - DPIO notification channel mode -+ * @DPIO_NO_CHANNEL: No support for notification channel -+ * @DPIO_LOCAL_CHANNEL: Notifications on data availability can be received by a -+ * dedicated channel in the DPIO; user should point the queue's -+ * destination in the relevant interface to this DPIO -+ */ -+enum dpio_channel_mode { -+ DPIO_NO_CHANNEL = 0, -+ DPIO_LOCAL_CHANNEL = 1, -+}; -+ -+/** -+ * struct dpio_cfg - Structure representing DPIO configuration -+ * @channel_mode: Notification channel mode -+ * @num_priorities: Number of priorities for the notification channel (1-8); -+ * relevant only if 'channel_mode = DPIO_LOCAL_CHANNEL' -+ */ -+struct dpio_cfg { -+ enum dpio_channel_mode channel_mode; -+ uint8_t num_priorities; -+}; -+ -+/** -+ * dpio_create() - Create the DPIO object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPIO object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpio_open() function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpio_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpio_destroy() - Destroy the DPIO object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * -+ * Return: '0' on Success; Error code otherwise -+ */ -+int dpio_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpio_enable() - Enable the DPIO, allow I/O portal operations. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * -+ * Return: '0' on Success; Error code otherwise -+ */ -+int dpio_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpio_disable() - Disable the DPIO, stop any I/O portal operation. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * -+ * Return: '0' on Success; Error code otherwise -+ */ -+int dpio_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpio_is_enabled() - Check if the DPIO is enabled. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @en: Returns '1' if object is enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpio_reset() - Reset the DPIO, returns the object to initial state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpio_set_stashing_destination() - Set the stashing destination. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @sdest: stashing destination value -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_set_stashing_destination(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t sdest); -+ -+/** -+ * dpio_get_stashing_destination() - Get the stashing destination.. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @sdest: Returns the stashing destination value -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_get_stashing_destination(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t *sdest); -+ -+/** -+ * dpio_add_static_dequeue_channel() - Add a static dequeue channel. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @dpcon_id: DPCON object ID -+ * @channel_index: Returned channel index to be used in qbman API -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_add_static_dequeue_channel(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpcon_id, -+ uint8_t *channel_index); -+ -+/** -+ * dpio_remove_static_dequeue_channel() - Remove a static dequeue channel. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @dpcon_id: DPCON object ID -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_remove_static_dequeue_channel(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int dpcon_id); -+ -+/** -+ * DPIO IRQ Index and Events -+ */ -+ -+/** -+ * Irq software-portal index -+ */ -+#define DPIO_IRQ_SWP_INDEX 0 -+ -+/** -+ * struct dpio_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpio_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpio_set_irq() - Set IRQ information for the DPIO to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpio_irq_cfg *irq_cfg); -+ -+/** -+ * dpio_get_irq() - Get IRQ information from the DPIO. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpio_irq_cfg *irq_cfg); -+ -+/** -+ * dpio_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpio_get_irq_enable() - Get overall interrupt state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpio_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @irq_index: The interrupt index to configure -+ * @mask: event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpio_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpio_get_irq_status() - Get the current status of any pending interrupts. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dpio_clear_irq_status() - Clear a pending interrupt's status -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @irq_index: The interrupt index to configure -+ * @status: bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpio_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+ -+/** -+ * struct dpio_attr - Structure representing DPIO attributes -+ * @id: DPIO object ID -+ * @version: DPIO version -+ * @qbman_portal_ce_offset: offset of the software portal cache-enabled area -+ * @qbman_portal_ci_offset: offset of the software portal cache-inhibited area -+ * @qbman_portal_id: Software portal ID -+ * @channel_mode: Notification channel mode -+ * @num_priorities: Number of priorities for the notification channel (1-8); -+ * relevant only if 'channel_mode = DPIO_LOCAL_CHANNEL' -+ * @qbman_version: QBMAN version -+ */ -+struct dpio_attr { -+ int id; -+ /** -+ * struct version - DPIO version -+ * @major: DPIO major version -+ * @minor: DPIO minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+ uint64_t qbman_portal_ce_offset; -+ uint64_t qbman_portal_ci_offset; -+ uint16_t qbman_portal_id; -+ enum dpio_channel_mode channel_mode; -+ uint8_t num_priorities; -+ uint32_t qbman_version; -+}; -+ -+/** -+ * dpio_get_attributes() - Retrieve DPIO attributes -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPIO object -+ * @attr: Returned object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise -+ */ -+int dpio_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpio_attr *attr); -+#endif /* __FSL_DPIO_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpio_cmd.h b/drivers/net/dpaa2/mc/fsl_dpio_cmd.h -new file mode 100644 -index 0000000..f339cd6 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpio_cmd.h -@@ -0,0 +1,184 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPIO_CMD_H -+#define _FSL_DPIO_CMD_H -+ -+/* DPIO Version */ -+#define DPIO_VER_MAJOR 3 -+#define DPIO_VER_MINOR 2 -+ -+/* Command IDs */ -+#define DPIO_CMDID_CLOSE 0x800 -+#define DPIO_CMDID_OPEN 0x803 -+#define DPIO_CMDID_CREATE 0x903 -+#define DPIO_CMDID_DESTROY 0x900 -+ -+#define DPIO_CMDID_ENABLE 0x002 -+#define DPIO_CMDID_DISABLE 0x003 -+#define DPIO_CMDID_GET_ATTR 0x004 -+#define DPIO_CMDID_RESET 0x005 -+#define DPIO_CMDID_IS_ENABLED 0x006 -+ -+#define DPIO_CMDID_SET_IRQ 0x010 -+#define DPIO_CMDID_GET_IRQ 0x011 -+#define DPIO_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPIO_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPIO_CMDID_SET_IRQ_MASK 0x014 -+#define DPIO_CMDID_GET_IRQ_MASK 0x015 -+#define DPIO_CMDID_GET_IRQ_STATUS 0x016 -+#define DPIO_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPIO_CMDID_SET_STASHING_DEST 0x120 -+#define DPIO_CMDID_GET_STASHING_DEST 0x121 -+#define DPIO_CMDID_ADD_STATIC_DEQUEUE_CHANNEL 0x122 -+#define DPIO_CMDID_REMOVE_STATIC_DEQUEUE_CHANNEL 0x123 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_OPEN(cmd, dpio_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpio_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_CREATE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 16, 2, enum dpio_channel_mode, \ -+ cfg->channel_mode);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->num_priorities);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_RSP_IS_ENABLED(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_SET_IRQ_ENABLE(cmd, irq_index, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, en); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_RSP_GET_IRQ_ENABLE(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_RSP_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->id);\ -+ MC_RSP_OP(cmd, 0, 32, 16, uint16_t, attr->qbman_portal_id);\ -+ MC_RSP_OP(cmd, 0, 48, 8, uint8_t, attr->num_priorities);\ -+ MC_RSP_OP(cmd, 0, 56, 4, enum dpio_channel_mode, attr->channel_mode);\ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, attr->qbman_portal_ce_offset);\ -+ MC_RSP_OP(cmd, 2, 0, 64, uint64_t, attr->qbman_portal_ci_offset);\ -+ MC_RSP_OP(cmd, 3, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 3, 16, 16, uint16_t, attr->version.minor);\ -+ MC_RSP_OP(cmd, 3, 32, 32, uint32_t, attr->qbman_version);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_SET_STASHING_DEST(cmd, sdest) \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, sdest) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_RSP_GET_STASHING_DEST(cmd, sdest) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, sdest) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_ADD_STATIC_DEQUEUE_CHANNEL(cmd, dpcon_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpcon_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_RSP_ADD_STATIC_DEQUEUE_CHANNEL(cmd, channel_index) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, channel_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPIO_CMD_REMOVE_STATIC_DEQUEUE_CHANNEL(cmd, dpcon_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpcon_id) -+#endif /* _FSL_DPIO_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpkg.h b/drivers/net/dpaa2/mc/fsl_dpkg.h -new file mode 100644 -index 0000000..b2bceaf ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpkg.h -@@ -0,0 +1,174 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPKG_H_ -+#define __FSL_DPKG_H_ -+ -+#include -+ -+/* Data Path Key Generator API -+ * Contains initialization APIs and runtime APIs for the Key Generator -+ */ -+ -+/** Key Generator properties */ -+ -+/** -+ * Number of masks per key extraction -+ */ -+#define DPKG_NUM_OF_MASKS 4 -+/** -+ * Number of extractions per key profile -+ */ -+#define DPKG_MAX_NUM_OF_EXTRACTS 10 -+ -+/** -+ * enum dpkg_extract_from_hdr_type - Selecting extraction by header types -+ * @DPKG_FROM_HDR: Extract selected bytes from header, by offset -+ * @DPKG_FROM_FIELD: Extract selected bytes from header, by offset from field -+ * @DPKG_FULL_FIELD: Extract a full field -+ */ -+enum dpkg_extract_from_hdr_type { -+ DPKG_FROM_HDR = 0, -+ DPKG_FROM_FIELD = 1, -+ DPKG_FULL_FIELD = 2 -+}; -+ -+/** -+ * enum dpkg_extract_type - Enumeration for selecting extraction type -+ * @DPKG_EXTRACT_FROM_HDR: Extract from the header -+ * @DPKG_EXTRACT_FROM_DATA: Extract from data not in specific header -+ * @DPKG_EXTRACT_FROM_PARSE: Extract from parser-result; -+ * e.g. can be used to extract header existence; -+ * please refer to 'Parse Result definition' section in the parser BG -+ */ -+enum dpkg_extract_type { -+ DPKG_EXTRACT_FROM_HDR = 0, -+ DPKG_EXTRACT_FROM_DATA = 1, -+ DPKG_EXTRACT_FROM_PARSE = 3 -+}; -+ -+/** -+ * struct dpkg_mask - A structure for defining a single extraction mask -+ * @mask: Byte mask for the extracted content -+ * @offset: Offset within the extracted content -+ */ -+struct dpkg_mask { -+ uint8_t mask; -+ uint8_t offset; -+}; -+ -+/** -+ * struct dpkg_extract - A structure for defining a single extraction -+ * @type: Determines how the union below is interpreted: -+ * DPKG_EXTRACT_FROM_HDR: selects 'from_hdr'; -+ * DPKG_EXTRACT_FROM_DATA: selects 'from_data'; -+ * DPKG_EXTRACT_FROM_PARSE: selects 'from_parse' -+ * @extract: Selects extraction method -+ * @num_of_byte_masks: Defines the number of valid entries in the array below; -+ * This is also the number of bytes to be used as masks -+ * @masks: Masks parameters -+ */ -+struct dpkg_extract { -+ enum dpkg_extract_type type; -+ /** -+ * union extract - Selects extraction method -+ * @from_hdr - Used when 'type = DPKG_EXTRACT_FROM_HDR' -+ * @from_data - Used when 'type = DPKG_EXTRACT_FROM_DATA' -+ * @from_parse - Used when 'type = DPKG_EXTRACT_FROM_PARSE' -+ */ -+ union { -+ /** -+ * struct from_hdr - Used when 'type = DPKG_EXTRACT_FROM_HDR' -+ * @prot: Any of the supported headers -+ * @type: Defines the type of header extraction: -+ * DPKG_FROM_HDR: use size & offset below; -+ * DPKG_FROM_FIELD: use field, size and offset below; -+ * DPKG_FULL_FIELD: use field below -+ * @field: One of the supported fields (NH_FLD_) -+ * -+ * @size: Size in bytes -+ * @offset: Byte offset -+ * @hdr_index: Clear for cases not listed below; -+ * Used for protocols that may have more than a single -+ * header, 0 indicates an outer header; -+ * Supported protocols (possible values): -+ * NET_PROT_VLAN (0, HDR_INDEX_LAST); -+ * NET_PROT_MPLS (0, 1, HDR_INDEX_LAST); -+ * NET_PROT_IP(0, HDR_INDEX_LAST); -+ * NET_PROT_IPv4(0, HDR_INDEX_LAST); -+ * NET_PROT_IPv6(0, HDR_INDEX_LAST); -+ */ -+ -+ struct { -+ enum net_prot prot; -+ enum dpkg_extract_from_hdr_type type; -+ uint32_t field; -+ uint8_t size; -+ uint8_t offset; -+ uint8_t hdr_index; -+ } from_hdr; -+ /** -+ * struct from_data - Used when 'type = DPKG_EXTRACT_FROM_DATA' -+ * @size: Size in bytes -+ * @offset: Byte offset -+ */ -+ struct { -+ uint8_t size; -+ uint8_t offset; -+ } from_data; -+ -+ /** -+ * struct from_parse - Used when 'type = DPKG_EXTRACT_FROM_PARSE' -+ * @size: Size in bytes -+ * @offset: Byte offset -+ */ -+ struct { -+ uint8_t size; -+ uint8_t offset; -+ } from_parse; -+ } extract; -+ -+ uint8_t num_of_byte_masks; -+ struct dpkg_mask masks[DPKG_NUM_OF_MASKS]; -+}; -+ -+/** -+ * struct dpkg_profile_cfg - A structure for defining a full Key Generation -+ * profile (rule) -+ * @num_extracts: Defines the number of valid entries in the array below -+ * @extracts: Array of required extractions -+ */ -+struct dpkg_profile_cfg { -+ uint8_t num_extracts; -+ struct dpkg_extract extracts[DPKG_MAX_NUM_OF_EXTRACTS]; -+}; -+ -+#endif /* __FSL_DPKG_H_ */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpmac.h b/drivers/net/dpaa2/mc/fsl_dpmac.h -new file mode 100644 -index 0000000..ad27772 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpmac.h -@@ -0,0 +1,593 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPMAC_H -+#define __FSL_DPMAC_H -+ -+/* Data Path MAC API -+ * Contains initialization APIs and runtime control APIs for DPMAC -+ */ -+ -+struct fsl_mc_io; -+ -+/** -+ * dpmac_open() - Open a control session for the specified object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpmac_id: DPMAC unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpmac_create function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpmac_id, -+ uint16_t *token); -+ -+/** -+ * dpmac_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * enum dpmac_link_type - DPMAC link type -+ * @DPMAC_LINK_TYPE_NONE: No link -+ * @DPMAC_LINK_TYPE_FIXED: Link is fixed type -+ * @DPMAC_LINK_TYPE_PHY: Link by PHY ID -+ * @DPMAC_LINK_TYPE_BACKPLANE: Backplane link type -+ */ -+enum dpmac_link_type { -+ DPMAC_LINK_TYPE_NONE, -+ DPMAC_LINK_TYPE_FIXED, -+ DPMAC_LINK_TYPE_PHY, -+ DPMAC_LINK_TYPE_BACKPLANE -+}; -+ -+/** -+ * enum dpmac_eth_if - DPMAC Ethrnet interface -+ * @DPMAC_ETH_IF_MII: MII interface -+ * @DPMAC_ETH_IF_RMII: RMII interface -+ * @DPMAC_ETH_IF_SMII: SMII interface -+ * @DPMAC_ETH_IF_GMII: GMII interface -+ * @DPMAC_ETH_IF_RGMII: RGMII interface -+ * @DPMAC_ETH_IF_SGMII: SGMII interface -+ * @DPMAC_ETH_IF_QSGMII: QSGMII interface -+ * @DPMAC_ETH_IF_XAUI: XAUI interface -+ * @DPMAC_ETH_IF_XFI: XFI interface -+ */ -+enum dpmac_eth_if { -+ DPMAC_ETH_IF_MII, -+ DPMAC_ETH_IF_RMII, -+ DPMAC_ETH_IF_SMII, -+ DPMAC_ETH_IF_GMII, -+ DPMAC_ETH_IF_RGMII, -+ DPMAC_ETH_IF_SGMII, -+ DPMAC_ETH_IF_QSGMII, -+ DPMAC_ETH_IF_XAUI, -+ DPMAC_ETH_IF_XFI -+}; -+ -+/** -+ * struct dpmac_cfg - Structure representing DPMAC configuration -+ * @mac_id: Represents the Hardware MAC ID; in case of multiple WRIOP, -+ * the MAC IDs are continuous. -+ * For example: 2 WRIOPs, 16 MACs in each: -+ * MAC IDs for the 1st WRIOP: 1-16, -+ * MAC IDs for the 2nd WRIOP: 17-32. -+ */ -+struct dpmac_cfg { -+ int mac_id; -+}; -+ -+/** -+ * dpmac_create() - Create the DPMAC object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPMAC object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpmac_open function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpmac_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpmac_destroy() - Destroy the DPMAC object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpmac_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * DPMAC IRQ Index and Events -+ */ -+ -+/** -+ * IRQ index -+ */ -+#define DPMAC_IRQ_INDEX 0 -+/** -+ * IRQ event - indicates a change in link state -+ */ -+#define DPMAC_IRQ_EVENT_LINK_CFG_REQ 0x00000001 -+/** -+ * IRQ event - Indicates that the link state changed -+ */ -+#define DPMAC_IRQ_EVENT_LINK_CHANGED 0x00000002 -+ -+/** -+ * struct dpmac_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpmac_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpmac_set_irq() - Set IRQ information for the DPMAC to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpmac_irq_cfg *irq_cfg); -+ -+/** -+ * dpmac_get_irq() - Get IRQ information from the DPMAC. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpmac_irq_cfg *irq_cfg); -+ -+/** -+ * dpmac_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpmac_get_irq_enable() - Get overall interrupt state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpmac_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @irq_index: The interrupt index to configure -+ * @mask: Event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpmac_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpmac_get_irq_status() - Get the current status of any pending interrupts. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dpmac_clear_irq_status() - Clear a pending interrupt's status -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @irq_index: The interrupt index to configure -+ * @status: Bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+ -+/** -+ * struct dpmac_attr - Structure representing DPMAC attributes -+ * @id: DPMAC object ID -+ * @phy_id: PHY ID -+ * @link_type: link type -+ * @eth_if: Ethernet interface -+ * @max_rate: Maximum supported rate - in Mbps -+ * @version: DPMAC version -+ */ -+struct dpmac_attr { -+ int id; -+ int phy_id; -+ enum dpmac_link_type link_type; -+ enum dpmac_eth_if eth_if; -+ uint32_t max_rate; -+ /** -+ * struct version - Structure representing DPMAC version -+ * @major: DPMAC major version -+ * @minor: DPMAC minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+}; -+ -+/** -+ * dpmac_get_attributes - Retrieve DPMAC attributes. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @attr: Returned object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpmac_attr *attr); -+ -+/** -+ * struct dpmac_mdio_cfg - DPMAC MDIO read/write parameters -+ * @phy_addr: MDIO device address -+ * @reg: Address of the register within the Clause 45 PHY device from which data -+ * is to be read -+ * @data: Data read/write from/to MDIO -+ */ -+struct dpmac_mdio_cfg { -+ uint8_t phy_addr; -+ uint8_t reg; -+ uint16_t data; -+}; -+ -+/** -+ * dpmac_mdio_read() - Perform MDIO read transaction -+ * @mc_io: Pointer to opaque I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @cfg: Structure with MDIO transaction parameters -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_mdio_read(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpmac_mdio_cfg *cfg); -+ -+/** -+ * dpmac_mdio_write() - Perform MDIO write transaction -+ * @mc_io: Pointer to opaque I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @cfg: Structure with MDIO transaction parameters -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_mdio_write(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpmac_mdio_cfg *cfg); -+ -+/** -+ * DPMAC link configuration/state options -+ */ -+ -+/** -+ * Enable auto-negotiation -+ */ -+#define DPMAC_LINK_OPT_AUTONEG 0x0000000000000001ULL -+/** -+ * Enable half-duplex mode -+ */ -+#define DPMAC_LINK_OPT_HALF_DUPLEX 0x0000000000000002ULL -+/** -+ * Enable pause frames -+ */ -+#define DPMAC_LINK_OPT_PAUSE 0x0000000000000004ULL -+/** -+ * Enable a-symmetric pause frames -+ */ -+#define DPMAC_LINK_OPT_ASYM_PAUSE 0x0000000000000008ULL -+ -+/** -+ * struct dpmac_link_cfg - Structure representing DPMAC link configuration -+ * @rate: Link's rate - in Mbps -+ * @options: Enable/Disable DPMAC link cfg features (bitmap) -+ */ -+struct dpmac_link_cfg { -+ uint32_t rate; -+ uint64_t options; -+}; -+ -+/** -+ * dpmac_get_link_cfg() - Get Ethernet link configuration -+ * @mc_io: Pointer to opaque I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @cfg: Returned structure with the link configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_get_link_cfg(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpmac_link_cfg *cfg); -+ -+/** -+ * struct dpmac_link_state - DPMAC link configuration request -+ * @rate: Rate in Mbps -+ * @options: Enable/Disable DPMAC link cfg features (bitmap) -+ * @up: Link state -+ */ -+struct dpmac_link_state { -+ uint32_t rate; -+ uint64_t options; -+ int up; -+}; -+ -+/** -+ * dpmac_set_link_state() - Set the Ethernet link status -+ * @mc_io: Pointer to opaque I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @link_state: Link state configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmac_set_link_state(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpmac_link_state *link_state); -+ -+/** -+ * enum dpmac_counter - DPMAC counter types -+ * @DPMAC_CNT_ING_FRAME_64: counts 64-bytes frames, good or bad. -+ * @DPMAC_CNT_ING_FRAME_127: counts 65- to 127-bytes frames, good or bad. -+ * @DPMAC_CNT_ING_FRAME_255: counts 128- to 255-bytes frames, good or bad. -+ * @DPMAC_CNT_ING_FRAME_511: counts 256- to 511-bytes frames, good or bad. -+ * @DPMAC_CNT_ING_FRAME_1023: counts 512- to 1023-bytes frames, good or bad. -+ * @DPMAC_CNT_ING_FRAME_1518: counts 1024- to 1518-bytes frames, good or bad. -+ * @DPMAC_CNT_ING_FRAME_1519_MAX: counts 1519-bytes frames and larger -+ * (up to max frame length specified), -+ * good or bad. -+ * @DPMAC_CNT_ING_FRAG: counts frames which are shorter than 64 bytes received -+ * with a wrong CRC -+ * @DPMAC_CNT_ING_JABBER: counts frames longer than the maximum frame length -+ * specified, with a bad frame check sequence. -+ * @DPMAC_CNT_ING_FRAME_DISCARD: counts dropped frames due to internal errors. -+ * Occurs when a receive FIFO overflows. -+ * Includes also frames truncated as a result of -+ * the receive FIFO overflow. -+ * @DPMAC_CNT_ING_ALIGN_ERR: counts frames with an alignment error -+ * (optional used for wrong SFD). -+ * @DPMAC_CNT_EGR_UNDERSIZED: counts frames transmitted that was less than 64 -+ * bytes long with a good CRC. -+ * @DPMAC_CNT_ING_OVERSIZED: counts frames longer than the maximum frame length -+ * specified, with a good frame check sequence. -+ * @DPMAC_CNT_ING_VALID_PAUSE_FRAME: counts valid pause frames (regular and PFC) -+ * @DPMAC_CNT_EGR_VALID_PAUSE_FRAME: counts valid pause frames transmitted -+ * (regular and PFC). -+ * @DPMAC_CNT_ING_BYTE: counts bytes received except preamble for all valid -+ * frames and valid pause frames. -+ * @DPMAC_CNT_ING_MCAST_FRAME: counts received multicast frames. -+ * @DPMAC_CNT_ING_BCAST_FRAME: counts received broadcast frames. -+ * @DPMAC_CNT_ING_ALL_FRAME: counts each good or bad frames received. -+ * @DPMAC_CNT_ING_UCAST_FRAME: counts received unicast frames. -+ * @DPMAC_CNT_ING_ERR_FRAME: counts frames received with an error -+ * (except for undersized/fragment frame). -+ * @DPMAC_CNT_EGR_BYTE: counts bytes transmitted except preamble for all valid -+ * frames and valid pause frames transmitted. -+ * @DPMAC_CNT_EGR_MCAST_FRAME: counts transmitted multicast frames. -+ * @DPMAC_CNT_EGR_BCAST_FRAME: counts transmitted broadcast frames. -+ * @DPMAC_CNT_EGR_UCAST_FRAME: counts transmitted unicast frames. -+ * @DPMAC_CNT_EGR_ERR_FRAME: counts frames transmitted with an error. -+ * @DPMAC_CNT_ING_GOOD_FRAME: counts frames received without error, including -+ * pause frames. -+ * @DPMAC_CNT_ENG_GOOD_FRAME: counts frames transmitted without error, including -+ * pause frames. -+ */ -+enum dpmac_counter { -+ DPMAC_CNT_ING_FRAME_64, -+ DPMAC_CNT_ING_FRAME_127, -+ DPMAC_CNT_ING_FRAME_255, -+ DPMAC_CNT_ING_FRAME_511, -+ DPMAC_CNT_ING_FRAME_1023, -+ DPMAC_CNT_ING_FRAME_1518, -+ DPMAC_CNT_ING_FRAME_1519_MAX, -+ DPMAC_CNT_ING_FRAG, -+ DPMAC_CNT_ING_JABBER, -+ DPMAC_CNT_ING_FRAME_DISCARD, -+ DPMAC_CNT_ING_ALIGN_ERR, -+ DPMAC_CNT_EGR_UNDERSIZED, -+ DPMAC_CNT_ING_OVERSIZED, -+ DPMAC_CNT_ING_VALID_PAUSE_FRAME, -+ DPMAC_CNT_EGR_VALID_PAUSE_FRAME, -+ DPMAC_CNT_ING_BYTE, -+ DPMAC_CNT_ING_MCAST_FRAME, -+ DPMAC_CNT_ING_BCAST_FRAME, -+ DPMAC_CNT_ING_ALL_FRAME, -+ DPMAC_CNT_ING_UCAST_FRAME, -+ DPMAC_CNT_ING_ERR_FRAME, -+ DPMAC_CNT_EGR_BYTE, -+ DPMAC_CNT_EGR_MCAST_FRAME, -+ DPMAC_CNT_EGR_BCAST_FRAME, -+ DPMAC_CNT_EGR_UCAST_FRAME, -+ DPMAC_CNT_EGR_ERR_FRAME, -+ DPMAC_CNT_ING_GOOD_FRAME, -+ DPMAC_CNT_ENG_GOOD_FRAME -+}; -+ -+/** -+ * dpmac_get_counter() - Read a specific DPMAC counter -+ * @mc_io: Pointer to opaque I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMAC object -+ * @type: The requested counter -+ * @counter: Returned counter value -+ * -+ * Return: The requested counter; '0' otherwise. -+ */ -+int dpmac_get_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ enum dpmac_counter type, -+ uint64_t *counter); -+ -+#endif /* __FSL_DPMAC_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpmac_cmd.h b/drivers/net/dpaa2/mc/fsl_dpmac_cmd.h -new file mode 100644 -index 0000000..dc00590 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpmac_cmd.h -@@ -0,0 +1,195 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPMAC_CMD_H -+#define _FSL_DPMAC_CMD_H -+ -+/* DPMAC Version */ -+#define DPMAC_VER_MAJOR 3 -+#define DPMAC_VER_MINOR 2 -+ -+/* Command IDs */ -+#define DPMAC_CMDID_CLOSE 0x800 -+#define DPMAC_CMDID_OPEN 0x80c -+#define DPMAC_CMDID_CREATE 0x90c -+#define DPMAC_CMDID_DESTROY 0x900 -+ -+#define DPMAC_CMDID_GET_ATTR 0x004 -+#define DPMAC_CMDID_RESET 0x005 -+ -+#define DPMAC_CMDID_SET_IRQ 0x010 -+#define DPMAC_CMDID_GET_IRQ 0x011 -+#define DPMAC_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPMAC_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPMAC_CMDID_SET_IRQ_MASK 0x014 -+#define DPMAC_CMDID_GET_IRQ_MASK 0x015 -+#define DPMAC_CMDID_GET_IRQ_STATUS 0x016 -+#define DPMAC_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPMAC_CMDID_MDIO_READ 0x0c0 -+#define DPMAC_CMDID_MDIO_WRITE 0x0c1 -+#define DPMAC_CMDID_GET_LINK_CFG 0x0c2 -+#define DPMAC_CMDID_SET_LINK_STATE 0x0c3 -+#define DPMAC_CMDID_GET_COUNTER 0x0c4 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_CREATE(cmd, cfg) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, cfg->mac_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_OPEN(cmd, dpmac_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpmac_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_SET_IRQ_ENABLE(cmd, irq_index, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, en); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_RSP_GET_IRQ_ENABLE(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_RSP_GET_ATTRIBUTES(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->phy_id);\ -+ MC_RSP_OP(cmd, 0, 32, 32, int, attr->id);\ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->version.minor);\ -+ MC_RSP_OP(cmd, 1, 32, 8, enum dpmac_link_type, attr->link_type);\ -+ MC_RSP_OP(cmd, 1, 40, 8, enum dpmac_eth_if, attr->eth_if);\ -+ MC_RSP_OP(cmd, 2, 0, 32, uint32_t, attr->max_rate);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_MDIO_READ(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, cfg->phy_addr); \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, cfg->reg); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_RSP_MDIO_READ(cmd, data) \ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, data) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_MDIO_WRITE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, cfg->phy_addr); \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, cfg->reg); \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->data); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_RSP_GET_LINK_CFG(cmd, cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 64, uint64_t, cfg->options); \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, cfg->rate); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_SET_LINK_STATE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 64, uint64_t, cfg->options); \ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, cfg->rate); \ -+ MC_CMD_OP(cmd, 2, 0, 1, int, cfg->up); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_CMD_GET_COUNTER(cmd, type) \ -+ MC_CMD_OP(cmd, 0, 0, 8, enum dpmac_counter, type) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMAC_RSP_GET_COUNTER(cmd, counter) \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, counter) -+ -+#endif /* _FSL_DPMAC_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpmcp.h b/drivers/net/dpaa2/mc/fsl_dpmcp.h -new file mode 100644 -index 0000000..80f238e ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpmcp.h -@@ -0,0 +1,332 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPMCP_H -+#define __FSL_DPMCP_H -+ -+/* Data Path Management Command Portal API -+ * Contains initialization APIs and runtime control APIs for DPMCP -+ */ -+ -+struct fsl_mc_io; -+ -+/** -+ * dpmcp_open() - Open a control session for the specified object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpmcp_id: DPMCP unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpmcp_create function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmcp_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpmcp_id, -+ uint16_t *token); -+ -+/** -+ * Get portal ID from pool -+ */ -+#define DPMCP_GET_PORTAL_ID_FROM_POOL (-1) -+ -+/** -+ * dpmcp_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMCP object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmcp_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpmcp_cfg - Structure representing DPMCP configuration -+ * @portal_id: Portal ID; 'DPMCP_GET_PORTAL_ID_FROM_POOL' to get the portal ID -+ * from pool -+ */ -+struct dpmcp_cfg { -+ int portal_id; -+}; -+ -+/** -+ * dpmcp_create() - Create the DPMCP object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPMCP object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpmcp_open function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmcp_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpmcp_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpmcp_destroy() - Destroy the DPMCP object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMCP object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpmcp_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpmcp_reset() - Reset the DPMCP, returns the object to initial state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMCP object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmcp_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * IRQ -+ */ -+ -+/** -+ * IRQ Index -+ */ -+#define DPMCP_IRQ_INDEX 0 -+/** -+ * irq event - Indicates that the link state changed -+ */ -+#define DPMCP_IRQ_EVENT_CMD_DONE 0x00000001 -+ -+/** -+ * struct dpmcp_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpmcp_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpmcp_set_irq() - Set IRQ information for the DPMCP to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMCP object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmcp_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpmcp_irq_cfg *irq_cfg); -+ -+/** -+ * dpmcp_get_irq() - Get IRQ information from the DPMCP. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMCP object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmcp_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpmcp_irq_cfg *irq_cfg); -+ -+/** -+ * dpmcp_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMCP object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmcp_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpmcp_get_irq_enable() - Get overall interrupt state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMCP object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmcp_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpmcp_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMCP object -+ * @irq_index: The interrupt index to configure -+ * @mask: Event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmcp_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpmcp_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMCP object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmcp_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpmcp_get_irq_status() - Get the current status of any pending interrupts. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMCP object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmcp_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * struct dpmcp_attr - Structure representing DPMCP attributes -+ * @id: DPMCP object ID -+ * @version: DPMCP version -+ */ -+struct dpmcp_attr { -+ int id; -+ /** -+ * struct version - Structure representing DPMCP version -+ * @major: DPMCP major version -+ * @minor: DPMCP minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+}; -+ -+/** -+ * dpmcp_get_attributes - Retrieve DPMCP attributes. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPMCP object -+ * @attr: Returned object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpmcp_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpmcp_attr *attr); -+ -+#endif /* __FSL_DPMCP_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpmcp_cmd.h b/drivers/net/dpaa2/mc/fsl_dpmcp_cmd.h -new file mode 100644 -index 0000000..8f710bd ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpmcp_cmd.h -@@ -0,0 +1,135 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPMCP_CMD_H -+#define _FSL_DPMCP_CMD_H -+ -+/* DPMCP Version */ -+#define DPMCP_VER_MAJOR 3 -+#define DPMCP_VER_MINOR 0 -+ -+/* Command IDs */ -+#define DPMCP_CMDID_CLOSE 0x800 -+#define DPMCP_CMDID_OPEN 0x80b -+#define DPMCP_CMDID_CREATE 0x90b -+#define DPMCP_CMDID_DESTROY 0x900 -+ -+#define DPMCP_CMDID_GET_ATTR 0x004 -+#define DPMCP_CMDID_RESET 0x005 -+ -+#define DPMCP_CMDID_SET_IRQ 0x010 -+#define DPMCP_CMDID_GET_IRQ 0x011 -+#define DPMCP_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPMCP_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPMCP_CMDID_SET_IRQ_MASK 0x014 -+#define DPMCP_CMDID_GET_IRQ_MASK 0x015 -+#define DPMCP_CMDID_GET_IRQ_STATUS 0x016 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_CMD_OPEN(cmd, dpmcp_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpmcp_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_CMD_CREATE(cmd, cfg) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, cfg->portal_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_CMD_SET_IRQ_ENABLE(cmd, irq_index, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, en); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_RSP_GET_IRQ_ENABLE(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMCP_RSP_GET_ATTRIBUTES(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 32, 32, int, attr->id);\ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+#endif /* _FSL_DPMCP_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpmng.h b/drivers/net/dpaa2/mc/fsl_dpmng.h -new file mode 100644 -index 0000000..4468dea ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpmng.h -@@ -0,0 +1,74 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPMNG_H -+#define __FSL_DPMNG_H -+ -+/* Management Complex General API -+ * Contains general API for the Management Complex firmware -+ */ -+ -+struct fsl_mc_io; -+ -+/** -+ * Management Complex firmware version information -+ */ -+#define MC_VER_MAJOR 9 -+#define MC_VER_MINOR 0 -+ -+/** -+ * struct mc_versoin -+ * @major: Major version number: incremented on API compatibility changes -+ * @minor: Minor version number: incremented on API additions (that are -+ * backward compatible); reset when major version is incremented -+ * @revision: Internal revision number: incremented on implementation changes -+ * and/or bug fixes that have no impact on API -+ */ -+struct mc_version { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t revision; -+}; -+ -+/** -+ * mc_get_version() - Retrieves the Management Complex firmware -+ * version information -+ * @mc_io: Pointer to opaque I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @mc_ver_info: Returned version information structure -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int mc_get_version(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ struct mc_version *mc_ver_info); -+ -+#endif /* __FSL_DPMNG_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpmng_cmd.h b/drivers/net/dpaa2/mc/fsl_dpmng_cmd.h -new file mode 100644 -index 0000000..c34ca3a ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpmng_cmd.h -@@ -0,0 +1,46 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPMNG_CMD_H -+#define __FSL_DPMNG_CMD_H -+ -+/* Command IDs */ -+#define DPMNG_CMDID_GET_VERSION 0x831 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPMNG_RSP_GET_VERSION(cmd, mc_ver_info) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mc_ver_info->revision); \ -+ MC_RSP_OP(cmd, 0, 32, 32, uint32_t, mc_ver_info->major); \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, mc_ver_info->minor); \ -+} while (0) -+ -+#endif /* __FSL_DPMNG_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpni.h b/drivers/net/dpaa2/mc/fsl_dpni.h -new file mode 100644 -index 0000000..c820086 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpni.h -@@ -0,0 +1,2581 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPNI_H -+#define __FSL_DPNI_H -+ -+#include -+ -+struct fsl_mc_io; -+ -+/** -+ * Data Path Network Interface API -+ * Contains initialization APIs and runtime control APIs for DPNI -+ */ -+ -+/** General DPNI macros */ -+ -+/** -+ * Maximum number of traffic classes -+ */ -+#define DPNI_MAX_TC 8 -+/** -+ * Maximum number of buffer pools per DPNI -+ */ -+#define DPNI_MAX_DPBP 8 -+/** -+ * Maximum number of storage-profiles per DPNI -+ */ -+#define DPNI_MAX_SP 2 -+ -+/** -+ * All traffic classes considered; see dpni_set_rx_flow() -+ */ -+#define DPNI_ALL_TCS (uint8_t)(-1) -+/** -+ * All flows within traffic class considered; see dpni_set_rx_flow() -+ */ -+#define DPNI_ALL_TC_FLOWS (uint16_t)(-1) -+/** -+ * Generate new flow ID; see dpni_set_tx_flow() -+ */ -+#define DPNI_NEW_FLOW_ID (uint16_t)(-1) -+/* use for common tx-conf queue; see dpni_set_tx_conf_() */ -+#define DPNI_COMMON_TX_CONF (uint16_t)(-1) -+ -+/** -+ * dpni_open() - Open a control session for the specified object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpni_id: DPNI unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpni_create() function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpni_id, -+ uint16_t *token); -+ -+/** -+ * dpni_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/* DPNI configuration options */ -+ -+/** -+ * Allow different distribution key profiles for different traffic classes; -+ * if not set, a single key profile is assumed -+ */ -+#define DPNI_OPT_ALLOW_DIST_KEY_PER_TC 0x00000001 -+ -+/** -+ * Disable all non-error transmit confirmation; error frames are reported -+ * back to a common Tx error queue -+ */ -+#define DPNI_OPT_TX_CONF_DISABLED 0x00000002 -+ -+/** -+ * Disable per-sender private Tx confirmation/error queue -+ */ -+#define DPNI_OPT_PRIVATE_TX_CONF_ERROR_DISABLED 0x00000004 -+ -+/** -+ * Support distribution based on hashed key; -+ * allows statistical distribution over receive queues in a traffic class -+ */ -+#define DPNI_OPT_DIST_HASH 0x00000010 -+ -+/** -+ * DEPRECATED - if this flag is selected and and all new 'max_fs_entries' are -+ * '0' then backward compatibility is preserved; -+ * Support distribution based on flow steering; -+ * allows explicit control of distribution over receive queues in a traffic -+ * class -+ */ -+#define DPNI_OPT_DIST_FS 0x00000020 -+ -+/** -+ * Unicast filtering support -+ */ -+#define DPNI_OPT_UNICAST_FILTER 0x00000080 -+/** -+ * Multicast filtering support -+ */ -+#define DPNI_OPT_MULTICAST_FILTER 0x00000100 -+/** -+ * VLAN filtering support -+ */ -+#define DPNI_OPT_VLAN_FILTER 0x00000200 -+/** -+ * Support IP reassembly on received packets -+ */ -+#define DPNI_OPT_IPR 0x00000800 -+/** -+ * Support IP fragmentation on transmitted packets -+ */ -+#define DPNI_OPT_IPF 0x00001000 -+/** -+ * VLAN manipulation support -+ */ -+#define DPNI_OPT_VLAN_MANIPULATION 0x00010000 -+/** -+ * Support masking of QoS lookup keys -+ */ -+#define DPNI_OPT_QOS_MASK_SUPPORT 0x00020000 -+/** -+ * Support masking of Flow Steering lookup keys -+ */ -+#define DPNI_OPT_FS_MASK_SUPPORT 0x00040000 -+ -+/** -+ * struct dpni_extended_cfg - Structure representing extended DPNI configuration -+ * @tc_cfg: TCs configuration -+ * @ipr_cfg: IP reassembly configuration -+ */ -+struct dpni_extended_cfg { -+ /** -+ * struct tc_cfg - TC configuration -+ * @max_dist: Maximum distribution size for Rx traffic class; -+ * supported values: 1,2,3,4,6,7,8,12,14,16,24,28,32,48,56,64,96, -+ * 112,128,192,224,256,384,448,512,768,896,1024; -+ * value '0' will be treated as '1'. -+ * other unsupported values will be round down to the nearest -+ * supported value. -+ * @max_fs_entries: Maximum FS entries for Rx traffic class; -+ * '0' means no support for this TC; -+ */ -+ struct { -+ uint16_t max_dist; -+ uint16_t max_fs_entries; -+ } tc_cfg[DPNI_MAX_TC]; -+ /** -+ * struct ipr_cfg - Structure representing IP reassembly configuration -+ * @max_reass_frm_size: Maximum size of the reassembled frame -+ * @min_frag_size_ipv4: Minimum fragment size of IPv4 fragments -+ * @min_frag_size_ipv6: Minimum fragment size of IPv6 fragments -+ * @max_open_frames_ipv4: Maximum concurrent IPv4 packets in reassembly -+ * process -+ * @max_open_frames_ipv6: Maximum concurrent IPv6 packets in reassembly -+ * process -+ */ -+ struct { -+ uint16_t max_reass_frm_size; -+ uint16_t min_frag_size_ipv4; -+ uint16_t min_frag_size_ipv6; -+ uint16_t max_open_frames_ipv4; -+ uint16_t max_open_frames_ipv6; -+ } ipr_cfg; -+}; -+ -+/** -+ * dpni_prepare_extended_cfg() - function prepare extended parameters -+ * @cfg: extended structure -+ * @ext_cfg_buf: Zeroed 256 bytes of memory before mapping it to DMA -+ * -+ * This function has to be called before dpni_create() -+ */ -+int dpni_prepare_extended_cfg(const struct dpni_extended_cfg *cfg, -+ uint8_t *ext_cfg_buf); -+ -+/** -+ * struct dpni_cfg - Structure representing DPNI configuration -+ * @mac_addr: Primary MAC address -+ * @adv: Advanced parameters; default is all zeros; -+ * use this structure to change default settings -+ */ -+struct dpni_cfg { -+ uint8_t mac_addr[6]; -+ /** -+ * struct adv - Advanced parameters -+ * @options: Mask of available options; use 'DPNI_OPT_' values -+ * @start_hdr: Selects the packet starting header for parsing; -+ * 'NET_PROT_NONE' is treated as default: 'NET_PROT_ETH' -+ * @max_senders: Maximum number of different senders; used as the number -+ * of dedicated Tx flows; Non-power-of-2 values are rounded -+ * up to the next power-of-2 value as hardware demands it; -+ * '0' will be treated as '1' -+ * @max_tcs: Maximum number of traffic classes (for both Tx and Rx); -+ * '0' will e treated as '1' -+ * @max_unicast_filters: Maximum number of unicast filters; -+ * '0' is treated as '16' -+ * @max_multicast_filters: Maximum number of multicast filters; -+ * '0' is treated as '64' -+ * @max_qos_entries: if 'max_tcs > 1', declares the maximum entries in -+ * the QoS table; '0' is treated as '64' -+ * @max_qos_key_size: Maximum key size for the QoS look-up; -+ * '0' is treated as '24' which is enough for IPv4 -+ * 5-tuple -+ * @max_dist_key_size: Maximum key size for the distribution; -+ * '0' is treated as '24' which is enough for IPv4 5-tuple -+ * @max_policers: Maximum number of policers; -+ * should be between '0' and max_tcs -+ * @max_congestion_ctrl: Maximum number of congestion control groups -+ * (CGs); covers early drop and congestion notification -+ * requirements; -+ * should be between '0' and ('max_tcs' + 'max_senders') -+ * @ext_cfg_iova: I/O virtual address of 256 bytes DMA-able memory -+ * filled with the extended configuration by calling -+ * dpni_prepare_extended_cfg() -+ */ -+ struct { -+ uint32_t options; -+ enum net_prot start_hdr; -+ uint8_t max_senders; -+ uint8_t max_tcs; -+ uint8_t max_unicast_filters; -+ uint8_t max_multicast_filters; -+ uint8_t max_vlan_filters; -+ uint8_t max_qos_entries; -+ uint8_t max_qos_key_size; -+ uint8_t max_dist_key_size; -+ uint8_t max_policers; -+ uint8_t max_congestion_ctrl; -+ uint64_t ext_cfg_iova; -+ } adv; -+}; -+ -+/** -+ * dpni_create() - Create the DPNI object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPNI object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpni_open() function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpni_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpni_destroy() - Destroy the DPNI object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpni_pools_cfg - Structure representing buffer pools configuration -+ * @num_dpbp: Number of DPBPs -+ * @pools: Array of buffer pools parameters; The number of valid entries -+ * must match 'num_dpbp' value -+ */ -+struct dpni_pools_cfg { -+ uint8_t num_dpbp; -+ /** -+ * struct pools - Buffer pools parameters -+ * @dpbp_id: DPBP object ID -+ * @buffer_size: Buffer size -+ * @backup_pool: Backup pool -+ */ -+ struct { -+ int dpbp_id; -+ uint16_t buffer_size; -+ int backup_pool; -+ } pools[DPNI_MAX_DPBP]; -+}; -+ -+/** -+ * dpni_set_pools() - Set buffer pools configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @cfg: Buffer pools configuration -+ * -+ * mandatory for DPNI operation -+ * warning:Allowed only when DPNI is disabled -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_pools(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_pools_cfg *cfg); -+ -+/** -+ * dpni_enable() - Enable the DPNI, allow sending and receiving frames. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpni_disable() - Disable the DPNI, stop sending and receiving frames. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpni_is_enabled() - Check if the DPNI is enabled. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Returns '1' if object is enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpni_reset() - Reset the DPNI, returns the object to initial state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * DPNI IRQ Index and Events -+ */ -+ -+/** -+ * IRQ index -+ */ -+#define DPNI_IRQ_INDEX 0 -+/** -+ * IRQ event - indicates a change in link state -+ */ -+#define DPNI_IRQ_EVENT_LINK_CHANGED 0x00000001 -+ -+/** -+ * struct dpni_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpni_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpni_set_irq() - Set IRQ information for the DPNI to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpni_irq_cfg *irq_cfg); -+ -+/** -+ * dpni_get_irq() - Get IRQ information from the DPNI. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpni_irq_cfg *irq_cfg); -+ -+/** -+ * dpni_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state: - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpni_get_irq_enable() - Get overall interrupt state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpni_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @irq_index: The interrupt index to configure -+ * @mask: event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpni_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpni_get_irq_status() - Get the current status of any pending interrupts. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dpni_clear_irq_status() - Clear a pending interrupt's status -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @irq_index: The interrupt index to configure -+ * @status: bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+ -+/** -+ * struct dpni_attr - Structure representing DPNI attributes -+ * @id: DPNI object ID -+ * @version: DPNI version -+ * @start_hdr: Indicates the packet starting header for parsing -+ * @options: Mask of available options; reflects the value as was given in -+ * object's creation -+ * @max_senders: Maximum number of different senders; used as the number -+ * of dedicated Tx flows; -+ * @max_tcs: Maximum number of traffic classes (for both Tx and Rx) -+ * @max_unicast_filters: Maximum number of unicast filters -+ * @max_multicast_filters: Maximum number of multicast filters -+ * @max_vlan_filters: Maximum number of VLAN filters -+ * @max_qos_entries: if 'max_tcs > 1', declares the maximum entries in QoS table -+ * @max_qos_key_size: Maximum key size for the QoS look-up -+ * @max_dist_key_size: Maximum key size for the distribution look-up -+ * @max_policers: Maximum number of policers; -+ * @max_congestion_ctrl: Maximum number of congestion control groups (CGs); -+ * @ext_cfg_iova: I/O virtual address of 256 bytes DMA-able memory; -+ * call dpni_extract_extended_cfg() to extract the extended configuration -+ */ -+struct dpni_attr { -+ int id; -+ /** -+ * struct version - DPNI version -+ * @major: DPNI major version -+ * @minor: DPNI minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+ enum net_prot start_hdr; -+ uint32_t options; -+ uint8_t max_senders; -+ uint8_t max_tcs; -+ uint8_t max_unicast_filters; -+ uint8_t max_multicast_filters; -+ uint8_t max_vlan_filters; -+ uint8_t max_qos_entries; -+ uint8_t max_qos_key_size; -+ uint8_t max_dist_key_size; -+ uint8_t max_policers; -+ uint8_t max_congestion_ctrl; -+ uint64_t ext_cfg_iova; -+}; -+ -+/** -+ * dpni_get_attributes() - Retrieve DPNI attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @attr: Object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_attr *attr); -+ -+/** -+ * dpni_extract_extended_cfg() - extract the extended parameters -+ * @cfg: extended structure -+ * @ext_cfg_buf: 256 bytes of DMA-able memory -+ * -+ * This function has to be called after dpni_get_attributes() -+ */ -+int dpni_extract_extended_cfg(struct dpni_extended_cfg *cfg, -+ const uint8_t *ext_cfg_buf); -+ -+/** -+ * DPNI errors -+ */ -+ -+/** -+ * Extract out of frame header error -+ */ -+#define DPNI_ERROR_EOFHE 0x00020000 -+/** -+ * Frame length error -+ */ -+#define DPNI_ERROR_FLE 0x00002000 -+/** -+ * Frame physical error -+ */ -+#define DPNI_ERROR_FPE 0x00001000 -+/** -+ * Parsing header error -+ */ -+#define DPNI_ERROR_PHE 0x00000020 -+/** -+ * Parser L3 checksum error -+ */ -+#define DPNI_ERROR_L3CE 0x00000004 -+/** -+ * Parser L3 checksum error -+ */ -+#define DPNI_ERROR_L4CE 0x00000001 -+ -+/** -+ * enum dpni_error_action - Defines DPNI behavior for errors -+ * @DPNI_ERROR_ACTION_DISCARD: Discard the frame -+ * @DPNI_ERROR_ACTION_CONTINUE: Continue with the normal flow -+ * @DPNI_ERROR_ACTION_SEND_TO_ERROR_QUEUE: Send the frame to the error queue -+ */ -+enum dpni_error_action { -+ DPNI_ERROR_ACTION_DISCARD = 0, -+ DPNI_ERROR_ACTION_CONTINUE = 1, -+ DPNI_ERROR_ACTION_SEND_TO_ERROR_QUEUE = 2 -+}; -+ -+/** -+ * struct dpni_error_cfg - Structure representing DPNI errors treatment -+ * @errors: Errors mask; use 'DPNI_ERROR__ -+ * @error_action: The desired action for the errors mask -+ * @set_frame_annotation: Set to '1' to mark the errors in frame annotation -+ * status (FAS); relevant only for the non-discard action -+ */ -+struct dpni_error_cfg { -+ uint32_t errors; -+ enum dpni_error_action error_action; -+ int set_frame_annotation; -+}; -+ -+/** -+ * dpni_set_errors_behavior() - Set errors behavior -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @cfg: Errors configuration -+ * -+ * this function may be called numerous times with different -+ * error masks -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_errors_behavior(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_error_cfg *cfg); -+ -+/** -+ * DPNI buffer layout modification options -+ */ -+ -+/** -+ * Select to modify the time-stamp setting -+ */ -+#define DPNI_BUF_LAYOUT_OPT_TIMESTAMP 0x00000001 -+/** -+ * Select to modify the parser-result setting; not applicable for Tx -+ */ -+#define DPNI_BUF_LAYOUT_OPT_PARSER_RESULT 0x00000002 -+/** -+ * Select to modify the frame-status setting -+ */ -+#define DPNI_BUF_LAYOUT_OPT_FRAME_STATUS 0x00000004 -+/** -+ * Select to modify the private-data-size setting -+ */ -+#define DPNI_BUF_LAYOUT_OPT_PRIVATE_DATA_SIZE 0x00000008 -+/** -+ * Select to modify the data-alignment setting -+ */ -+#define DPNI_BUF_LAYOUT_OPT_DATA_ALIGN 0x00000010 -+/** -+ * Select to modify the data-head-room setting -+ */ -+#define DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM 0x00000020 -+/** -+ * Select to modify the data-tail-room setting -+ */ -+#define DPNI_BUF_LAYOUT_OPT_DATA_TAIL_ROOM 0x00000040 -+ -+/** -+ * struct dpni_buffer_layout - Structure representing DPNI buffer layout -+ * @options: Flags representing the suggested modifications to the buffer -+ * layout; Use any combination of 'DPNI_BUF_LAYOUT_OPT_' flags -+ * @pass_timestamp: Pass timestamp value -+ * @pass_parser_result: Pass parser results -+ * @pass_frame_status: Pass frame status -+ * @private_data_size: Size kept for private data (in bytes) -+ * @data_align: Data alignment -+ * @data_head_room: Data head room -+ * @data_tail_room: Data tail room -+ */ -+struct dpni_buffer_layout { -+ uint32_t options; -+ int pass_timestamp; -+ int pass_parser_result; -+ int pass_frame_status; -+ uint16_t private_data_size; -+ uint16_t data_align; -+ uint16_t data_head_room; -+ uint16_t data_tail_room; -+}; -+ -+/** -+ * dpni_get_rx_buffer_layout() - Retrieve Rx buffer layout attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @layout: Returns buffer layout attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_rx_buffer_layout(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_buffer_layout *layout); -+ -+/** -+ * dpni_set_rx_buffer_layout() - Set Rx buffer layout configuration. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @layout: Buffer layout configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ * -+ * @warning Allowed only when DPNI is disabled -+ */ -+int dpni_set_rx_buffer_layout(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_buffer_layout *layout); -+ -+/** -+ * dpni_get_tx_buffer_layout() - Retrieve Tx buffer layout attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @layout: Returns buffer layout attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_tx_buffer_layout(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_buffer_layout *layout); -+ -+/** -+ * dpni_set_tx_buffer_layout() - Set Tx buffer layout configuration. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @layout: Buffer layout configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ * -+ * @warning Allowed only when DPNI is disabled -+ */ -+int dpni_set_tx_buffer_layout(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_buffer_layout *layout); -+ -+/** -+ * dpni_get_tx_conf_buffer_layout() - Retrieve Tx confirmation buffer layout -+ * attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @layout: Returns buffer layout attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_tx_conf_buffer_layout(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_buffer_layout *layout); -+ -+/** -+ * dpni_set_tx_conf_buffer_layout() - Set Tx confirmation buffer layout -+ * configuration. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @layout: Buffer layout configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ * -+ * @warning Allowed only when DPNI is disabled -+ */ -+int dpni_set_tx_conf_buffer_layout(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_buffer_layout *layout); -+ -+/** -+ * dpni_set_l3_chksum_validation() - Enable/disable L3 checksum validation -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Set to '1' to enable; '0' to disable -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_l3_chksum_validation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en); -+ -+/** -+ * dpni_get_l3_chksum_validation() - Get L3 checksum validation mode -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Returns '1' if enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_l3_chksum_validation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpni_set_l4_chksum_validation() - Enable/disable L4 checksum validation -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Set to '1' to enable; '0' to disable -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_l4_chksum_validation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en); -+ -+/** -+ * dpni_get_l4_chksum_validation() - Get L4 checksum validation mode -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Returns '1' if enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_l4_chksum_validation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpni_get_qdid() - Get the Queuing Destination ID (QDID) that should be used -+ * for enqueue operations -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @qdid: Returned virtual QDID value that should be used as an argument -+ * in all enqueue operations -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_qdid(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *qdid); -+ -+/** -+ * struct dpni_sp_info - Structure representing DPNI storage-profile information -+ * (relevant only for DPNI owned by AIOP) -+ * @spids: array of storage-profiles -+ */ -+struct dpni_sp_info { -+ uint16_t spids[DPNI_MAX_SP]; -+}; -+ -+/** -+ * dpni_get_spids() - Get the AIOP storage profile IDs associated with the DPNI -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @sp_info: Returned AIOP storage-profile information -+ * -+ * Return: '0' on Success; Error code otherwise. -+ * -+ * @warning Only relevant for DPNI that belongs to AIOP container. -+ */ -+int dpni_get_sp_info(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_sp_info *sp_info); -+ -+/** -+ * dpni_get_tx_data_offset() - Get the Tx data offset (from start of buffer) -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @data_offset: Tx data offset (from start of buffer) -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_tx_data_offset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *data_offset); -+ -+/** -+ * enum dpni_counter - DPNI counter types -+ * @DPNI_CNT_ING_FRAME: Counts ingress frames -+ * @DPNI_CNT_ING_BYTE: Counts ingress bytes -+ * @DPNI_CNT_ING_FRAME_DROP: Counts ingress frames dropped due to explicit -+ * 'drop' setting -+ * @DPNI_CNT_ING_FRAME_DISCARD: Counts ingress frames discarded due to errors -+ * @DPNI_CNT_ING_MCAST_FRAME: Counts ingress multicast frames -+ * @DPNI_CNT_ING_MCAST_BYTE: Counts ingress multicast bytes -+ * @DPNI_CNT_ING_BCAST_FRAME: Counts ingress broadcast frames -+ * @DPNI_CNT_ING_BCAST_BYTES: Counts ingress broadcast bytes -+ * @DPNI_CNT_EGR_FRAME: Counts egress frames -+ * @DPNI_CNT_EGR_BYTE: Counts egress bytes -+ * @DPNI_CNT_EGR_FRAME_DISCARD: Counts egress frames discarded due to errors -+ */ -+enum dpni_counter { -+ DPNI_CNT_ING_FRAME = 0x0, -+ DPNI_CNT_ING_BYTE = 0x1, -+ DPNI_CNT_ING_FRAME_DROP = 0x2, -+ DPNI_CNT_ING_FRAME_DISCARD = 0x3, -+ DPNI_CNT_ING_MCAST_FRAME = 0x4, -+ DPNI_CNT_ING_MCAST_BYTE = 0x5, -+ DPNI_CNT_ING_BCAST_FRAME = 0x6, -+ DPNI_CNT_ING_BCAST_BYTES = 0x7, -+ DPNI_CNT_EGR_FRAME = 0x8, -+ DPNI_CNT_EGR_BYTE = 0x9, -+ DPNI_CNT_EGR_FRAME_DISCARD = 0xa -+}; -+ -+/** -+ * dpni_get_counter() - Read a specific DPNI counter -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @counter: The requested counter -+ * @value: Returned counter's current value -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ enum dpni_counter counter, -+ uint64_t *value); -+ -+/** -+ * dpni_set_counter() - Set (or clear) a specific DPNI counter -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @counter: The requested counter -+ * @value: New counter value; typically pass '0' for resetting -+ * the counter. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ enum dpni_counter counter, -+ uint64_t value); -+ -+/** -+ * Enable auto-negotiation -+ */ -+#define DPNI_LINK_OPT_AUTONEG 0x0000000000000001ULL -+/** -+ * Enable half-duplex mode -+ */ -+#define DPNI_LINK_OPT_HALF_DUPLEX 0x0000000000000002ULL -+/** -+ * Enable pause frames -+ */ -+#define DPNI_LINK_OPT_PAUSE 0x0000000000000004ULL -+/** -+ * Enable a-symmetric pause frames -+ */ -+#define DPNI_LINK_OPT_ASYM_PAUSE 0x0000000000000008ULL -+ -+/** -+ * struct - Structure representing DPNI link configuration -+ * @rate: Rate -+ * @options: Mask of available options; use 'DPNI_LINK_OPT_' values -+ */ -+struct dpni_link_cfg { -+ uint32_t rate; -+ uint64_t options; -+}; -+ -+/** -+ * dpni_set_link_cfg() - set the link configuration. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @cfg: Link configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_link_cfg(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_link_cfg *cfg); -+ -+/** -+ * struct dpni_link_state - Structure representing DPNI link state -+ * @rate: Rate -+ * @options: Mask of available options; use 'DPNI_LINK_OPT_' values -+ * @up: Link state; '0' for down, '1' for up -+ */ -+struct dpni_link_state { -+ uint32_t rate; -+ uint64_t options; -+ int up; -+}; -+ -+/** -+ * dpni_get_link_state() - Return the link state (either up or down) -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @state: Returned link state; -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_link_state(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_link_state *state); -+ -+/** -+ * struct dpni_tx_shaping - Structure representing DPNI tx shaping configuration -+ * @rate_limit: rate in Mbps -+ * @max_burst_size: burst size in bytes (up to 64KB) -+ */ -+struct dpni_tx_shaping_cfg { -+ uint32_t rate_limit; -+ uint16_t max_burst_size; -+}; -+ -+/** -+ * dpni_set_tx_shaping() - Set the transmit shaping -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tx_shaper: tx shaping configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_tx_shaping(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_tx_shaping_cfg *tx_shaper); -+ -+/** -+ * dpni_set_max_frame_length() - Set the maximum received frame length. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @max_frame_length: Maximum received frame length (in -+ * bytes); frame is discarded if its -+ * length exceeds this value -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_max_frame_length(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t max_frame_length); -+ -+/** -+ * dpni_get_max_frame_length() - Get the maximum received frame length. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @max_frame_length: Maximum received frame length (in -+ * bytes); frame is discarded if its -+ * length exceeds this value -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_max_frame_length(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *max_frame_length); -+ -+/** -+ * dpni_set_mtu() - Set the MTU for the interface. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @mtu: MTU length (in bytes) -+ * -+ * MTU determines the maximum fragment size for performing IP -+ * fragmentation on egress packets. -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_mtu(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t mtu); -+ -+/** -+ * dpni_get_mtu() - Get the MTU. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @mtu: Returned MTU length (in bytes) -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_mtu(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *mtu); -+ -+/** -+ * dpni_set_multicast_promisc() - Enable/disable multicast promiscuous mode -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Set to '1' to enable; '0' to disable -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_multicast_promisc(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en); -+ -+/** -+ * dpni_get_multicast_promisc() - Get multicast promiscuous mode -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Returns '1' if enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_multicast_promisc(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpni_set_unicast_promisc() - Enable/disable unicast promiscuous mode -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Set to '1' to enable; '0' to disable -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_unicast_promisc(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en); -+ -+/** -+ * dpni_get_unicast_promisc() - Get unicast promiscuous mode -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Returns '1' if enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_unicast_promisc(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpni_set_primary_mac_addr() - Set the primary MAC address -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @mac_addr: MAC address to set as primary address -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_primary_mac_addr(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const uint8_t mac_addr[6]); -+ -+/** -+ * dpni_get_primary_mac_addr() - Get the primary MAC address -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @mac_addr: Returned MAC address -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_primary_mac_addr(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t mac_addr[6]); -+ -+/** -+ * dpni_add_mac_addr() - Add MAC address filter -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @mac_addr: MAC address to add -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_add_mac_addr(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const uint8_t mac_addr[6]); -+ -+/** -+ * dpni_remove_mac_addr() - Remove MAC address filter -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @mac_addr: MAC address to remove -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_remove_mac_addr(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const uint8_t mac_addr[6]); -+ -+/** -+ * dpni_clear_mac_filters() - Clear all unicast and/or multicast MAC filters -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @unicast: Set to '1' to clear unicast addresses -+ * @multicast: Set to '1' to clear multicast addresses -+ * -+ * The primary MAC address is not cleared by this operation. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_clear_mac_filters(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int unicast, -+ int multicast); -+ -+/** -+ * dpni_set_vlan_filters() - Enable/disable VLAN filtering mode -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Set to '1' to enable; '0' to disable -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_vlan_filters(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en); -+ -+/** -+ * dpni_add_vlan_id() - Add VLAN ID filter -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @vlan_id: VLAN ID to add -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_add_vlan_id(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id); -+ -+/** -+ * dpni_remove_vlan_id() - Remove VLAN ID filter -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @vlan_id: VLAN ID to remove -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_remove_vlan_id(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id); -+ -+/** -+ * dpni_clear_vlan_filters() - Clear all VLAN filters -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_clear_vlan_filters(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * enum dpni_tx_schedule_mode - DPNI Tx scheduling mode -+ * @DPNI_TX_SCHED_STRICT_PRIORITY: strict priority -+ * @DPNI_TX_SCHED_WEIGHTED: weighted based scheduling -+ */ -+enum dpni_tx_schedule_mode { -+ DPNI_TX_SCHED_STRICT_PRIORITY, -+ DPNI_TX_SCHED_WEIGHTED, -+}; -+ -+/** -+ * struct dpni_tx_schedule_cfg - Structure representing Tx -+ * scheduling configuration -+ * @mode: scheduling mode -+ * @delta_bandwidth: Bandwidth represented in weights from 100 to 10000; -+ * not applicable for 'strict-priority' mode; -+ */ -+struct dpni_tx_schedule_cfg { -+ enum dpni_tx_schedule_mode mode; -+ uint16_t delta_bandwidth; -+}; -+ -+/** -+ * struct dpni_tx_selection_cfg - Structure representing transmission -+ * selection configuration -+ * @tc_sched: an array of traffic-classes -+ */ -+struct dpni_tx_selection_cfg { -+ struct dpni_tx_schedule_cfg tc_sched[DPNI_MAX_TC]; -+}; -+ -+/** -+ * dpni_set_tx_selection() - Set transmission selection configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @cfg: transmission selection configuration -+ * -+ * warning: Allowed only when DPNI is disabled -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_tx_selection(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_tx_selection_cfg *cfg); -+ -+/** -+ * enum dpni_dist_mode - DPNI distribution mode -+ * @DPNI_DIST_MODE_NONE: No distribution -+ * @DPNI_DIST_MODE_HASH: Use hash distribution; only relevant if -+ * the 'DPNI_OPT_DIST_HASH' option was set at DPNI creation -+ * @DPNI_DIST_MODE_FS: Use explicit flow steering; only relevant if -+ * the 'DPNI_OPT_DIST_FS' option was set at DPNI creation -+ */ -+enum dpni_dist_mode { -+ DPNI_DIST_MODE_NONE = 0, -+ DPNI_DIST_MODE_HASH = 1, -+ DPNI_DIST_MODE_FS = 2 -+}; -+ -+/** -+ * enum dpni_fs_miss_action - DPNI Flow Steering miss action -+ * @DPNI_FS_MISS_DROP: In case of no-match, drop the frame -+ * @DPNI_FS_MISS_EXPLICIT_FLOWID: In case of no-match, use explicit flow-id -+ * @DPNI_FS_MISS_HASH: In case of no-match, distribute using hash -+ */ -+enum dpni_fs_miss_action { -+ DPNI_FS_MISS_DROP = 0, -+ DPNI_FS_MISS_EXPLICIT_FLOWID = 1, -+ DPNI_FS_MISS_HASH = 2 -+}; -+ -+/** -+ * struct dpni_fs_tbl_cfg - Flow Steering table configuration -+ * @miss_action: Miss action selection -+ * @default_flow_id: Used when 'miss_action = DPNI_FS_MISS_EXPLICIT_FLOWID' -+ */ -+struct dpni_fs_tbl_cfg { -+ enum dpni_fs_miss_action miss_action; -+ uint16_t default_flow_id; -+}; -+ -+/** -+ * dpni_prepare_key_cfg() - function prepare extract parameters -+ * @cfg: defining a full Key Generation profile (rule) -+ * @key_cfg_buf: Zeroed 256 bytes of memory before mapping it to DMA -+ * -+ * This function has to be called before the following functions: -+ * - dpni_set_rx_tc_dist() -+ * - dpni_set_qos_table() -+ */ -+int dpni_prepare_key_cfg(const struct dpkg_profile_cfg *cfg, -+ uint8_t *key_cfg_buf); -+ -+/** -+ * struct dpni_rx_tc_dist_cfg - Rx traffic class distribution configuration -+ * @dist_size: Set the distribution size; -+ * supported values: 1,2,3,4,6,7,8,12,14,16,24,28,32,48,56,64,96, -+ * 112,128,192,224,256,384,448,512,768,896,1024 -+ * @dist_mode: Distribution mode -+ * @key_cfg_iova: I/O virtual address of 256 bytes DMA-able memory filled with -+ * the extractions to be used for the distribution key by calling -+ * dpni_prepare_key_cfg() relevant only when -+ * 'dist_mode != DPNI_DIST_MODE_NONE', otherwise it can be '0' -+ * @fs_cfg: Flow Steering table configuration; only relevant if -+ * 'dist_mode = DPNI_DIST_MODE_FS' -+ */ -+struct dpni_rx_tc_dist_cfg { -+ uint16_t dist_size; -+ enum dpni_dist_mode dist_mode; -+ uint64_t key_cfg_iova; -+ struct dpni_fs_tbl_cfg fs_cfg; -+}; -+ -+/** -+ * dpni_set_rx_tc_dist() - Set Rx traffic class distribution configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @cfg: Traffic class distribution configuration -+ * -+ * warning: if 'dist_mode != DPNI_DIST_MODE_NONE', call dpni_prepare_key_cfg() -+ * first to prepare the key_cfg_iova parameter -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_set_rx_tc_dist(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ const struct dpni_rx_tc_dist_cfg *cfg); -+ -+/** -+ * Set to select color aware mode (otherwise - color blind) -+ */ -+#define DPNI_POLICER_OPT_COLOR_AWARE 0x00000001 -+/** -+ * Set to discard frame with RED color -+ */ -+#define DPNI_POLICER_OPT_DISCARD_RED 0x00000002 -+ -+/** -+ * enum dpni_policer_mode - selecting the policer mode -+ * @DPNI_POLICER_MODE_NONE: Policer is disabled -+ * @DPNI_POLICER_MODE_PASS_THROUGH: Policer pass through -+ * @DPNI_POLICER_MODE_RFC_2698: Policer algorithm RFC 2698 -+ * @DPNI_POLICER_MODE_RFC_4115: Policer algorithm RFC 4115 -+ */ -+enum dpni_policer_mode { -+ DPNI_POLICER_MODE_NONE = 0, -+ DPNI_POLICER_MODE_PASS_THROUGH, -+ DPNI_POLICER_MODE_RFC_2698, -+ DPNI_POLICER_MODE_RFC_4115 -+}; -+ -+/** -+ * enum dpni_policer_unit - DPNI policer units -+ * @DPNI_POLICER_UNIT_BYTES: bytes units -+ * @DPNI_POLICER_UNIT_FRAMES: frames units -+ */ -+enum dpni_policer_unit { -+ DPNI_POLICER_UNIT_BYTES = 0, -+ DPNI_POLICER_UNIT_FRAMES -+}; -+ -+/** -+ * enum dpni_policer_color - selecting the policer color -+ * @DPNI_POLICER_COLOR_GREEN: Green color -+ * @DPNI_POLICER_COLOR_YELLOW: Yellow color -+ * @DPNI_POLICER_COLOR_RED: Red color -+ */ -+enum dpni_policer_color { -+ DPNI_POLICER_COLOR_GREEN = 0, -+ DPNI_POLICER_COLOR_YELLOW, -+ DPNI_POLICER_COLOR_RED -+}; -+ -+/** -+ * struct dpni_rx_tc_policing_cfg - Policer configuration -+ * @options: Mask of available options; use 'DPNI_POLICER_OPT_' values -+ * @mode: policer mode -+ * @default_color: For pass-through mode the policer re-colors with this -+ * color any incoming packets. For Color aware non-pass-through mode: -+ * policer re-colors with this color all packets with FD[DROPP]>2. -+ * @units: Bytes or Packets -+ * @cir: Committed information rate (CIR) in Kbps or packets/second -+ * @cbs: Committed burst size (CBS) in bytes or packets -+ * @eir: Peak information rate (PIR, rfc2698) in Kbps or packets/second -+ * Excess information rate (EIR, rfc4115) in Kbps or packets/second -+ * @ebs: Peak burst size (PBS, rfc2698) in bytes or packets -+ * Excess burst size (EBS, rfc4115) in bytes or packets -+ */ -+struct dpni_rx_tc_policing_cfg { -+ uint32_t options; -+ enum dpni_policer_mode mode; -+ enum dpni_policer_unit units; -+ enum dpni_policer_color default_color; -+ uint32_t cir; -+ uint32_t cbs; -+ uint32_t eir; -+ uint32_t ebs; -+}; -+ -+/** -+ * dpni_set_rx_tc_policing() - Set Rx traffic class policing configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @cfg: Traffic class policing configuration -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_set_rx_tc_policing(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ const struct dpni_rx_tc_policing_cfg *cfg); -+ -+/** -+ * dpni_get_rx_tc_policing() - Get Rx traffic class policing configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @cfg: Traffic class policing configuration -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_get_rx_tc_policing(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ struct dpni_rx_tc_policing_cfg *cfg); -+ -+/** -+ * enum dpni_congestion_unit - DPNI congestion units -+ * @DPNI_CONGESTION_UNIT_BYTES: bytes units -+ * @DPNI_CONGESTION_UNIT_FRAMES: frames units -+ */ -+enum dpni_congestion_unit { -+ DPNI_CONGESTION_UNIT_BYTES = 0, -+ DPNI_CONGESTION_UNIT_FRAMES -+}; -+ -+/** -+ * enum dpni_early_drop_mode - DPNI early drop mode -+ * @DPNI_EARLY_DROP_MODE_NONE: early drop is disabled -+ * @DPNI_EARLY_DROP_MODE_TAIL: early drop in taildrop mode -+ * @DPNI_EARLY_DROP_MODE_WRED: early drop in WRED mode -+ */ -+enum dpni_early_drop_mode { -+ DPNI_EARLY_DROP_MODE_NONE = 0, -+ DPNI_EARLY_DROP_MODE_TAIL, -+ DPNI_EARLY_DROP_MODE_WRED -+}; -+ -+/** -+ * struct dpni_wred_cfg - WRED configuration -+ * @max_threshold: maximum threshold that packets may be discarded. Above this -+ * threshold all packets are discarded; must be less than 2^39; -+ * approximated to be expressed as (x+256)*2^(y-1) due to HW -+ * implementation. -+ * @min_threshold: minimum threshold that packets may be discarded at -+ * @drop_probability: probability that a packet will be discarded (1-100, -+ * associated with the max_threshold). -+ */ -+struct dpni_wred_cfg { -+ uint64_t max_threshold; -+ uint64_t min_threshold; -+ uint8_t drop_probability; -+}; -+ -+/** -+ * struct dpni_early_drop_cfg - early-drop configuration -+ * @mode: drop mode -+ * @units: units type -+ * @green: WRED - 'green' configuration -+ * @yellow: WRED - 'yellow' configuration -+ * @red: WRED - 'red' configuration -+ * @tail_drop_threshold: tail drop threshold -+ */ -+struct dpni_early_drop_cfg { -+ enum dpni_early_drop_mode mode; -+ enum dpni_congestion_unit units; -+ -+ struct dpni_wred_cfg green; -+ struct dpni_wred_cfg yellow; -+ struct dpni_wred_cfg red; -+ -+ uint32_t tail_drop_threshold; -+}; -+ -+/** -+ * dpni_prepare_early_drop() - prepare an early drop. -+ * @cfg: Early-drop configuration -+ * @early_drop_buf: Zeroed 256 bytes of memory before mapping it to DMA -+ * -+ * This function has to be called before dpni_set_rx_tc_early_drop or -+ * dpni_set_tx_tc_early_drop -+ * -+ */ -+void dpni_prepare_early_drop(const struct dpni_early_drop_cfg *cfg, -+ uint8_t *early_drop_buf); -+ -+/** -+ * dpni_extract_early_drop() - extract the early drop configuration. -+ * @cfg: Early-drop configuration -+ * @early_drop_buf: Zeroed 256 bytes of memory before mapping it to DMA -+ * -+ * This function has to be called after dpni_get_rx_tc_early_drop or -+ * dpni_get_tx_tc_early_drop -+ * -+ */ -+void dpni_extract_early_drop(struct dpni_early_drop_cfg *cfg, -+ const uint8_t *early_drop_buf); -+ -+/** -+ * dpni_set_rx_tc_early_drop() - Set Rx traffic class early-drop configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @early_drop_iova: I/O virtual address of 256 bytes DMA-able memory filled -+ * with the early-drop configuration by calling dpni_prepare_early_drop() -+ * -+ * warning: Before calling this function, call dpni_prepare_early_drop() to -+ * prepare the early_drop_iova parameter -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_set_rx_tc_early_drop(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ uint64_t early_drop_iova); -+ -+/** -+ * dpni_get_rx_tc_early_drop() - Get Rx traffic class early-drop configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @early_drop_iova: I/O virtual address of 256 bytes DMA-able memory -+ * -+ * warning: After calling this function, call dpni_extract_early_drop() to -+ * get the early drop configuration -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_get_rx_tc_early_drop(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ uint64_t early_drop_iova); -+ -+/** -+ * dpni_set_tx_tc_early_drop() - Set Tx traffic class early-drop configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @early_drop_iova: I/O virtual address of 256 bytes DMA-able memory filled -+ * with the early-drop configuration by calling dpni_prepare_early_drop() -+ * -+ * warning: Before calling this function, call dpni_prepare_early_drop() to -+ * prepare the early_drop_iova parameter -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_set_tx_tc_early_drop(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ uint64_t early_drop_iova); -+ -+/** -+ * dpni_get_tx_tc_early_drop() - Get Tx traffic class early-drop configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @early_drop_iova: I/O virtual address of 256 bytes DMA-able memory -+ * -+ * warning: After calling this function, call dpni_extract_early_drop() to -+ * get the early drop configuration -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_get_tx_tc_early_drop(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ uint64_t early_drop_iova); -+ -+/** -+ * enum dpni_dest - DPNI destination types -+ * @DPNI_DEST_NONE: Unassigned destination; The queue is set in parked mode and -+ * does not generate FQDAN notifications; user is expected to -+ * dequeue from the queue based on polling or other user-defined -+ * method -+ * @DPNI_DEST_DPIO: The queue is set in schedule mode and generates FQDAN -+ * notifications to the specified DPIO; user is expected to dequeue -+ * from the queue only after notification is received -+ * @DPNI_DEST_DPCON: The queue is set in schedule mode and does not generate -+ * FQDAN notifications, but is connected to the specified DPCON -+ * object; user is expected to dequeue from the DPCON channel -+ */ -+enum dpni_dest { -+ DPNI_DEST_NONE = 0, -+ DPNI_DEST_DPIO = 1, -+ DPNI_DEST_DPCON = 2 -+}; -+ -+/** -+ * struct dpni_dest_cfg - Structure representing DPNI destination parameters -+ * @dest_type: Destination type -+ * @dest_id: Either DPIO ID or DPCON ID, depending on the destination type -+ * @priority: Priority selection within the DPIO or DPCON channel; valid values -+ * are 0-1 or 0-7, depending on the number of priorities in that -+ * channel; not relevant for 'DPNI_DEST_NONE' option -+ */ -+struct dpni_dest_cfg { -+ enum dpni_dest dest_type; -+ int dest_id; -+ uint8_t priority; -+}; -+ -+/* DPNI congestion options */ -+ -+/** -+ * CSCN message is written to message_iova once entering a -+ * congestion state (see 'threshold_entry') -+ */ -+#define DPNI_CONG_OPT_WRITE_MEM_ON_ENTER 0x00000001 -+/** -+ * CSCN message is written to message_iova once exiting a -+ * congestion state (see 'threshold_exit') -+ */ -+#define DPNI_CONG_OPT_WRITE_MEM_ON_EXIT 0x00000002 -+/** -+ * CSCN write will attempt to allocate into a cache (coherent write); -+ * valid only if 'DPNI_CONG_OPT_WRITE_MEM_' is selected -+ */ -+#define DPNI_CONG_OPT_COHERENT_WRITE 0x00000004 -+/** -+ * if 'dest_cfg.dest_type != DPNI_DEST_NONE' CSCN message is sent to -+ * DPIO/DPCON's WQ channel once entering a congestion state -+ * (see 'threshold_entry') -+ */ -+#define DPNI_CONG_OPT_NOTIFY_DEST_ON_ENTER 0x00000008 -+/** -+ * if 'dest_cfg.dest_type != DPNI_DEST_NONE' CSCN message is sent to -+ * DPIO/DPCON's WQ channel once exiting a congestion state -+ * (see 'threshold_exit') -+ */ -+#define DPNI_CONG_OPT_NOTIFY_DEST_ON_EXIT 0x00000010 -+/** -+ * if 'dest_cfg.dest_type != DPNI_DEST_NONE' when the CSCN is written to the -+ * sw-portal's DQRR, the DQRI interrupt is asserted immediately (if enabled) -+ */ -+#define DPNI_CONG_OPT_INTR_COALESCING_DISABLED 0x00000020 -+ -+/** -+ * struct dpni_congestion_notification_cfg - congestion notification -+ * configuration -+ * @units: units type -+ * @threshold_entry: above this threshold we enter a congestion state. -+ * set it to '0' to disable it -+ * @threshold_exit: below this threshold we exit the congestion state. -+ * @message_ctx: The context that will be part of the CSCN message -+ * @message_iova: I/O virtual address (must be in DMA-able memory), -+ * must be 16B aligned; valid only if 'DPNI_CONG_OPT_WRITE_MEM_' is -+ * contained in 'options' -+ * @dest_cfg: CSCN can be send to either DPIO or DPCON WQ channel -+ * @options: Mask of available options; use 'DPNI_CONG_OPT_' values -+ */ -+ -+struct dpni_congestion_notification_cfg { -+ enum dpni_congestion_unit units; -+ uint32_t threshold_entry; -+ uint32_t threshold_exit; -+ uint64_t message_ctx; -+ uint64_t message_iova; -+ struct dpni_dest_cfg dest_cfg; -+ uint16_t options; -+}; -+ -+/** -+ * dpni_set_rx_tc_congestion_notification() - Set Rx traffic class congestion -+ * notification configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @cfg: congestion notification configuration -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_set_rx_tc_congestion_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ const struct dpni_congestion_notification_cfg *cfg); -+ -+/** -+ * dpni_get_rx_tc_congestion_notification() - Get Rx traffic class congestion -+ * notification configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @cfg: congestion notification configuration -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_get_rx_tc_congestion_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ struct dpni_congestion_notification_cfg *cfg); -+ -+/** -+ * dpni_set_tx_tc_congestion_notification() - Set Tx traffic class congestion -+ * notification configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @cfg: congestion notification configuration -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_set_tx_tc_congestion_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ const struct dpni_congestion_notification_cfg *cfg); -+ -+/** -+ * dpni_get_tx_tc_congestion_notification() - Get Tx traffic class congestion -+ * notification configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @cfg: congestion notification configuration -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_get_tx_tc_congestion_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ struct dpni_congestion_notification_cfg *cfg); -+ -+/** -+ * enum dpni_flc_type - DPNI FLC types -+ * @DPNI_FLC_USER_DEFINED: select the FLC to be used for user defined value -+ * @DPNI_FLC_STASH: select the FLC to be used for stash control -+ */ -+enum dpni_flc_type { -+ DPNI_FLC_USER_DEFINED = 0, -+ DPNI_FLC_STASH = 1, -+}; -+ -+/** -+ * enum dpni_stash_size - DPNI FLC stashing size -+ * @DPNI_STASH_SIZE_0B: no stash -+ * @DPNI_STASH_SIZE_64B: stashes 64 bytes -+ * @DPNI_STASH_SIZE_128B: stashes 128 bytes -+ * @DPNI_STASH_SIZE_192B: stashes 192 bytes -+ */ -+enum dpni_stash_size { -+ DPNI_STASH_SIZE_0B = 0, -+ DPNI_STASH_SIZE_64B = 1, -+ DPNI_STASH_SIZE_128B = 2, -+ DPNI_STASH_SIZE_192B = 3, -+}; -+ -+/* DPNI FLC stash options */ -+ -+/** -+ * stashes the whole annotation area (up to 192 bytes) -+ */ -+#define DPNI_FLC_STASH_FRAME_ANNOTATION 0x00000001 -+ -+/** -+ * struct dpni_flc_cfg - Structure representing DPNI FLC configuration -+ * @flc_type: FLC type -+ * @options: Mask of available options; -+ * use 'DPNI_FLC_STASH_' values -+ * @frame_data_size: Size of frame data to be stashed -+ * @flow_context_size: Size of flow context to be stashed -+ * @flow_context: 1. In case flc_type is 'DPNI_FLC_USER_DEFINED': -+ * this value will be provided in the frame descriptor -+ * (FD[FLC]) -+ * 2. In case flc_type is 'DPNI_FLC_STASH': -+ * this value will be I/O virtual address of the -+ * flow-context; -+ * Must be cacheline-aligned and DMA-able memory -+ */ -+struct dpni_flc_cfg { -+ enum dpni_flc_type flc_type; -+ uint32_t options; -+ enum dpni_stash_size frame_data_size; -+ enum dpni_stash_size flow_context_size; -+ uint64_t flow_context; -+}; -+ -+/** -+ * DPNI queue modification options -+ */ -+ -+/** -+ * Select to modify the user's context associated with the queue -+ */ -+#define DPNI_QUEUE_OPT_USER_CTX 0x00000001 -+/** -+ * Select to modify the queue's destination -+ */ -+#define DPNI_QUEUE_OPT_DEST 0x00000002 -+/** Select to modify the flow-context parameters; -+ * not applicable for Tx-conf/Err queues as the FD comes from the user -+ */ -+#define DPNI_QUEUE_OPT_FLC 0x00000004 -+/** -+ * Select to modify the queue's order preservation -+ */ -+#define DPNI_QUEUE_OPT_ORDER_PRESERVATION 0x00000008 -+/* Select to modify the queue's tail-drop threshold */ -+#define DPNI_QUEUE_OPT_TAILDROP_THRESHOLD 0x00000010 -+ -+/** -+ * struct dpni_queue_cfg - Structure representing queue configuration -+ * @options: Flags representing the suggested modifications to the queue; -+ * Use any combination of 'DPNI_QUEUE_OPT_' flags -+ * @user_ctx: User context value provided in the frame descriptor of each -+ * dequeued frame; valid only if 'DPNI_QUEUE_OPT_USER_CTX' -+ * is contained in 'options' -+ * @dest_cfg: Queue destination parameters; -+ * valid only if 'DPNI_QUEUE_OPT_DEST' is contained in 'options' -+ * @flc_cfg: Flow context configuration; in case the TC's distribution -+ * is either NONE or HASH the FLC's settings of flow#0 are used. -+ * in the case of FS (flow-steering) the flow's FLC settings -+ * are used. -+ * valid only if 'DPNI_QUEUE_OPT_FLC' is contained in 'options' -+ * @order_preservation_en: enable/disable order preservation; -+ * valid only if 'DPNI_QUEUE_OPT_ORDER_PRESERVATION' is contained -+ * in 'options' -+ * @tail_drop_threshold: set the queue's tail drop threshold in bytes; -+ * '0' value disable the threshold; maximum value is 0xE000000; -+ * valid only if 'DPNI_QUEUE_OPT_TAILDROP_THRESHOLD' is contained -+ * in 'options' -+ */ -+struct dpni_queue_cfg { -+ uint32_t options; -+ uint64_t user_ctx; -+ struct dpni_dest_cfg dest_cfg; -+ struct dpni_flc_cfg flc_cfg; -+ int order_preservation_en; -+ uint32_t tail_drop_threshold; -+}; -+ -+/** -+ * struct dpni_queue_attr - Structure representing queue attributes -+ * @user_ctx: User context value provided in the frame descriptor of each -+ * dequeued frame -+ * @dest_cfg: Queue destination configuration -+ * @flc_cfg: Flow context configuration -+ * @order_preservation_en: enable/disable order preservation -+ * @tail_drop_threshold: queue's tail drop threshold in bytes; -+ * @fqid: Virtual fqid value to be used for dequeue operations -+ */ -+struct dpni_queue_attr { -+ uint64_t user_ctx; -+ struct dpni_dest_cfg dest_cfg; -+ struct dpni_flc_cfg flc_cfg; -+ int order_preservation_en; -+ uint32_t tail_drop_threshold; -+ -+ uint32_t fqid; -+}; -+ -+/** -+ * DPNI Tx flow modification options -+ */ -+ -+/** -+ * Select to modify the settings for dedicate Tx confirmation/error -+ */ -+#define DPNI_TX_FLOW_OPT_TX_CONF_ERROR 0x00000001 -+/** -+ * Select to modify the L3 checksum generation setting -+ */ -+#define DPNI_TX_FLOW_OPT_L3_CHKSUM_GEN 0x00000010 -+/** -+ * Select to modify the L4 checksum generation setting -+ */ -+#define DPNI_TX_FLOW_OPT_L4_CHKSUM_GEN 0x00000020 -+ -+/** -+ * struct dpni_tx_flow_cfg - Structure representing Tx flow configuration -+ * @options: Flags representing the suggested modifications to the Tx flow; -+ * Use any combination 'DPNI_TX_FLOW_OPT_' flags -+ * @use_common_tx_conf_queue: Set to '1' to use the common (default) Tx -+ * confirmation and error queue; Set to '0' to use the private -+ * Tx confirmation and error queue; valid only if -+ * 'DPNI_OPT_PRIVATE_TX_CONF_ERROR_DISABLED' wasn't set at DPNI creation -+ * and 'DPNI_TX_FLOW_OPT_TX_CONF_ERROR' is contained in 'options' -+ * @l3_chksum_gen: Set to '1' to enable L3 checksum generation; '0' to disable; -+ * valid only if 'DPNI_TX_FLOW_OPT_L3_CHKSUM_GEN' is contained in 'options' -+ * @l4_chksum_gen: Set to '1' to enable L4 checksum generation; '0' to disable; -+ * valid only if 'DPNI_TX_FLOW_OPT_L4_CHKSUM_GEN' is contained in 'options' -+ */ -+struct dpni_tx_flow_cfg { -+ uint32_t options; -+ int use_common_tx_conf_queue; -+ int l3_chksum_gen; -+ int l4_chksum_gen; -+}; -+ -+/** -+ * dpni_set_tx_flow() - Set Tx flow configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @flow_id: Provides (or returns) the sender's flow ID; -+ * for each new sender set (*flow_id) to 'DPNI_NEW_FLOW_ID' to generate -+ * a new flow_id; this ID should be used as the QDBIN argument -+ * in enqueue operations -+ * @cfg: Tx flow configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_tx_flow(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *flow_id, -+ const struct dpni_tx_flow_cfg *cfg); -+ -+/** -+ * struct dpni_tx_flow_attr - Structure representing Tx flow attributes -+ * @use_common_tx_conf_queue: '1' if using common (default) Tx confirmation and -+ * error queue; '0' if using private Tx confirmation and error queue -+ * @l3_chksum_gen: '1' if L3 checksum generation is enabled; '0' if disabled -+ * @l4_chksum_gen: '1' if L4 checksum generation is enabled; '0' if disabled -+ */ -+struct dpni_tx_flow_attr { -+ int use_common_tx_conf_queue; -+ int l3_chksum_gen; -+ int l4_chksum_gen; -+}; -+ -+/** -+ * dpni_get_tx_flow() - Get Tx flow attributes -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @flow_id: The sender's flow ID, as returned by the -+ * dpni_set_tx_flow() function -+ * @attr: Returned Tx flow attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_tx_flow(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t flow_id, -+ struct dpni_tx_flow_attr *attr); -+ -+/** -+ * struct dpni_tx_conf_cfg - Structure representing Tx conf configuration -+ * @errors_only: Set to '1' to report back only error frames; -+ * Set to '0' to confirm transmission/error for all transmitted frames; -+ * @queue_cfg: Queue configuration -+ */ -+struct dpni_tx_conf_cfg { -+ int errors_only; -+ struct dpni_queue_cfg queue_cfg; -+}; -+ -+/** -+ * dpni_set_tx_conf() - Set Tx confirmation and error queue configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @flow_id: The sender's flow ID, as returned by the -+ * dpni_set_tx_flow() function; -+ * use 'DPNI_COMMON_TX_CONF' for common tx-conf -+ * @cfg: Queue configuration -+ * -+ * If either 'DPNI_OPT_TX_CONF_DISABLED' or -+ * 'DPNI_OPT_PRIVATE_TX_CONF_ERROR_DISABLED' were selected at DPNI creation, -+ * this function can ONLY be used with 'flow_id == DPNI_COMMON_TX_CONF'; -+ * i.e. only serve the common tx-conf-err queue; -+ * if 'DPNI_OPT_TX_CONF_DISABLED' was selected, only error frames are reported -+ * back - successfully transmitted frames are not confirmed. Otherwise, all -+ * transmitted frames are sent for confirmation. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_tx_conf(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t flow_id, -+ const struct dpni_tx_conf_cfg *cfg); -+ -+/** -+ * struct dpni_tx_conf_attr - Structure representing Tx conf attributes -+ * @errors_only: '1' if only error frames are reported back; '0' if all -+ * transmitted frames are confirmed -+ * @queue_attr: Queue attributes -+ */ -+struct dpni_tx_conf_attr { -+ int errors_only; -+ struct dpni_queue_attr queue_attr; -+}; -+ -+/** -+ * dpni_get_tx_conf() - Get Tx confirmation and error queue attributes -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @flow_id: The sender's flow ID, as returned by the -+ * dpni_set_tx_flow() function; -+ * use 'DPNI_COMMON_TX_CONF' for common tx-conf -+ * @attr: Returned tx-conf attributes -+ * -+ * If either 'DPNI_OPT_TX_CONF_DISABLED' or -+ * 'DPNI_OPT_PRIVATE_TX_CONF_ERROR_DISABLED' were selected at DPNI creation, -+ * this function can ONLY be used with 'flow_id == DPNI_COMMON_TX_CONF'; -+ * i.e. only serve the common tx-conf-err queue; -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_tx_conf(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t flow_id, -+ struct dpni_tx_conf_attr *attr); -+ -+/** -+ * dpni_set_tx_conf_congestion_notification() - Set Tx conf congestion -+ * notification configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @flow_id: The sender's flow ID, as returned by the -+ * dpni_set_tx_flow() function; -+ * use 'DPNI_COMMON_TX_CONF' for common tx-conf -+ * @cfg: congestion notification configuration -+ * -+ * If either 'DPNI_OPT_TX_CONF_DISABLED' or -+ * 'DPNI_OPT_PRIVATE_TX_CONF_ERROR_DISABLED' were selected at DPNI creation, -+ * this function can ONLY be used with 'flow_id == DPNI_COMMON_TX_CONF'; -+ * i.e. only serve the common tx-conf-err queue; -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_set_tx_conf_congestion_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t flow_id, -+ const struct dpni_congestion_notification_cfg *cfg); -+ -+/** -+ * dpni_get_tx_conf_congestion_notification() - Get Tx conf congestion -+ * notification configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @flow_id: The sender's flow ID, as returned by the -+ * dpni_set_tx_flow() function; -+ * use 'DPNI_COMMON_TX_CONF' for common tx-conf -+ * @cfg: congestion notification -+ * -+ * If either 'DPNI_OPT_TX_CONF_DISABLED' or -+ * 'DPNI_OPT_PRIVATE_TX_CONF_ERROR_DISABLED' were selected at DPNI creation, -+ * this function can ONLY be used with 'flow_id == DPNI_COMMON_TX_CONF'; -+ * i.e. only serve the common tx-conf-err queue; -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpni_get_tx_conf_congestion_notification(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t flow_id, -+ struct dpni_congestion_notification_cfg *cfg); -+ -+/** -+ * dpni_set_tx_conf_revoke() - Tx confirmation revocation -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @revoke: revoke or not -+ * -+ * This function is useful only when 'DPNI_OPT_TX_CONF_DISABLED' is not -+ * selected at DPNI creation. -+ * Calling this function with 'revoke' set to '1' disables all transmit -+ * confirmation (including the private confirmation queues), regardless of -+ * previous settings; Note that in this case, Tx error frames are still -+ * enqueued to the general transmit errors queue. -+ * Calling this function with 'revoke' set to '0' restores the previous -+ * settings for both general and private transmit confirmation. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_tx_conf_revoke(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int revoke); -+ -+/** -+ * dpni_set_rx_flow() - Set Rx flow configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7); -+ * use 'DPNI_ALL_TCS' to set all TCs and all flows -+ * @flow_id: Rx flow id within the traffic class; use -+ * 'DPNI_ALL_TC_FLOWS' to set all flows within -+ * this tc_id; ignored if tc_id is set to -+ * 'DPNI_ALL_TCS'; -+ * @cfg: Rx flow configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_rx_flow(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ uint16_t flow_id, -+ const struct dpni_queue_cfg *cfg); -+ -+/** -+ * dpni_get_rx_flow() - Get Rx flow attributes -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @flow_id: Rx flow id within the traffic class -+ * @attr: Returned Rx flow attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_rx_flow(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ uint16_t flow_id, -+ struct dpni_queue_attr *attr); -+ -+/** -+ * dpni_set_rx_err_queue() - Set Rx error queue configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @cfg: Queue configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_rx_err_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_queue_cfg *cfg); -+ -+/** -+ * dpni_get_rx_err_queue() - Get Rx error queue attributes -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @attr: Returned Queue attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_get_rx_err_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpni_queue_attr *attr); -+ -+/** -+ * struct dpni_qos_tbl_cfg - Structure representing QOS table configuration -+ * @key_cfg_iova: I/O virtual address of 256 bytes DMA-able memory filled with -+ * key extractions to be used as the QoS criteria by calling -+ * dpni_prepare_key_cfg() -+ * @discard_on_miss: Set to '1' to discard frames in case of no match (miss); -+ * '0' to use the 'default_tc' in such cases -+ * @default_tc: Used in case of no-match and 'discard_on_miss'= 0 -+ */ -+struct dpni_qos_tbl_cfg { -+ uint64_t key_cfg_iova; -+ int discard_on_miss; -+ uint8_t default_tc; -+}; -+ -+/** -+ * dpni_set_qos_table() - Set QoS mapping table -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @cfg: QoS table configuration -+ * -+ * This function and all QoS-related functions require that -+ *'max_tcs > 1' was set at DPNI creation. -+ * -+ * warning: Before calling this function, call dpni_prepare_key_cfg() to -+ * prepare the key_cfg_iova parameter -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_qos_table(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_qos_tbl_cfg *cfg); -+ -+/** -+ * struct dpni_rule_cfg - Rule configuration for table lookup -+ * @key_iova: I/O virtual address of the key (must be in DMA-able memory) -+ * @mask_iova: I/O virtual address of the mask (must be in DMA-able memory) -+ * @key_size: key and mask size (in bytes) -+ */ -+struct dpni_rule_cfg { -+ uint64_t key_iova; -+ uint64_t mask_iova; -+ uint8_t key_size; -+}; -+ -+/** -+ * dpni_add_qos_entry() - Add QoS mapping entry (to select a traffic class) -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @cfg: QoS rule to add -+ * @tc_id: Traffic class selection (0-7) -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_add_qos_entry(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_rule_cfg *cfg, -+ uint8_t tc_id); -+ -+/** -+ * dpni_remove_qos_entry() - Remove QoS mapping entry -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @cfg: QoS rule to remove -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_remove_qos_entry(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpni_rule_cfg *cfg); -+ -+/** -+ * dpni_clear_qos_table() - Clear all QoS mapping entries -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * -+ * Following this function call, all frames are directed to -+ * the default traffic class (0) -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_clear_qos_table(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpni_add_fs_entry() - Add Flow Steering entry for a specific traffic class -+ * (to select a flow ID) -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @cfg: Flow steering rule to add -+ * @flow_id: Flow id selection (must be smaller than the -+ * distribution size of the traffic class) -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_add_fs_entry(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ const struct dpni_rule_cfg *cfg, -+ uint16_t flow_id); -+ -+/** -+ * dpni_remove_fs_entry() - Remove Flow Steering entry from a specific -+ * traffic class -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * @cfg: Flow steering rule to remove -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_remove_fs_entry(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id, -+ const struct dpni_rule_cfg *cfg); -+ -+/** -+ * dpni_clear_fs_entries() - Clear all Flow Steering entries of a specific -+ * traffic class -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @tc_id: Traffic class selection (0-7) -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_clear_fs_entries(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t tc_id); -+ -+/** -+ * dpni_set_vlan_insertion() - Enable/disable VLAN insertion for egress frames -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Set to '1' to enable; '0' to disable -+ * -+ * Requires that the 'DPNI_OPT_VLAN_MANIPULATION' option is set -+ * at DPNI creation. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_vlan_insertion(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en); -+ -+/** -+ * dpni_set_vlan_removal() - Enable/disable VLAN removal for ingress frames -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Set to '1' to enable; '0' to disable -+ * -+ * Requires that the 'DPNI_OPT_VLAN_MANIPULATION' option is set -+ * at DPNI creation. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_vlan_removal(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en); -+ -+/** -+ * dpni_set_ipr() - Enable/disable IP reassembly of ingress frames -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Set to '1' to enable; '0' to disable -+ * -+ * Requires that the 'DPNI_OPT_IPR' option is set at DPNI creation. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_ipr(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en); -+ -+/** -+ * dpni_set_ipf() - Enable/disable IP fragmentation of egress frames -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPNI object -+ * @en: Set to '1' to enable; '0' to disable -+ * -+ * Requires that the 'DPNI_OPT_IPF' option is set at DPNI -+ * creation. Fragmentation is performed according to MTU value -+ * set by dpni_set_mtu() function -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpni_set_ipf(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en); -+ -+#endif /* __FSL_DPNI_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpni_cmd.h b/drivers/net/dpaa2/mc/fsl_dpni_cmd.h -new file mode 100644 -index 0000000..c0f8af0 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpni_cmd.h -@@ -0,0 +1,1058 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPNI_CMD_H -+#define _FSL_DPNI_CMD_H -+ -+/* DPNI Version */ -+#define DPNI_VER_MAJOR 6 -+#define DPNI_VER_MINOR 0 -+ -+/* Command IDs */ -+#define DPNI_CMDID_OPEN 0x801 -+#define DPNI_CMDID_CLOSE 0x800 -+#define DPNI_CMDID_CREATE 0x901 -+#define DPNI_CMDID_DESTROY 0x900 -+ -+#define DPNI_CMDID_ENABLE 0x002 -+#define DPNI_CMDID_DISABLE 0x003 -+#define DPNI_CMDID_GET_ATTR 0x004 -+#define DPNI_CMDID_RESET 0x005 -+#define DPNI_CMDID_IS_ENABLED 0x006 -+ -+#define DPNI_CMDID_SET_IRQ 0x010 -+#define DPNI_CMDID_GET_IRQ 0x011 -+#define DPNI_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPNI_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPNI_CMDID_SET_IRQ_MASK 0x014 -+#define DPNI_CMDID_GET_IRQ_MASK 0x015 -+#define DPNI_CMDID_GET_IRQ_STATUS 0x016 -+#define DPNI_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPNI_CMDID_SET_POOLS 0x200 -+#define DPNI_CMDID_GET_RX_BUFFER_LAYOUT 0x201 -+#define DPNI_CMDID_SET_RX_BUFFER_LAYOUT 0x202 -+#define DPNI_CMDID_GET_TX_BUFFER_LAYOUT 0x203 -+#define DPNI_CMDID_SET_TX_BUFFER_LAYOUT 0x204 -+#define DPNI_CMDID_SET_TX_CONF_BUFFER_LAYOUT 0x205 -+#define DPNI_CMDID_GET_TX_CONF_BUFFER_LAYOUT 0x206 -+#define DPNI_CMDID_SET_L3_CHKSUM_VALIDATION 0x207 -+#define DPNI_CMDID_GET_L3_CHKSUM_VALIDATION 0x208 -+#define DPNI_CMDID_SET_L4_CHKSUM_VALIDATION 0x209 -+#define DPNI_CMDID_GET_L4_CHKSUM_VALIDATION 0x20A -+#define DPNI_CMDID_SET_ERRORS_BEHAVIOR 0x20B -+#define DPNI_CMDID_SET_TX_CONF_REVOKE 0x20C -+ -+#define DPNI_CMDID_GET_QDID 0x210 -+#define DPNI_CMDID_GET_SP_INFO 0x211 -+#define DPNI_CMDID_GET_TX_DATA_OFFSET 0x212 -+#define DPNI_CMDID_GET_COUNTER 0x213 -+#define DPNI_CMDID_SET_COUNTER 0x214 -+#define DPNI_CMDID_GET_LINK_STATE 0x215 -+#define DPNI_CMDID_SET_MAX_FRAME_LENGTH 0x216 -+#define DPNI_CMDID_GET_MAX_FRAME_LENGTH 0x217 -+#define DPNI_CMDID_SET_MTU 0x218 -+#define DPNI_CMDID_GET_MTU 0x219 -+#define DPNI_CMDID_SET_LINK_CFG 0x21A -+#define DPNI_CMDID_SET_TX_SHAPING 0x21B -+ -+#define DPNI_CMDID_SET_MCAST_PROMISC 0x220 -+#define DPNI_CMDID_GET_MCAST_PROMISC 0x221 -+#define DPNI_CMDID_SET_UNICAST_PROMISC 0x222 -+#define DPNI_CMDID_GET_UNICAST_PROMISC 0x223 -+#define DPNI_CMDID_SET_PRIM_MAC 0x224 -+#define DPNI_CMDID_GET_PRIM_MAC 0x225 -+#define DPNI_CMDID_ADD_MAC_ADDR 0x226 -+#define DPNI_CMDID_REMOVE_MAC_ADDR 0x227 -+#define DPNI_CMDID_CLR_MAC_FILTERS 0x228 -+ -+#define DPNI_CMDID_SET_VLAN_FILTERS 0x230 -+#define DPNI_CMDID_ADD_VLAN_ID 0x231 -+#define DPNI_CMDID_REMOVE_VLAN_ID 0x232 -+#define DPNI_CMDID_CLR_VLAN_FILTERS 0x233 -+ -+#define DPNI_CMDID_SET_RX_TC_DIST 0x235 -+#define DPNI_CMDID_SET_TX_FLOW 0x236 -+#define DPNI_CMDID_GET_TX_FLOW 0x237 -+#define DPNI_CMDID_SET_RX_FLOW 0x238 -+#define DPNI_CMDID_GET_RX_FLOW 0x239 -+#define DPNI_CMDID_SET_RX_ERR_QUEUE 0x23A -+#define DPNI_CMDID_GET_RX_ERR_QUEUE 0x23B -+ -+#define DPNI_CMDID_SET_RX_TC_POLICING 0x23E -+#define DPNI_CMDID_SET_RX_TC_EARLY_DROP 0x23F -+ -+#define DPNI_CMDID_SET_QOS_TBL 0x240 -+#define DPNI_CMDID_ADD_QOS_ENT 0x241 -+#define DPNI_CMDID_REMOVE_QOS_ENT 0x242 -+#define DPNI_CMDID_CLR_QOS_TBL 0x243 -+#define DPNI_CMDID_ADD_FS_ENT 0x244 -+#define DPNI_CMDID_REMOVE_FS_ENT 0x245 -+#define DPNI_CMDID_CLR_FS_ENT 0x246 -+#define DPNI_CMDID_SET_VLAN_INSERTION 0x247 -+#define DPNI_CMDID_SET_VLAN_REMOVAL 0x248 -+#define DPNI_CMDID_SET_IPR 0x249 -+#define DPNI_CMDID_SET_IPF 0x24A -+ -+#define DPNI_CMDID_SET_TX_SELECTION 0x250 -+#define DPNI_CMDID_GET_RX_TC_POLICING 0x251 -+#define DPNI_CMDID_GET_RX_TC_EARLY_DROP 0x252 -+#define DPNI_CMDID_SET_RX_TC_CONGESTION_NOTIFICATION 0x253 -+#define DPNI_CMDID_GET_RX_TC_CONGESTION_NOTIFICATION 0x254 -+#define DPNI_CMDID_SET_TX_TC_CONGESTION_NOTIFICATION 0x255 -+#define DPNI_CMDID_GET_TX_TC_CONGESTION_NOTIFICATION 0x256 -+#define DPNI_CMDID_SET_TX_CONF 0x257 -+#define DPNI_CMDID_GET_TX_CONF 0x258 -+#define DPNI_CMDID_SET_TX_CONF_CONGESTION_NOTIFICATION 0x259 -+#define DPNI_CMDID_GET_TX_CONF_CONGESTION_NOTIFICATION 0x25A -+#define DPNI_CMDID_SET_TX_TC_EARLY_DROP 0x25B -+#define DPNI_CMDID_GET_TX_TC_EARLY_DROP 0x25C -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_OPEN(cmd, dpni_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpni_id) -+ -+#define DPNI_PREP_EXTENDED_CFG(ext, cfg) \ -+do { \ -+ MC_PREP_OP(ext, 0, 0, 16, uint16_t, cfg->tc_cfg[0].max_dist); \ -+ MC_PREP_OP(ext, 0, 16, 16, uint16_t, cfg->tc_cfg[0].max_fs_entries); \ -+ MC_PREP_OP(ext, 0, 32, 16, uint16_t, cfg->tc_cfg[1].max_dist); \ -+ MC_PREP_OP(ext, 0, 48, 16, uint16_t, cfg->tc_cfg[1].max_fs_entries); \ -+ MC_PREP_OP(ext, 1, 0, 16, uint16_t, cfg->tc_cfg[2].max_dist); \ -+ MC_PREP_OP(ext, 1, 16, 16, uint16_t, cfg->tc_cfg[2].max_fs_entries); \ -+ MC_PREP_OP(ext, 1, 32, 16, uint16_t, cfg->tc_cfg[3].max_dist); \ -+ MC_PREP_OP(ext, 1, 48, 16, uint16_t, cfg->tc_cfg[3].max_fs_entries); \ -+ MC_PREP_OP(ext, 2, 0, 16, uint16_t, cfg->tc_cfg[4].max_dist); \ -+ MC_PREP_OP(ext, 2, 16, 16, uint16_t, cfg->tc_cfg[4].max_fs_entries); \ -+ MC_PREP_OP(ext, 2, 32, 16, uint16_t, cfg->tc_cfg[5].max_dist); \ -+ MC_PREP_OP(ext, 2, 48, 16, uint16_t, cfg->tc_cfg[5].max_fs_entries); \ -+ MC_PREP_OP(ext, 3, 0, 16, uint16_t, cfg->tc_cfg[6].max_dist); \ -+ MC_PREP_OP(ext, 3, 16, 16, uint16_t, cfg->tc_cfg[6].max_fs_entries); \ -+ MC_PREP_OP(ext, 3, 32, 16, uint16_t, cfg->tc_cfg[7].max_dist); \ -+ MC_PREP_OP(ext, 3, 48, 16, uint16_t, cfg->tc_cfg[7].max_fs_entries); \ -+ MC_PREP_OP(ext, 4, 0, 16, uint16_t, \ -+ cfg->ipr_cfg.max_open_frames_ipv4); \ -+ MC_PREP_OP(ext, 4, 16, 16, uint16_t, \ -+ cfg->ipr_cfg.max_open_frames_ipv6); \ -+ MC_PREP_OP(ext, 4, 32, 16, uint16_t, \ -+ cfg->ipr_cfg.max_reass_frm_size); \ -+ MC_PREP_OP(ext, 5, 0, 16, uint16_t, \ -+ cfg->ipr_cfg.min_frag_size_ipv4); \ -+ MC_PREP_OP(ext, 5, 16, 16, uint16_t, \ -+ cfg->ipr_cfg.min_frag_size_ipv6); \ -+} while (0) -+ -+#define DPNI_EXT_EXTENDED_CFG(ext, cfg) \ -+do { \ -+ MC_EXT_OP(ext, 0, 0, 16, uint16_t, cfg->tc_cfg[0].max_dist); \ -+ MC_EXT_OP(ext, 0, 16, 16, uint16_t, cfg->tc_cfg[0].max_fs_entries); \ -+ MC_EXT_OP(ext, 0, 32, 16, uint16_t, cfg->tc_cfg[1].max_dist); \ -+ MC_EXT_OP(ext, 0, 48, 16, uint16_t, cfg->tc_cfg[1].max_fs_entries); \ -+ MC_EXT_OP(ext, 1, 0, 16, uint16_t, cfg->tc_cfg[2].max_dist); \ -+ MC_EXT_OP(ext, 1, 16, 16, uint16_t, cfg->tc_cfg[2].max_fs_entries); \ -+ MC_EXT_OP(ext, 1, 32, 16, uint16_t, cfg->tc_cfg[3].max_dist); \ -+ MC_EXT_OP(ext, 1, 48, 16, uint16_t, cfg->tc_cfg[3].max_fs_entries); \ -+ MC_EXT_OP(ext, 2, 0, 16, uint16_t, cfg->tc_cfg[4].max_dist); \ -+ MC_EXT_OP(ext, 2, 16, 16, uint16_t, cfg->tc_cfg[4].max_fs_entries); \ -+ MC_EXT_OP(ext, 2, 32, 16, uint16_t, cfg->tc_cfg[5].max_dist); \ -+ MC_EXT_OP(ext, 2, 48, 16, uint16_t, cfg->tc_cfg[5].max_fs_entries); \ -+ MC_EXT_OP(ext, 3, 0, 16, uint16_t, cfg->tc_cfg[6].max_dist); \ -+ MC_EXT_OP(ext, 3, 16, 16, uint16_t, cfg->tc_cfg[6].max_fs_entries); \ -+ MC_EXT_OP(ext, 3, 32, 16, uint16_t, cfg->tc_cfg[7].max_dist); \ -+ MC_EXT_OP(ext, 3, 48, 16, uint16_t, cfg->tc_cfg[7].max_fs_entries); \ -+ MC_EXT_OP(ext, 4, 0, 16, uint16_t, \ -+ cfg->ipr_cfg.max_open_frames_ipv4); \ -+ MC_EXT_OP(ext, 4, 16, 16, uint16_t, \ -+ cfg->ipr_cfg.max_open_frames_ipv6); \ -+ MC_EXT_OP(ext, 4, 32, 16, uint16_t, \ -+ cfg->ipr_cfg.max_reass_frm_size); \ -+ MC_EXT_OP(ext, 5, 0, 16, uint16_t, \ -+ cfg->ipr_cfg.min_frag_size_ipv4); \ -+ MC_EXT_OP(ext, 5, 16, 16, uint16_t, \ -+ cfg->ipr_cfg.min_frag_size_ipv6); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_CREATE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, cfg->adv.max_tcs); \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, cfg->adv.max_senders); \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, cfg->mac_addr[5]); \ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, cfg->mac_addr[4]); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->mac_addr[3]); \ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, cfg->mac_addr[2]); \ -+ MC_CMD_OP(cmd, 0, 48, 8, uint8_t, cfg->mac_addr[1]); \ -+ MC_CMD_OP(cmd, 0, 56, 8, uint8_t, cfg->mac_addr[0]); \ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, cfg->adv.options); \ -+ MC_CMD_OP(cmd, 2, 0, 8, uint8_t, cfg->adv.max_unicast_filters); \ -+ MC_CMD_OP(cmd, 2, 8, 8, uint8_t, cfg->adv.max_multicast_filters); \ -+ MC_CMD_OP(cmd, 2, 16, 8, uint8_t, cfg->adv.max_vlan_filters); \ -+ MC_CMD_OP(cmd, 2, 24, 8, uint8_t, cfg->adv.max_qos_entries); \ -+ MC_CMD_OP(cmd, 2, 32, 8, uint8_t, cfg->adv.max_qos_key_size); \ -+ MC_CMD_OP(cmd, 2, 48, 8, uint8_t, cfg->adv.max_dist_key_size); \ -+ MC_CMD_OP(cmd, 2, 56, 8, enum net_prot, cfg->adv.start_hdr); \ -+ MC_CMD_OP(cmd, 4, 48, 8, uint8_t, cfg->adv.max_policers); \ -+ MC_CMD_OP(cmd, 4, 56, 8, uint8_t, cfg->adv.max_congestion_ctrl); \ -+ MC_CMD_OP(cmd, 5, 0, 64, uint64_t, cfg->adv.ext_cfg_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_POOLS(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, cfg->num_dpbp); \ -+ MC_CMD_OP(cmd, 0, 8, 1, int, cfg->pools[0].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 9, 1, int, cfg->pools[1].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 10, 1, int, cfg->pools[2].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 11, 1, int, cfg->pools[3].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 12, 1, int, cfg->pools[4].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 13, 1, int, cfg->pools[5].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 14, 1, int, cfg->pools[6].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 15, 1, int, cfg->pools[7].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 32, 32, int, cfg->pools[0].dpbp_id); \ -+ MC_CMD_OP(cmd, 4, 32, 16, uint16_t, cfg->pools[0].buffer_size);\ -+ MC_CMD_OP(cmd, 1, 0, 32, int, cfg->pools[1].dpbp_id); \ -+ MC_CMD_OP(cmd, 4, 48, 16, uint16_t, cfg->pools[1].buffer_size);\ -+ MC_CMD_OP(cmd, 1, 32, 32, int, cfg->pools[2].dpbp_id); \ -+ MC_CMD_OP(cmd, 5, 0, 16, uint16_t, cfg->pools[2].buffer_size);\ -+ MC_CMD_OP(cmd, 2, 0, 32, int, cfg->pools[3].dpbp_id); \ -+ MC_CMD_OP(cmd, 5, 16, 16, uint16_t, cfg->pools[3].buffer_size);\ -+ MC_CMD_OP(cmd, 2, 32, 32, int, cfg->pools[4].dpbp_id); \ -+ MC_CMD_OP(cmd, 5, 32, 16, uint16_t, cfg->pools[4].buffer_size);\ -+ MC_CMD_OP(cmd, 3, 0, 32, int, cfg->pools[5].dpbp_id); \ -+ MC_CMD_OP(cmd, 5, 48, 16, uint16_t, cfg->pools[5].buffer_size);\ -+ MC_CMD_OP(cmd, 3, 32, 32, int, cfg->pools[6].dpbp_id); \ -+ MC_CMD_OP(cmd, 6, 0, 16, uint16_t, cfg->pools[6].buffer_size);\ -+ MC_CMD_OP(cmd, 4, 0, 32, int, cfg->pools[7].dpbp_id); \ -+ MC_CMD_OP(cmd, 6, 16, 16, uint16_t, cfg->pools[7].buffer_size);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_IS_ENABLED(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_IRQ_ENABLE(cmd, irq_index, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, en); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_IRQ_ENABLE(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_GET_ATTR(cmd, attr) \ -+ MC_CMD_OP(cmd, 6, 0, 64, uint64_t, attr->ext_cfg_iova) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->id);\ -+ MC_RSP_OP(cmd, 0, 32, 8, uint8_t, attr->max_tcs); \ -+ MC_RSP_OP(cmd, 0, 40, 8, uint8_t, attr->max_senders); \ -+ MC_RSP_OP(cmd, 0, 48, 8, enum net_prot, attr->start_hdr); \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, attr->options); \ -+ MC_RSP_OP(cmd, 2, 0, 8, uint8_t, attr->max_unicast_filters); \ -+ MC_RSP_OP(cmd, 2, 8, 8, uint8_t, attr->max_multicast_filters);\ -+ MC_RSP_OP(cmd, 2, 16, 8, uint8_t, attr->max_vlan_filters); \ -+ MC_RSP_OP(cmd, 2, 24, 8, uint8_t, attr->max_qos_entries); \ -+ MC_RSP_OP(cmd, 2, 32, 8, uint8_t, attr->max_qos_key_size); \ -+ MC_RSP_OP(cmd, 2, 40, 8, uint8_t, attr->max_dist_key_size); \ -+ MC_RSP_OP(cmd, 4, 48, 8, uint8_t, attr->max_policers); \ -+ MC_RSP_OP(cmd, 4, 56, 8, uint8_t, attr->max_congestion_ctrl); \ -+ MC_RSP_OP(cmd, 5, 32, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 5, 48, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_ERRORS_BEHAVIOR(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, cfg->errors); \ -+ MC_CMD_OP(cmd, 0, 32, 4, enum dpni_error_action, cfg->error_action); \ -+ MC_CMD_OP(cmd, 0, 36, 1, int, cfg->set_frame_annotation); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_RX_BUFFER_LAYOUT(cmd, layout) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 16, uint16_t, layout->private_data_size); \ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, layout->data_align); \ -+ MC_RSP_OP(cmd, 1, 0, 1, int, layout->pass_timestamp); \ -+ MC_RSP_OP(cmd, 1, 1, 1, int, layout->pass_parser_result); \ -+ MC_RSP_OP(cmd, 1, 2, 1, int, layout->pass_frame_status); \ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, layout->data_head_room); \ -+ MC_RSP_OP(cmd, 1, 32, 16, uint16_t, layout->data_tail_room); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_RX_BUFFER_LAYOUT(cmd, layout) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, layout->private_data_size); \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, layout->data_align); \ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, layout->options); \ -+ MC_CMD_OP(cmd, 1, 0, 1, int, layout->pass_timestamp); \ -+ MC_CMD_OP(cmd, 1, 1, 1, int, layout->pass_parser_result); \ -+ MC_CMD_OP(cmd, 1, 2, 1, int, layout->pass_frame_status); \ -+ MC_CMD_OP(cmd, 1, 16, 16, uint16_t, layout->data_head_room); \ -+ MC_CMD_OP(cmd, 1, 32, 16, uint16_t, layout->data_tail_room); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_TX_BUFFER_LAYOUT(cmd, layout) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 16, uint16_t, layout->private_data_size); \ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, layout->data_align); \ -+ MC_RSP_OP(cmd, 1, 0, 1, int, layout->pass_timestamp); \ -+ MC_RSP_OP(cmd, 1, 1, 1, int, layout->pass_parser_result); \ -+ MC_RSP_OP(cmd, 1, 2, 1, int, layout->pass_frame_status); \ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, layout->data_head_room); \ -+ MC_RSP_OP(cmd, 1, 32, 16, uint16_t, layout->data_tail_room); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_TX_BUFFER_LAYOUT(cmd, layout) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, layout->private_data_size); \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, layout->data_align); \ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, layout->options); \ -+ MC_CMD_OP(cmd, 1, 0, 1, int, layout->pass_timestamp); \ -+ MC_CMD_OP(cmd, 1, 1, 1, int, layout->pass_parser_result); \ -+ MC_CMD_OP(cmd, 1, 2, 1, int, layout->pass_frame_status); \ -+ MC_CMD_OP(cmd, 1, 16, 16, uint16_t, layout->data_head_room); \ -+ MC_CMD_OP(cmd, 1, 32, 16, uint16_t, layout->data_tail_room); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_TX_CONF_BUFFER_LAYOUT(cmd, layout) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 16, uint16_t, layout->private_data_size); \ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, layout->data_align); \ -+ MC_RSP_OP(cmd, 1, 0, 1, int, layout->pass_timestamp); \ -+ MC_RSP_OP(cmd, 1, 1, 1, int, layout->pass_parser_result); \ -+ MC_RSP_OP(cmd, 1, 2, 1, int, layout->pass_frame_status); \ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, layout->data_head_room); \ -+ MC_RSP_OP(cmd, 1, 32, 16, uint16_t, layout->data_tail_room); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_TX_CONF_BUFFER_LAYOUT(cmd, layout) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, layout->private_data_size); \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, layout->data_align); \ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, layout->options); \ -+ MC_CMD_OP(cmd, 1, 0, 1, int, layout->pass_timestamp); \ -+ MC_CMD_OP(cmd, 1, 1, 1, int, layout->pass_parser_result); \ -+ MC_CMD_OP(cmd, 1, 2, 1, int, layout->pass_frame_status); \ -+ MC_CMD_OP(cmd, 1, 16, 16, uint16_t, layout->data_head_room); \ -+ MC_CMD_OP(cmd, 1, 32, 16, uint16_t, layout->data_tail_room); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_L3_CHKSUM_VALIDATION(cmd, en) \ -+ MC_CMD_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_L3_CHKSUM_VALIDATION(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_L4_CHKSUM_VALIDATION(cmd, en) \ -+ MC_CMD_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_L4_CHKSUM_VALIDATION(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_QDID(cmd, qdid) \ -+ MC_RSP_OP(cmd, 0, 0, 16, uint16_t, qdid) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_SP_INFO(cmd, sp_info) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 16, uint16_t, sp_info->spids[0]); \ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, sp_info->spids[1]); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_TX_DATA_OFFSET(cmd, data_offset) \ -+ MC_RSP_OP(cmd, 0, 0, 16, uint16_t, data_offset) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_GET_COUNTER(cmd, counter) \ -+ MC_CMD_OP(cmd, 0, 0, 16, enum dpni_counter, counter) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_COUNTER(cmd, value) \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, value) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_COUNTER(cmd, counter, value) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, enum dpni_counter, counter); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, value); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_LINK_CFG(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, cfg->rate);\ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->options);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_LINK_STATE(cmd, state) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 32, 1, int, state->up);\ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, state->rate);\ -+ MC_RSP_OP(cmd, 2, 0, 64, uint64_t, state->options);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_TX_SHAPING(cmd, tx_shaper) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, tx_shaper->max_burst_size);\ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, tx_shaper->rate_limit);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_MAX_FRAME_LENGTH(cmd, max_frame_length) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, max_frame_length) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_MAX_FRAME_LENGTH(cmd, max_frame_length) \ -+ MC_RSP_OP(cmd, 0, 0, 16, uint16_t, max_frame_length) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_MTU(cmd, mtu) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, mtu) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_MTU(cmd, mtu) \ -+ MC_RSP_OP(cmd, 0, 0, 16, uint16_t, mtu) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_MULTICAST_PROMISC(cmd, en) \ -+ MC_CMD_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_MULTICAST_PROMISC(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_UNICAST_PROMISC(cmd, en) \ -+ MC_CMD_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_UNICAST_PROMISC(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_PRIMARY_MAC_ADDR(cmd, mac_addr) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, mac_addr[5]); \ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, mac_addr[4]); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, mac_addr[3]); \ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, mac_addr[2]); \ -+ MC_CMD_OP(cmd, 0, 48, 8, uint8_t, mac_addr[1]); \ -+ MC_CMD_OP(cmd, 0, 56, 8, uint8_t, mac_addr[0]); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_PRIMARY_MAC_ADDR(cmd, mac_addr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 16, 8, uint8_t, mac_addr[5]); \ -+ MC_RSP_OP(cmd, 0, 24, 8, uint8_t, mac_addr[4]); \ -+ MC_RSP_OP(cmd, 0, 32, 8, uint8_t, mac_addr[3]); \ -+ MC_RSP_OP(cmd, 0, 40, 8, uint8_t, mac_addr[2]); \ -+ MC_RSP_OP(cmd, 0, 48, 8, uint8_t, mac_addr[1]); \ -+ MC_RSP_OP(cmd, 0, 56, 8, uint8_t, mac_addr[0]); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_ADD_MAC_ADDR(cmd, mac_addr) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, mac_addr[5]); \ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, mac_addr[4]); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, mac_addr[3]); \ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, mac_addr[2]); \ -+ MC_CMD_OP(cmd, 0, 48, 8, uint8_t, mac_addr[1]); \ -+ MC_CMD_OP(cmd, 0, 56, 8, uint8_t, mac_addr[0]); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_REMOVE_MAC_ADDR(cmd, mac_addr) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, mac_addr[5]); \ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, mac_addr[4]); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, mac_addr[3]); \ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, mac_addr[2]); \ -+ MC_CMD_OP(cmd, 0, 48, 8, uint8_t, mac_addr[1]); \ -+ MC_CMD_OP(cmd, 0, 56, 8, uint8_t, mac_addr[0]); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_CLEAR_MAC_FILTERS(cmd, unicast, multicast) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 1, int, unicast); \ -+ MC_CMD_OP(cmd, 0, 1, 1, int, multicast); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_VLAN_FILTERS(cmd, en) \ -+ MC_CMD_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_ADD_VLAN_ID(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 32, 16, uint16_t, vlan_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_REMOVE_VLAN_ID(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 32, 16, uint16_t, vlan_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_TX_SELECTION(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, cfg->tc_sched[0].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 0, 16, 4, enum dpni_tx_schedule_mode, \ -+ cfg->tc_sched[0].mode); \ -+ MC_CMD_OP(cmd, 0, 32, 16, uint16_t, cfg->tc_sched[1].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 0, 48, 4, enum dpni_tx_schedule_mode, \ -+ cfg->tc_sched[1].mode); \ -+ MC_CMD_OP(cmd, 1, 0, 16, uint16_t, cfg->tc_sched[2].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 1, 16, 4, enum dpni_tx_schedule_mode, \ -+ cfg->tc_sched[2].mode); \ -+ MC_CMD_OP(cmd, 1, 32, 16, uint16_t, cfg->tc_sched[3].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 1, 48, 4, enum dpni_tx_schedule_mode, \ -+ cfg->tc_sched[3].mode); \ -+ MC_CMD_OP(cmd, 2, 0, 16, uint16_t, cfg->tc_sched[4].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 2, 16, 4, enum dpni_tx_schedule_mode, \ -+ cfg->tc_sched[4].mode); \ -+ MC_CMD_OP(cmd, 2, 32, 16, uint16_t, cfg->tc_sched[5].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 2, 48, 4, enum dpni_tx_schedule_mode, \ -+ cfg->tc_sched[5].mode); \ -+ MC_CMD_OP(cmd, 3, 0, 16, uint16_t, cfg->tc_sched[6].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 3, 16, 4, enum dpni_tx_schedule_mode, \ -+ cfg->tc_sched[6].mode); \ -+ MC_CMD_OP(cmd, 3, 32, 16, uint16_t, cfg->tc_sched[7].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 3, 48, 4, enum dpni_tx_schedule_mode, \ -+ cfg->tc_sched[7].mode); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_RX_TC_DIST(cmd, tc_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, cfg->dist_size); \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 0, 24, 4, enum dpni_dist_mode, cfg->dist_mode); \ -+ MC_CMD_OP(cmd, 0, 28, 4, enum dpni_fs_miss_action, \ -+ cfg->fs_cfg.miss_action); \ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, cfg->fs_cfg.default_flow_id); \ -+ MC_CMD_OP(cmd, 6, 0, 64, uint64_t, cfg->key_cfg_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_TX_FLOW(cmd, flow_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 43, 1, int, cfg->l3_chksum_gen);\ -+ MC_CMD_OP(cmd, 0, 44, 1, int, cfg->l4_chksum_gen);\ -+ MC_CMD_OP(cmd, 0, 45, 1, int, cfg->use_common_tx_conf_queue);\ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, flow_id);\ -+ MC_CMD_OP(cmd, 2, 0, 32, uint32_t, cfg->options);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_SET_TX_FLOW(cmd, flow_id) \ -+ MC_RSP_OP(cmd, 0, 48, 16, uint16_t, flow_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_GET_TX_FLOW(cmd, flow_id) \ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, flow_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_TX_FLOW(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 43, 1, int, attr->l3_chksum_gen);\ -+ MC_RSP_OP(cmd, 0, 44, 1, int, attr->l4_chksum_gen);\ -+ MC_RSP_OP(cmd, 0, 45, 1, int, attr->use_common_tx_conf_queue);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_RX_FLOW(cmd, tc_id, flow_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, cfg->dest_cfg.dest_id); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->dest_cfg.priority);\ -+ MC_CMD_OP(cmd, 0, 40, 2, enum dpni_dest, cfg->dest_cfg.dest_type);\ -+ MC_CMD_OP(cmd, 0, 42, 1, int, cfg->order_preservation_en);\ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, flow_id); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->user_ctx); \ -+ MC_CMD_OP(cmd, 2, 16, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 2, 32, 32, uint32_t, cfg->options); \ -+ MC_CMD_OP(cmd, 3, 0, 4, enum dpni_flc_type, cfg->flc_cfg.flc_type); \ -+ MC_CMD_OP(cmd, 3, 4, 4, enum dpni_stash_size, \ -+ cfg->flc_cfg.frame_data_size);\ -+ MC_CMD_OP(cmd, 3, 8, 4, enum dpni_stash_size, \ -+ cfg->flc_cfg.flow_context_size);\ -+ MC_CMD_OP(cmd, 3, 32, 32, uint32_t, cfg->flc_cfg.options);\ -+ MC_CMD_OP(cmd, 4, 0, 64, uint64_t, cfg->flc_cfg.flow_context);\ -+ MC_CMD_OP(cmd, 5, 0, 32, uint32_t, cfg->tail_drop_threshold); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_GET_RX_FLOW(cmd, tc_id, flow_id) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, flow_id); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_RX_FLOW(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->dest_cfg.dest_id); \ -+ MC_RSP_OP(cmd, 0, 32, 8, uint8_t, attr->dest_cfg.priority);\ -+ MC_RSP_OP(cmd, 0, 40, 2, enum dpni_dest, attr->dest_cfg.dest_type); \ -+ MC_RSP_OP(cmd, 0, 42, 1, int, attr->order_preservation_en);\ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, attr->user_ctx); \ -+ MC_RSP_OP(cmd, 2, 0, 32, uint32_t, attr->tail_drop_threshold); \ -+ MC_RSP_OP(cmd, 2, 32, 32, uint32_t, attr->fqid); \ -+ MC_RSP_OP(cmd, 3, 0, 4, enum dpni_flc_type, attr->flc_cfg.flc_type); \ -+ MC_RSP_OP(cmd, 3, 4, 4, enum dpni_stash_size, \ -+ attr->flc_cfg.frame_data_size);\ -+ MC_RSP_OP(cmd, 3, 8, 4, enum dpni_stash_size, \ -+ attr->flc_cfg.flow_context_size);\ -+ MC_RSP_OP(cmd, 3, 32, 32, uint32_t, attr->flc_cfg.options);\ -+ MC_RSP_OP(cmd, 4, 0, 64, uint64_t, attr->flc_cfg.flow_context);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_RX_ERR_QUEUE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, cfg->dest_cfg.dest_id); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->dest_cfg.priority);\ -+ MC_CMD_OP(cmd, 0, 40, 2, enum dpni_dest, cfg->dest_cfg.dest_type);\ -+ MC_CMD_OP(cmd, 0, 42, 1, int, cfg->order_preservation_en);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->user_ctx); \ -+ MC_CMD_OP(cmd, 2, 0, 32, uint32_t, cfg->options); \ -+ MC_CMD_OP(cmd, 2, 32, 32, uint32_t, cfg->tail_drop_threshold); \ -+ MC_CMD_OP(cmd, 3, 0, 4, enum dpni_flc_type, cfg->flc_cfg.flc_type); \ -+ MC_CMD_OP(cmd, 3, 4, 4, enum dpni_stash_size, \ -+ cfg->flc_cfg.frame_data_size);\ -+ MC_CMD_OP(cmd, 3, 8, 4, enum dpni_stash_size, \ -+ cfg->flc_cfg.flow_context_size);\ -+ MC_CMD_OP(cmd, 3, 32, 32, uint32_t, cfg->flc_cfg.options);\ -+ MC_CMD_OP(cmd, 4, 0, 64, uint64_t, cfg->flc_cfg.flow_context);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_RX_ERR_QUEUE(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->dest_cfg.dest_id); \ -+ MC_RSP_OP(cmd, 0, 32, 8, uint8_t, attr->dest_cfg.priority);\ -+ MC_RSP_OP(cmd, 0, 40, 2, enum dpni_dest, attr->dest_cfg.dest_type);\ -+ MC_RSP_OP(cmd, 0, 42, 1, int, attr->order_preservation_en);\ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, attr->user_ctx); \ -+ MC_RSP_OP(cmd, 2, 0, 32, uint32_t, attr->tail_drop_threshold); \ -+ MC_RSP_OP(cmd, 2, 32, 32, uint32_t, attr->fqid); \ -+ MC_RSP_OP(cmd, 3, 0, 4, enum dpni_flc_type, attr->flc_cfg.flc_type); \ -+ MC_RSP_OP(cmd, 3, 4, 4, enum dpni_stash_size, \ -+ attr->flc_cfg.frame_data_size);\ -+ MC_RSP_OP(cmd, 3, 8, 4, enum dpni_stash_size, \ -+ attr->flc_cfg.flow_context_size);\ -+ MC_RSP_OP(cmd, 3, 32, 32, uint32_t, attr->flc_cfg.options);\ -+ MC_RSP_OP(cmd, 4, 0, 64, uint64_t, attr->flc_cfg.flow_context);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_TX_CONF_REVOKE(cmd, revoke) \ -+ MC_CMD_OP(cmd, 0, 0, 1, int, revoke) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_QOS_TABLE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->default_tc); \ -+ MC_CMD_OP(cmd, 0, 40, 1, int, cfg->discard_on_miss); \ -+ MC_CMD_OP(cmd, 6, 0, 64, uint64_t, cfg->key_cfg_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_ADD_QOS_ENTRY(cmd, cfg, tc_id) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, cfg->key_size); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->key_iova); \ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->mask_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_REMOVE_QOS_ENTRY(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, cfg->key_size); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->key_iova); \ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->mask_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_ADD_FS_ENTRY(cmd, tc_id, cfg, flow_id) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, flow_id); \ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, cfg->key_size); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->key_iova); \ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->mask_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_REMOVE_FS_ENTRY(cmd, tc_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, cfg->key_size); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->key_iova); \ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->mask_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_CLEAR_FS_ENTRIES(cmd, tc_id) \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, tc_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_VLAN_INSERTION(cmd, en) \ -+ MC_CMD_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_VLAN_REMOVAL(cmd, en) \ -+ MC_CMD_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_IPR(cmd, en) \ -+ MC_CMD_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_IPF(cmd, en) \ -+ MC_CMD_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_RX_TC_POLICING(cmd, tc_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 4, enum dpni_policer_mode, cfg->mode); \ -+ MC_CMD_OP(cmd, 0, 4, 4, enum dpni_policer_color, cfg->default_color); \ -+ MC_CMD_OP(cmd, 0, 8, 4, enum dpni_policer_unit, cfg->units); \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, cfg->options); \ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, cfg->cir); \ -+ MC_CMD_OP(cmd, 1, 32, 32, uint32_t, cfg->cbs); \ -+ MC_CMD_OP(cmd, 2, 0, 32, uint32_t, cfg->eir); \ -+ MC_CMD_OP(cmd, 2, 32, 32, uint32_t, cfg->ebs);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_GET_RX_TC_POLICING(cmd, tc_id) \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, tc_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_RSP_GET_RX_TC_POLICING(cmd, cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 4, enum dpni_policer_mode, cfg->mode); \ -+ MC_RSP_OP(cmd, 0, 4, 4, enum dpni_policer_color, cfg->default_color); \ -+ MC_RSP_OP(cmd, 0, 8, 4, enum dpni_policer_unit, cfg->units); \ -+ MC_RSP_OP(cmd, 0, 32, 32, uint32_t, cfg->options); \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, cfg->cir); \ -+ MC_RSP_OP(cmd, 1, 32, 32, uint32_t, cfg->cbs); \ -+ MC_RSP_OP(cmd, 2, 0, 32, uint32_t, cfg->eir); \ -+ MC_RSP_OP(cmd, 2, 32, 32, uint32_t, cfg->ebs);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_PREP_EARLY_DROP(ext, cfg) \ -+do { \ -+ MC_PREP_OP(ext, 0, 0, 2, enum dpni_early_drop_mode, cfg->mode); \ -+ MC_PREP_OP(ext, 0, 2, 2, \ -+ enum dpni_congestion_unit, cfg->units); \ -+ MC_PREP_OP(ext, 0, 32, 32, uint32_t, cfg->tail_drop_threshold); \ -+ MC_PREP_OP(ext, 1, 0, 8, uint8_t, cfg->green.drop_probability); \ -+ MC_PREP_OP(ext, 2, 0, 64, uint64_t, cfg->green.max_threshold); \ -+ MC_PREP_OP(ext, 3, 0, 64, uint64_t, cfg->green.min_threshold); \ -+ MC_PREP_OP(ext, 5, 0, 8, uint8_t, cfg->yellow.drop_probability);\ -+ MC_PREP_OP(ext, 6, 0, 64, uint64_t, cfg->yellow.max_threshold); \ -+ MC_PREP_OP(ext, 7, 0, 64, uint64_t, cfg->yellow.min_threshold); \ -+ MC_PREP_OP(ext, 9, 0, 8, uint8_t, cfg->red.drop_probability); \ -+ MC_PREP_OP(ext, 10, 0, 64, uint64_t, cfg->red.max_threshold); \ -+ MC_PREP_OP(ext, 11, 0, 64, uint64_t, cfg->red.min_threshold); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_EXT_EARLY_DROP(ext, cfg) \ -+do { \ -+ MC_EXT_OP(ext, 0, 0, 2, enum dpni_early_drop_mode, cfg->mode); \ -+ MC_EXT_OP(ext, 0, 2, 2, \ -+ enum dpni_congestion_unit, cfg->units); \ -+ MC_EXT_OP(ext, 0, 32, 32, uint32_t, cfg->tail_drop_threshold); \ -+ MC_EXT_OP(ext, 1, 0, 8, uint8_t, cfg->green.drop_probability); \ -+ MC_EXT_OP(ext, 2, 0, 64, uint64_t, cfg->green.max_threshold); \ -+ MC_EXT_OP(ext, 3, 0, 64, uint64_t, cfg->green.min_threshold); \ -+ MC_EXT_OP(ext, 5, 0, 8, uint8_t, cfg->yellow.drop_probability);\ -+ MC_EXT_OP(ext, 6, 0, 64, uint64_t, cfg->yellow.max_threshold); \ -+ MC_EXT_OP(ext, 7, 0, 64, uint64_t, cfg->yellow.min_threshold); \ -+ MC_EXT_OP(ext, 9, 0, 8, uint8_t, cfg->red.drop_probability); \ -+ MC_EXT_OP(ext, 10, 0, 64, uint64_t, cfg->red.max_threshold); \ -+ MC_EXT_OP(ext, 11, 0, 64, uint64_t, cfg->red.min_threshold); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_RX_TC_EARLY_DROP(cmd, tc_id, early_drop_iova) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, early_drop_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_GET_RX_TC_EARLY_DROP(cmd, tc_id, early_drop_iova) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, early_drop_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_SET_TX_TC_EARLY_DROP(cmd, tc_id, early_drop_iova) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, early_drop_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPNI_CMD_GET_TX_TC_EARLY_DROP(cmd, tc_id, early_drop_iova) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, early_drop_iova); \ -+} while (0) -+ -+#define DPNI_CMD_SET_RX_TC_CONGESTION_NOTIFICATION(cmd, tc_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 2, enum dpni_congestion_unit, cfg->units); \ -+ MC_CMD_OP(cmd, 0, 4, 4, enum dpni_dest, cfg->dest_cfg.dest_type); \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, cfg->dest_cfg.priority); \ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, cfg->threshold_entry); \ -+ MC_CMD_OP(cmd, 1, 32, 32, uint32_t, cfg->threshold_exit); \ -+ MC_CMD_OP(cmd, 2, 0, 16, uint16_t, cfg->options); \ -+ MC_CMD_OP(cmd, 2, 32, 32, int, cfg->dest_cfg.dest_id); \ -+ MC_CMD_OP(cmd, 3, 0, 64, uint64_t, cfg->message_ctx); \ -+ MC_CMD_OP(cmd, 4, 0, 64, uint64_t, cfg->message_iova); \ -+} while (0) -+ -+#define DPNI_CMD_GET_RX_TC_CONGESTION_NOTIFICATION(cmd, tc_id) \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, tc_id) -+ -+#define DPNI_RSP_GET_RX_TC_CONGESTION_NOTIFICATION(cmd, cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 2, enum dpni_congestion_unit, cfg->units); \ -+ MC_RSP_OP(cmd, 0, 4, 4, enum dpni_dest, cfg->dest_cfg.dest_type); \ -+ MC_RSP_OP(cmd, 0, 16, 8, uint8_t, cfg->dest_cfg.priority); \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, cfg->threshold_entry); \ -+ MC_RSP_OP(cmd, 1, 32, 32, uint32_t, cfg->threshold_exit); \ -+ MC_RSP_OP(cmd, 2, 0, 16, uint16_t, cfg->options); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, cfg->dest_cfg.dest_id); \ -+ MC_RSP_OP(cmd, 3, 0, 64, uint64_t, cfg->message_ctx); \ -+ MC_RSP_OP(cmd, 4, 0, 64, uint64_t, cfg->message_iova); \ -+} while (0) -+ -+#define DPNI_CMD_SET_TX_TC_CONGESTION_NOTIFICATION(cmd, tc_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 2, enum dpni_congestion_unit, cfg->units); \ -+ MC_CMD_OP(cmd, 0, 4, 4, enum dpni_dest, cfg->dest_cfg.dest_type); \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, cfg->dest_cfg.priority); \ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, cfg->threshold_entry); \ -+ MC_CMD_OP(cmd, 1, 32, 32, uint32_t, cfg->threshold_exit); \ -+ MC_CMD_OP(cmd, 2, 0, 16, uint16_t, cfg->options); \ -+ MC_CMD_OP(cmd, 2, 32, 32, int, cfg->dest_cfg.dest_id); \ -+ MC_CMD_OP(cmd, 3, 0, 64, uint64_t, cfg->message_ctx); \ -+ MC_CMD_OP(cmd, 4, 0, 64, uint64_t, cfg->message_iova); \ -+} while (0) -+ -+#define DPNI_CMD_GET_TX_TC_CONGESTION_NOTIFICATION(cmd, tc_id) \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, tc_id) -+ -+#define DPNI_RSP_GET_TX_TC_CONGESTION_NOTIFICATION(cmd, cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 2, enum dpni_congestion_unit, cfg->units); \ -+ MC_RSP_OP(cmd, 0, 4, 4, enum dpni_dest, cfg->dest_cfg.dest_type); \ -+ MC_RSP_OP(cmd, 0, 16, 8, uint8_t, cfg->dest_cfg.priority); \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, cfg->threshold_entry); \ -+ MC_RSP_OP(cmd, 1, 32, 32, uint32_t, cfg->threshold_exit); \ -+ MC_RSP_OP(cmd, 2, 0, 16, uint16_t, cfg->options); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, cfg->dest_cfg.dest_id); \ -+ MC_RSP_OP(cmd, 3, 0, 64, uint64_t, cfg->message_ctx); \ -+ MC_RSP_OP(cmd, 4, 0, 64, uint64_t, cfg->message_iova); \ -+} while (0) -+ -+#define DPNI_CMD_SET_TX_CONF(cmd, flow_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->queue_cfg.dest_cfg.priority); \ -+ MC_CMD_OP(cmd, 0, 40, 2, enum dpni_dest, \ -+ cfg->queue_cfg.dest_cfg.dest_type); \ -+ MC_CMD_OP(cmd, 0, 42, 1, int, cfg->errors_only); \ -+ MC_CMD_OP(cmd, 0, 46, 1, int, cfg->queue_cfg.order_preservation_en); \ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, flow_id); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->queue_cfg.user_ctx); \ -+ MC_CMD_OP(cmd, 2, 0, 32, uint32_t, cfg->queue_cfg.options); \ -+ MC_CMD_OP(cmd, 2, 32, 32, int, cfg->queue_cfg.dest_cfg.dest_id); \ -+ MC_CMD_OP(cmd, 3, 0, 32, uint32_t, \ -+ cfg->queue_cfg.tail_drop_threshold); \ -+ MC_CMD_OP(cmd, 4, 0, 4, enum dpni_flc_type, \ -+ cfg->queue_cfg.flc_cfg.flc_type); \ -+ MC_CMD_OP(cmd, 4, 4, 4, enum dpni_stash_size, \ -+ cfg->queue_cfg.flc_cfg.frame_data_size); \ -+ MC_CMD_OP(cmd, 4, 8, 4, enum dpni_stash_size, \ -+ cfg->queue_cfg.flc_cfg.flow_context_size); \ -+ MC_CMD_OP(cmd, 4, 32, 32, uint32_t, cfg->queue_cfg.flc_cfg.options); \ -+ MC_CMD_OP(cmd, 5, 0, 64, uint64_t, \ -+ cfg->queue_cfg.flc_cfg.flow_context); \ -+} while (0) -+ -+#define DPNI_CMD_GET_TX_CONF(cmd, flow_id) \ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, flow_id) -+ -+#define DPNI_RSP_GET_TX_CONF(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 32, 8, uint8_t, \ -+ attr->queue_attr.dest_cfg.priority); \ -+ MC_RSP_OP(cmd, 0, 40, 2, enum dpni_dest, \ -+ attr->queue_attr.dest_cfg.dest_type); \ -+ MC_RSP_OP(cmd, 0, 42, 1, int, attr->errors_only); \ -+ MC_RSP_OP(cmd, 0, 46, 1, int, \ -+ attr->queue_attr.order_preservation_en); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, attr->queue_attr.user_ctx); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, attr->queue_attr.dest_cfg.dest_id); \ -+ MC_RSP_OP(cmd, 3, 0, 32, uint32_t, \ -+ attr->queue_attr.tail_drop_threshold); \ -+ MC_RSP_OP(cmd, 3, 32, 32, uint32_t, attr->queue_attr.fqid); \ -+ MC_RSP_OP(cmd, 4, 0, 4, enum dpni_flc_type, \ -+ attr->queue_attr.flc_cfg.flc_type); \ -+ MC_RSP_OP(cmd, 4, 4, 4, enum dpni_stash_size, \ -+ attr->queue_attr.flc_cfg.frame_data_size); \ -+ MC_RSP_OP(cmd, 4, 8, 4, enum dpni_stash_size, \ -+ attr->queue_attr.flc_cfg.flow_context_size); \ -+ MC_RSP_OP(cmd, 4, 32, 32, uint32_t, attr->queue_attr.flc_cfg.options); \ -+ MC_RSP_OP(cmd, 5, 0, 64, uint64_t, \ -+ attr->queue_attr.flc_cfg.flow_context); \ -+} while (0) -+ -+#define DPNI_CMD_SET_TX_CONF_CONGESTION_NOTIFICATION(cmd, flow_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 2, enum dpni_congestion_unit, cfg->units); \ -+ MC_CMD_OP(cmd, 0, 4, 4, enum dpni_dest, cfg->dest_cfg.dest_type); \ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, cfg->dest_cfg.priority); \ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, flow_id); \ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, cfg->threshold_entry); \ -+ MC_CMD_OP(cmd, 1, 32, 32, uint32_t, cfg->threshold_exit); \ -+ MC_CMD_OP(cmd, 2, 0, 16, uint16_t, cfg->options); \ -+ MC_CMD_OP(cmd, 2, 32, 32, int, cfg->dest_cfg.dest_id); \ -+ MC_CMD_OP(cmd, 3, 0, 64, uint64_t, cfg->message_ctx); \ -+ MC_CMD_OP(cmd, 4, 0, 64, uint64_t, cfg->message_iova); \ -+} while (0) -+ -+#define DPNI_CMD_GET_TX_CONF_CONGESTION_NOTIFICATION(cmd, flow_id) \ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, flow_id) -+ -+#define DPNI_RSP_GET_TX_CONF_CONGESTION_NOTIFICATION(cmd, cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 2, enum dpni_congestion_unit, cfg->units); \ -+ MC_RSP_OP(cmd, 0, 4, 4, enum dpni_dest, cfg->dest_cfg.dest_type); \ -+ MC_RSP_OP(cmd, 0, 16, 8, uint8_t, cfg->dest_cfg.priority); \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, cfg->threshold_entry); \ -+ MC_RSP_OP(cmd, 1, 32, 32, uint32_t, cfg->threshold_exit); \ -+ MC_RSP_OP(cmd, 2, 0, 16, uint16_t, cfg->options); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, cfg->dest_cfg.dest_id); \ -+ MC_RSP_OP(cmd, 3, 0, 64, uint64_t, cfg->message_ctx); \ -+ MC_RSP_OP(cmd, 4, 0, 64, uint64_t, cfg->message_iova); \ -+} while (0) -+ -+#endif /* _FSL_DPNI_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dprc.h b/drivers/net/dpaa2/mc/fsl_dprc.h -new file mode 100644 -index 0000000..c831f46 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dprc.h -@@ -0,0 +1,1032 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPRC_H -+#define _FSL_DPRC_H -+ -+/* Data Path Resource Container API -+ * Contains DPRC API for managing and querying DPAA resources -+ */ -+ -+struct fsl_mc_io; -+ -+/** -+ * Set this value as the icid value in dprc_cfg structure when creating a -+ * container, in case the ICID is not selected by the user and should be -+ * allocated by the DPRC from the pool of ICIDs. -+ */ -+#define DPRC_GET_ICID_FROM_POOL (uint16_t)(~(0)) -+ -+/** -+ * Set this value as the portal_id value in dprc_cfg structure when creating a -+ * container, in case the portal ID is not specifically selected by the -+ * user and should be allocated by the DPRC from the pool of portal ids. -+ */ -+#define DPRC_GET_PORTAL_ID_FROM_POOL (int)(~(0)) -+ -+/** -+ * dprc_get_container_id() - Get container ID associated with a given portal. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @container_id: Requested container ID -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_container_id(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int *container_id); -+ -+/** -+ * dprc_open() - Open DPRC object for use -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @container_id: Container ID to open -+ * @token: Returned token of DPRC object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ * -+ * @warning Required before any operation on the object. -+ */ -+int dprc_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int container_id, -+ uint16_t *token); -+ -+/** -+ * dprc_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * Container general options -+ * -+ * These options may be selected at container creation by the container creator -+ * and can be retrieved using dprc_get_attributes() -+ */ -+ -+/** -+ * Spawn Policy Option allowed - Indicates that the new container is allowed -+ * to spawn and have its own child containers. -+ */ -+#define DPRC_CFG_OPT_SPAWN_ALLOWED 0x00000001 -+ -+/** -+ * General Container allocation policy - Indicates that the new container is -+ * allowed to allocate requested resources from its parent container; if not -+ * set, the container is only allowed to use resources in its own pools; Note -+ * that this is a container's global policy, but the parent container may -+ * override it and set specific quota per resource type. -+ */ -+#define DPRC_CFG_OPT_ALLOC_ALLOWED 0x00000002 -+ -+/** -+ * Object initialization allowed - software context associated with this -+ * container is allowed to invoke object initialization operations. -+ */ -+#define DPRC_CFG_OPT_OBJ_CREATE_ALLOWED 0x00000004 -+ -+/** -+ * Topology change allowed - software context associated with this -+ * container is allowed to invoke topology operations, such as attach/detach -+ * of network objects. -+ */ -+#define DPRC_CFG_OPT_TOPOLOGY_CHANGES_ALLOWED 0x00000008 -+ -+/** -+ * AIOP - Indicates that container belongs to AIOP. -+ */ -+#define DPRC_CFG_OPT_AIOP 0x00000020 -+ -+/** -+ * IRQ Config - Indicates that the container allowed to configure its IRQs. -+ */ -+#define DPRC_CFG_OPT_IRQ_CFG_ALLOWED 0x00000040 -+ -+/** -+ * struct dprc_cfg - Container configuration options -+ * @icid: Container's ICID; if set to 'DPRC_GET_ICID_FROM_POOL', a free -+ * ICID value is allocated by the DPRC -+ * @portal_id: Portal ID; if set to 'DPRC_GET_PORTAL_ID_FROM_POOL', a free -+ * portal ID is allocated by the DPRC -+ * @options: Combination of 'DPRC_CFG_OPT_' options -+ * @label: Object's label -+ */ -+struct dprc_cfg { -+ uint16_t icid; -+ int portal_id; -+ uint64_t options; -+ char label[16]; -+}; -+ -+/** -+ * dprc_create_container() - Create child container -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @cfg: Child container configuration -+ * @child_container_id: Returned child container ID -+ * @child_portal_offset: Returned child portal offset from MC portal base -+ * -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_create_container(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dprc_cfg *cfg, -+ int *child_container_id, -+ uint64_t *child_portal_offset); -+ -+/** -+ * dprc_destroy_container() - Destroy child container. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @child_container_id: ID of the container to destroy -+ * -+ * This function terminates the child container, so following this call the -+ * child container ID becomes invalid. -+ * -+ * Notes: -+ * - All resources and objects of the destroyed container are returned to the -+ * parent container or destroyed if were created be the destroyed container. -+ * - This function destroy all the child containers of the specified -+ * container prior to destroying the container itself. -+ * -+ * warning: Only the parent container is allowed to destroy a child policy -+ * Container 0 can't be destroyed -+ * -+ * Return: '0' on Success; Error code otherwise. -+ * -+ */ -+int dprc_destroy_container(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int child_container_id); -+ -+/** -+ * dprc_reset_container - Reset child container. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @child_container_id: ID of the container to reset -+ * -+ * In case a software context crashes or becomes non-responsive, the parent -+ * may wish to reset its resources container before the software context is -+ * restarted. -+ * -+ * This routine informs all objects assigned to the child container that the -+ * container is being reset, so they may perform any cleanup operations that are -+ * needed. All objects handles that were owned by the child container shall be -+ * closed. -+ * -+ * Note that such request may be submitted even if the child software context -+ * has not crashed, but the resulting object cleanup operations will not be -+ * aware of that. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_reset_container(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int child_container_id); -+ -+/** -+ * DPRC IRQ Index and Events -+ */ -+ -+/** -+ * IRQ index -+ */ -+#define DPRC_IRQ_INDEX 0 -+ -+/** -+ * Number of dprc's IRQs -+ */ -+#define DPRC_NUM_OF_IRQS 1 -+ -+/* DPRC IRQ events */ -+/** -+ * IRQ event - Indicates that a new object added to the container -+ */ -+#define DPRC_IRQ_EVENT_OBJ_ADDED 0x00000001 -+/** -+ * IRQ event - Indicates that an object was removed from the container -+ */ -+#define DPRC_IRQ_EVENT_OBJ_REMOVED 0x00000002 -+/** -+ * IRQ event - Indicates that resources added to the container -+ */ -+#define DPRC_IRQ_EVENT_RES_ADDED 0x00000004 -+/** -+ * IRQ event - Indicates that resources removed from the container -+ */ -+#define DPRC_IRQ_EVENT_RES_REMOVED 0x00000008 -+/** -+ * IRQ event - Indicates that one of the descendant containers that opened by -+ * this container is destroyed -+ */ -+#define DPRC_IRQ_EVENT_CONTAINER_DESTROYED 0x00000010 -+/** -+ * IRQ event - Indicates that on one of the container's opened object is -+ * destroyed -+ */ -+#define DPRC_IRQ_EVENT_OBJ_DESTROYED 0x00000020 -+/** -+ * Irq event - Indicates that object is created at the container -+ */ -+#define DPRC_IRQ_EVENT_OBJ_CREATED 0x00000040 -+ -+/** -+ * struct dprc_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dprc_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dprc_set_irq() - Set IRQ information for the DPRC to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dprc_irq_cfg *irq_cfg); -+ -+/** -+ * dprc_get_irq() - Get IRQ information from the DPRC. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dprc_irq_cfg *irq_cfg); -+ -+/** -+ * dprc_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dprc_get_irq_enable() - Get overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dprc_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @irq_index: The interrupt index to configure -+ * @mask: event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dprc_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dprc_get_irq_status() - Get the current status of any pending interrupts. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dprc_clear_irq_status() - Clear a pending interrupt's status -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @irq_index: The interrupt index to configure -+ * @status: bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+ -+/** -+ * struct dprc_attributes - Container attributes -+ * @container_id: Container's ID -+ * @icid: Container's ICID -+ * @portal_id: Container's portal ID -+ * @options: Container's options as set at container's creation -+ * @version: DPRC version -+ */ -+struct dprc_attributes { -+ int container_id; -+ uint16_t icid; -+ int portal_id; -+ uint64_t options; -+ /** -+ * struct version - DPRC version -+ * @major: DPRC major version -+ * @minor: DPRC minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+}; -+ -+/** -+ * dprc_get_attributes() - Obtains container attributes -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @attributes: Returned container attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dprc_attributes *attributes); -+ -+/** -+ * dprc_set_res_quota() - Set allocation policy for a specific resource/object -+ * type in a child container -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @child_container_id: ID of the child container -+ * @type: Resource/object type -+ * @quota: Sets the maximum number of resources of the selected type -+ * that the child container is allowed to allocate from its parent; -+ * when quota is set to -1, the policy is the same as container's -+ * general policy. -+ * -+ * Allocation policy determines whether or not a container may allocate -+ * resources from its parent. Each container has a 'global' allocation policy -+ * that is set when the container is created. -+ * -+ * This function sets allocation policy for a specific resource type. -+ * The default policy for all resource types matches the container's 'global' -+ * allocation policy. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ * -+ * @warning Only the parent container is allowed to change a child policy. -+ */ -+int dprc_set_res_quota(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int child_container_id, -+ char *type, -+ uint16_t quota); -+ -+/** -+ * dprc_get_res_quota() - Gets the allocation policy of a specific -+ * resource/object type in a child container -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @child_container_id: ID of the child container -+ * @type: resource/object type -+ * @quota: Returnes the maximum number of resources of the selected type -+ * that the child container is allowed to allocate from the parent; -+ * when quota is set to -1, the policy is the same as container's -+ * general policy. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_res_quota(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int child_container_id, -+ char *type, -+ uint16_t *quota); -+ -+/* Resource request options */ -+ -+/** -+ * Explicit resource ID request - The requested objects/resources -+ * are explicit and sequential (in case of resources). -+ * The base ID is given at res_req at base_align field -+ */ -+#define DPRC_RES_REQ_OPT_EXPLICIT 0x00000001 -+ -+/** -+ * Aligned resources request - Relevant only for resources -+ * request (and not objects). Indicates that resources base ID should be -+ * sequential and aligned to the value given at dprc_res_req base_align field -+ */ -+#define DPRC_RES_REQ_OPT_ALIGNED 0x00000002 -+ -+/** -+ * Plugged Flag - Relevant only for object assignment request. -+ * Indicates that after all objects assigned. An interrupt will be invoked at -+ * the relevant GPP. The assigned object will be marked as plugged. -+ * plugged objects can't be assigned from their container -+ */ -+#define DPRC_RES_REQ_OPT_PLUGGED 0x00000004 -+ -+/** -+ * struct dprc_res_req - Resource request descriptor, to be used in assignment -+ * or un-assignment of resources and objects. -+ * @type: Resource/object type: Represent as a NULL terminated string. -+ * This string may received by using dprc_get_pool() to get resource -+ * type and dprc_get_obj() to get object type; -+ * Note: it is not possible to assign/un-assign DPRC objects -+ * @num: Number of resources -+ * @options: Request options: combination of DPRC_RES_REQ_OPT_ options -+ * @id_base_align: In case of explicit assignment (DPRC_RES_REQ_OPT_EXPLICIT -+ * is set at option), this field represents the required base ID -+ * for resource allocation; In case of aligned assignment -+ * (DPRC_RES_REQ_OPT_ALIGNED is set at option), this field -+ * indicates the required alignment for the resource ID(s) - -+ * use 0 if there is no alignment or explicit ID requirements -+ */ -+struct dprc_res_req { -+ char type[16]; -+ uint32_t num; -+ uint32_t options; -+ int id_base_align; -+}; -+ -+/** -+ * dprc_assign() - Assigns objects or resource to a child container. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @container_id: ID of the child container -+ * @res_req: Describes the type and amount of resources to -+ * assign to the given container -+ * -+ * Assignment is usually done by a parent (this DPRC) to one of its child -+ * containers. -+ * -+ * According to the DPRC allocation policy, the assigned resources may be taken -+ * (allocated) from the container's ancestors, if not enough resources are -+ * available in the container itself. -+ * -+ * The type of assignment depends on the dprc_res_req options, as follows: -+ * - DPRC_RES_REQ_OPT_EXPLICIT: indicates that assigned resources should have -+ * the explicit base ID specified at the id_base_align field of res_req. -+ * - DPRC_RES_REQ_OPT_ALIGNED: indicates that the assigned resources should be -+ * aligned to the value given at id_base_align field of res_req. -+ * - DPRC_RES_REQ_OPT_PLUGGED: Relevant only for object assignment, -+ * and indicates that the object must be set to the plugged state. -+ * -+ * A container may use this function with its own ID in order to change a -+ * object state to plugged or unplugged. -+ * -+ * If IRQ information has been set in the child DPRC, it will signal an -+ * interrupt following every change in its object assignment. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_assign(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int container_id, -+ struct dprc_res_req *res_req); -+ -+/** -+ * dprc_unassign() - Un-assigns objects or resources from a child container -+ * and moves them into this (parent) DPRC. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @child_container_id: ID of the child container -+ * @res_req: Describes the type and amount of resources to un-assign from -+ * the child container -+ * -+ * Un-assignment of objects can succeed only if the object is not in the -+ * plugged or opened state. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_unassign(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int child_container_id, -+ struct dprc_res_req *res_req); -+ -+/** -+ * dprc_get_pool_count() - Get the number of dprc's pools -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @pool_count: Returned number of resource pools in the dprc -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_pool_count(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *pool_count); -+ -+/** -+ * dprc_get_pool() - Get the type (string) of a certain dprc's pool -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @pool_index: Index of the pool to be queried (< pool_count) -+ * @type: The type of the pool -+ * -+ * The pool types retrieved one by one by incrementing -+ * pool_index up to (not including) the value of pool_count returned -+ * from dprc_get_pool_count(). dprc_get_pool_count() must -+ * be called prior to dprc_get_pool(). -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_pool(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int pool_index, -+ char *type); -+ -+/** -+ * dprc_get_obj_count() - Obtains the number of objects in the DPRC -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @obj_count: Number of objects assigned to the DPRC -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_obj_count(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *obj_count); -+ -+/** -+ * Objects Attributes Flags -+ */ -+ -+/** -+ * Opened state - Indicates that an object is open by at least one owner -+ */ -+#define DPRC_OBJ_STATE_OPEN 0x00000001 -+/** -+ * Plugged state - Indicates that the object is plugged -+ */ -+#define DPRC_OBJ_STATE_PLUGGED 0x00000002 -+ -+/** -+ * Shareability flag - Object flag indicating no memory shareability. -+ * the object generates memory accesses that are non coherent with other -+ * masters; -+ * user is responsible for proper memory handling through IOMMU configuration. -+ */ -+#define DPRC_OBJ_FLAG_NO_MEM_SHAREABILITY 0x0001 -+ -+/** -+ * struct dprc_obj_desc - Object descriptor, returned from dprc_get_obj() -+ * @type: Type of object: NULL terminated string -+ * @id: ID of logical object resource -+ * @vendor: Object vendor identifier -+ * @ver_major: Major version number -+ * @ver_minor: Minor version number -+ * @irq_count: Number of interrupts supported by the object -+ * @region_count: Number of mappable regions supported by the object -+ * @state: Object state: combination of DPRC_OBJ_STATE_ states -+ * @label: Object label -+ * @flags: Object's flags -+ */ -+struct dprc_obj_desc { -+ char type[16]; -+ int id; -+ uint16_t vendor; -+ uint16_t ver_major; -+ uint16_t ver_minor; -+ uint8_t irq_count; -+ uint8_t region_count; -+ uint32_t state; -+ char label[16]; -+ uint16_t flags; -+}; -+ -+/** -+ * dprc_get_obj() - Get general information on an object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @obj_index: Index of the object to be queried (< obj_count) -+ * @obj_desc: Returns the requested object descriptor -+ * -+ * The object descriptors are retrieved one by one by incrementing -+ * obj_index up to (not including) the value of obj_count returned -+ * from dprc_get_obj_count(). dprc_get_obj_count() must -+ * be called prior to dprc_get_obj(). -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_obj(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int obj_index, -+ struct dprc_obj_desc *obj_desc); -+ -+/** -+ * dprc_get_obj_desc() - Get object descriptor. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @obj_type: The type of the object to get its descriptor. -+ * @obj_id: The id of the object to get its descriptor -+ * @obj_desc: The returned descriptor to fill and return to the user -+ * -+ * Return: '0' on Success; Error code otherwise. -+ * -+ */ -+int dprc_get_obj_desc(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *obj_type, -+ int obj_id, -+ struct dprc_obj_desc *obj_desc); -+ -+/** -+ * dprc_set_obj_irq() - Set IRQ information for object to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @obj_type: Type of the object to set its IRQ -+ * @obj_id: ID of the object to set its IRQ -+ * @irq_index: The interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_set_obj_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *obj_type, -+ int obj_id, -+ uint8_t irq_index, -+ struct dprc_irq_cfg *irq_cfg); -+ -+/** -+ * dprc_get_obj_irq() - Get IRQ information from object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @obj_type: Type od the object to get its IRQ -+ * @obj_id: ID of the object to get its IRQ -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: The returned IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_obj_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *obj_type, -+ int obj_id, -+ uint8_t irq_index, -+ int *type, -+ struct dprc_irq_cfg *irq_cfg); -+ -+/** -+ * dprc_get_res_count() - Obtains the number of free resources that are -+ * assigned to this container, by pool type -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @type: pool type -+ * @res_count: Returned number of free resources of the given -+ * resource type that are assigned to this DPRC -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_res_count(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *type, -+ int *res_count); -+ -+/** -+ * enum dprc_iter_status - Iteration status -+ * @DPRC_ITER_STATUS_FIRST: Perform first iteration -+ * @DPRC_ITER_STATUS_MORE: Indicates more/next iteration is needed -+ * @DPRC_ITER_STATUS_LAST: Indicates last iteration -+ */ -+enum dprc_iter_status { -+ DPRC_ITER_STATUS_FIRST = 0, -+ DPRC_ITER_STATUS_MORE = 1, -+ DPRC_ITER_STATUS_LAST = 2 -+}; -+ -+/** -+ * struct dprc_res_ids_range_desc - Resource ID range descriptor -+ * @base_id: Base resource ID of this range -+ * @last_id: Last resource ID of this range -+ * @iter_status: Iteration status - should be set to DPRC_ITER_STATUS_FIRST at -+ * first iteration; while the returned marker is DPRC_ITER_STATUS_MORE, -+ * additional iterations are needed, until the returned marker is -+ * DPRC_ITER_STATUS_LAST -+ */ -+struct dprc_res_ids_range_desc { -+ int base_id; -+ int last_id; -+ enum dprc_iter_status iter_status; -+}; -+ -+/** -+ * dprc_get_res_ids() - Obtains IDs of free resources in the container -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @type: pool type -+ * @range_desc: range descriptor -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_res_ids(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *type, -+ struct dprc_res_ids_range_desc *range_desc); -+ -+/** -+ * Region flags -+ */ -+/** -+ * Cacheable - Indicates that region should be mapped as cacheable -+ */ -+#define DPRC_REGION_CACHEABLE 0x00000001 -+ -+/** -+ * enum dprc_region_type - Region type -+ * @DPRC_REGION_TYPE_MC_PORTAL: MC portal region -+ * @DPRC_REGION_TYPE_QBMAN_PORTAL: Qbman portal region -+ */ -+enum dprc_region_type { -+ DPRC_REGION_TYPE_MC_PORTAL, -+ DPRC_REGION_TYPE_QBMAN_PORTAL -+}; -+ -+/** -+ * struct dprc_region_desc - Mappable region descriptor -+ * @base_offset: Region offset from region's base address. -+ * For DPMCP and DPRC objects, region base is offset from SoC MC portals -+ * base address; For DPIO, region base is offset from SoC QMan portals -+ * base address -+ * @size: Region size (in bytes) -+ * @flags: Region attributes -+ * @type: Portal region type -+ */ -+struct dprc_region_desc { -+ uint32_t base_offset; -+ uint32_t size; -+ uint32_t flags; -+ enum dprc_region_type type; -+}; -+ -+/** -+ * dprc_get_obj_region() - Get region information for a specified object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @obj_type: Object type as returned in dprc_get_obj() -+ * @obj_id: Unique object instance as returned in dprc_get_obj() -+ * @region_index: The specific region to query -+ * @region_desc: Returns the requested region descriptor -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_get_obj_region(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *obj_type, -+ int obj_id, -+ uint8_t region_index, -+ struct dprc_region_desc *region_desc); -+ -+/** -+ * dprc_set_obj_label() - Set object label. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @obj_type: Object's type -+ * @obj_id: Object's ID -+ * @label: The required label. The maximum length is 16 chars. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_set_obj_label(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ char *obj_type, -+ int obj_id, -+ char *label); -+ -+/** -+ * struct dprc_endpoint - Endpoint description for link connect/disconnect -+ * operations -+ * @type: Endpoint object type: NULL terminated string -+ * @id: Endpoint object ID -+ * @if_id: Interface ID; should be set for endpoints with multiple -+ * interfaces ("dpsw", "dpdmux"); for others, always set to 0 -+ */ -+struct dprc_endpoint { -+ char type[16]; -+ int id; -+ uint16_t if_id; -+}; -+ -+/** -+ * struct dprc_connection_cfg - Connection configuration. -+ * Used for virtual connections only -+ * @committed_rate: Committed rate (Mbits/s) -+ * @max_rate: Maximum rate (Mbits/s) -+ */ -+struct dprc_connection_cfg { -+ uint32_t committed_rate; -+ uint32_t max_rate; -+}; -+ -+/** -+ * dprc_connect() - Connect two endpoints to create a network link between them -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @endpoint1: Endpoint 1 configuration parameters -+ * @endpoint2: Endpoint 2 configuration parameters -+ * @cfg: Connection configuration. The connection configuration is ignored for -+ * connections made to DPMAC objects, where rate is retrieved from the -+ * MAC configuration. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_connect(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dprc_endpoint *endpoint1, -+ const struct dprc_endpoint *endpoint2, -+ const struct dprc_connection_cfg *cfg); -+ -+/** -+ * dprc_disconnect() - Disconnect one endpoint to remove its network connection -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRC object -+ * @endpoint: Endpoint configuration parameters -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprc_disconnect(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dprc_endpoint *endpoint); -+ -+/** -+* dprc_get_connection() - Get connected endpoint and link status if connection -+* exists. -+* @mc_io: Pointer to MC portal's I/O object -+* @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+* @token: Token of DPRC object -+* @endpoint1: Endpoint 1 configuration parameters -+* @endpoint2: Returned endpoint 2 configuration parameters -+* @state: Returned link state: -+* 1 - link is up; -+* 0 - link is down; -+* -1 - no connection (endpoint2 information is irrelevant) -+* -+* Return: '0' on Success; -ENAVAIL if connection does not exist. -+*/ -+int dprc_get_connection(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dprc_endpoint *endpoint1, -+ struct dprc_endpoint *endpoint2, -+ int *state); -+ -+#endif /* _FSL_DPRC_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dprc_cmd.h b/drivers/net/dpaa2/mc/fsl_dprc_cmd.h -new file mode 100644 -index 0000000..469e286 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dprc_cmd.h -@@ -0,0 +1,755 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPRC_CMD_H -+#define _FSL_DPRC_CMD_H -+ -+/* DPRC Version */ -+#define DPRC_VER_MAJOR 5 -+#define DPRC_VER_MINOR 1 -+ -+/* Command IDs */ -+#define DPRC_CMDID_CLOSE 0x800 -+#define DPRC_CMDID_OPEN 0x805 -+#define DPRC_CMDID_CREATE 0x905 -+ -+#define DPRC_CMDID_GET_ATTR 0x004 -+#define DPRC_CMDID_RESET_CONT 0x005 -+ -+#define DPRC_CMDID_SET_IRQ 0x010 -+#define DPRC_CMDID_GET_IRQ 0x011 -+#define DPRC_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPRC_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPRC_CMDID_SET_IRQ_MASK 0x014 -+#define DPRC_CMDID_GET_IRQ_MASK 0x015 -+#define DPRC_CMDID_GET_IRQ_STATUS 0x016 -+#define DPRC_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPRC_CMDID_CREATE_CONT 0x151 -+#define DPRC_CMDID_DESTROY_CONT 0x152 -+#define DPRC_CMDID_GET_CONT_ID 0x830 -+#define DPRC_CMDID_SET_RES_QUOTA 0x155 -+#define DPRC_CMDID_GET_RES_QUOTA 0x156 -+#define DPRC_CMDID_ASSIGN 0x157 -+#define DPRC_CMDID_UNASSIGN 0x158 -+#define DPRC_CMDID_GET_OBJ_COUNT 0x159 -+#define DPRC_CMDID_GET_OBJ 0x15A -+#define DPRC_CMDID_GET_RES_COUNT 0x15B -+#define DPRC_CMDID_GET_RES_IDS 0x15C -+#define DPRC_CMDID_GET_OBJ_REG 0x15E -+#define DPRC_CMDID_SET_OBJ_IRQ 0x15F -+#define DPRC_CMDID_GET_OBJ_IRQ 0x160 -+#define DPRC_CMDID_SET_OBJ_LABEL 0x161 -+#define DPRC_CMDID_GET_OBJ_DESC 0x162 -+ -+#define DPRC_CMDID_CONNECT 0x167 -+#define DPRC_CMDID_DISCONNECT 0x168 -+#define DPRC_CMDID_GET_POOL 0x169 -+#define DPRC_CMDID_GET_POOL_COUNT 0x16A -+ -+#define DPRC_CMDID_GET_CONNECTION 0x16C -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_CONTAINER_ID(cmd, container_id) \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, container_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_OPEN(cmd, container_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, container_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_CREATE_CONTAINER(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 32, 16, uint16_t, cfg->icid); \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, cfg->options); \ -+ MC_CMD_OP(cmd, 1, 32, 32, int, cfg->portal_id); \ -+ MC_CMD_OP(cmd, 2, 0, 8, char, cfg->label[0]);\ -+ MC_CMD_OP(cmd, 2, 8, 8, char, cfg->label[1]);\ -+ MC_CMD_OP(cmd, 2, 16, 8, char, cfg->label[2]);\ -+ MC_CMD_OP(cmd, 2, 24, 8, char, cfg->label[3]);\ -+ MC_CMD_OP(cmd, 2, 32, 8, char, cfg->label[4]);\ -+ MC_CMD_OP(cmd, 2, 40, 8, char, cfg->label[5]);\ -+ MC_CMD_OP(cmd, 2, 48, 8, char, cfg->label[6]);\ -+ MC_CMD_OP(cmd, 2, 56, 8, char, cfg->label[7]);\ -+ MC_CMD_OP(cmd, 3, 0, 8, char, cfg->label[8]);\ -+ MC_CMD_OP(cmd, 3, 8, 8, char, cfg->label[9]);\ -+ MC_CMD_OP(cmd, 3, 16, 8, char, cfg->label[10]);\ -+ MC_CMD_OP(cmd, 3, 24, 8, char, cfg->label[11]);\ -+ MC_CMD_OP(cmd, 3, 32, 8, char, cfg->label[12]);\ -+ MC_CMD_OP(cmd, 3, 40, 8, char, cfg->label[13]);\ -+ MC_CMD_OP(cmd, 3, 48, 8, char, cfg->label[14]);\ -+ MC_CMD_OP(cmd, 3, 56, 8, char, cfg->label[15]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_CREATE_CONTAINER(cmd, child_container_id, child_portal_offset)\ -+do { \ -+ MC_RSP_OP(cmd, 1, 0, 32, int, child_container_id); \ -+ MC_RSP_OP(cmd, 2, 0, 64, uint64_t, child_portal_offset);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_DESTROY_CONTAINER(cmd, child_container_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, child_container_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_RESET_CONTAINER(cmd, child_container_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, child_container_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_SET_IRQ_ENABLE(cmd, irq_index, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, en); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_IRQ_ENABLE(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_ATTRIBUTES(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->container_id); \ -+ MC_RSP_OP(cmd, 0, 32, 16, uint16_t, attr->icid); \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, attr->options);\ -+ MC_RSP_OP(cmd, 1, 32, 32, int, attr->portal_id); \ -+ MC_RSP_OP(cmd, 2, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 2, 16, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_SET_RES_QUOTA(cmd, child_container_id, type, quota) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, child_container_id); \ -+ MC_CMD_OP(cmd, 0, 32, 16, uint16_t, quota);\ -+ MC_CMD_OP(cmd, 1, 0, 8, char, type[0]);\ -+ MC_CMD_OP(cmd, 1, 8, 8, char, type[1]);\ -+ MC_CMD_OP(cmd, 1, 16, 8, char, type[2]);\ -+ MC_CMD_OP(cmd, 1, 24, 8, char, type[3]);\ -+ MC_CMD_OP(cmd, 1, 32, 8, char, type[4]);\ -+ MC_CMD_OP(cmd, 1, 40, 8, char, type[5]);\ -+ MC_CMD_OP(cmd, 1, 48, 8, char, type[6]);\ -+ MC_CMD_OP(cmd, 1, 56, 8, char, type[7]);\ -+ MC_CMD_OP(cmd, 2, 0, 8, char, type[8]);\ -+ MC_CMD_OP(cmd, 2, 8, 8, char, type[9]);\ -+ MC_CMD_OP(cmd, 2, 16, 8, char, type[10]);\ -+ MC_CMD_OP(cmd, 2, 24, 8, char, type[11]);\ -+ MC_CMD_OP(cmd, 2, 32, 8, char, type[12]);\ -+ MC_CMD_OP(cmd, 2, 40, 8, char, type[13]);\ -+ MC_CMD_OP(cmd, 2, 48, 8, char, type[14]);\ -+ MC_CMD_OP(cmd, 2, 56, 8, char, type[15]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_RES_QUOTA(cmd, child_container_id, type) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, child_container_id); \ -+ MC_CMD_OP(cmd, 1, 0, 8, char, type[0]);\ -+ MC_CMD_OP(cmd, 1, 8, 8, char, type[1]);\ -+ MC_CMD_OP(cmd, 1, 16, 8, char, type[2]);\ -+ MC_CMD_OP(cmd, 1, 24, 8, char, type[3]);\ -+ MC_CMD_OP(cmd, 1, 32, 8, char, type[4]);\ -+ MC_CMD_OP(cmd, 1, 40, 8, char, type[5]);\ -+ MC_CMD_OP(cmd, 1, 48, 8, char, type[6]);\ -+ MC_CMD_OP(cmd, 1, 56, 8, char, type[7]);\ -+ MC_CMD_OP(cmd, 2, 0, 8, char, type[8]);\ -+ MC_CMD_OP(cmd, 2, 8, 8, char, type[9]);\ -+ MC_CMD_OP(cmd, 2, 16, 8, char, type[10]);\ -+ MC_CMD_OP(cmd, 2, 24, 8, char, type[11]);\ -+ MC_CMD_OP(cmd, 2, 32, 8, char, type[12]);\ -+ MC_CMD_OP(cmd, 2, 40, 8, char, type[13]);\ -+ MC_CMD_OP(cmd, 2, 48, 8, char, type[14]);\ -+ MC_CMD_OP(cmd, 2, 56, 8, char, type[15]);\ -+} while (0) -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_RES_QUOTA(cmd, quota) \ -+ MC_RSP_OP(cmd, 0, 32, 16, uint16_t, quota) -+ -+/* param, offset, width, type, arg_name */ -+#define DPRC_CMD_ASSIGN(cmd, container_id, res_req) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, container_id); \ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, res_req->options);\ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, res_req->num); \ -+ MC_CMD_OP(cmd, 1, 32, 32, int, res_req->id_base_align); \ -+ MC_CMD_OP(cmd, 2, 0, 8, char, res_req->type[0]);\ -+ MC_CMD_OP(cmd, 2, 8, 8, char, res_req->type[1]);\ -+ MC_CMD_OP(cmd, 2, 16, 8, char, res_req->type[2]);\ -+ MC_CMD_OP(cmd, 2, 24, 8, char, res_req->type[3]);\ -+ MC_CMD_OP(cmd, 2, 32, 8, char, res_req->type[4]);\ -+ MC_CMD_OP(cmd, 2, 40, 8, char, res_req->type[5]);\ -+ MC_CMD_OP(cmd, 2, 48, 8, char, res_req->type[6]);\ -+ MC_CMD_OP(cmd, 2, 56, 8, char, res_req->type[7]);\ -+ MC_CMD_OP(cmd, 3, 0, 8, char, res_req->type[8]);\ -+ MC_CMD_OP(cmd, 3, 8, 8, char, res_req->type[9]);\ -+ MC_CMD_OP(cmd, 3, 16, 8, char, res_req->type[10]);\ -+ MC_CMD_OP(cmd, 3, 24, 8, char, res_req->type[11]);\ -+ MC_CMD_OP(cmd, 3, 32, 8, char, res_req->type[12]);\ -+ MC_CMD_OP(cmd, 3, 40, 8, char, res_req->type[13]);\ -+ MC_CMD_OP(cmd, 3, 48, 8, char, res_req->type[14]);\ -+ MC_CMD_OP(cmd, 3, 56, 8, char, res_req->type[15]);\ -+} while (0) -+ -+/* param, offset, width, type, arg_name */ -+#define DPRC_CMD_UNASSIGN(cmd, child_container_id, res_req) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, child_container_id); \ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, res_req->options);\ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, res_req->num); \ -+ MC_CMD_OP(cmd, 1, 32, 32, int, res_req->id_base_align); \ -+ MC_CMD_OP(cmd, 2, 0, 8, char, res_req->type[0]);\ -+ MC_CMD_OP(cmd, 2, 8, 8, char, res_req->type[1]);\ -+ MC_CMD_OP(cmd, 2, 16, 8, char, res_req->type[2]);\ -+ MC_CMD_OP(cmd, 2, 24, 8, char, res_req->type[3]);\ -+ MC_CMD_OP(cmd, 2, 32, 8, char, res_req->type[4]);\ -+ MC_CMD_OP(cmd, 2, 40, 8, char, res_req->type[5]);\ -+ MC_CMD_OP(cmd, 2, 48, 8, char, res_req->type[6]);\ -+ MC_CMD_OP(cmd, 2, 56, 8, char, res_req->type[7]);\ -+ MC_CMD_OP(cmd, 3, 0, 8, char, res_req->type[8]);\ -+ MC_CMD_OP(cmd, 3, 8, 8, char, res_req->type[9]);\ -+ MC_CMD_OP(cmd, 3, 16, 8, char, res_req->type[10]);\ -+ MC_CMD_OP(cmd, 3, 24, 8, char, res_req->type[11]);\ -+ MC_CMD_OP(cmd, 3, 32, 8, char, res_req->type[12]);\ -+ MC_CMD_OP(cmd, 3, 40, 8, char, res_req->type[13]);\ -+ MC_CMD_OP(cmd, 3, 48, 8, char, res_req->type[14]);\ -+ MC_CMD_OP(cmd, 3, 56, 8, char, res_req->type[15]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_POOL_COUNT(cmd, pool_count) \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, pool_count) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_POOL(cmd, pool_index) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, pool_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_POOL(cmd, type) \ -+do { \ -+ MC_RSP_OP(cmd, 1, 0, 8, char, type[0]);\ -+ MC_RSP_OP(cmd, 1, 8, 8, char, type[1]);\ -+ MC_RSP_OP(cmd, 1, 16, 8, char, type[2]);\ -+ MC_RSP_OP(cmd, 1, 24, 8, char, type[3]);\ -+ MC_RSP_OP(cmd, 1, 32, 8, char, type[4]);\ -+ MC_RSP_OP(cmd, 1, 40, 8, char, type[5]);\ -+ MC_RSP_OP(cmd, 1, 48, 8, char, type[6]);\ -+ MC_RSP_OP(cmd, 1, 56, 8, char, type[7]);\ -+ MC_RSP_OP(cmd, 2, 0, 8, char, type[8]);\ -+ MC_RSP_OP(cmd, 2, 8, 8, char, type[9]);\ -+ MC_RSP_OP(cmd, 2, 16, 8, char, type[10]);\ -+ MC_RSP_OP(cmd, 2, 24, 8, char, type[11]);\ -+ MC_RSP_OP(cmd, 2, 32, 8, char, type[12]);\ -+ MC_RSP_OP(cmd, 2, 40, 8, char, type[13]);\ -+ MC_RSP_OP(cmd, 2, 48, 8, char, type[14]);\ -+ MC_RSP_OP(cmd, 2, 56, 8, char, type[15]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_OBJ_COUNT(cmd, obj_count) \ -+ MC_RSP_OP(cmd, 0, 32, 32, int, obj_count) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_OBJ(cmd, obj_index) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, obj_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_OBJ(cmd, obj_desc) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 32, 32, int, obj_desc->id); \ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, obj_desc->vendor); \ -+ MC_RSP_OP(cmd, 1, 16, 8, uint8_t, obj_desc->irq_count); \ -+ MC_RSP_OP(cmd, 1, 24, 8, uint8_t, obj_desc->region_count); \ -+ MC_RSP_OP(cmd, 1, 32, 32, uint32_t, obj_desc->state);\ -+ MC_RSP_OP(cmd, 2, 0, 16, uint16_t, obj_desc->ver_major);\ -+ MC_RSP_OP(cmd, 2, 16, 16, uint16_t, obj_desc->ver_minor);\ -+ MC_RSP_OP(cmd, 2, 32, 16, uint16_t, obj_desc->flags); \ -+ MC_RSP_OP(cmd, 3, 0, 8, char, obj_desc->type[0]);\ -+ MC_RSP_OP(cmd, 3, 8, 8, char, obj_desc->type[1]);\ -+ MC_RSP_OP(cmd, 3, 16, 8, char, obj_desc->type[2]);\ -+ MC_RSP_OP(cmd, 3, 24, 8, char, obj_desc->type[3]);\ -+ MC_RSP_OP(cmd, 3, 32, 8, char, obj_desc->type[4]);\ -+ MC_RSP_OP(cmd, 3, 40, 8, char, obj_desc->type[5]);\ -+ MC_RSP_OP(cmd, 3, 48, 8, char, obj_desc->type[6]);\ -+ MC_RSP_OP(cmd, 3, 56, 8, char, obj_desc->type[7]);\ -+ MC_RSP_OP(cmd, 4, 0, 8, char, obj_desc->type[8]);\ -+ MC_RSP_OP(cmd, 4, 8, 8, char, obj_desc->type[9]);\ -+ MC_RSP_OP(cmd, 4, 16, 8, char, obj_desc->type[10]);\ -+ MC_RSP_OP(cmd, 4, 24, 8, char, obj_desc->type[11]);\ -+ MC_RSP_OP(cmd, 4, 32, 8, char, obj_desc->type[12]);\ -+ MC_RSP_OP(cmd, 4, 40, 8, char, obj_desc->type[13]);\ -+ MC_RSP_OP(cmd, 4, 48, 8, char, obj_desc->type[14]);\ -+ MC_RSP_OP(cmd, 4, 56, 8, char, obj_desc->type[15]);\ -+ MC_RSP_OP(cmd, 5, 0, 8, char, obj_desc->label[0]);\ -+ MC_RSP_OP(cmd, 5, 8, 8, char, obj_desc->label[1]);\ -+ MC_RSP_OP(cmd, 5, 16, 8, char, obj_desc->label[2]);\ -+ MC_RSP_OP(cmd, 5, 24, 8, char, obj_desc->label[3]);\ -+ MC_RSP_OP(cmd, 5, 32, 8, char, obj_desc->label[4]);\ -+ MC_RSP_OP(cmd, 5, 40, 8, char, obj_desc->label[5]);\ -+ MC_RSP_OP(cmd, 5, 48, 8, char, obj_desc->label[6]);\ -+ MC_RSP_OP(cmd, 5, 56, 8, char, obj_desc->label[7]);\ -+ MC_RSP_OP(cmd, 6, 0, 8, char, obj_desc->label[8]);\ -+ MC_RSP_OP(cmd, 6, 8, 8, char, obj_desc->label[9]);\ -+ MC_RSP_OP(cmd, 6, 16, 8, char, obj_desc->label[10]);\ -+ MC_RSP_OP(cmd, 6, 24, 8, char, obj_desc->label[11]);\ -+ MC_RSP_OP(cmd, 6, 32, 8, char, obj_desc->label[12]);\ -+ MC_RSP_OP(cmd, 6, 40, 8, char, obj_desc->label[13]);\ -+ MC_RSP_OP(cmd, 6, 48, 8, char, obj_desc->label[14]);\ -+ MC_RSP_OP(cmd, 6, 56, 8, char, obj_desc->label[15]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_OBJ_DESC(cmd, obj_type, obj_id) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, obj_id);\ -+ MC_CMD_OP(cmd, 1, 0, 8, char, obj_type[0]);\ -+ MC_CMD_OP(cmd, 1, 8, 8, char, obj_type[1]);\ -+ MC_CMD_OP(cmd, 1, 16, 8, char, obj_type[2]);\ -+ MC_CMD_OP(cmd, 1, 24, 8, char, obj_type[3]);\ -+ MC_CMD_OP(cmd, 1, 32, 8, char, obj_type[4]);\ -+ MC_CMD_OP(cmd, 1, 40, 8, char, obj_type[5]);\ -+ MC_CMD_OP(cmd, 1, 48, 8, char, obj_type[6]);\ -+ MC_CMD_OP(cmd, 1, 56, 8, char, obj_type[7]);\ -+ MC_CMD_OP(cmd, 2, 0, 8, char, obj_type[8]);\ -+ MC_CMD_OP(cmd, 2, 8, 8, char, obj_type[9]);\ -+ MC_CMD_OP(cmd, 2, 16, 8, char, obj_type[10]);\ -+ MC_CMD_OP(cmd, 2, 24, 8, char, obj_type[11]);\ -+ MC_CMD_OP(cmd, 2, 32, 8, char, obj_type[12]);\ -+ MC_CMD_OP(cmd, 2, 40, 8, char, obj_type[13]);\ -+ MC_CMD_OP(cmd, 2, 48, 8, char, obj_type[14]);\ -+ MC_CMD_OP(cmd, 2, 56, 8, char, obj_type[15]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_OBJ_DESC(cmd, obj_desc) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 32, 32, int, obj_desc->id); \ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, obj_desc->vendor); \ -+ MC_RSP_OP(cmd, 1, 16, 8, uint8_t, obj_desc->irq_count); \ -+ MC_RSP_OP(cmd, 1, 24, 8, uint8_t, obj_desc->region_count); \ -+ MC_RSP_OP(cmd, 1, 32, 32, uint32_t, obj_desc->state);\ -+ MC_RSP_OP(cmd, 2, 0, 16, uint16_t, obj_desc->ver_major);\ -+ MC_RSP_OP(cmd, 2, 16, 16, uint16_t, obj_desc->ver_minor);\ -+ MC_RSP_OP(cmd, 2, 32, 16, uint16_t, obj_desc->flags); \ -+ MC_RSP_OP(cmd, 3, 0, 8, char, obj_desc->type[0]);\ -+ MC_RSP_OP(cmd, 3, 8, 8, char, obj_desc->type[1]);\ -+ MC_RSP_OP(cmd, 3, 16, 8, char, obj_desc->type[2]);\ -+ MC_RSP_OP(cmd, 3, 24, 8, char, obj_desc->type[3]);\ -+ MC_RSP_OP(cmd, 3, 32, 8, char, obj_desc->type[4]);\ -+ MC_RSP_OP(cmd, 3, 40, 8, char, obj_desc->type[5]);\ -+ MC_RSP_OP(cmd, 3, 48, 8, char, obj_desc->type[6]);\ -+ MC_RSP_OP(cmd, 3, 56, 8, char, obj_desc->type[7]);\ -+ MC_RSP_OP(cmd, 4, 0, 8, char, obj_desc->type[8]);\ -+ MC_RSP_OP(cmd, 4, 8, 8, char, obj_desc->type[9]);\ -+ MC_RSP_OP(cmd, 4, 16, 8, char, obj_desc->type[10]);\ -+ MC_RSP_OP(cmd, 4, 24, 8, char, obj_desc->type[11]);\ -+ MC_RSP_OP(cmd, 4, 32, 8, char, obj_desc->type[12]);\ -+ MC_RSP_OP(cmd, 4, 40, 8, char, obj_desc->type[13]);\ -+ MC_RSP_OP(cmd, 4, 48, 8, char, obj_desc->type[14]);\ -+ MC_RSP_OP(cmd, 4, 56, 8, char, obj_desc->type[15]);\ -+ MC_RSP_OP(cmd, 5, 0, 8, char, obj_desc->label[0]);\ -+ MC_RSP_OP(cmd, 5, 8, 8, char, obj_desc->label[1]);\ -+ MC_RSP_OP(cmd, 5, 16, 8, char, obj_desc->label[2]);\ -+ MC_RSP_OP(cmd, 5, 24, 8, char, obj_desc->label[3]);\ -+ MC_RSP_OP(cmd, 5, 32, 8, char, obj_desc->label[4]);\ -+ MC_RSP_OP(cmd, 5, 40, 8, char, obj_desc->label[5]);\ -+ MC_RSP_OP(cmd, 5, 48, 8, char, obj_desc->label[6]);\ -+ MC_RSP_OP(cmd, 5, 56, 8, char, obj_desc->label[7]);\ -+ MC_RSP_OP(cmd, 6, 0, 8, char, obj_desc->label[8]);\ -+ MC_RSP_OP(cmd, 6, 8, 8, char, obj_desc->label[9]);\ -+ MC_RSP_OP(cmd, 6, 16, 8, char, obj_desc->label[10]);\ -+ MC_RSP_OP(cmd, 6, 24, 8, char, obj_desc->label[11]);\ -+ MC_RSP_OP(cmd, 6, 32, 8, char, obj_desc->label[12]);\ -+ MC_RSP_OP(cmd, 6, 40, 8, char, obj_desc->label[13]);\ -+ MC_RSP_OP(cmd, 6, 48, 8, char, obj_desc->label[14]);\ -+ MC_RSP_OP(cmd, 6, 56, 8, char, obj_desc->label[15]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_RES_COUNT(cmd, type) \ -+do { \ -+ MC_CMD_OP(cmd, 1, 0, 8, char, type[0]);\ -+ MC_CMD_OP(cmd, 1, 8, 8, char, type[1]);\ -+ MC_CMD_OP(cmd, 1, 16, 8, char, type[2]);\ -+ MC_CMD_OP(cmd, 1, 24, 8, char, type[3]);\ -+ MC_CMD_OP(cmd, 1, 32, 8, char, type[4]);\ -+ MC_CMD_OP(cmd, 1, 40, 8, char, type[5]);\ -+ MC_CMD_OP(cmd, 1, 48, 8, char, type[6]);\ -+ MC_CMD_OP(cmd, 1, 56, 8, char, type[7]);\ -+ MC_CMD_OP(cmd, 2, 0, 8, char, type[8]);\ -+ MC_CMD_OP(cmd, 2, 8, 8, char, type[9]);\ -+ MC_CMD_OP(cmd, 2, 16, 8, char, type[10]);\ -+ MC_CMD_OP(cmd, 2, 24, 8, char, type[11]);\ -+ MC_CMD_OP(cmd, 2, 32, 8, char, type[12]);\ -+ MC_CMD_OP(cmd, 2, 40, 8, char, type[13]);\ -+ MC_CMD_OP(cmd, 2, 48, 8, char, type[14]);\ -+ MC_CMD_OP(cmd, 2, 56, 8, char, type[15]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_RES_COUNT(cmd, res_count) \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, res_count) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_RES_IDS(cmd, range_desc, type) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 42, 7, enum dprc_iter_status, \ -+ range_desc->iter_status); \ -+ MC_CMD_OP(cmd, 1, 0, 32, int, range_desc->base_id); \ -+ MC_CMD_OP(cmd, 1, 32, 32, int, range_desc->last_id);\ -+ MC_CMD_OP(cmd, 2, 0, 8, char, type[0]);\ -+ MC_CMD_OP(cmd, 2, 8, 8, char, type[1]);\ -+ MC_CMD_OP(cmd, 2, 16, 8, char, type[2]);\ -+ MC_CMD_OP(cmd, 2, 24, 8, char, type[3]);\ -+ MC_CMD_OP(cmd, 2, 32, 8, char, type[4]);\ -+ MC_CMD_OP(cmd, 2, 40, 8, char, type[5]);\ -+ MC_CMD_OP(cmd, 2, 48, 8, char, type[6]);\ -+ MC_CMD_OP(cmd, 2, 56, 8, char, type[7]);\ -+ MC_CMD_OP(cmd, 3, 0, 8, char, type[8]);\ -+ MC_CMD_OP(cmd, 3, 8, 8, char, type[9]);\ -+ MC_CMD_OP(cmd, 3, 16, 8, char, type[10]);\ -+ MC_CMD_OP(cmd, 3, 24, 8, char, type[11]);\ -+ MC_CMD_OP(cmd, 3, 32, 8, char, type[12]);\ -+ MC_CMD_OP(cmd, 3, 40, 8, char, type[13]);\ -+ MC_CMD_OP(cmd, 3, 48, 8, char, type[14]);\ -+ MC_CMD_OP(cmd, 3, 56, 8, char, type[15]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_RES_IDS(cmd, range_desc) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 42, 7, enum dprc_iter_status, \ -+ range_desc->iter_status);\ -+ MC_RSP_OP(cmd, 1, 0, 32, int, range_desc->base_id); \ -+ MC_RSP_OP(cmd, 1, 32, 32, int, range_desc->last_id);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_OBJ_REGION(cmd, obj_type, obj_id, region_index) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, obj_id); \ -+ MC_CMD_OP(cmd, 0, 48, 8, uint8_t, region_index);\ -+ MC_CMD_OP(cmd, 3, 0, 8, char, obj_type[0]);\ -+ MC_CMD_OP(cmd, 3, 8, 8, char, obj_type[1]);\ -+ MC_CMD_OP(cmd, 3, 16, 8, char, obj_type[2]);\ -+ MC_CMD_OP(cmd, 3, 24, 8, char, obj_type[3]);\ -+ MC_CMD_OP(cmd, 3, 32, 8, char, obj_type[4]);\ -+ MC_CMD_OP(cmd, 3, 40, 8, char, obj_type[5]);\ -+ MC_CMD_OP(cmd, 3, 48, 8, char, obj_type[6]);\ -+ MC_CMD_OP(cmd, 3, 56, 8, char, obj_type[7]);\ -+ MC_CMD_OP(cmd, 4, 0, 8, char, obj_type[8]);\ -+ MC_CMD_OP(cmd, 4, 8, 8, char, obj_type[9]);\ -+ MC_CMD_OP(cmd, 4, 16, 8, char, obj_type[10]);\ -+ MC_CMD_OP(cmd, 4, 24, 8, char, obj_type[11]);\ -+ MC_CMD_OP(cmd, 4, 32, 8, char, obj_type[12]);\ -+ MC_CMD_OP(cmd, 4, 40, 8, char, obj_type[13]);\ -+ MC_CMD_OP(cmd, 4, 48, 8, char, obj_type[14]);\ -+ MC_CMD_OP(cmd, 4, 56, 8, char, obj_type[15]);\ -+} while (0) -+ -+/* param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_OBJ_REGION(cmd, region_desc) \ -+do { \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, region_desc->base_offset);\ -+ MC_RSP_OP(cmd, 2, 0, 32, uint32_t, region_desc->size); \ -+ MC_RSP_OP(cmd, 2, 32, 4, enum dprc_region_type, region_desc->type);\ -+ MC_RSP_OP(cmd, 3, 0, 32, uint32_t, region_desc->flags);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_SET_OBJ_LABEL(cmd, obj_type, obj_id, label) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, obj_id); \ -+ MC_CMD_OP(cmd, 1, 0, 8, char, label[0]);\ -+ MC_CMD_OP(cmd, 1, 8, 8, char, label[1]);\ -+ MC_CMD_OP(cmd, 1, 16, 8, char, label[2]);\ -+ MC_CMD_OP(cmd, 1, 24, 8, char, label[3]);\ -+ MC_CMD_OP(cmd, 1, 32, 8, char, label[4]);\ -+ MC_CMD_OP(cmd, 1, 40, 8, char, label[5]);\ -+ MC_CMD_OP(cmd, 1, 48, 8, char, label[6]);\ -+ MC_CMD_OP(cmd, 1, 56, 8, char, label[7]);\ -+ MC_CMD_OP(cmd, 2, 0, 8, char, label[8]);\ -+ MC_CMD_OP(cmd, 2, 8, 8, char, label[9]);\ -+ MC_CMD_OP(cmd, 2, 16, 8, char, label[10]);\ -+ MC_CMD_OP(cmd, 2, 24, 8, char, label[11]);\ -+ MC_CMD_OP(cmd, 2, 32, 8, char, label[12]);\ -+ MC_CMD_OP(cmd, 2, 40, 8, char, label[13]);\ -+ MC_CMD_OP(cmd, 2, 48, 8, char, label[14]);\ -+ MC_CMD_OP(cmd, 2, 56, 8, char, label[15]);\ -+ MC_CMD_OP(cmd, 3, 0, 8, char, obj_type[0]);\ -+ MC_CMD_OP(cmd, 3, 8, 8, char, obj_type[1]);\ -+ MC_CMD_OP(cmd, 3, 16, 8, char, obj_type[2]);\ -+ MC_CMD_OP(cmd, 3, 24, 8, char, obj_type[3]);\ -+ MC_CMD_OP(cmd, 3, 32, 8, char, obj_type[4]);\ -+ MC_CMD_OP(cmd, 3, 40, 8, char, obj_type[5]);\ -+ MC_CMD_OP(cmd, 3, 48, 8, char, obj_type[6]);\ -+ MC_CMD_OP(cmd, 3, 56, 8, char, obj_type[7]);\ -+ MC_CMD_OP(cmd, 4, 0, 8, char, obj_type[8]);\ -+ MC_CMD_OP(cmd, 4, 8, 8, char, obj_type[9]);\ -+ MC_CMD_OP(cmd, 4, 16, 8, char, obj_type[10]);\ -+ MC_CMD_OP(cmd, 4, 24, 8, char, obj_type[11]);\ -+ MC_CMD_OP(cmd, 4, 32, 8, char, obj_type[12]);\ -+ MC_CMD_OP(cmd, 4, 40, 8, char, obj_type[13]);\ -+ MC_CMD_OP(cmd, 4, 48, 8, char, obj_type[14]);\ -+ MC_CMD_OP(cmd, 4, 56, 8, char, obj_type[15]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_SET_OBJ_IRQ(cmd, obj_type, obj_id, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_CMD_OP(cmd, 2, 32, 32, int, obj_id); \ -+ MC_CMD_OP(cmd, 3, 0, 8, char, obj_type[0]);\ -+ MC_CMD_OP(cmd, 3, 8, 8, char, obj_type[1]);\ -+ MC_CMD_OP(cmd, 3, 16, 8, char, obj_type[2]);\ -+ MC_CMD_OP(cmd, 3, 24, 8, char, obj_type[3]);\ -+ MC_CMD_OP(cmd, 3, 32, 8, char, obj_type[4]);\ -+ MC_CMD_OP(cmd, 3, 40, 8, char, obj_type[5]);\ -+ MC_CMD_OP(cmd, 3, 48, 8, char, obj_type[6]);\ -+ MC_CMD_OP(cmd, 3, 56, 8, char, obj_type[7]);\ -+ MC_CMD_OP(cmd, 4, 0, 8, char, obj_type[8]);\ -+ MC_CMD_OP(cmd, 4, 8, 8, char, obj_type[9]);\ -+ MC_CMD_OP(cmd, 4, 16, 8, char, obj_type[10]);\ -+ MC_CMD_OP(cmd, 4, 24, 8, char, obj_type[11]);\ -+ MC_CMD_OP(cmd, 4, 32, 8, char, obj_type[12]);\ -+ MC_CMD_OP(cmd, 4, 40, 8, char, obj_type[13]);\ -+ MC_CMD_OP(cmd, 4, 48, 8, char, obj_type[14]);\ -+ MC_CMD_OP(cmd, 4, 56, 8, char, obj_type[15]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_OBJ_IRQ(cmd, obj_type, obj_id, irq_index) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, obj_id); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+ MC_CMD_OP(cmd, 1, 0, 8, char, obj_type[0]);\ -+ MC_CMD_OP(cmd, 1, 8, 8, char, obj_type[1]);\ -+ MC_CMD_OP(cmd, 1, 16, 8, char, obj_type[2]);\ -+ MC_CMD_OP(cmd, 1, 24, 8, char, obj_type[3]);\ -+ MC_CMD_OP(cmd, 1, 32, 8, char, obj_type[4]);\ -+ MC_CMD_OP(cmd, 1, 40, 8, char, obj_type[5]);\ -+ MC_CMD_OP(cmd, 1, 48, 8, char, obj_type[6]);\ -+ MC_CMD_OP(cmd, 1, 56, 8, char, obj_type[7]);\ -+ MC_CMD_OP(cmd, 2, 0, 8, char, obj_type[8]);\ -+ MC_CMD_OP(cmd, 2, 8, 8, char, obj_type[9]);\ -+ MC_CMD_OP(cmd, 2, 16, 8, char, obj_type[10]);\ -+ MC_CMD_OP(cmd, 2, 24, 8, char, obj_type[11]);\ -+ MC_CMD_OP(cmd, 2, 32, 8, char, obj_type[12]);\ -+ MC_CMD_OP(cmd, 2, 40, 8, char, obj_type[13]);\ -+ MC_CMD_OP(cmd, 2, 48, 8, char, obj_type[14]);\ -+ MC_CMD_OP(cmd, 2, 56, 8, char, obj_type[15]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_OBJ_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_CONNECT(cmd, endpoint1, endpoint2, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, endpoint1->id); \ -+ MC_CMD_OP(cmd, 0, 32, 16, uint16_t, endpoint1->if_id); \ -+ MC_CMD_OP(cmd, 1, 0, 32, int, endpoint2->id); \ -+ MC_CMD_OP(cmd, 1, 32, 16, uint16_t, endpoint2->if_id); \ -+ MC_CMD_OP(cmd, 2, 0, 8, char, endpoint1->type[0]); \ -+ MC_CMD_OP(cmd, 2, 8, 8, char, endpoint1->type[1]); \ -+ MC_CMD_OP(cmd, 2, 16, 8, char, endpoint1->type[2]); \ -+ MC_CMD_OP(cmd, 2, 24, 8, char, endpoint1->type[3]); \ -+ MC_CMD_OP(cmd, 2, 32, 8, char, endpoint1->type[4]); \ -+ MC_CMD_OP(cmd, 2, 40, 8, char, endpoint1->type[5]); \ -+ MC_CMD_OP(cmd, 2, 48, 8, char, endpoint1->type[6]); \ -+ MC_CMD_OP(cmd, 2, 56, 8, char, endpoint1->type[7]); \ -+ MC_CMD_OP(cmd, 3, 0, 8, char, endpoint1->type[8]); \ -+ MC_CMD_OP(cmd, 3, 8, 8, char, endpoint1->type[9]); \ -+ MC_CMD_OP(cmd, 3, 16, 8, char, endpoint1->type[10]); \ -+ MC_CMD_OP(cmd, 3, 24, 8, char, endpoint1->type[11]); \ -+ MC_CMD_OP(cmd, 3, 32, 8, char, endpoint1->type[12]); \ -+ MC_CMD_OP(cmd, 3, 40, 8, char, endpoint1->type[13]); \ -+ MC_CMD_OP(cmd, 3, 48, 8, char, endpoint1->type[14]); \ -+ MC_CMD_OP(cmd, 3, 56, 8, char, endpoint1->type[15]); \ -+ MC_CMD_OP(cmd, 4, 0, 32, uint32_t, cfg->max_rate); \ -+ MC_CMD_OP(cmd, 4, 32, 32, uint32_t, cfg->committed_rate); \ -+ MC_CMD_OP(cmd, 5, 0, 8, char, endpoint2->type[0]); \ -+ MC_CMD_OP(cmd, 5, 8, 8, char, endpoint2->type[1]); \ -+ MC_CMD_OP(cmd, 5, 16, 8, char, endpoint2->type[2]); \ -+ MC_CMD_OP(cmd, 5, 24, 8, char, endpoint2->type[3]); \ -+ MC_CMD_OP(cmd, 5, 32, 8, char, endpoint2->type[4]); \ -+ MC_CMD_OP(cmd, 5, 40, 8, char, endpoint2->type[5]); \ -+ MC_CMD_OP(cmd, 5, 48, 8, char, endpoint2->type[6]); \ -+ MC_CMD_OP(cmd, 5, 56, 8, char, endpoint2->type[7]); \ -+ MC_CMD_OP(cmd, 6, 0, 8, char, endpoint2->type[8]); \ -+ MC_CMD_OP(cmd, 6, 8, 8, char, endpoint2->type[9]); \ -+ MC_CMD_OP(cmd, 6, 16, 8, char, endpoint2->type[10]); \ -+ MC_CMD_OP(cmd, 6, 24, 8, char, endpoint2->type[11]); \ -+ MC_CMD_OP(cmd, 6, 32, 8, char, endpoint2->type[12]); \ -+ MC_CMD_OP(cmd, 6, 40, 8, char, endpoint2->type[13]); \ -+ MC_CMD_OP(cmd, 6, 48, 8, char, endpoint2->type[14]); \ -+ MC_CMD_OP(cmd, 6, 56, 8, char, endpoint2->type[15]); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_DISCONNECT(cmd, endpoint) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, endpoint->id); \ -+ MC_CMD_OP(cmd, 0, 32, 16, uint16_t, endpoint->if_id); \ -+ MC_CMD_OP(cmd, 1, 0, 8, char, endpoint->type[0]); \ -+ MC_CMD_OP(cmd, 1, 8, 8, char, endpoint->type[1]); \ -+ MC_CMD_OP(cmd, 1, 16, 8, char, endpoint->type[2]); \ -+ MC_CMD_OP(cmd, 1, 24, 8, char, endpoint->type[3]); \ -+ MC_CMD_OP(cmd, 1, 32, 8, char, endpoint->type[4]); \ -+ MC_CMD_OP(cmd, 1, 40, 8, char, endpoint->type[5]); \ -+ MC_CMD_OP(cmd, 1, 48, 8, char, endpoint->type[6]); \ -+ MC_CMD_OP(cmd, 1, 56, 8, char, endpoint->type[7]); \ -+ MC_CMD_OP(cmd, 2, 0, 8, char, endpoint->type[8]); \ -+ MC_CMD_OP(cmd, 2, 8, 8, char, endpoint->type[9]); \ -+ MC_CMD_OP(cmd, 2, 16, 8, char, endpoint->type[10]); \ -+ MC_CMD_OP(cmd, 2, 24, 8, char, endpoint->type[11]); \ -+ MC_CMD_OP(cmd, 2, 32, 8, char, endpoint->type[12]); \ -+ MC_CMD_OP(cmd, 2, 40, 8, char, endpoint->type[13]); \ -+ MC_CMD_OP(cmd, 2, 48, 8, char, endpoint->type[14]); \ -+ MC_CMD_OP(cmd, 2, 56, 8, char, endpoint->type[15]); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_CMD_GET_CONNECTION(cmd, endpoint1) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, endpoint1->id); \ -+ MC_CMD_OP(cmd, 0, 32, 16, uint16_t, endpoint1->if_id); \ -+ MC_CMD_OP(cmd, 1, 0, 8, char, endpoint1->type[0]); \ -+ MC_CMD_OP(cmd, 1, 8, 8, char, endpoint1->type[1]); \ -+ MC_CMD_OP(cmd, 1, 16, 8, char, endpoint1->type[2]); \ -+ MC_CMD_OP(cmd, 1, 24, 8, char, endpoint1->type[3]); \ -+ MC_CMD_OP(cmd, 1, 32, 8, char, endpoint1->type[4]); \ -+ MC_CMD_OP(cmd, 1, 40, 8, char, endpoint1->type[5]); \ -+ MC_CMD_OP(cmd, 1, 48, 8, char, endpoint1->type[6]); \ -+ MC_CMD_OP(cmd, 1, 56, 8, char, endpoint1->type[7]); \ -+ MC_CMD_OP(cmd, 2, 0, 8, char, endpoint1->type[8]); \ -+ MC_CMD_OP(cmd, 2, 8, 8, char, endpoint1->type[9]); \ -+ MC_CMD_OP(cmd, 2, 16, 8, char, endpoint1->type[10]); \ -+ MC_CMD_OP(cmd, 2, 24, 8, char, endpoint1->type[11]); \ -+ MC_CMD_OP(cmd, 2, 32, 8, char, endpoint1->type[12]); \ -+ MC_CMD_OP(cmd, 2, 40, 8, char, endpoint1->type[13]); \ -+ MC_CMD_OP(cmd, 2, 48, 8, char, endpoint1->type[14]); \ -+ MC_CMD_OP(cmd, 2, 56, 8, char, endpoint1->type[15]); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRC_RSP_GET_CONNECTION(cmd, endpoint2, state) \ -+do { \ -+ MC_RSP_OP(cmd, 3, 0, 32, int, endpoint2->id); \ -+ MC_RSP_OP(cmd, 3, 32, 16, uint16_t, endpoint2->if_id); \ -+ MC_RSP_OP(cmd, 4, 0, 8, char, endpoint2->type[0]); \ -+ MC_RSP_OP(cmd, 4, 8, 8, char, endpoint2->type[1]); \ -+ MC_RSP_OP(cmd, 4, 16, 8, char, endpoint2->type[2]); \ -+ MC_RSP_OP(cmd, 4, 24, 8, char, endpoint2->type[3]); \ -+ MC_RSP_OP(cmd, 4, 32, 8, char, endpoint2->type[4]); \ -+ MC_RSP_OP(cmd, 4, 40, 8, char, endpoint2->type[5]); \ -+ MC_RSP_OP(cmd, 4, 48, 8, char, endpoint2->type[6]); \ -+ MC_RSP_OP(cmd, 4, 56, 8, char, endpoint2->type[7]); \ -+ MC_RSP_OP(cmd, 5, 0, 8, char, endpoint2->type[8]); \ -+ MC_RSP_OP(cmd, 5, 8, 8, char, endpoint2->type[9]); \ -+ MC_RSP_OP(cmd, 5, 16, 8, char, endpoint2->type[10]); \ -+ MC_RSP_OP(cmd, 5, 24, 8, char, endpoint2->type[11]); \ -+ MC_RSP_OP(cmd, 5, 32, 8, char, endpoint2->type[12]); \ -+ MC_RSP_OP(cmd, 5, 40, 8, char, endpoint2->type[13]); \ -+ MC_RSP_OP(cmd, 5, 48, 8, char, endpoint2->type[14]); \ -+ MC_RSP_OP(cmd, 5, 56, 8, char, endpoint2->type[15]); \ -+ MC_RSP_OP(cmd, 6, 0, 32, int, state); \ -+} while (0) -+ -+#endif /* _FSL_DPRC_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dprtc.h b/drivers/net/dpaa2/mc/fsl_dprtc.h -new file mode 100644 -index 0000000..cad0693 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dprtc.h -@@ -0,0 +1,434 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPRTC_H -+#define __FSL_DPRTC_H -+ -+/* Data Path Real Time Counter API -+ * Contains initialization APIs and runtime control APIs for RTC -+ */ -+ -+struct fsl_mc_io; -+ -+/** -+ * Number of irq's -+ */ -+#define DPRTC_MAX_IRQ_NUM 1 -+#define DPRTC_IRQ_INDEX 0 -+ -+/** -+ * Interrupt event masks: -+ */ -+ -+/** -+ * Interrupt event mask indicating alarm event had occurred -+ */ -+#define DPRTC_EVENT_ALARM 0x40000000 -+/** -+ * Interrupt event mask indicating periodic pulse event had occurred -+ */ -+#define DPRTC_EVENT_PPS 0x08000000 -+ -+/** -+ * dprtc_open() - Open a control session for the specified object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dprtc_id: DPRTC unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dprtc_create function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dprtc_id, -+ uint16_t *token); -+ -+/** -+ * dprtc_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dprtc_cfg - Structure representing DPRTC configuration -+ * @options: place holder -+ */ -+struct dprtc_cfg { -+ uint32_t options; -+}; -+ -+/** -+ * dprtc_create() - Create the DPRTC object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPRTC object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dprtc_open function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dprtc_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dprtc_destroy() - Destroy the DPRTC object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dprtc_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dprtc_set_clock_offset() - Sets the clock's offset -+ * (usually relative to another clock). -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @offset: New clock offset (in nanoseconds). -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_set_clock_offset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int64_t offset); -+ -+/** -+ * dprtc_set_freq_compensation() - Sets a new frequency compensation value. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @freq_compensation: -+ * The new frequency compensation value to set. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_set_freq_compensation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint32_t freq_compensation); -+ -+/** -+ * dprtc_get_freq_compensation() - Retrieves the frequency compensation value -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @freq_compensation: -+ * Frequency compensation value -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_get_freq_compensation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint32_t *freq_compensation); -+ -+/** -+ * dprtc_get_time() - Returns the current RTC time. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @time: Current RTC time. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_get_time(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint64_t *time); -+ -+/** -+ * dprtc_set_time() - Updates current RTC time. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @time: New RTC time. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_set_time(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint64_t time); -+ -+/** -+ * dprtc_set_alarm() - Defines and sets alarm. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @time: In nanoseconds, the time when the alarm -+ * should go off - must be a multiple of -+ * 1 microsecond -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_set_alarm(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint64_t time); -+ -+/** -+ * struct dprtc_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dprtc_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dprtc_set_irq() - Set IRQ information for the DPRTC to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dprtc_irq_cfg *irq_cfg); -+ -+/** -+ * dprtc_get_irq() - Get IRQ information from the DPRTC. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dprtc_irq_cfg *irq_cfg); -+ -+/** -+ * dprtc_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dprtc_get_irq_enable() - Get overall interrupt state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dprtc_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @irq_index: The interrupt index to configure -+ * @mask: Event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dprtc_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dprtc_get_irq_status() - Get the current status of any pending interrupts. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dprtc_clear_irq_status() - Clear a pending interrupt's status -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @irq_index: The interrupt index to configure -+ * @status: Bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+ -+/** -+ * struct dprtc_attr - Structure representing DPRTC attributes -+ * @id: DPRTC object ID -+ * @version: DPRTC version -+ */ -+struct dprtc_attr { -+ int id; -+ /** -+ * struct version - Structure representing DPRTC version -+ * @major: DPRTC major version -+ * @minor: DPRTC minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+}; -+ -+/** -+ * dprtc_get_attributes - Retrieve DPRTC attributes. -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPRTC object -+ * @attr: Returned object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dprtc_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dprtc_attr *attr); -+ -+#endif /* __FSL_DPRTC_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dprtc_cmd.h b/drivers/net/dpaa2/mc/fsl_dprtc_cmd.h -new file mode 100644 -index 0000000..aeccece ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dprtc_cmd.h -@@ -0,0 +1,181 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPRTC_CMD_H -+#define _FSL_DPRTC_CMD_H -+ -+/* DPRTC Version */ -+#define DPRTC_VER_MAJOR 1 -+#define DPRTC_VER_MINOR 0 -+ -+/* Command IDs */ -+#define DPRTC_CMDID_CLOSE 0x800 -+#define DPRTC_CMDID_OPEN 0x810 -+#define DPRTC_CMDID_CREATE 0x910 -+#define DPRTC_CMDID_DESTROY 0x900 -+ -+#define DPRTC_CMDID_ENABLE 0x002 -+#define DPRTC_CMDID_DISABLE 0x003 -+#define DPRTC_CMDID_GET_ATTR 0x004 -+#define DPRTC_CMDID_RESET 0x005 -+#define DPRTC_CMDID_IS_ENABLED 0x006 -+ -+#define DPRTC_CMDID_SET_IRQ 0x010 -+#define DPRTC_CMDID_GET_IRQ 0x011 -+#define DPRTC_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPRTC_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPRTC_CMDID_SET_IRQ_MASK 0x014 -+#define DPRTC_CMDID_GET_IRQ_MASK 0x015 -+#define DPRTC_CMDID_GET_IRQ_STATUS 0x016 -+#define DPRTC_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPRTC_CMDID_SET_CLOCK_OFFSET 0x1d0 -+#define DPRTC_CMDID_SET_FREQ_COMPENSATION 0x1d1 -+#define DPRTC_CMDID_GET_FREQ_COMPENSATION 0x1d2 -+#define DPRTC_CMDID_GET_TIME 0x1d3 -+#define DPRTC_CMDID_SET_TIME 0x1d4 -+#define DPRTC_CMDID_SET_ALARM 0x1d5 -+#define DPRTC_CMDID_SET_PERIODIC_PULSE 0x1d6 -+#define DPRTC_CMDID_CLEAR_PERIODIC_PULSE 0x1d7 -+#define DPRTC_CMDID_SET_EXT_TRIGGER 0x1d8 -+#define DPRTC_CMDID_CLEAR_EXT_TRIGGER 0x1d9 -+#define DPRTC_CMDID_GET_EXT_TRIGGER_TIMESTAMP 0x1dA -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_OPEN(cmd, dpbp_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpbp_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_RSP_IS_ENABLED(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr); \ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_SET_IRQ_ENABLE(cmd, irq_index, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, en); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_RSP_GET_IRQ_ENABLE(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_RSP_GET_ATTRIBUTES(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 32, 32, int, attr->id);\ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_SET_CLOCK_OFFSET(cmd, offset) \ -+ MC_CMD_OP(cmd, 0, 0, 64, int64_t, offset) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_SET_FREQ_COMPENSATION(cmd, freq_compensation) \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, freq_compensation) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_RSP_GET_FREQ_COMPENSATION(cmd, freq_compensation) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, freq_compensation) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_RSP_GET_TIME(cmd, time) \ -+ MC_RSP_OP(cmd, 0, 0, 64, uint64_t, time) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_SET_TIME(cmd, time) \ -+ MC_CMD_OP(cmd, 0, 0, 64, uint64_t, time) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPRTC_CMD_SET_ALARM(cmd, time) \ -+ MC_CMD_OP(cmd, 0, 0, 64, uint64_t, time) -+ -+#endif /* _FSL_DPRTC_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpseci.h b/drivers/net/dpaa2/mc/fsl_dpseci.h -new file mode 100644 -index 0000000..1dd7215 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpseci.h -@@ -0,0 +1,647 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPSECI_H -+#define __FSL_DPSECI_H -+ -+/* Data Path SEC Interface API -+ * Contains initialization APIs and runtime control APIs for DPSECI -+ */ -+ -+struct fsl_mc_io; -+ -+/** -+ * General DPSECI macros -+ */ -+ -+/** -+ * Maximum number of Tx/Rx priorities per DPSECI object -+ */ -+#define DPSECI_PRIO_NUM 8 -+ -+/** -+ * All queues considered; see dpseci_set_rx_queue() -+ */ -+#define DPSECI_ALL_QUEUES (uint8_t)(-1) -+ -+/** -+ * dpseci_open() - Open a control session for the specified object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpseci_id: DPSECI unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpseci_create() function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpseci_id, -+ uint16_t *token); -+ -+/** -+ * dpseci_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpseci_cfg - Structure representing DPSECI configuration -+ * @num_tx_queues: num of queues towards the SEC -+ * @num_rx_queues: num of queues back from the SEC -+ * @priorities: Priorities for the SEC hardware processing; -+ * each place in the array is the priority of the tx queue -+ * towards the SEC, -+ * valid priorities are configured with values 1-8; -+ */ -+struct dpseci_cfg { -+ uint8_t num_tx_queues; -+ uint8_t num_rx_queues; -+ uint8_t priorities[DPSECI_PRIO_NUM]; -+}; -+ -+/** -+ * dpseci_create() - Create the DPSECI object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPSECI object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpseci_open() function to get an authentication -+ * token first. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpseci_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpseci_destroy() - Destroy the DPSECI object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpseci_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpseci_enable() - Enable the DPSECI, allow sending and receiving frames. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpseci_disable() - Disable the DPSECI, stop sending and receiving frames. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpseci_is_enabled() - Check if the DPSECI is enabled. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @en: Returns '1' if object is enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpseci_reset() - Reset the DPSECI, returns the object to initial state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * struct dpseci_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpseci_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpseci_set_irq() - Set IRQ information for the DPSECI to trigger an interrupt -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpseci_irq_cfg *irq_cfg); -+ -+/** -+ * dpseci_get_irq() - Get IRQ information from the DPSECI -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpseci_irq_cfg *irq_cfg); -+ -+/** -+ * dpseci_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpseci_get_irq_enable() - Get overall interrupt state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned Interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpseci_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @irq_index: The interrupt index to configure -+ * @mask: event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpseci_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpseci_get_irq_status() - Get the current status of any pending interrupts -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dpseci_clear_irq_status() - Clear a pending interrupt's status -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @irq_index: The interrupt index to configure -+ * @status: bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+ -+/** -+ * struct dpseci_attr - Structure representing DPSECI attributes -+ * @id: DPSECI object ID -+ * @version: DPSECI version -+ * @num_tx_queues: number of queues towards the SEC -+ * @num_rx_queues: number of queues back from the SEC -+ */ -+struct dpseci_attr { -+ int id; -+ /** -+ * struct version - DPSECI version -+ * @major: DPSECI major version -+ * @minor: DPSECI minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+ uint8_t num_tx_queues; -+ uint8_t num_rx_queues; -+}; -+ -+/** -+ * dpseci_get_attributes() - Retrieve DPSECI attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @attr: Returned object's attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpseci_attr *attr); -+ -+/** -+ * enum dpseci_dest - DPSECI destination types -+ * @DPSECI_DEST_NONE: Unassigned destination; The queue is set in parked mode -+ * and does not generate FQDAN notifications; user is expected to -+ * dequeue from the queue based on polling or other user-defined -+ * method -+ * @DPSECI_DEST_DPIO: The queue is set in schedule mode and generates FQDAN -+ * notifications to the specified DPIO; user is expected to dequeue -+ * from the queue only after notification is received -+ * @DPSECI_DEST_DPCON: The queue is set in schedule mode and does not generate -+ * FQDAN notifications, but is connected to the specified DPCON -+ * object; user is expected to dequeue from the DPCON channel -+ */ -+enum dpseci_dest { -+ DPSECI_DEST_NONE = 0, -+ DPSECI_DEST_DPIO = 1, -+ DPSECI_DEST_DPCON = 2 -+}; -+ -+/** -+ * struct dpseci_dest_cfg - Structure representing DPSECI destination parameters -+ * @dest_type: Destination type -+ * @dest_id: Either DPIO ID or DPCON ID, depending on the destination type -+ * @priority: Priority selection within the DPIO or DPCON channel; valid values -+ * are 0-1 or 0-7, depending on the number of priorities in that -+ * channel; not relevant for 'DPSECI_DEST_NONE' option -+ */ -+struct dpseci_dest_cfg { -+ enum dpseci_dest dest_type; -+ int dest_id; -+ uint8_t priority; -+}; -+ -+/** -+ * DPSECI queue modification options -+ */ -+ -+/** -+ * Select to modify the user's context associated with the queue -+ */ -+#define DPSECI_QUEUE_OPT_USER_CTX 0x00000001 -+ -+/** -+ * Select to modify the queue's destination -+ */ -+#define DPSECI_QUEUE_OPT_DEST 0x00000002 -+ -+/** -+ * Select to modify the queue's order preservation -+ */ -+#define DPSECI_QUEUE_OPT_ORDER_PRESERVATION 0x00000004 -+ -+/** -+ * struct dpseci_rx_queue_cfg - DPSECI RX queue configuration -+ * @options: Flags representing the suggested modifications to the queue; -+ * Use any combination of 'DPSECI_QUEUE_OPT_' flags -+ * @order_preservation_en: order preservation configuration for the rx queue -+ * valid only if 'DPSECI_QUEUE_OPT_ORDER_PRESERVATION' is contained in 'options' -+ * @user_ctx: User context value provided in the frame descriptor of each -+ * dequeued frame; -+ * valid only if 'DPSECI_QUEUE_OPT_USER_CTX' is contained in 'options' -+ * @dest_cfg: Queue destination parameters; -+ * valid only if 'DPSECI_QUEUE_OPT_DEST' is contained in 'options' -+ */ -+struct dpseci_rx_queue_cfg { -+ uint32_t options; -+ int order_preservation_en; -+ uint64_t user_ctx; -+ struct dpseci_dest_cfg dest_cfg; -+}; -+ -+/** -+ * dpseci_set_rx_queue() - Set Rx queue configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @queue: Select the queue relative to number of -+ * priorities configured at DPSECI creation; use -+ * DPSECI_ALL_QUEUES to configure all Rx queues identically. -+ * @cfg: Rx queue configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_set_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t queue, -+ const struct dpseci_rx_queue_cfg *cfg); -+ -+/** -+ * struct dpseci_rx_queue_attr - Structure representing attributes of Rx queues -+ * @user_ctx: User context value provided in the frame descriptor of each -+ * dequeued frame -+ * @order_preservation_en: Status of the order preservation configuration -+ * on the queue -+ * @dest_cfg: Queue destination configuration -+ * @fqid: Virtual FQID value to be used for dequeue operations -+ */ -+struct dpseci_rx_queue_attr { -+ uint64_t user_ctx; -+ int order_preservation_en; -+ struct dpseci_dest_cfg dest_cfg; -+ uint32_t fqid; -+}; -+ -+/** -+ * dpseci_get_rx_queue() - Retrieve Rx queue attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @queue: Select the queue relative to number of -+ * priorities configured at DPSECI creation -+ * @attr: Returned Rx queue attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_get_rx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t queue, -+ struct dpseci_rx_queue_attr *attr); -+ -+/** -+ * struct dpseci_tx_queue_attr - Structure representing attributes of Tx queues -+ * @fqid: Virtual FQID to be used for sending frames to SEC hardware -+ * @priority: SEC hardware processing priority for the queue -+ */ -+struct dpseci_tx_queue_attr { -+ uint32_t fqid; -+ uint8_t priority; -+}; -+ -+/** -+ * dpseci_get_tx_queue() - Retrieve Tx queue attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @queue: Select the queue relative to number of -+ * priorities configured at DPSECI creation -+ * @attr: Returned Tx queue attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_get_tx_queue(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t queue, -+ struct dpseci_tx_queue_attr *attr); -+ -+/** -+ * struct dpseci_sec_attr - Structure representing attributes of the SEC -+ * hardware accelerator -+ * @ip_id: ID for SEC. -+ * @major_rev: Major revision number for SEC. -+ * @minor_rev: Minor revision number for SEC. -+ * @era: SEC Era. -+ * @deco_num: The number of copies of the DECO that are implemented in -+ * this version of SEC. -+ * @zuc_auth_acc_num: The number of copies of ZUCA that are implemented -+ * in this version of SEC. -+ * @zuc_enc_acc_num: The number of copies of ZUCE that are implemented -+ * in this version of SEC. -+ * @snow_f8_acc_num: The number of copies of the SNOW-f8 module that are -+ * implemented in this version of SEC. -+ * @snow_f9_acc_num: The number of copies of the SNOW-f9 module that are -+ * implemented in this version of SEC. -+ * @crc_acc_num: The number of copies of the CRC module that are implemented -+ * in this version of SEC. -+ * @pk_acc_num: The number of copies of the Public Key module that are -+ * implemented in this version of SEC. -+ * @kasumi_acc_num: The number of copies of the Kasumi module that are -+ * implemented in this version of SEC. -+ * @rng_acc_num: The number of copies of the Random Number Generator that are -+ * implemented in this version of SEC. -+ * @md_acc_num: The number of copies of the MDHA (Hashing module) that are -+ * implemented in this version of SEC. -+ * @arc4_acc_num: The number of copies of the ARC4 module that are implemented -+ * in this version of SEC. -+ * @des_acc_num: The number of copies of the DES module that are implemented -+ * in this version of SEC. -+ * @aes_acc_num: The number of copies of the AES module that are implemented -+ * in this version of SEC. -+ **/ -+ -+struct dpseci_sec_attr { -+ uint16_t ip_id; -+ uint8_t major_rev; -+ uint8_t minor_rev; -+ uint8_t era; -+ uint8_t deco_num; -+ uint8_t zuc_auth_acc_num; -+ uint8_t zuc_enc_acc_num; -+ uint8_t snow_f8_acc_num; -+ uint8_t snow_f9_acc_num; -+ uint8_t crc_acc_num; -+ uint8_t pk_acc_num; -+ uint8_t kasumi_acc_num; -+ uint8_t rng_acc_num; -+ uint8_t md_acc_num; -+ uint8_t arc4_acc_num; -+ uint8_t des_acc_num; -+ uint8_t aes_acc_num; -+}; -+ -+/** -+ * dpseci_get_sec_attr() - Retrieve SEC accelerator attributes. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @attr: Returned SEC attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_get_sec_attr(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpseci_sec_attr *attr); -+ -+/** -+ * struct dpseci_sec_counters - Structure representing global SEC counters and -+ * not per dpseci counters -+ * @dequeued_requests: Number of Requests Dequeued -+ * @ob_enc_requests: Number of Outbound Encrypt Requests -+ * @ib_dec_requests: Number of Inbound Decrypt Requests -+ * @ob_enc_bytes: Number of Outbound Bytes Encrypted -+ * @ob_prot_bytes: Number of Outbound Bytes Protected -+ * @ib_dec_bytes: Number of Inbound Bytes Decrypted -+ * @ib_valid_bytes: Number of Inbound Bytes Validated -+ */ -+struct dpseci_sec_counters { -+ uint64_t dequeued_requests; -+ uint64_t ob_enc_requests; -+ uint64_t ib_dec_requests; -+ uint64_t ob_enc_bytes; -+ uint64_t ob_prot_bytes; -+ uint64_t ib_dec_bytes; -+ uint64_t ib_valid_bytes; -+}; -+ -+/** -+ * dpseci_get_sec_counters() - Retrieve SEC accelerator counters. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSECI object -+ * @counters: Returned SEC counters -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpseci_get_sec_counters(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpseci_sec_counters *counters); -+ -+#endif /* __FSL_DPSECI_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpseci_cmd.h b/drivers/net/dpaa2/mc/fsl_dpseci_cmd.h -new file mode 100644 -index 0000000..6c0b96e ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpseci_cmd.h -@@ -0,0 +1,241 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_DPSECI_CMD_H -+#define _FSL_DPSECI_CMD_H -+ -+/* DPSECI Version */ -+#define DPSECI_VER_MAJOR 3 -+#define DPSECI_VER_MINOR 1 -+ -+/* Command IDs */ -+#define DPSECI_CMDID_CLOSE 0x800 -+#define DPSECI_CMDID_OPEN 0x809 -+#define DPSECI_CMDID_CREATE 0x909 -+#define DPSECI_CMDID_DESTROY 0x900 -+ -+#define DPSECI_CMDID_ENABLE 0x002 -+#define DPSECI_CMDID_DISABLE 0x003 -+#define DPSECI_CMDID_GET_ATTR 0x004 -+#define DPSECI_CMDID_RESET 0x005 -+#define DPSECI_CMDID_IS_ENABLED 0x006 -+ -+#define DPSECI_CMDID_SET_IRQ 0x010 -+#define DPSECI_CMDID_GET_IRQ 0x011 -+#define DPSECI_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPSECI_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPSECI_CMDID_SET_IRQ_MASK 0x014 -+#define DPSECI_CMDID_GET_IRQ_MASK 0x015 -+#define DPSECI_CMDID_GET_IRQ_STATUS 0x016 -+#define DPSECI_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPSECI_CMDID_SET_RX_QUEUE 0x194 -+#define DPSECI_CMDID_GET_RX_QUEUE 0x196 -+#define DPSECI_CMDID_GET_TX_QUEUE 0x197 -+#define DPSECI_CMDID_GET_SEC_ATTR 0x198 -+#define DPSECI_CMDID_GET_SEC_COUNTERS 0x199 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_OPEN(cmd, dpseci_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpseci_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_CREATE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, cfg->priorities[0]);\ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, cfg->priorities[1]);\ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, cfg->priorities[2]);\ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, cfg->priorities[3]);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->priorities[4]);\ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, cfg->priorities[5]);\ -+ MC_CMD_OP(cmd, 0, 48, 8, uint8_t, cfg->priorities[6]);\ -+ MC_CMD_OP(cmd, 0, 56, 8, uint8_t, cfg->priorities[7]);\ -+ MC_CMD_OP(cmd, 1, 0, 8, uint8_t, cfg->num_tx_queues);\ -+ MC_CMD_OP(cmd, 1, 8, 8, uint8_t, cfg->num_rx_queues);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_RSP_IS_ENABLED(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_SET_IRQ_ENABLE(cmd, irq_index, enable_state) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, enable_state); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_RSP_GET_IRQ_ENABLE(cmd, enable_state) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, enable_state) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_RSP_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->id); \ -+ MC_RSP_OP(cmd, 1, 0, 8, uint8_t, attr->num_tx_queues); \ -+ MC_RSP_OP(cmd, 1, 8, 8, uint8_t, attr->num_rx_queues); \ -+ MC_RSP_OP(cmd, 5, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 5, 16, 16, uint16_t, attr->version.minor);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_SET_RX_QUEUE(cmd, queue, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, cfg->dest_cfg.dest_id); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->dest_cfg.priority); \ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, queue); \ -+ MC_CMD_OP(cmd, 0, 48, 4, enum dpseci_dest, cfg->dest_cfg.dest_type); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, cfg->user_ctx); \ -+ MC_CMD_OP(cmd, 2, 0, 32, uint32_t, cfg->options);\ -+ MC_CMD_OP(cmd, 2, 32, 1, int, cfg->order_preservation_en);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_GET_RX_QUEUE(cmd, queue) \ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, queue) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_RSP_GET_RX_QUEUE(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, int, attr->dest_cfg.dest_id);\ -+ MC_RSP_OP(cmd, 0, 32, 8, uint8_t, attr->dest_cfg.priority);\ -+ MC_RSP_OP(cmd, 0, 48, 4, enum dpseci_dest, attr->dest_cfg.dest_type);\ -+ MC_RSP_OP(cmd, 1, 0, 8, uint64_t, attr->user_ctx);\ -+ MC_RSP_OP(cmd, 2, 0, 32, uint32_t, attr->fqid);\ -+ MC_RSP_OP(cmd, 2, 32, 1, int, attr->order_preservation_en);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_CMD_GET_TX_QUEUE(cmd, queue) \ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, queue) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_RSP_GET_TX_QUEUE(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 32, 32, uint32_t, attr->fqid);\ -+ MC_RSP_OP(cmd, 1, 0, 8, uint8_t, attr->priority);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_RSP_GET_SEC_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 16, uint16_t, attr->ip_id);\ -+ MC_RSP_OP(cmd, 0, 16, 8, uint8_t, attr->major_rev);\ -+ MC_RSP_OP(cmd, 0, 24, 8, uint8_t, attr->minor_rev);\ -+ MC_RSP_OP(cmd, 0, 32, 8, uint8_t, attr->era);\ -+ MC_RSP_OP(cmd, 1, 0, 8, uint8_t, attr->deco_num);\ -+ MC_RSP_OP(cmd, 1, 8, 8, uint8_t, attr->zuc_auth_acc_num);\ -+ MC_RSP_OP(cmd, 1, 16, 8, uint8_t, attr->zuc_enc_acc_num);\ -+ MC_RSP_OP(cmd, 1, 32, 8, uint8_t, attr->snow_f8_acc_num);\ -+ MC_RSP_OP(cmd, 1, 40, 8, uint8_t, attr->snow_f9_acc_num);\ -+ MC_RSP_OP(cmd, 1, 48, 8, uint8_t, attr->crc_acc_num);\ -+ MC_RSP_OP(cmd, 2, 0, 8, uint8_t, attr->pk_acc_num);\ -+ MC_RSP_OP(cmd, 2, 8, 8, uint8_t, attr->kasumi_acc_num);\ -+ MC_RSP_OP(cmd, 2, 16, 8, uint8_t, attr->rng_acc_num);\ -+ MC_RSP_OP(cmd, 2, 32, 8, uint8_t, attr->md_acc_num);\ -+ MC_RSP_OP(cmd, 2, 40, 8, uint8_t, attr->arc4_acc_num);\ -+ MC_RSP_OP(cmd, 2, 48, 8, uint8_t, attr->des_acc_num);\ -+ MC_RSP_OP(cmd, 2, 56, 8, uint8_t, attr->aes_acc_num);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSECI_RSP_GET_SEC_COUNTERS(cmd, counters) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 64, uint64_t, counters->dequeued_requests);\ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, counters->ob_enc_requests);\ -+ MC_RSP_OP(cmd, 2, 0, 64, uint64_t, counters->ib_dec_requests);\ -+ MC_RSP_OP(cmd, 3, 0, 64, uint64_t, counters->ob_enc_bytes);\ -+ MC_RSP_OP(cmd, 4, 0, 64, uint64_t, counters->ob_prot_bytes);\ -+ MC_RSP_OP(cmd, 5, 0, 64, uint64_t, counters->ib_dec_bytes);\ -+ MC_RSP_OP(cmd, 6, 0, 64, uint64_t, counters->ib_valid_bytes);\ -+} while (0) -+ -+#endif /* _FSL_DPSECI_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpsw.h b/drivers/net/dpaa2/mc/fsl_dpsw.h -new file mode 100644 -index 0000000..9c1bd9d ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpsw.h -@@ -0,0 +1,2164 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPSW_H -+#define __FSL_DPSW_H -+ -+#include -+ -+/* Data Path L2-Switch API -+ * Contains API for handling DPSW topology and functionality -+ */ -+ -+struct fsl_mc_io; -+ -+/** -+ * DPSW general definitions -+ */ -+ -+/** -+ * Maximum number of traffic class priorities -+ */ -+#define DPSW_MAX_PRIORITIES 8 -+/** -+ * Maximum number of interfaces -+ */ -+#define DPSW_MAX_IF 64 -+ -+/** -+ * dpsw_open() - Open a control session for the specified object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @dpsw_id: DPSW unique ID -+ * @token: Returned token; use in subsequent API calls -+ * -+ * This function can be used to open a control session for an -+ * already created object; an object may have been declared in -+ * the DPL or by calling the dpsw_create() function. -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent commands for -+ * this specific object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_open(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ int dpsw_id, -+ uint16_t *token); -+ -+/** -+ * dpsw_close() - Close the control session of the object -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * -+ * After this function is called, no further operations are -+ * allowed on the object without opening a new control session. -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_close(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * DPSW options -+ */ -+ -+/** -+ * Disable flooding -+ */ -+#define DPSW_OPT_FLOODING_DIS 0x0000000000000001ULL -+/** -+ * Disable Multicast -+ */ -+#define DPSW_OPT_MULTICAST_DIS 0x0000000000000004ULL -+/** -+ * Support control interface -+ */ -+#define DPSW_OPT_CTRL_IF_DIS 0x0000000000000010ULL -+/** -+ * Disable flooding metering -+ */ -+#define DPSW_OPT_FLOODING_METERING_DIS 0x0000000000000020ULL -+/** -+ * Enable metering -+ */ -+#define DPSW_OPT_METERING_EN 0x0000000000000040ULL -+ -+/** -+ * enum dpsw_component_type - component type of a bridge -+ * @DPSW_COMPONENT_TYPE_C_VLAN: A C-VLAN component of an -+ * enterprise VLAN bridge or of a Provider Bridge used -+ * to process C-tagged frames -+ * @DPSW_COMPONENT_TYPE_S_VLAN: An S-VLAN component of a -+ * Provider Bridge -+ * -+ */ -+enum dpsw_component_type { -+ DPSW_COMPONENT_TYPE_C_VLAN = 0, -+ DPSW_COMPONENT_TYPE_S_VLAN -+}; -+ -+/** -+ * struct dpsw_cfg - DPSW configuration -+ * @num_ifs: Number of external and internal interfaces -+ * @adv: Advanced parameters; default is all zeros; -+ * use this structure to change default settings -+ */ -+struct dpsw_cfg { -+ uint16_t num_ifs; -+ /** -+ * struct adv - Advanced parameters -+ * @options: Enable/Disable DPSW features (bitmap) -+ * @max_vlans: Maximum Number of VLAN's; 0 - indicates default 16 -+ * @max_meters_per_if: Number of meters per interface -+ * @max_fdbs: Maximum Number of FDB's; 0 - indicates default 16 -+ * @max_fdb_entries: Number of FDB entries for default FDB table; -+ * 0 - indicates default 1024 entries. -+ * @fdb_aging_time: Default FDB aging time for default FDB table; -+ * 0 - indicates default 300 seconds -+ * @max_fdb_mc_groups: Number of multicast groups in each FDB table; -+ * 0 - indicates default 32 -+ * @component_type: Indicates the component type of this bridge -+ */ -+ struct { -+ uint64_t options; -+ uint16_t max_vlans; -+ uint8_t max_meters_per_if; -+ uint8_t max_fdbs; -+ uint16_t max_fdb_entries; -+ uint16_t fdb_aging_time; -+ uint16_t max_fdb_mc_groups; -+ enum dpsw_component_type component_type; -+ } adv; -+}; -+ -+/** -+ * dpsw_create() - Create the DPSW object. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @cfg: Configuration structure -+ * @token: Returned token; use in subsequent API calls -+ * -+ * Create the DPSW object, allocate required resources and -+ * perform required initialization. -+ * -+ * The object can be created either by declaring it in the -+ * DPL file, or by calling this function. -+ * -+ * This function returns a unique authentication token, -+ * associated with the specific object ID and the specific MC -+ * portal; this token must be used in all subsequent calls to -+ * this specific object. For objects that are created using the -+ * DPL file, call dpsw_open() function to get an authentication -+ * token first -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_create(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ const struct dpsw_cfg *cfg, -+ uint16_t *token); -+ -+/** -+ * dpsw_destroy() - Destroy the DPSW object and release all its resources. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpsw_destroy(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpsw_enable() - Enable DPSW functionality -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpsw_disable() - Disable DPSW functionality -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * dpsw_is_enabled() - Check if the DPSW is enabled -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @en: Returns '1' if object is enabled; '0' otherwise -+ * -+ * Return: '0' on Success; Error code otherwise -+ */ -+int dpsw_is_enabled(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en); -+ -+/** -+ * dpsw_reset() - Reset the DPSW, returns the object to initial state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_reset(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+ * DPSW IRQ Index and Events -+ */ -+ -+#define DPSW_IRQ_INDEX_IF 0x0000 -+#define DPSW_IRQ_INDEX_L2SW 0x0001 -+ -+/** -+ * IRQ event - Indicates that the link state changed -+ */ -+#define DPSW_IRQ_EVENT_LINK_CHANGED 0x0001 -+ -+/** -+ * struct dpsw_irq_cfg - IRQ configuration -+ * @addr: Address that must be written to signal a message-based interrupt -+ * @val: Value to write into irq_addr address -+ * @irq_num: A user defined number associated with this IRQ -+ */ -+struct dpsw_irq_cfg { -+ uint64_t addr; -+ uint32_t val; -+ int irq_num; -+}; -+ -+/** -+ * dpsw_set_irq() - Set IRQ information for the DPSW to trigger an interrupt. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @irq_index: Identifies the interrupt index to configure -+ * @irq_cfg: IRQ configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_set_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ struct dpsw_irq_cfg *irq_cfg); -+ -+/** -+ * dpsw_get_irq() - Get IRQ information from the DPSW -+ * -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @irq_index: The interrupt index to configure -+ * @type: Interrupt type: 0 represents message interrupt -+ * type (both irq_addr and irq_val are valid) -+ * @irq_cfg: IRQ attributes -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_get_irq(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ int *type, -+ struct dpsw_irq_cfg *irq_cfg); -+ -+/** -+ * dpsw_set_irq_enable() - Set overall interrupt state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: The interrupt index to configure -+ * @en: Interrupt state - enable = 1, disable = 0 -+ * -+ * Allows GPP software to control when interrupts are generated. -+ * Each interrupt can have up to 32 causes. The enable/disable control's the -+ * overall interrupt state. if the interrupt is disabled no causes will cause -+ * an interrupt -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_set_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t en); -+ -+/** -+ * dpsw_get_irq_enable() - Get overall interrupt state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @irq_index: The interrupt index to configure -+ * @en: Returned Interrupt state - enable = 1, disable = 0 -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_get_irq_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint8_t *en); -+ -+/** -+ * dpsw_set_irq_mask() - Set interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: The interrupt index to configure -+ * @mask: event mask to trigger interrupt; -+ * each bit: -+ * 0 = ignore event -+ * 1 = consider event for asserting IRQ -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_set_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t mask); -+ -+/** -+ * dpsw_get_irq_mask() - Get interrupt mask. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @irq_index: The interrupt index to configure -+ * @mask: Returned event mask to trigger interrupt -+ * -+ * Every interrupt can have up to 32 causes and the interrupt model supports -+ * masking/unmasking each cause independently -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_get_irq_mask(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *mask); -+ -+/** -+ * dpsw_get_irq_status() - Get the current status of any pending interrupts -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @irq_index: The interrupt index to configure -+ * @status: Returned interrupts status - one bit per cause: -+ * 0 = no interrupt pending -+ * 1 = interrupt pending -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_get_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t *status); -+ -+/** -+ * dpsw_clear_irq_status() - Clear a pending interrupt's status -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPCI object -+ * @irq_index: The interrupt index to configure -+ * @status: bits to clear (W1C) - one bit per cause: -+ * 0 = don't change -+ * 1 = clear status bit -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_clear_irq_status(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint8_t irq_index, -+ uint32_t status); -+/** -+ * struct dpsw_attr - Structure representing DPSW attributes -+ * @id: DPSW object ID -+ * @version: DPSW version -+ * @options: Enable/Disable DPSW features -+ * @max_vlans: Maximum Number of VLANs -+ * @max_meters_per_if: Number of meters per interface -+ * @max_fdbs: Maximum Number of FDBs -+ * @max_fdb_entries: Number of FDB entries for default FDB table; -+ * 0 - indicates default 1024 entries. -+ * @fdb_aging_time: Default FDB aging time for default FDB table; -+ * 0 - indicates default 300 seconds -+ * @max_fdb_mc_groups: Number of multicast groups in each FDB table; -+ * 0 - indicates default 32 -+ * @mem_size: DPSW frame storage memory size -+ * @num_ifs: Number of interfaces -+ * @num_vlans: Current number of VLANs -+ * @num_fdbs: Current number of FDBs -+ * @component_type: Component type of this bridge -+ */ -+struct dpsw_attr { -+ int id; -+ /** -+ * struct version - DPSW version -+ * @major: DPSW major version -+ * @minor: DPSW minor version -+ */ -+ struct { -+ uint16_t major; -+ uint16_t minor; -+ } version; -+ uint64_t options; -+ uint16_t max_vlans; -+ uint8_t max_meters_per_if; -+ uint8_t max_fdbs; -+ uint16_t max_fdb_entries; -+ uint16_t fdb_aging_time; -+ uint16_t max_fdb_mc_groups; -+ uint16_t num_ifs; -+ uint16_t mem_size; -+ uint16_t num_vlans; -+ uint8_t num_fdbs; -+ enum dpsw_component_type component_type; -+}; -+ -+/** -+ * dpsw_get_attributes() - Retrieve DPSW attributes -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @attr: Returned DPSW attributes -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpsw_attr *attr); -+ -+/** -+ * dpsw_set_reflection_if() - Set target interface for reflected interfaces. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Id -+ * -+ * Only one reflection receive interface is allowed per switch -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_set_reflection_if(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id); -+ -+/** -+ * enum dpsw_action - Action selection for special/control frames -+ * @DPSW_ACTION_DROP: Drop frame -+ * @DPSW_ACTION_REDIRECT: Redirect frame to control port -+ */ -+enum dpsw_action { -+ DPSW_ACTION_DROP = 0, -+ DPSW_ACTION_REDIRECT = 1 -+}; -+ -+/** -+ * Enable auto-negotiation -+ */ -+#define DPSW_LINK_OPT_AUTONEG 0x0000000000000001ULL -+/** -+ * Enable half-duplex mode -+ */ -+#define DPSW_LINK_OPT_HALF_DUPLEX 0x0000000000000002ULL -+/** -+ * Enable pause frames -+ */ -+#define DPSW_LINK_OPT_PAUSE 0x0000000000000004ULL -+/** -+ * Enable a-symmetric pause frames -+ */ -+#define DPSW_LINK_OPT_ASYM_PAUSE 0x0000000000000008ULL -+ -+/** -+ * struct dpsw_link_cfg - Structure representing DPSW link configuration -+ * @rate: Rate -+ * @options: Mask of available options; use 'DPSW_LINK_OPT_' values -+ */ -+struct dpsw_link_cfg { -+ uint32_t rate; -+ uint64_t options; -+}; -+ -+/** -+ * dpsw_if_set_link_cfg() - set the link configuration. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: interface id -+ * @cfg: Link configuration -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpsw_link_cfg *cfg); -+/** -+ * struct dpsw_link_state - Structure representing DPSW link state -+ * @rate: Rate -+ * @options: Mask of available options; use 'DPSW_LINK_OPT_' values -+ * @up: 0 - covers two cases: down and disconnected, 1 - up -+ */ -+struct dpsw_link_state { -+ uint32_t rate; -+ uint64_t options; -+ int up; -+}; -+ -+/** -+ * dpsw_if_get_link_state - Return the link state -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: interface id -+ * @state: link state 1 - linkup, 0 - link down or disconnected -+ * -+ * @returns '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_get_link_state(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpsw_link_state *state); -+ -+/** -+ * dpsw_if_set_flooding() - Enable Disable flooding for particular interface -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @en: 1 - enable, 0 - disable -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_flooding(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ int en); -+ -+/** -+ * dpsw_if_set_broadcast() - Enable/disable broadcast for particular interface -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @en: 1 - enable, 0 - disable -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_broadcast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ int en); -+ -+/** -+ * dpsw_if_set_multicast() - Enable/disable multicast for particular interface -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @en: 1 - enable, 0 - disable -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_multicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ int en); -+ -+/** -+ * struct dpsw_tci_cfg - Tag Contorl Information (TCI) configuration -+ * @pcp: Priority Code Point (PCP): a 3-bit field which refers -+ * to the IEEE 802.1p priority -+ * @dei: Drop Eligible Indicator (DEI): a 1-bit field. May be used -+ * separately or in conjunction with PCP to indicate frames -+ * eligible to be dropped in the presence of congestion -+ * @vlan_id: VLAN Identifier (VID): a 12-bit field specifying the VLAN -+ * to which the frame belongs. The hexadecimal values -+ * of 0x000 and 0xFFF are reserved; -+ * all other values may be used as VLAN identifiers, -+ * allowing up to 4,094 VLANs -+ */ -+struct dpsw_tci_cfg { -+ uint8_t pcp; -+ uint8_t dei; -+ uint16_t vlan_id; -+}; -+ -+/** -+ * dpsw_if_set_tci() - Set default VLAN Tag Control Information (TCI) -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @cfg: Tag Control Information Configuration -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_tci(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_tci_cfg *cfg); -+ -+/** -+ * dpsw_if_get_tci() - Get default VLAN Tag Control Information (TCI) -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @cfg: Tag Control Information Configuration -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_get_tci(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpsw_tci_cfg *cfg); -+ -+/** -+ * enum dpsw_stp_state - Spanning Tree Protocol (STP) states -+ * @DPSW_STP_STATE_BLOCKING: Blocking state -+ * @DPSW_STP_STATE_LISTENING: Listening state -+ * @DPSW_STP_STATE_LEARNING: Learning state -+ * @DPSW_STP_STATE_FORWARDING: Forwarding state -+ * -+ */ -+enum dpsw_stp_state { -+ DPSW_STP_STATE_BLOCKING = 0, -+ DPSW_STP_STATE_LISTENING = 1, -+ DPSW_STP_STATE_LEARNING = 2, -+ DPSW_STP_STATE_FORWARDING = 3 -+}; -+ -+/** -+ * struct dpsw_stp_cfg - Spanning Tree Protocol (STP) Configuration -+ * @vlan_id: VLAN ID STP state -+ * @state: STP state -+ */ -+struct dpsw_stp_cfg { -+ uint16_t vlan_id; -+ enum dpsw_stp_state state; -+}; -+ -+/** -+ * dpsw_if_set_stp() - Function sets Spanning Tree Protocol (STP) state. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @cfg: STP State configuration parameters -+ * -+ * The following STP states are supported - -+ * blocking, listening, learning, forwarding and disabled. -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_stp(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_stp_cfg *cfg); -+ -+/** -+ * enum dpsw_accepted_frames - Types of frames to accept -+ * @DPSW_ADMIT_ALL: The device accepts VLAN tagged, untagged and -+ * priority tagged frames -+ * @DPSW_ADMIT_ONLY_VLAN_TAGGED: The device discards untagged frames or -+ * Priority-Tagged frames received on this interface. -+ * -+ */ -+enum dpsw_accepted_frames { -+ DPSW_ADMIT_ALL = 1, -+ DPSW_ADMIT_ONLY_VLAN_TAGGED = 3 -+}; -+ -+/** -+ * struct dpsw_accepted_frames_cfg - Types of frames to accept configuration -+ * @type: Defines ingress accepted frames -+ * @unaccept_act: When a frame is not accepted, it may be discarded or -+ * redirected to control interface depending on this mode -+ */ -+struct dpsw_accepted_frames_cfg { -+ enum dpsw_accepted_frames type; -+ enum dpsw_action unaccept_act; -+}; -+ -+/** -+ * dpsw_if_set_accepted_frames() -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @cfg: Frame types configuration -+ * -+ * When is admit_only_vlan_tagged- the device will discard untagged -+ * frames or Priority-Tagged frames received on this interface. -+ * When admit_only_untagged- untagged frames or Priority-Tagged -+ * frames received on this interface will be accepted and assigned -+ * to a VID based on the PVID and VID Set for this interface. -+ * When admit_all - the device will accept VLAN tagged, untagged -+ * and priority tagged frames. -+ * The default is admit_all -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_accepted_frames(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_accepted_frames_cfg *cfg); -+ -+/** -+ * dpsw_if_set_accept_all_vlan() -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @accept_all: Accept or drop frames having different VLAN -+ * -+ * When this is accept (FALSE), the device will discard incoming -+ * frames for VLANs that do not include this interface in its -+ * Member set. When accept (TRUE), the interface will accept all incoming frames -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_accept_all_vlan(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ int accept_all); -+ -+/** -+ * enum dpsw_counter - Counters types -+ * @DPSW_CNT_ING_FRAME: Counts ingress frames -+ * @DPSW_CNT_ING_BYTE: Counts ingress bytes -+ * @DPSW_CNT_ING_FLTR_FRAME: Counts filtered ingress frames -+ * @DPSW_CNT_ING_FRAME_DISCARD: Counts discarded ingress frame -+ * @DPSW_CNT_ING_MCAST_FRAME: Counts ingress multicast frames -+ * @DPSW_CNT_ING_MCAST_BYTE: Counts ingress multicast bytes -+ * @DPSW_CNT_ING_BCAST_FRAME: Counts ingress broadcast frames -+ * @DPSW_CNT_ING_BCAST_BYTES: Counts ingress broadcast bytes -+ * @DPSW_CNT_EGR_FRAME: Counts egress frames -+ * @DPSW_CNT_EGR_BYTE: Counts eEgress bytes -+ * @DPSW_CNT_EGR_FRAME_DISCARD: Counts discarded egress frames -+ * @DPSW_CNT_EGR_STP_FRAME_DISCARD: Counts egress STP discarded frames -+ */ -+enum dpsw_counter { -+ DPSW_CNT_ING_FRAME = 0x0, -+ DPSW_CNT_ING_BYTE = 0x1, -+ DPSW_CNT_ING_FLTR_FRAME = 0x2, -+ DPSW_CNT_ING_FRAME_DISCARD = 0x3, -+ DPSW_CNT_ING_MCAST_FRAME = 0x4, -+ DPSW_CNT_ING_MCAST_BYTE = 0x5, -+ DPSW_CNT_ING_BCAST_FRAME = 0x6, -+ DPSW_CNT_ING_BCAST_BYTES = 0x7, -+ DPSW_CNT_EGR_FRAME = 0x8, -+ DPSW_CNT_EGR_BYTE = 0x9, -+ DPSW_CNT_EGR_FRAME_DISCARD = 0xa, -+ DPSW_CNT_EGR_STP_FRAME_DISCARD = 0xb -+}; -+ -+/** -+ * dpsw_if_get_counter() - Get specific counter of particular interface -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @type: Counter type -+ * @counter: return value -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_get_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ enum dpsw_counter type, -+ uint64_t *counter); -+ -+/** -+ * dpsw_if_set_counter() - Set specific counter of particular interface -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @type: Counter type -+ * @counter: New counter value -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_counter(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ enum dpsw_counter type, -+ uint64_t counter); -+ -+/** -+ * Maximum number of TC -+ */ -+#define DPSW_MAX_TC 8 -+ -+/** -+ * enum dpsw_priority_selector - User priority -+ * @DPSW_UP_PCP: Priority Code Point (PCP): a 3-bit field which -+ * refers to the IEEE 802.1p priority. -+ * @DPSW_UP_DSCP: Differentiated services Code Point (DSCP): 6 bit -+ * field from IP header -+ * -+ */ -+enum dpsw_priority_selector { -+ DPSW_UP_PCP = 0, -+ DPSW_UP_DSCP = 1 -+}; -+ -+/** -+ * enum dpsw_schedule_mode - Traffic classes scheduling -+ * @DPSW_SCHED_STRICT_PRIORITY: schedule strict priority -+ * @DPSW_SCHED_WEIGHTED: schedule based on token bucket created algorithm -+ */ -+enum dpsw_schedule_mode { -+ DPSW_SCHED_STRICT_PRIORITY, -+ DPSW_SCHED_WEIGHTED -+}; -+ -+/** -+ * struct dpsw_tx_schedule_cfg - traffic class configuration -+ * @mode: Strict or weight-based scheduling -+ * @delta_bandwidth: weighted Bandwidth in range from 100 to 10000 -+ */ -+struct dpsw_tx_schedule_cfg { -+ enum dpsw_schedule_mode mode; -+ uint16_t delta_bandwidth; -+}; -+ -+/** -+ * struct dpsw_tx_selection_cfg - Mapping user priority into traffic -+ * class configuration -+ * @priority_selector: Source for user priority regeneration -+ * @tc_id: The Regenerated User priority that the incoming -+ * User Priority is mapped to for this interface -+ * @tc_sched: Traffic classes configuration -+ */ -+struct dpsw_tx_selection_cfg { -+ enum dpsw_priority_selector priority_selector; -+ uint8_t tc_id[DPSW_MAX_PRIORITIES]; -+ struct dpsw_tx_schedule_cfg tc_sched[DPSW_MAX_TC]; -+}; -+ -+/** -+ * dpsw_if_set_tx_selection() - Function is used for mapping variety -+ * of frame fields -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @cfg: Traffic class mapping configuration -+ * -+ * Function is used for mapping variety of frame fields (DSCP, PCP) -+ * to Traffic Class. Traffic class is a number -+ * in the range from 0 to 7 -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_tx_selection(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_tx_selection_cfg *cfg); -+ -+/** -+ * enum dpsw_reflection_filter - Filter type for frames to reflect -+ * @DPSW_REFLECTION_FILTER_INGRESS_ALL: Reflect all frames -+ * @DPSW_REFLECTION_FILTER_INGRESS_VLAN: Reflect only frames belong to -+ * particular VLAN defined by vid parameter -+ * -+ */ -+enum dpsw_reflection_filter { -+ DPSW_REFLECTION_FILTER_INGRESS_ALL = 0, -+ DPSW_REFLECTION_FILTER_INGRESS_VLAN = 1 -+}; -+ -+/** -+ * struct dpsw_reflection_cfg - Structure representing reflection information -+ * @filter: Filter type for frames to reflect -+ * @vlan_id: Vlan Id to reflect; valid only when filter type is -+ * DPSW_INGRESS_VLAN -+ */ -+struct dpsw_reflection_cfg { -+ enum dpsw_reflection_filter filter; -+ uint16_t vlan_id; -+}; -+ -+/** -+ * dpsw_if_add_reflection() - Identify interface to be reflected or mirrored -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @cfg: Reflection configuration -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_add_reflection(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_reflection_cfg *cfg); -+ -+/** -+ * dpsw_if_remove_reflection() - Remove interface to be reflected or mirrored -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @cfg: Reflection configuration -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_remove_reflection(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_reflection_cfg *cfg); -+ -+/** -+ * enum dpsw_metering_mode - Metering modes -+ * @DPSW_METERING_MODE_NONE: metering disabled -+ * @DPSW_METERING_MODE_RFC2698: RFC 2698 -+ * @DPSW_METERING_MODE_RFC4115: RFC 4115 -+ */ -+enum dpsw_metering_mode { -+ DPSW_METERING_MODE_NONE = 0, -+ DPSW_METERING_MODE_RFC2698, -+ DPSW_METERING_MODE_RFC4115 -+}; -+ -+/** -+ * enum dpsw_metering_unit - Metering count -+ * @DPSW_METERING_UNIT_BYTES: count bytes -+ * @DPSW_METERING_UNIT_FRAMES: count frames -+ */ -+enum dpsw_metering_unit { -+ DPSW_METERING_UNIT_BYTES = 0, -+ DPSW_METERING_UNIT_FRAMES -+}; -+ -+/** -+ * struct dpsw_metering_cfg - Metering configuration -+ * @mode: metering modes -+ * @units: Bytes or frame units -+ * @cir: Committed information rate (CIR) in Kbits/s -+ * @eir: Peak information rate (PIR) Kbit/s rfc2698 -+ * Excess information rate (EIR) Kbit/s rfc4115 -+ * @cbs: Committed burst size (CBS) in bytes -+ * @ebs: Peak burst size (PBS) in bytes for rfc2698 -+ * Excess bust size (EBS) in bytes rfc4115 -+ * -+ */ -+struct dpsw_metering_cfg { -+ enum dpsw_metering_mode mode; -+ enum dpsw_metering_unit units; -+ uint32_t cir; -+ uint32_t eir; -+ uint32_t cbs; -+ uint32_t ebs; -+}; -+ -+/** -+ * dpsw_if_set_flooding_metering() - Set flooding metering -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @cfg: Metering parameters -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_flooding_metering(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ const struct dpsw_metering_cfg *cfg); -+ -+/** -+ * dpsw_if_set_metering() - Set interface metering for flooding -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @tc_id: Traffic class ID -+ * @cfg: Metering parameters -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_metering(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ uint8_t tc_id, -+ const struct dpsw_metering_cfg *cfg); -+ -+/** -+ * enum dpsw_early_drop_unit - DPSW early drop unit -+ * @DPSW_EARLY_DROP_UNIT_BYTE: count bytes -+ * @DPSW_EARLY_DROP_UNIT_FRAMES: count frames -+ */ -+enum dpsw_early_drop_unit { -+ DPSW_EARLY_DROP_UNIT_BYTE = 0, -+ DPSW_EARLY_DROP_UNIT_FRAMES -+}; -+ -+/** -+ * enum dpsw_early_drop_mode - DPSW early drop mode -+ * @DPSW_EARLY_DROP_MODE_NONE: early drop is disabled -+ * @DPSW_EARLY_DROP_MODE_TAIL: early drop in taildrop mode -+ * @DPSW_EARLY_DROP_MODE_WRED: early drop in WRED mode -+ */ -+enum dpsw_early_drop_mode { -+ DPSW_EARLY_DROP_MODE_NONE = 0, -+ DPSW_EARLY_DROP_MODE_TAIL, -+ DPSW_EARLY_DROP_MODE_WRED -+}; -+ -+/** -+ * struct dpsw_wred_cfg - WRED configuration -+ * @max_threshold: maximum threshold that packets may be discarded. Above this -+ * threshold all packets are discarded; must be less than 2^39; -+ * approximated to be expressed as (x+256)*2^(y-1) due to HW -+ * implementation. -+ * @min_threshold: minimum threshold that packets may be discarded at -+ * @drop_probability: probability that a packet will be discarded (1-100, -+ * associated with the maximum threshold) -+ */ -+struct dpsw_wred_cfg { -+ uint64_t min_threshold; -+ uint64_t max_threshold; -+ uint8_t drop_probability; -+}; -+ -+/** -+ * struct dpsw_early_drop_cfg - early-drop configuration -+ * @drop_mode: drop mode -+ * @units: count units -+ * @yellow: WRED - 'yellow' configuration -+ * @green: WRED - 'green' configuration -+ * @tail_drop_threshold: tail drop threshold -+ */ -+struct dpsw_early_drop_cfg { -+ enum dpsw_early_drop_mode drop_mode; -+ enum dpsw_early_drop_unit units; -+ struct dpsw_wred_cfg yellow; -+ struct dpsw_wred_cfg green; -+ uint32_t tail_drop_threshold; -+}; -+ -+/** -+ * dpsw_prepare_early_drop() - Prepare an early drop for setting in to interface -+ * @cfg: Early-drop configuration -+ * @early_drop_buf: Zeroed 256 bytes of memory before mapping it to DMA -+ * -+ * This function has to be called before dpsw_if_tc_set_early_drop -+ * -+ */ -+void dpsw_prepare_early_drop(const struct dpsw_early_drop_cfg *cfg, -+ uint8_t *early_drop_buf); -+ -+/** -+ * dpsw_if_set_early_drop() - Set interface traffic class early-drop -+ * configuration -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @tc_id: Traffic class selection (0-7) -+ * @early_drop_iova: I/O virtual address of 64 bytes; -+ * Must be cacheline-aligned and DMA-able memory -+ * -+ * warning: Before calling this function, call dpsw_prepare_if_tc_early_drop() -+ * to prepare the early_drop_iova parameter -+ * -+ * Return: '0' on Success; error code otherwise. -+ */ -+int dpsw_if_set_early_drop(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ uint8_t tc_id, -+ uint64_t early_drop_iova); -+ -+/** -+ * struct dpsw_custom_tpid_cfg - Structure representing tag Protocol identifier -+ * @tpid: An additional tag protocol identifier -+ */ -+struct dpsw_custom_tpid_cfg { -+ uint16_t tpid; -+}; -+ -+/** -+ * dpsw_add_custom_tpid() - API Configures a distinct Ethernet type value -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @cfg: Tag Protocol identifier -+ * -+ * API Configures a distinct Ethernet type value (or TPID value) -+ * to indicate a VLAN tag in addition to the common -+ * TPID values 0x8100 and 0x88A8. -+ * Two additional TPID's are supported -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_add_custom_tpid(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpsw_custom_tpid_cfg *cfg); -+ -+/** -+ * dpsw_remove_custom_tpid - API removes a distinct Ethernet type value -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @cfg: Tag Protocol identifier -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_remove_custom_tpid(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpsw_custom_tpid_cfg *cfg); -+ -+/** -+ * dpsw_if_enable() - Enable Interface -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id); -+ -+/** -+ * dpsw_if_disable() - Disable Interface -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id); -+ -+/** -+ * struct dpsw_if_attr - Structure representing DPSW interface attributes -+ * @num_tcs: Number of traffic classes -+ * @rate: Transmit rate in bits per second -+ * @options: Interface configuration options (bitmap) -+ * @enabled: Indicates if interface is enabled -+ * @accept_all_vlan: The device discards/accepts incoming frames -+ * for VLANs that do not include this interface -+ * @admit_untagged: When set to 'DPSW_ADMIT_ONLY_VLAN_TAGGED', the device -+ * discards untagged frames or priority-tagged frames received on -+ * this interface; -+ * When set to 'DPSW_ADMIT_ALL', untagged frames or priority- -+ * tagged frames received on this interface are accepted -+ * @qdid: control frames transmit qdid -+ */ -+struct dpsw_if_attr { -+ uint8_t num_tcs; -+ uint32_t rate; -+ uint32_t options; -+ int enabled; -+ int accept_all_vlan; -+ enum dpsw_accepted_frames admit_untagged; -+ uint16_t qdid; -+}; -+ -+/** -+ * dpsw_if_get_attributes() - Function obtains attributes of interface -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @attr: Returned interface attributes -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ struct dpsw_if_attr *attr); -+ -+/** -+ * dpsw_if_set_max_frame_length() - Set Maximum Receive frame length. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @frame_length: Maximum Frame Length -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ uint16_t frame_length); -+ -+/** -+ * dpsw_if_get_max_frame_length() - Get Maximum Receive frame length. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @if_id: Interface Identifier -+ * @frame_length: Returned maximum Frame Length -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_if_get_max_frame_length(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t if_id, -+ uint16_t *frame_length); -+ -+/** -+ * struct dpsw_vlan_cfg - VLAN Configuration -+ * @fdb_id: Forwarding Data Base -+ */ -+struct dpsw_vlan_cfg { -+ uint16_t fdb_id; -+}; -+ -+/** -+ * dpsw_vlan_add() - Adding new VLAN to DPSW. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @vlan_id: VLAN Identifier -+ * @cfg: VLAN configuration -+ * -+ * Only VLAN ID and FDB ID are required parameters here. -+ * 12 bit VLAN ID is defined in IEEE802.1Q. -+ * Adding a duplicate VLAN ID is not allowed. -+ * FDB ID can be shared across multiple VLANs. Shared learning -+ * is obtained by calling dpsw_vlan_add for multiple VLAN IDs -+ * with same fdb_id -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_vlan_add(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_cfg *cfg); -+ -+/** -+ * struct dpsw_vlan_if_cfg - Set of VLAN Interfaces -+ * @num_ifs: The number of interfaces that are assigned to the egress -+ * list for this VLAN -+ * @if_id: The set of interfaces that are -+ * assigned to the egress list for this VLAN -+ */ -+struct dpsw_vlan_if_cfg { -+ uint16_t num_ifs; -+ uint16_t if_id[DPSW_MAX_IF]; -+}; -+ -+/** -+ * dpsw_vlan_add_if() - Adding a set of interfaces to an existing VLAN. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @vlan_id: VLAN Identifier -+ * @cfg: Set of interfaces to add -+ * -+ * It adds only interfaces not belonging to this VLAN yet, -+ * otherwise an error is generated and an entire command is -+ * ignored. This function can be called numerous times always -+ * providing required interfaces delta. -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_vlan_add_if(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_if_cfg *cfg); -+ -+/** -+ * dpsw_vlan_add_if_untagged() - Defining a set of interfaces that should be -+ * transmitted as untagged. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @vlan_id: VLAN Identifier -+ * @cfg: set of interfaces that should be transmitted as untagged -+ * -+ * These interfaces should already belong to this VLAN. -+ * By default all interfaces are transmitted as tagged. -+ * Providing un-existing interface or untagged interface that is -+ * configured untagged already generates an error and the entire -+ * command is ignored. -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_if_cfg *cfg); -+ -+/** -+ * dpsw_vlan_add_if_flooding() - Define a set of interfaces that should be -+ * included in flooding when frame with unknown destination -+ * unicast MAC arrived. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @vlan_id: VLAN Identifier -+ * @cfg: Set of interfaces that should be used for flooding -+ * -+ * These interfaces should belong to this VLAN. By default all -+ * interfaces are included into flooding list. Providing -+ * un-existing interface or an interface that already in the -+ * flooding list generates an error and the entire command is -+ * ignored. -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_vlan_add_if_flooding(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_if_cfg *cfg); -+ -+/** -+ * dpsw_vlan_remove_if() - Remove interfaces from an existing VLAN. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @vlan_id: VLAN Identifier -+ * @cfg: Set of interfaces that should be removed -+ * -+ * Interfaces must belong to this VLAN, otherwise an error -+ * is returned and an the command is ignored -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_if_cfg *cfg); -+ -+/** -+ * dpsw_vlan_remove_if_untagged() - Define a set of interfaces that should be -+ * converted from transmitted as untagged to transmit as tagged. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @vlan_id: VLAN Identifier -+ * @cfg: set of interfaces that should be removed -+ * -+ * Interfaces provided by API have to belong to this VLAN and -+ * configured untagged, otherwise an error is returned and the -+ * command is ignored -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_if_cfg *cfg); -+ -+/** -+ * dpsw_vlan_remove_if_flooding() - Define a set of interfaces that should be -+ * removed from the flooding list. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @vlan_id: VLAN Identifier -+ * @cfg: set of interfaces used for flooding -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_vlan_remove_if_flooding(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ const struct dpsw_vlan_if_cfg *cfg); -+ -+/** -+ * dpsw_vlan_remove() - Remove an entire VLAN -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @vlan_id: VLAN Identifier -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_vlan_remove(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id); -+ -+/** -+ * struct dpsw_vlan_attr - VLAN attributes -+ * @fdb_id: Associated FDB ID -+ * @num_ifs: Number of interfaces -+ * @num_untagged_ifs: Number of untagged interfaces -+ * @num_flooding_ifs: Number of flooding interfaces -+ */ -+struct dpsw_vlan_attr { -+ uint16_t fdb_id; -+ uint16_t num_ifs; -+ uint16_t num_untagged_ifs; -+ uint16_t num_flooding_ifs; -+}; -+ -+/** -+ * dpsw_vlan_get_attributes() - Get VLAN attributes -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @vlan_id: VLAN Identifier -+ * @attr: Returned DPSW attributes -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_vlan_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ struct dpsw_vlan_attr *attr); -+ -+/** -+ * dpsw_vlan_get_if() - Get interfaces belong to this VLAN -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @vlan_id: VLAN Identifier -+ * @cfg: Returned set of interfaces belong to this VLAN -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_vlan_get_if(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ struct dpsw_vlan_if_cfg *cfg); -+ -+/** -+ * dpsw_vlan_get_if_flooding() - Get interfaces used in flooding for this VLAN -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @vlan_id: VLAN Identifier -+ * @cfg: Returned set of flooding interfaces -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_vlan_get_if_flooding(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ struct dpsw_vlan_if_cfg *cfg); -+ -+/** -+ * dpsw_vlan_get_if_untagged() - Get interfaces that should be transmitted as -+ * untagged -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @vlan_id: VLAN Identifier -+ * @cfg: Returned set of untagged interfaces -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_vlan_get_if_untagged(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t vlan_id, -+ struct dpsw_vlan_if_cfg *cfg); -+ -+/** -+ * struct dpsw_fdb_cfg - FDB Configuration -+ * @num_fdb_entries: Number of FDB entries -+ * @fdb_aging_time: Aging time in seconds -+ */ -+struct dpsw_fdb_cfg { -+ uint16_t num_fdb_entries; -+ uint16_t fdb_aging_time; -+}; -+ -+/** -+ * dpsw_fdb_add() - Add FDB to switch and Returns handle to FDB table for -+ * the reference -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @fdb_id: Returned Forwarding Database Identifier -+ * @cfg: FDB Configuration -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_fdb_add(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *fdb_id, -+ const struct dpsw_fdb_cfg *cfg); -+ -+/** -+ * dpsw_fdb_remove() - Remove FDB from switch -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @fdb_id: Forwarding Database Identifier -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_fdb_remove(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id); -+ -+/** -+ * enum dpsw_fdb_entry_type - FDB Entry type - Static/Dynamic -+ * @DPSW_FDB_ENTRY_STATIC: Static entry -+ * @DPSW_FDB_ENTRY_DINAMIC: Dynamic entry -+ */ -+enum dpsw_fdb_entry_type { -+ DPSW_FDB_ENTRY_STATIC = 0, -+ DPSW_FDB_ENTRY_DINAMIC = 1 -+}; -+ -+/** -+ * struct dpsw_fdb_unicast_cfg - Unicast entry configuration -+ * @type: Select static or dynamic entry -+ * @mac_addr: MAC address -+ * @if_egress: Egress interface ID -+ */ -+struct dpsw_fdb_unicast_cfg { -+ enum dpsw_fdb_entry_type type; -+ uint8_t mac_addr[6]; -+ uint16_t if_egress; -+}; -+ -+/** -+ * dpsw_fdb_add_unicast() - Function adds an unicast entry into MAC lookup table -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @fdb_id: Forwarding Database Identifier -+ * @cfg: Unicast entry configuration -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ const struct dpsw_fdb_unicast_cfg *cfg); -+ -+/** -+ * dpsw_fdb_get_unicast() - Get unicast entry from MAC lookup table by -+ * unicast Ethernet address -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @fdb_id: Forwarding Database Identifier -+ * @cfg: Returned unicast entry configuration -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_fdb_get_unicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ struct dpsw_fdb_unicast_cfg *cfg); -+ -+/** -+ * dpsw_fdb_remove_unicast() - removes an entry from MAC lookup table -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @fdb_id: Forwarding Database Identifier -+ * @cfg: Unicast entry configuration -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ const struct dpsw_fdb_unicast_cfg *cfg); -+ -+/** -+ * struct dpsw_fdb_multicast_cfg - Multi-cast entry configuration -+ * @type: Select static or dynamic entry -+ * @mac_addr: MAC address -+ * @num_ifs: Number of external and internal interfaces -+ * @if_id: Egress interface IDs -+ */ -+struct dpsw_fdb_multicast_cfg { -+ enum dpsw_fdb_entry_type type; -+ uint8_t mac_addr[6]; -+ uint16_t num_ifs; -+ uint16_t if_id[DPSW_MAX_IF]; -+}; -+ -+/** -+ * dpsw_fdb_add_multicast() - Add a set of egress interfaces to multi-cast group -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @fdb_id: Forwarding Database Identifier -+ * @cfg: Multicast entry configuration -+ * -+ * If group doesn't exist, it will be created. -+ * It adds only interfaces not belonging to this multicast group -+ * yet, otherwise error will be generated and the command is -+ * ignored. -+ * This function may be called numerous times always providing -+ * required interfaces delta. -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ const struct dpsw_fdb_multicast_cfg *cfg); -+ -+/** -+ * dpsw_fdb_get_multicast() - Reading multi-cast group by multi-cast Ethernet -+ * address. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @fdb_id: Forwarding Database Identifier -+ * @cfg: Returned multicast entry configuration -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_fdb_get_multicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ struct dpsw_fdb_multicast_cfg *cfg); -+ -+/** -+ * dpsw_fdb_remove_multicast() - Removing interfaces from an existing multicast -+ * group. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @fdb_id: Forwarding Database Identifier -+ * @cfg: Multicast entry configuration -+ * -+ * Interfaces provided by this API have to exist in the group, -+ * otherwise an error will be returned and an entire command -+ * ignored. If there is no interface left in the group, -+ * an entire group is deleted -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ const struct dpsw_fdb_multicast_cfg *cfg); -+ -+/** -+ * enum dpsw_fdb_learning_mode - Auto-learning modes -+ * @DPSW_FDB_LEARNING_MODE_DIS: Disable Auto-learning -+ * @DPSW_FDB_LEARNING_MODE_HW: Enable HW auto-Learning -+ * @DPSW_FDB_LEARNING_MODE_NON_SECURE: Enable None secure learning by CPU -+ * @DPSW_FDB_LEARNING_MODE_SECURE: Enable secure learning by CPU -+ * -+ * NONE - SECURE LEARNING -+ * SMAC found DMAC found CTLU Action -+ * v v Forward frame to -+ * 1. DMAC destination -+ * - v Forward frame to -+ * 1. DMAC destination -+ * 2. Control interface -+ * v - Forward frame to -+ * 1. Flooding list of interfaces -+ * - - Forward frame to -+ * 1. Flooding list of interfaces -+ * 2. Control interface -+ * SECURE LEARING -+ * SMAC found DMAC found CTLU Action -+ * v v Forward frame to -+ * 1. DMAC destination -+ * - v Forward frame to -+ * 1. Control interface -+ * v - Forward frame to -+ * 1. Flooding list of interfaces -+ * - - Forward frame to -+ * 1. Control interface -+ */ -+enum dpsw_fdb_learning_mode { -+ DPSW_FDB_LEARNING_MODE_DIS = 0, -+ DPSW_FDB_LEARNING_MODE_HW = 1, -+ DPSW_FDB_LEARNING_MODE_NON_SECURE = 2, -+ DPSW_FDB_LEARNING_MODE_SECURE = 3 -+}; -+ -+/** -+ * dpsw_fdb_set_learning_mode() - Define FDB learning mode -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @fdb_id: Forwarding Database Identifier -+ * @mode: learning mode -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_fdb_set_learning_mode(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ enum dpsw_fdb_learning_mode mode); -+ -+/** -+ * struct dpsw_fdb_attr - FDB Attributes -+ * @max_fdb_entries: Number of FDB entries -+ * @fdb_aging_time: Aging time in seconds -+ * @learning_mode: Learning mode -+ * @num_fdb_mc_groups: Current number of multicast groups -+ * @max_fdb_mc_groups: Maximum number of multicast groups -+ */ -+struct dpsw_fdb_attr { -+ uint16_t max_fdb_entries; -+ uint16_t fdb_aging_time; -+ enum dpsw_fdb_learning_mode learning_mode; -+ uint16_t num_fdb_mc_groups; -+ uint16_t max_fdb_mc_groups; -+}; -+ -+/** -+ * dpsw_fdb_get_attributes() - Get FDB attributes -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @fdb_id: Forwarding Database Identifier -+ * @attr: Returned FDB attributes -+ * -+ * Return: Completion status. '0' on Success; Error code otherwise. -+ */ -+int dpsw_fdb_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t fdb_id, -+ struct dpsw_fdb_attr *attr); -+ -+/** -+ * struct dpsw_acl_cfg - ACL Configuration -+ * @max_entries: Number of FDB entries -+ */ -+struct dpsw_acl_cfg { -+ uint16_t max_entries; -+}; -+ -+/** -+ * struct dpsw_acl_fields - ACL fields. -+ * @l2_dest_mac: Destination MAC address: BPDU, Multicast, Broadcast, Unicast, -+ * slow protocols, MVRP, STP -+ * @l2_source_mac: Source MAC address -+ * @l2_tpid: Layer 2 (Ethernet) protocol type, used to identify the following -+ * protocols: MPLS, PTP, PFC, ARP, Jumbo frames, LLDP, IEEE802.1ae, -+ * Q-in-Q, IPv4, IPv6, PPPoE -+ * @l2_pcp_dei: indicate which protocol is encapsulated in the payload -+ * @l2_vlan_id: layer 2 VLAN ID -+ * @l2_ether_type: layer 2 Ethernet type -+ * @l3_dscp: Layer 3 differentiated services code point -+ * @l3_protocol: Tells the Network layer at the destination host, to which -+ * Protocol this packet belongs to. The following protocol are -+ * supported: ICMP, IGMP, IPv4 (encapsulation), TCP, IPv6 -+ * (encapsulation), GRE, PTP -+ * @l3_source_ip: Source IPv4 IP -+ * @l3_dest_ip: Destination IPv4 IP -+ * @l4_source_port: Source TCP/UDP Port -+ * @l4_dest_port: Destination TCP/UDP Port -+ */ -+struct dpsw_acl_fields { -+ uint8_t l2_dest_mac[6]; -+ uint8_t l2_source_mac[6]; -+ uint16_t l2_tpid; -+ uint8_t l2_pcp_dei; -+ uint16_t l2_vlan_id; -+ uint16_t l2_ether_type; -+ uint8_t l3_dscp; -+ uint8_t l3_protocol; -+ uint32_t l3_source_ip; -+ uint32_t l3_dest_ip; -+ uint16_t l4_source_port; -+ uint16_t l4_dest_port; -+}; -+ -+/** -+ * struct dpsw_acl_key - ACL key -+ * @match: Match fields -+ * @mask: Mask: b'1 - valid, b'0 don't care -+ */ -+struct dpsw_acl_key { -+ struct dpsw_acl_fields match; -+ struct dpsw_acl_fields mask; -+}; -+ -+/** -+ * enum dpsw_acl_action -+ * @DPSW_ACL_ACTION_DROP: Drop frame -+ * @DPSW_ACL_ACTION_REDIRECT: Redirect to certain port -+ * @DPSW_ACL_ACTION_ACCEPT: Accept frame -+ * @DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF: Redirect to control interface -+ */ -+enum dpsw_acl_action { -+ DPSW_ACL_ACTION_DROP, -+ DPSW_ACL_ACTION_REDIRECT, -+ DPSW_ACL_ACTION_ACCEPT, -+ DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF -+}; -+ -+/** -+ * struct dpsw_acl_result - ACL action -+ * @action: Action should be taken when ACL entry hit -+ * @if_id: Interface IDs to redirect frame. Valid only if redirect selected for -+ * action -+ */ -+struct dpsw_acl_result { -+ enum dpsw_acl_action action; -+ uint16_t if_id; -+}; -+ -+/** -+ * struct dpsw_acl_entry_cfg - ACL entry -+ * @key_iova: I/O virtual address of DMA-able memory filled with key after call -+ * to dpsw_acl_prepare_entry_cfg() -+ * @result: Required action when entry hit occurs -+ * @precedence: Precedence inside ACL 0 is lowest; This priority can not change -+ * during the lifetime of a Policy. It is user responsibility to -+ * space the priorities according to consequent rule additions. -+ */ -+struct dpsw_acl_entry_cfg { -+ uint64_t key_iova; -+ struct dpsw_acl_result result; -+ int precedence; -+}; -+ -+/** -+ * dpsw_acl_add() - Adds ACL to L2 switch. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @acl_id: Returned ACL ID, for the future reference -+ * @cfg: ACL configuration -+ * -+ * Create Access Control List. Multiple ACLs can be created and -+ * co-exist in L2 switch -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_acl_add(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t *acl_id, -+ const struct dpsw_acl_cfg *cfg); -+ -+/** -+ * dpsw_acl_remove() - Removes ACL from L2 switch. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @acl_id: ACL ID -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_acl_remove(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t acl_id); -+ -+/** -+ * dpsw_acl_prepare_entry_cfg() - Set an entry to ACL. -+ * @key: key -+ * @entry_cfg_buf: Zeroed 256 bytes of memory before mapping it to DMA -+ * -+ * This function has to be called before adding or removing acl_entry -+ * -+ */ -+void dpsw_acl_prepare_entry_cfg(const struct dpsw_acl_key *key, -+ uint8_t *entry_cfg_buf); -+ -+/** -+ * dpsw_acl_add_entry() - Adds an entry to ACL. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @acl_id: ACL ID -+ * @cfg: entry configuration -+ * -+ * warning: This function has to be called after dpsw_acl_set_entry_cfg() -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t acl_id, -+ const struct dpsw_acl_entry_cfg *cfg); -+ -+/** -+ * dpsw_acl_remove_entry() - Removes an entry from ACL. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @acl_id: ACL ID -+ * @cfg: entry configuration -+ * -+ * warning: This function has to be called after dpsw_acl_set_entry_cfg() -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t acl_id, -+ const struct dpsw_acl_entry_cfg *cfg); -+ -+/** -+ * struct dpsw_acl_if_cfg - List of interfaces to Associate with ACL -+ * @num_ifs: Number of interfaces -+ * @if_id: List of interfaces -+ */ -+struct dpsw_acl_if_cfg { -+ uint16_t num_ifs; -+ uint16_t if_id[DPSW_MAX_IF]; -+}; -+ -+/** -+ * dpsw_acl_add_if() - Associate interface/interfaces with ACL. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @acl_id: ACL ID -+ * @cfg: interfaces list -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_acl_add_if(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t acl_id, -+ const struct dpsw_acl_if_cfg *cfg); -+ -+/** -+ * dpsw_acl_remove_if() - De-associate interface/interfaces from ACL. -+ * @mc_io: Pointer to MC portal's I/O object -+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+ * @token: Token of DPSW object -+ * @acl_id: ACL ID -+ * @cfg: interfaces list -+ * -+ * Return: '0' on Success; Error code otherwise. -+ */ -+int dpsw_acl_remove_if(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t acl_id, -+ const struct dpsw_acl_if_cfg *cfg); -+ -+/** -+ * struct dpsw_acl_attr - ACL Attributes -+ * @max_entries: Max number of ACL entries -+ * @num_entries: Number of used ACL entries -+ * @num_ifs: Number of interfaces associated with ACL -+ */ -+struct dpsw_acl_attr { -+ uint16_t max_entries; -+ uint16_t num_entries; -+ uint16_t num_ifs; -+}; -+ -+/** -+* dpsw_acl_get_attributes() - Get specific counter of particular interface -+* @mc_io: Pointer to MC portal's I/O object -+* @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+* @token: Token of DPSW object -+* @acl_id: ACL Identifier -+* @attr: Returned ACL attributes -+* -+* Return: '0' on Success; Error code otherwise. -+*/ -+int dpsw_acl_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ uint16_t acl_id, -+ struct dpsw_acl_attr *attr); -+/** -+* struct dpsw_ctrl_if_attr - Control interface attributes -+* @rx_fqid: Receive FQID -+* @rx_err_fqid: Receive error FQID -+* @tx_err_conf_fqid: Transmit error and confirmation FQID -+*/ -+struct dpsw_ctrl_if_attr { -+ uint32_t rx_fqid; -+ uint32_t rx_err_fqid; -+ uint32_t tx_err_conf_fqid; -+}; -+ -+/** -+* dpsw_ctrl_if_get_attributes() - Obtain control interface attributes -+* @mc_io: Pointer to MC portal's I/O object -+* @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+* @token: Token of DPSW object -+* @attr: Returned control interface attributes -+* -+* Return: '0' on Success; Error code otherwise. -+*/ -+int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ struct dpsw_ctrl_if_attr *attr); -+ -+/** -+ * Maximum number of DPBP -+ */ -+#define DPSW_MAX_DPBP 8 -+ -+/** -+ * struct dpsw_ctrl_if_pools_cfg - Control interface buffer pools configuration -+ * @num_dpbp: Number of DPBPs -+ * @pools: Array of buffer pools parameters; The number of valid entries -+ * must match 'num_dpbp' value -+ */ -+struct dpsw_ctrl_if_pools_cfg { -+ uint8_t num_dpbp; -+ /** -+ * struct pools - Buffer pools parameters -+ * @dpbp_id: DPBP object ID -+ * @buffer_size: Buffer size -+ * @backup_pool: Backup pool -+ */ -+ struct { -+ int dpbp_id; -+ uint16_t buffer_size; -+ int backup_pool; -+ } pools[DPSW_MAX_DPBP]; -+}; -+ -+/** -+* dpsw_ctrl_if_set_pools() - Set control interface buffer pools -+* @mc_io: Pointer to MC portal's I/O object -+* @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+* @token: Token of DPSW object -+* @cfg: buffer pools configuration -+* -+* Return: '0' on Success; Error code otherwise. -+*/ -+int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ const struct dpsw_ctrl_if_pools_cfg *cfg); -+ -+/** -+* dpsw_ctrl_if_enable() - Enable control interface -+* @mc_io: Pointer to MC portal's I/O object -+* @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+* @token: Token of DPSW object -+* -+* Return: '0' on Success; Error code otherwise. -+*/ -+int dpsw_ctrl_if_enable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+/** -+* dpsw_ctrl_if_disable() - Function disables control interface -+* @mc_io: Pointer to MC portal's I/O object -+* @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' -+* @token: Token of DPSW object -+* -+* Return: '0' on Success; Error code otherwise. -+*/ -+int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token); -+ -+#endif /* __FSL_DPSW_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_dpsw_cmd.h b/drivers/net/dpaa2/mc/fsl_dpsw_cmd.h -new file mode 100644 -index 0000000..c65fe38 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_dpsw_cmd.h -@@ -0,0 +1,916 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_DPSW_CMD_H -+#define __FSL_DPSW_CMD_H -+ -+/* DPSW Version */ -+#define DPSW_VER_MAJOR 7 -+#define DPSW_VER_MINOR 0 -+ -+/* Command IDs */ -+#define DPSW_CMDID_CLOSE 0x800 -+#define DPSW_CMDID_OPEN 0x802 -+#define DPSW_CMDID_CREATE 0x902 -+#define DPSW_CMDID_DESTROY 0x900 -+ -+#define DPSW_CMDID_ENABLE 0x002 -+#define DPSW_CMDID_DISABLE 0x003 -+#define DPSW_CMDID_GET_ATTR 0x004 -+#define DPSW_CMDID_RESET 0x005 -+#define DPSW_CMDID_IS_ENABLED 0x006 -+ -+#define DPSW_CMDID_SET_IRQ 0x010 -+#define DPSW_CMDID_GET_IRQ 0x011 -+#define DPSW_CMDID_SET_IRQ_ENABLE 0x012 -+#define DPSW_CMDID_GET_IRQ_ENABLE 0x013 -+#define DPSW_CMDID_SET_IRQ_MASK 0x014 -+#define DPSW_CMDID_GET_IRQ_MASK 0x015 -+#define DPSW_CMDID_GET_IRQ_STATUS 0x016 -+#define DPSW_CMDID_CLEAR_IRQ_STATUS 0x017 -+ -+#define DPSW_CMDID_SET_REFLECTION_IF 0x022 -+ -+#define DPSW_CMDID_ADD_CUSTOM_TPID 0x024 -+ -+#define DPSW_CMDID_REMOVE_CUSTOM_TPID 0x026 -+ -+#define DPSW_CMDID_IF_SET_TCI 0x030 -+#define DPSW_CMDID_IF_SET_STP 0x031 -+#define DPSW_CMDID_IF_SET_ACCEPTED_FRAMES 0x032 -+#define DPSW_CMDID_SET_IF_ACCEPT_ALL_VLAN 0x033 -+#define DPSW_CMDID_IF_GET_COUNTER 0x034 -+#define DPSW_CMDID_IF_SET_COUNTER 0x035 -+#define DPSW_CMDID_IF_SET_TX_SELECTION 0x036 -+#define DPSW_CMDID_IF_ADD_REFLECTION 0x037 -+#define DPSW_CMDID_IF_REMOVE_REFLECTION 0x038 -+#define DPSW_CMDID_IF_SET_FLOODING_METERING 0x039 -+#define DPSW_CMDID_IF_SET_METERING 0x03A -+#define DPSW_CMDID_IF_SET_EARLY_DROP 0x03B -+ -+#define DPSW_CMDID_IF_ENABLE 0x03D -+#define DPSW_CMDID_IF_DISABLE 0x03E -+ -+#define DPSW_CMDID_IF_GET_ATTR 0x042 -+ -+#define DPSW_CMDID_IF_SET_MAX_FRAME_LENGTH 0x044 -+#define DPSW_CMDID_IF_GET_MAX_FRAME_LENGTH 0x045 -+#define DPSW_CMDID_IF_GET_LINK_STATE 0x046 -+#define DPSW_CMDID_IF_SET_FLOODING 0x047 -+#define DPSW_CMDID_IF_SET_BROADCAST 0x048 -+#define DPSW_CMDID_IF_SET_MULTICAST 0x049 -+#define DPSW_CMDID_IF_GET_TCI 0x04A -+ -+#define DPSW_CMDID_IF_SET_LINK_CFG 0x04C -+ -+#define DPSW_CMDID_VLAN_ADD 0x060 -+#define DPSW_CMDID_VLAN_ADD_IF 0x061 -+#define DPSW_CMDID_VLAN_ADD_IF_UNTAGGED 0x062 -+#define DPSW_CMDID_VLAN_ADD_IF_FLOODING 0x063 -+#define DPSW_CMDID_VLAN_REMOVE_IF 0x064 -+#define DPSW_CMDID_VLAN_REMOVE_IF_UNTAGGED 0x065 -+#define DPSW_CMDID_VLAN_REMOVE_IF_FLOODING 0x066 -+#define DPSW_CMDID_VLAN_REMOVE 0x067 -+#define DPSW_CMDID_VLAN_GET_IF 0x068 -+#define DPSW_CMDID_VLAN_GET_IF_FLOODING 0x069 -+#define DPSW_CMDID_VLAN_GET_IF_UNTAGGED 0x06A -+#define DPSW_CMDID_VLAN_GET_ATTRIBUTES 0x06B -+ -+#define DPSW_CMDID_FDB_GET_MULTICAST 0x080 -+#define DPSW_CMDID_FDB_GET_UNICAST 0x081 -+#define DPSW_CMDID_FDB_ADD 0x082 -+#define DPSW_CMDID_FDB_REMOVE 0x083 -+#define DPSW_CMDID_FDB_ADD_UNICAST 0x084 -+#define DPSW_CMDID_FDB_REMOVE_UNICAST 0x085 -+#define DPSW_CMDID_FDB_ADD_MULTICAST 0x086 -+#define DPSW_CMDID_FDB_REMOVE_MULTICAST 0x087 -+#define DPSW_CMDID_FDB_SET_LEARNING_MODE 0x088 -+#define DPSW_CMDID_FDB_GET_ATTR 0x089 -+ -+#define DPSW_CMDID_ACL_ADD 0x090 -+#define DPSW_CMDID_ACL_REMOVE 0x091 -+#define DPSW_CMDID_ACL_ADD_ENTRY 0x092 -+#define DPSW_CMDID_ACL_REMOVE_ENTRY 0x093 -+#define DPSW_CMDID_ACL_ADD_IF 0x094 -+#define DPSW_CMDID_ACL_REMOVE_IF 0x095 -+#define DPSW_CMDID_ACL_GET_ATTR 0x096 -+ -+#define DPSW_CMDID_CTRL_IF_GET_ATTR 0x0A0 -+#define DPSW_CMDID_CTRL_IF_SET_POOLS 0x0A1 -+#define DPSW_CMDID_CTRL_IF_ENABLE 0x0A2 -+#define DPSW_CMDID_CTRL_IF_DISABLE 0x0A3 -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_OPEN(cmd, dpsw_id) \ -+ MC_CMD_OP(cmd, 0, 0, 32, int, dpsw_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_CREATE(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, cfg->num_ifs);\ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, cfg->adv.max_fdbs);\ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, cfg->adv.max_meters_per_if);\ -+ MC_CMD_OP(cmd, 0, 32, 4, enum dpsw_component_type, \ -+ cfg->adv.component_type);\ -+ MC_CMD_OP(cmd, 1, 0, 16, uint16_t, cfg->adv.max_vlans);\ -+ MC_CMD_OP(cmd, 1, 16, 16, uint16_t, cfg->adv.max_fdb_entries);\ -+ MC_CMD_OP(cmd, 1, 32, 16, uint16_t, cfg->adv.fdb_aging_time);\ -+ MC_CMD_OP(cmd, 1, 48, 16, uint16_t, cfg->adv.max_fdb_mc_groups);\ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->adv.options);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_IS_ENABLED(cmd, en) \ -+ MC_RSP_OP(cmd, 0, 0, 1, int, en) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_SET_IRQ(cmd, irq_index, irq_cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, irq_index);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, irq_cfg->val);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_CMD_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_GET_IRQ(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_GET_IRQ(cmd, type, irq_cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, irq_cfg->val); \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, irq_cfg->addr);\ -+ MC_RSP_OP(cmd, 2, 0, 32, int, irq_cfg->irq_num); \ -+ MC_RSP_OP(cmd, 2, 32, 32, int, type); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_SET_IRQ_ENABLE(cmd, irq_index, enable_state) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, enable_state); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_GET_IRQ_ENABLE(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_GET_IRQ_ENABLE(cmd, enable_state) \ -+ MC_RSP_OP(cmd, 0, 0, 8, uint8_t, enable_state) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_SET_IRQ_MASK(cmd, irq_index, mask) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, mask); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_GET_IRQ_MASK(cmd, irq_index) \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_GET_IRQ_MASK(cmd, mask) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, mask) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_GET_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_GET_IRQ_STATUS(cmd, status) \ -+ MC_RSP_OP(cmd, 0, 0, 32, uint32_t, status) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_CLEAR_IRQ_STATUS(cmd, irq_index, status) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 32, uint32_t, status); \ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, irq_index);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 16, uint16_t, attr->num_ifs);\ -+ MC_RSP_OP(cmd, 0, 16, 8, uint8_t, attr->max_fdbs);\ -+ MC_RSP_OP(cmd, 0, 24, 8, uint8_t, attr->num_fdbs);\ -+ MC_RSP_OP(cmd, 0, 32, 16, uint16_t, attr->max_vlans);\ -+ MC_RSP_OP(cmd, 0, 48, 16, uint16_t, attr->num_vlans);\ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->version.major);\ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->version.minor);\ -+ MC_RSP_OP(cmd, 1, 32, 16, uint16_t, attr->max_fdb_entries);\ -+ MC_RSP_OP(cmd, 1, 48, 16, uint16_t, attr->fdb_aging_time);\ -+ MC_RSP_OP(cmd, 2, 0, 32, int, attr->id);\ -+ MC_RSP_OP(cmd, 2, 32, 16, uint16_t, attr->mem_size);\ -+ MC_RSP_OP(cmd, 2, 48, 16, uint16_t, attr->max_fdb_mc_groups);\ -+ MC_RSP_OP(cmd, 3, 0, 64, uint64_t, attr->options);\ -+ MC_RSP_OP(cmd, 4, 0, 8, uint8_t, attr->max_meters_per_if);\ -+ MC_RSP_OP(cmd, 4, 8, 4, enum dpsw_component_type, \ -+ attr->component_type);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_SET_REFLECTION_IF(cmd, if_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_FLOODING(cmd, if_id, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 1, int, en);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_BROADCAST(cmd, if_id, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 1, int, en);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_MULTICAST(cmd, if_id, en) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 1, int, en);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_TCI(cmd, if_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 12, uint16_t, cfg->vlan_id);\ -+ MC_CMD_OP(cmd, 0, 28, 1, uint8_t, cfg->dei);\ -+ MC_CMD_OP(cmd, 0, 29, 3, uint8_t, cfg->pcp);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_GET_TCI(cmd, if_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_IF_GET_TCI(cmd, cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, cfg->vlan_id);\ -+ MC_RSP_OP(cmd, 0, 32, 8, uint8_t, cfg->dei);\ -+ MC_RSP_OP(cmd, 0, 40, 8, uint8_t, cfg->pcp);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_STP(cmd, if_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->vlan_id);\ -+ MC_CMD_OP(cmd, 0, 32, 4, enum dpsw_stp_state, cfg->state);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_ACCEPTED_FRAMES(cmd, if_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 4, enum dpsw_accepted_frames, cfg->type);\ -+ MC_CMD_OP(cmd, 0, 20, 4, enum dpsw_action, cfg->unaccept_act);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_ACCEPT_ALL_VLAN(cmd, if_id, accept_all) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 1, int, accept_all);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_GET_COUNTER(cmd, if_id, type) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 5, enum dpsw_counter, type);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_IF_GET_COUNTER(cmd, counter) \ -+ MC_RSP_OP(cmd, 1, 0, 64, uint64_t, counter) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_COUNTER(cmd, if_id, type, counter) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 5, enum dpsw_counter, type);\ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, counter);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_TX_SELECTION(cmd, if_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 3, enum dpsw_priority_selector, \ -+ cfg->priority_selector);\ -+ MC_CMD_OP(cmd, 1, 0, 8, uint8_t, cfg->tc_id[0]);\ -+ MC_CMD_OP(cmd, 1, 8, 8, uint8_t, cfg->tc_id[1]);\ -+ MC_CMD_OP(cmd, 1, 16, 8, uint8_t, cfg->tc_id[2]);\ -+ MC_CMD_OP(cmd, 1, 24, 8, uint8_t, cfg->tc_id[3]);\ -+ MC_CMD_OP(cmd, 1, 32, 8, uint8_t, cfg->tc_id[4]);\ -+ MC_CMD_OP(cmd, 1, 40, 8, uint8_t, cfg->tc_id[5]);\ -+ MC_CMD_OP(cmd, 1, 48, 8, uint8_t, cfg->tc_id[6]);\ -+ MC_CMD_OP(cmd, 1, 56, 8, uint8_t, cfg->tc_id[7]);\ -+ MC_CMD_OP(cmd, 2, 0, 16, uint16_t, cfg->tc_sched[0].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 2, 16, 4, enum dpsw_schedule_mode, \ -+ cfg->tc_sched[0].mode);\ -+ MC_CMD_OP(cmd, 2, 32, 16, uint16_t, cfg->tc_sched[1].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 2, 48, 4, enum dpsw_schedule_mode, \ -+ cfg->tc_sched[1].mode);\ -+ MC_CMD_OP(cmd, 3, 0, 16, uint16_t, cfg->tc_sched[2].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 3, 16, 4, enum dpsw_schedule_mode, \ -+ cfg->tc_sched[2].mode);\ -+ MC_CMD_OP(cmd, 3, 32, 16, uint16_t, cfg->tc_sched[3].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 3, 48, 4, enum dpsw_schedule_mode, \ -+ cfg->tc_sched[3].mode);\ -+ MC_CMD_OP(cmd, 4, 0, 16, uint16_t, cfg->tc_sched[4].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 4, 16, 4, enum dpsw_schedule_mode, \ -+ cfg->tc_sched[4].mode);\ -+ MC_CMD_OP(cmd, 4, 32, 16, uint16_t, cfg->tc_sched[5].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 4, 48, 4, enum dpsw_schedule_mode, \ -+ cfg->tc_sched[5].mode);\ -+ MC_CMD_OP(cmd, 5, 0, 16, uint16_t, cfg->tc_sched[6].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 5, 16, 4, enum dpsw_schedule_mode, \ -+ cfg->tc_sched[6].mode);\ -+ MC_CMD_OP(cmd, 5, 32, 16, uint16_t, cfg->tc_sched[7].delta_bandwidth);\ -+ MC_CMD_OP(cmd, 5, 48, 4, enum dpsw_schedule_mode, \ -+ cfg->tc_sched[7].mode);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_ADD_REFLECTION(cmd, if_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->vlan_id);\ -+ MC_CMD_OP(cmd, 0, 32, 2, enum dpsw_reflection_filter, cfg->filter);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_REMOVE_REFLECTION(cmd, if_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->vlan_id);\ -+ MC_CMD_OP(cmd, 0, 32, 2, enum dpsw_reflection_filter, cfg->filter);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_FLOODING_METERING(cmd, if_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 24, 4, enum dpsw_metering_mode, cfg->mode);\ -+ MC_CMD_OP(cmd, 0, 28, 4, enum dpsw_metering_unit, cfg->units);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, cfg->cir);\ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, cfg->eir);\ -+ MC_CMD_OP(cmd, 1, 32, 32, uint32_t, cfg->cbs);\ -+ MC_CMD_OP(cmd, 2, 0, 32, uint32_t, cfg->ebs);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_METERING(cmd, if_id, tc_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, tc_id);\ -+ MC_CMD_OP(cmd, 0, 24, 4, enum dpsw_metering_mode, cfg->mode);\ -+ MC_CMD_OP(cmd, 0, 28, 4, enum dpsw_metering_unit, cfg->units);\ -+ MC_CMD_OP(cmd, 0, 32, 32, uint32_t, cfg->cir);\ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, cfg->eir);\ -+ MC_CMD_OP(cmd, 1, 32, 32, uint32_t, cfg->cbs);\ -+ MC_CMD_OP(cmd, 2, 0, 32, uint32_t, cfg->ebs);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_PREP_EARLY_DROP(ext, cfg) \ -+do { \ -+ MC_PREP_OP(ext, 0, 0, 2, enum dpsw_early_drop_mode, cfg->drop_mode); \ -+ MC_PREP_OP(ext, 0, 2, 2, \ -+ enum dpsw_early_drop_unit, cfg->units); \ -+ MC_PREP_OP(ext, 0, 32, 32, uint32_t, cfg->tail_drop_threshold); \ -+ MC_PREP_OP(ext, 1, 0, 8, uint8_t, cfg->green.drop_probability); \ -+ MC_PREP_OP(ext, 2, 0, 64, uint64_t, cfg->green.max_threshold); \ -+ MC_PREP_OP(ext, 3, 0, 64, uint64_t, cfg->green.min_threshold); \ -+ MC_PREP_OP(ext, 5, 0, 8, uint8_t, cfg->yellow.drop_probability);\ -+ MC_PREP_OP(ext, 6, 0, 64, uint64_t, cfg->yellow.max_threshold); \ -+ MC_PREP_OP(ext, 7, 0, 64, uint64_t, cfg->yellow.min_threshold); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_EXT_EARLY_DROP(ext, cfg) \ -+do { \ -+ MC_EXT_OP(ext, 0, 0, 2, enum dpsw_early_drop_mode, cfg->drop_mode); \ -+ MC_EXT_OP(ext, 0, 2, 2, \ -+ enum dpsw_early_drop_unit, cfg->units); \ -+ MC_EXT_OP(ext, 0, 32, 32, uint32_t, cfg->tail_drop_threshold); \ -+ MC_EXT_OP(ext, 1, 0, 8, uint8_t, cfg->green.drop_probability); \ -+ MC_EXT_OP(ext, 2, 0, 64, uint64_t, cfg->green.max_threshold); \ -+ MC_EXT_OP(ext, 3, 0, 64, uint64_t, cfg->green.min_threshold); \ -+ MC_EXT_OP(ext, 5, 0, 8, uint8_t, cfg->yellow.drop_probability);\ -+ MC_EXT_OP(ext, 6, 0, 64, uint64_t, cfg->yellow.max_threshold); \ -+ MC_EXT_OP(ext, 7, 0, 64, uint64_t, cfg->yellow.min_threshold); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_EARLY_DROP(cmd, if_id, tc_id, early_drop_iova) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 8, 8, uint8_t, tc_id); \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, if_id); \ -+ MC_CMD_OP(cmd, 1, 0, 64, uint64_t, early_drop_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_ADD_CUSTOM_TPID(cmd, cfg) \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->tpid) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_REMOVE_CUSTOM_TPID(cmd, cfg) \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->tpid) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_ENABLE(cmd, if_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_DISABLE(cmd, if_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_GET_ATTR(cmd, if_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_IF_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 0, 4, enum dpsw_accepted_frames, \ -+ attr->admit_untagged);\ -+ MC_RSP_OP(cmd, 0, 5, 1, int, attr->enabled);\ -+ MC_RSP_OP(cmd, 0, 6, 1, int, attr->accept_all_vlan);\ -+ MC_RSP_OP(cmd, 0, 16, 8, uint8_t, attr->num_tcs);\ -+ MC_RSP_OP(cmd, 0, 32, 16, uint16_t, attr->qdid);\ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, attr->options);\ -+ MC_RSP_OP(cmd, 2, 0, 32, uint32_t, attr->rate);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_MAX_FRAME_LENGTH(cmd, if_id, frame_length) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, frame_length);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_GET_MAX_FRAME_LENGTH(cmd, if_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_IF_GET_MAX_FRAME_LENGTH(cmd, frame_length) \ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, frame_length) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_SET_LINK_CFG(cmd, if_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id);\ -+ MC_CMD_OP(cmd, 1, 0, 32, uint32_t, cfg->rate);\ -+ MC_CMD_OP(cmd, 2, 0, 64, uint64_t, cfg->options);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_IF_GET_LINK_STATE(cmd, if_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, if_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_IF_GET_LINK_STATE(cmd, state) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 32, 1, int, state->up);\ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, state->rate);\ -+ MC_RSP_OP(cmd, 2, 0, 64, uint64_t, state->options);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_VLAN_ADD(cmd, vlan_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, cfg->fdb_id);\ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, vlan_id);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_VLAN_ADD_IF(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, vlan_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_VLAN_ADD_IF_UNTAGGED(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, vlan_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_VLAN_ADD_IF_FLOODING(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, vlan_id) -+ -+#define DPSW_CMD_VLAN_REMOVE_IF(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, vlan_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_VLAN_REMOVE_IF_UNTAGGED(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, vlan_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_VLAN_REMOVE_IF_FLOODING(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, vlan_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_VLAN_REMOVE(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, vlan_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_VLAN_GET_ATTR(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, vlan_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_VLAN_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->fdb_id); \ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->num_ifs); \ -+ MC_RSP_OP(cmd, 1, 32, 16, uint16_t, attr->num_untagged_ifs); \ -+ MC_RSP_OP(cmd, 1, 48, 16, uint16_t, attr->num_flooding_ifs); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_VLAN_GET_IF(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, vlan_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_VLAN_GET_IF(cmd, cfg) \ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, cfg->num_ifs) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_VLAN_GET_IF_FLOODING(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, vlan_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_VLAN_GET_IF_FLOODING(cmd, cfg) \ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, cfg->num_ifs) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_VLAN_GET_IF_UNTAGGED(cmd, vlan_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, vlan_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_VLAN_GET_IF_UNTAGGED(cmd, cfg) \ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, cfg->num_ifs) -+ -+/* param, offset, width, type, arg_name */ -+#define DPSW_CMD_FDB_ADD(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 32, 16, uint16_t, cfg->fdb_aging_time);\ -+ MC_CMD_OP(cmd, 0, 48, 16, uint16_t, cfg->num_fdb_entries);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_FDB_ADD(cmd, fdb_id) \ -+ MC_RSP_OP(cmd, 0, 0, 16, uint16_t, fdb_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_FDB_REMOVE(cmd, fdb_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, fdb_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_FDB_ADD_UNICAST(cmd, fdb_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, fdb_id);\ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, cfg->mac_addr[5]);\ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, cfg->mac_addr[4]);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->mac_addr[3]);\ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, cfg->mac_addr[2]);\ -+ MC_CMD_OP(cmd, 0, 48, 8, uint8_t, cfg->mac_addr[1]);\ -+ MC_CMD_OP(cmd, 0, 56, 8, uint8_t, cfg->mac_addr[0]);\ -+ MC_CMD_OP(cmd, 1, 0, 8, uint16_t, cfg->if_egress);\ -+ MC_CMD_OP(cmd, 1, 16, 4, enum dpsw_fdb_entry_type, cfg->type);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_FDB_GET_UNICAST(cmd, fdb_id) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, fdb_id);\ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, cfg->mac_addr[5]);\ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, cfg->mac_addr[4]);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->mac_addr[3]);\ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, cfg->mac_addr[2]);\ -+ MC_CMD_OP(cmd, 0, 48, 8, uint8_t, cfg->mac_addr[1]);\ -+ MC_CMD_OP(cmd, 0, 56, 8, uint8_t, cfg->mac_addr[0]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_FDB_GET_UNICAST(cmd, cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, cfg->if_egress);\ -+ MC_RSP_OP(cmd, 1, 16, 4, enum dpsw_fdb_entry_type, cfg->type);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_FDB_REMOVE_UNICAST(cmd, fdb_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, fdb_id);\ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, cfg->mac_addr[5]);\ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, cfg->mac_addr[4]);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->mac_addr[3]);\ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, cfg->mac_addr[2]);\ -+ MC_CMD_OP(cmd, 0, 48, 8, uint8_t, cfg->mac_addr[1]);\ -+ MC_CMD_OP(cmd, 0, 56, 8, uint8_t, cfg->mac_addr[0]);\ -+ MC_CMD_OP(cmd, 1, 0, 16, uint16_t, cfg->if_egress);\ -+ MC_CMD_OP(cmd, 1, 16, 4, enum dpsw_fdb_entry_type, cfg->type);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_FDB_ADD_MULTICAST(cmd, fdb_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, fdb_id);\ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->num_ifs);\ -+ MC_CMD_OP(cmd, 0, 32, 4, enum dpsw_fdb_entry_type, cfg->type);\ -+ MC_CMD_OP(cmd, 1, 0, 8, uint8_t, cfg->mac_addr[5]);\ -+ MC_CMD_OP(cmd, 1, 8, 8, uint8_t, cfg->mac_addr[4]);\ -+ MC_CMD_OP(cmd, 1, 16, 8, uint8_t, cfg->mac_addr[3]);\ -+ MC_CMD_OP(cmd, 1, 24, 8, uint8_t, cfg->mac_addr[2]);\ -+ MC_CMD_OP(cmd, 1, 32, 8, uint8_t, cfg->mac_addr[1]);\ -+ MC_CMD_OP(cmd, 1, 40, 8, uint8_t, cfg->mac_addr[0]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_FDB_GET_MULTICAST(cmd, fdb_id) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, fdb_id);\ -+ MC_CMD_OP(cmd, 0, 16, 8, uint8_t, cfg->mac_addr[5]);\ -+ MC_CMD_OP(cmd, 0, 24, 8, uint8_t, cfg->mac_addr[4]);\ -+ MC_CMD_OP(cmd, 0, 32, 8, uint8_t, cfg->mac_addr[3]);\ -+ MC_CMD_OP(cmd, 0, 40, 8, uint8_t, cfg->mac_addr[2]);\ -+ MC_CMD_OP(cmd, 0, 48, 8, uint8_t, cfg->mac_addr[1]);\ -+ MC_CMD_OP(cmd, 0, 56, 8, uint8_t, cfg->mac_addr[0]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_FDB_GET_MULTICAST(cmd, cfg) \ -+do { \ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, cfg->num_ifs);\ -+ MC_RSP_OP(cmd, 1, 16, 4, enum dpsw_fdb_entry_type, cfg->type);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_FDB_REMOVE_MULTICAST(cmd, fdb_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, fdb_id);\ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->num_ifs);\ -+ MC_CMD_OP(cmd, 0, 32, 4, enum dpsw_fdb_entry_type, cfg->type);\ -+ MC_CMD_OP(cmd, 1, 0, 8, uint8_t, cfg->mac_addr[5]);\ -+ MC_CMD_OP(cmd, 1, 8, 8, uint8_t, cfg->mac_addr[4]);\ -+ MC_CMD_OP(cmd, 1, 16, 8, uint8_t, cfg->mac_addr[3]);\ -+ MC_CMD_OP(cmd, 1, 24, 8, uint8_t, cfg->mac_addr[2]);\ -+ MC_CMD_OP(cmd, 1, 32, 8, uint8_t, cfg->mac_addr[1]);\ -+ MC_CMD_OP(cmd, 1, 40, 8, uint8_t, cfg->mac_addr[0]);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_FDB_SET_LEARNING_MODE(cmd, fdb_id, mode) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, fdb_id);\ -+ MC_CMD_OP(cmd, 0, 16, 4, enum dpsw_fdb_learning_mode, mode);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_FDB_GET_ATTR(cmd, fdb_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, fdb_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_FDB_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 0, 16, 16, uint16_t, attr->max_fdb_entries);\ -+ MC_RSP_OP(cmd, 0, 32, 16, uint16_t, attr->fdb_aging_time);\ -+ MC_RSP_OP(cmd, 0, 48, 16, uint16_t, attr->num_fdb_mc_groups);\ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->max_fdb_mc_groups);\ -+ MC_RSP_OP(cmd, 1, 16, 4, enum dpsw_fdb_learning_mode, \ -+ attr->learning_mode);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_ACL_ADD(cmd, cfg) \ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->max_entries) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_ACL_ADD(cmd, acl_id) \ -+ MC_RSP_OP(cmd, 0, 0, 16, uint16_t, acl_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_ACL_REMOVE(cmd, acl_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, acl_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_PREP_ACL_ENTRY(ext, key) \ -+do { \ -+ MC_PREP_OP(ext, 0, 0, 8, uint8_t, key->match.l2_dest_mac[5]);\ -+ MC_PREP_OP(ext, 0, 8, 8, uint8_t, key->match.l2_dest_mac[4]);\ -+ MC_PREP_OP(ext, 0, 16, 8, uint8_t, key->match.l2_dest_mac[3]);\ -+ MC_PREP_OP(ext, 0, 24, 8, uint8_t, key->match.l2_dest_mac[2]);\ -+ MC_PREP_OP(ext, 0, 32, 8, uint8_t, key->match.l2_dest_mac[1]);\ -+ MC_PREP_OP(ext, 0, 40, 8, uint8_t, key->match.l2_dest_mac[0]);\ -+ MC_PREP_OP(ext, 0, 48, 16, uint16_t, key->match.l2_tpid);\ -+ MC_PREP_OP(ext, 1, 0, 8, uint8_t, key->match.l2_source_mac[5]);\ -+ MC_PREP_OP(ext, 1, 8, 8, uint8_t, key->match.l2_source_mac[4]);\ -+ MC_PREP_OP(ext, 1, 16, 8, uint8_t, key->match.l2_source_mac[3]);\ -+ MC_PREP_OP(ext, 1, 24, 8, uint8_t, key->match.l2_source_mac[2]);\ -+ MC_PREP_OP(ext, 1, 32, 8, uint8_t, key->match.l2_source_mac[1]);\ -+ MC_PREP_OP(ext, 1, 40, 8, uint8_t, key->match.l2_source_mac[0]);\ -+ MC_PREP_OP(ext, 1, 48, 16, uint16_t, key->match.l2_vlan_id);\ -+ MC_PREP_OP(ext, 2, 0, 32, uint32_t, key->match.l3_dest_ip);\ -+ MC_PREP_OP(ext, 2, 32, 32, uint32_t, key->match.l3_source_ip);\ -+ MC_PREP_OP(ext, 3, 0, 16, uint16_t, key->match.l4_dest_port);\ -+ MC_PREP_OP(ext, 3, 16, 16, uint16_t, key->match.l4_source_port);\ -+ MC_PREP_OP(ext, 3, 32, 16, uint16_t, key->match.l2_ether_type);\ -+ MC_PREP_OP(ext, 3, 48, 8, uint8_t, key->match.l2_pcp_dei);\ -+ MC_PREP_OP(ext, 3, 56, 8, uint8_t, key->match.l3_dscp);\ -+ MC_PREP_OP(ext, 4, 0, 8, uint8_t, key->mask.l2_dest_mac[5]);\ -+ MC_PREP_OP(ext, 4, 8, 8, uint8_t, key->mask.l2_dest_mac[4]);\ -+ MC_PREP_OP(ext, 4, 16, 8, uint8_t, key->mask.l2_dest_mac[3]);\ -+ MC_PREP_OP(ext, 4, 24, 8, uint8_t, key->mask.l2_dest_mac[2]);\ -+ MC_PREP_OP(ext, 4, 32, 8, uint8_t, key->mask.l2_dest_mac[1]);\ -+ MC_PREP_OP(ext, 4, 40, 8, uint8_t, key->mask.l2_dest_mac[0]);\ -+ MC_PREP_OP(ext, 4, 48, 16, uint16_t, key->mask.l2_tpid);\ -+ MC_PREP_OP(ext, 5, 0, 8, uint8_t, key->mask.l2_source_mac[5]);\ -+ MC_PREP_OP(ext, 5, 8, 8, uint8_t, key->mask.l2_source_mac[4]);\ -+ MC_PREP_OP(ext, 5, 16, 8, uint8_t, key->mask.l2_source_mac[3]);\ -+ MC_PREP_OP(ext, 5, 24, 8, uint8_t, key->mask.l2_source_mac[2]);\ -+ MC_PREP_OP(ext, 5, 32, 8, uint8_t, key->mask.l2_source_mac[1]);\ -+ MC_PREP_OP(ext, 5, 40, 8, uint8_t, key->mask.l2_source_mac[0]);\ -+ MC_PREP_OP(ext, 5, 48, 16, uint16_t, key->mask.l2_vlan_id);\ -+ MC_PREP_OP(ext, 6, 0, 32, uint32_t, key->mask.l3_dest_ip);\ -+ MC_PREP_OP(ext, 6, 32, 32, uint32_t, key->mask.l3_source_ip);\ -+ MC_PREP_OP(ext, 7, 0, 16, uint16_t, key->mask.l4_dest_port);\ -+ MC_PREP_OP(ext, 7, 16, 16, uint16_t, key->mask.l4_source_port);\ -+ MC_PREP_OP(ext, 7, 32, 16, uint16_t, key->mask.l2_ether_type);\ -+ MC_PREP_OP(ext, 7, 48, 8, uint8_t, key->mask.l2_pcp_dei);\ -+ MC_PREP_OP(ext, 7, 56, 8, uint8_t, key->mask.l3_dscp);\ -+ MC_PREP_OP(ext, 8, 0, 8, uint8_t, key->match.l3_protocol);\ -+ MC_PREP_OP(ext, 8, 8, 8, uint8_t, key->mask.l3_protocol);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_EXT_ACL_ENTRY(ext, key) \ -+do { \ -+ MC_EXT_OP(ext, 0, 0, 8, uint8_t, key->match.l2_dest_mac[5]);\ -+ MC_EXT_OP(ext, 0, 8, 8, uint8_t, key->match.l2_dest_mac[4]);\ -+ MC_EXT_OP(ext, 0, 16, 8, uint8_t, key->match.l2_dest_mac[3]);\ -+ MC_EXT_OP(ext, 0, 24, 8, uint8_t, key->match.l2_dest_mac[2]);\ -+ MC_EXT_OP(ext, 0, 32, 8, uint8_t, key->match.l2_dest_mac[1]);\ -+ MC_EXT_OP(ext, 0, 40, 8, uint8_t, key->match.l2_dest_mac[0]);\ -+ MC_EXT_OP(ext, 0, 48, 16, uint16_t, key->match.l2_tpid);\ -+ MC_EXT_OP(ext, 1, 0, 8, uint8_t, key->match.l2_source_mac[5]);\ -+ MC_EXT_OP(ext, 1, 8, 8, uint8_t, key->match.l2_source_mac[4]);\ -+ MC_EXT_OP(ext, 1, 16, 8, uint8_t, key->match.l2_source_mac[3]);\ -+ MC_EXT_OP(ext, 1, 24, 8, uint8_t, key->match.l2_source_mac[2]);\ -+ MC_EXT_OP(ext, 1, 32, 8, uint8_t, key->match.l2_source_mac[1]);\ -+ MC_EXT_OP(ext, 1, 40, 8, uint8_t, key->match.l2_source_mac[0]);\ -+ MC_EXT_OP(ext, 1, 48, 16, uint16_t, key->match.l2_vlan_id);\ -+ MC_EXT_OP(ext, 2, 0, 32, uint32_t, key->match.l3_dest_ip);\ -+ MC_EXT_OP(ext, 2, 32, 32, uint32_t, key->match.l3_source_ip);\ -+ MC_EXT_OP(ext, 3, 0, 16, uint16_t, key->match.l4_dest_port);\ -+ MC_EXT_OP(ext, 3, 16, 16, uint16_t, key->match.l4_source_port);\ -+ MC_EXT_OP(ext, 3, 32, 16, uint16_t, key->match.l2_ether_type);\ -+ MC_EXT_OP(ext, 3, 48, 8, uint8_t, key->match.l2_pcp_dei);\ -+ MC_EXT_OP(ext, 3, 56, 8, uint8_t, key->match.l3_dscp);\ -+ MC_EXT_OP(ext, 4, 0, 8, uint8_t, key->mask.l2_dest_mac[5]);\ -+ MC_EXT_OP(ext, 4, 8, 8, uint8_t, key->mask.l2_dest_mac[4]);\ -+ MC_EXT_OP(ext, 4, 16, 8, uint8_t, key->mask.l2_dest_mac[3]);\ -+ MC_EXT_OP(ext, 4, 24, 8, uint8_t, key->mask.l2_dest_mac[2]);\ -+ MC_EXT_OP(ext, 4, 32, 8, uint8_t, key->mask.l2_dest_mac[1]);\ -+ MC_EXT_OP(ext, 4, 40, 8, uint8_t, key->mask.l2_dest_mac[0]);\ -+ MC_EXT_OP(ext, 4, 48, 16, uint16_t, key->mask.l2_tpid);\ -+ MC_EXT_OP(ext, 5, 0, 8, uint8_t, key->mask.l2_source_mac[5]);\ -+ MC_EXT_OP(ext, 5, 8, 8, uint8_t, key->mask.l2_source_mac[4]);\ -+ MC_EXT_OP(ext, 5, 16, 8, uint8_t, key->mask.l2_source_mac[3]);\ -+ MC_EXT_OP(ext, 5, 24, 8, uint8_t, key->mask.l2_source_mac[2]);\ -+ MC_EXT_OP(ext, 5, 32, 8, uint8_t, key->mask.l2_source_mac[1]);\ -+ MC_EXT_OP(ext, 5, 40, 8, uint8_t, key->mask.l2_source_mac[0]);\ -+ MC_EXT_OP(ext, 5, 48, 16, uint16_t, key->mask.l2_vlan_id);\ -+ MC_EXT_OP(ext, 6, 0, 32, uint32_t, key->mask.l3_dest_ip);\ -+ MC_EXT_OP(ext, 6, 32, 32, uint32_t, key->mask.l3_source_ip);\ -+ MC_EXT_OP(ext, 7, 0, 16, uint16_t, key->mask.l4_dest_port);\ -+ MC_EXT_OP(ext, 7, 16, 16, uint16_t, key->mask.l4_source_port);\ -+ MC_EXT_OP(ext, 7, 32, 16, uint16_t, key->mask.l2_ether_type);\ -+ MC_EXT_OP(ext, 7, 48, 8, uint8_t, key->mask.l2_pcp_dei);\ -+ MC_EXT_OP(ext, 7, 56, 8, uint8_t, key->mask.l3_dscp);\ -+ MC_EXT_OP(ext, 8, 0, 8, uint8_t, key->match.l3_protocol);\ -+ MC_EXT_OP(ext, 8, 8, 8, uint8_t, key->mask.l3_protocol);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_ACL_ADD_ENTRY(cmd, acl_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, acl_id);\ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->result.if_id);\ -+ MC_CMD_OP(cmd, 0, 32, 32, int, cfg->precedence);\ -+ MC_CMD_OP(cmd, 1, 0, 4, enum dpsw_acl_action, cfg->result.action);\ -+ MC_CMD_OP(cmd, 6, 0, 64, uint64_t, cfg->key_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_ACL_REMOVE_ENTRY(cmd, acl_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, acl_id);\ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->result.if_id);\ -+ MC_CMD_OP(cmd, 0, 32, 32, int, cfg->precedence);\ -+ MC_CMD_OP(cmd, 1, 0, 4, enum dpsw_acl_action, cfg->result.action);\ -+ MC_CMD_OP(cmd, 6, 0, 64, uint64_t, cfg->key_iova); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_ACL_ADD_IF(cmd, acl_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, acl_id);\ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->num_ifs); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_ACL_REMOVE_IF(cmd, acl_id, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, acl_id);\ -+ MC_CMD_OP(cmd, 0, 16, 16, uint16_t, cfg->num_ifs); \ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_ACL_GET_ATTR(cmd, acl_id) \ -+ MC_CMD_OP(cmd, 0, 0, 16, uint16_t, acl_id) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_ACL_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 1, 0, 16, uint16_t, attr->max_entries);\ -+ MC_RSP_OP(cmd, 1, 16, 16, uint16_t, attr->num_entries);\ -+ MC_RSP_OP(cmd, 1, 32, 16, uint16_t, attr->num_ifs);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_RSP_CTRL_IF_GET_ATTR(cmd, attr) \ -+do { \ -+ MC_RSP_OP(cmd, 1, 0, 32, uint32_t, attr->rx_fqid);\ -+ MC_RSP_OP(cmd, 1, 32, 32, uint32_t, attr->rx_err_fqid);\ -+ MC_RSP_OP(cmd, 2, 0, 32, uint32_t, attr->tx_err_conf_fqid);\ -+} while (0) -+ -+/* cmd, param, offset, width, type, arg_name */ -+#define DPSW_CMD_CTRL_IF_SET_POOLS(cmd, cfg) \ -+do { \ -+ MC_CMD_OP(cmd, 0, 0, 8, uint8_t, cfg->num_dpbp); \ -+ MC_CMD_OP(cmd, 0, 8, 1, int, cfg->pools[0].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 9, 1, int, cfg->pools[1].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 10, 1, int, cfg->pools[2].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 11, 1, int, cfg->pools[3].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 12, 1, int, cfg->pools[4].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 13, 1, int, cfg->pools[5].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 14, 1, int, cfg->pools[6].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 15, 1, int, cfg->pools[7].backup_pool); \ -+ MC_CMD_OP(cmd, 0, 32, 32, int, cfg->pools[0].dpbp_id); \ -+ MC_CMD_OP(cmd, 4, 32, 16, uint16_t, cfg->pools[0].buffer_size);\ -+ MC_CMD_OP(cmd, 1, 0, 32, int, cfg->pools[1].dpbp_id); \ -+ MC_CMD_OP(cmd, 4, 48, 16, uint16_t, cfg->pools[1].buffer_size);\ -+ MC_CMD_OP(cmd, 1, 32, 32, int, cfg->pools[2].dpbp_id); \ -+ MC_CMD_OP(cmd, 5, 0, 16, uint16_t, cfg->pools[2].buffer_size);\ -+ MC_CMD_OP(cmd, 2, 0, 32, int, cfg->pools[3].dpbp_id); \ -+ MC_CMD_OP(cmd, 5, 16, 16, uint16_t, cfg->pools[3].buffer_size);\ -+ MC_CMD_OP(cmd, 2, 32, 32, int, cfg->pools[4].dpbp_id); \ -+ MC_CMD_OP(cmd, 5, 32, 16, uint16_t, cfg->pools[4].buffer_size);\ -+ MC_CMD_OP(cmd, 3, 0, 32, int, cfg->pools[5].dpbp_id); \ -+ MC_CMD_OP(cmd, 5, 48, 16, uint16_t, cfg->pools[5].buffer_size);\ -+ MC_CMD_OP(cmd, 3, 32, 32, int, cfg->pools[6].dpbp_id); \ -+ MC_CMD_OP(cmd, 6, 0, 16, uint16_t, cfg->pools[6].buffer_size);\ -+ MC_CMD_OP(cmd, 4, 0, 32, int, cfg->pools[7].dpbp_id); \ -+ MC_CMD_OP(cmd, 6, 16, 16, uint16_t, cfg->pools[7].buffer_size);\ -+} while (0) -+ -+#endif /* __FSL_DPSW_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_mc_cmd.h b/drivers/net/dpaa2/mc/fsl_mc_cmd.h -new file mode 100644 -index 0000000..ca4fb64 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_mc_cmd.h -@@ -0,0 +1,221 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_MC_CMD_H -+#define __FSL_MC_CMD_H -+ -+#define MC_CMD_NUM_OF_PARAMS 7 -+ -+#define MAKE_UMASK64(_width) \ -+ ((uint64_t)((_width) < 64 ? ((uint64_t)1 << (_width)) - 1 :\ -+ (uint64_t)-1)) -+static inline uint64_t mc_enc(int lsoffset, int width, uint64_t val) -+{ -+ return (uint64_t)(((uint64_t)val & MAKE_UMASK64(width)) << lsoffset); -+} -+ -+static inline uint64_t mc_dec(uint64_t val, int lsoffset, int width) -+{ -+ return (uint64_t)((val >> lsoffset) & MAKE_UMASK64(width)); -+} -+ -+struct mc_command { -+ uint64_t header; -+ uint64_t params[MC_CMD_NUM_OF_PARAMS]; -+}; -+ -+/** -+ * enum mc_cmd_status - indicates MC status at command response -+ * @MC_CMD_STATUS_OK: Completed successfully -+ * @MC_CMD_STATUS_READY: Ready to be processed -+ * @MC_CMD_STATUS_AUTH_ERR: Authentication error -+ * @MC_CMD_STATUS_NO_PRIVILEGE: No privilege -+ * @MC_CMD_STATUS_DMA_ERR: DMA or I/O error -+ * @MC_CMD_STATUS_CONFIG_ERR: Configuration error -+ * @MC_CMD_STATUS_TIMEOUT: Operation timed out -+ * @MC_CMD_STATUS_NO_RESOURCE: No resources -+ * @MC_CMD_STATUS_NO_MEMORY: No memory available -+ * @MC_CMD_STATUS_BUSY: Device is busy -+ * @MC_CMD_STATUS_UNSUPPORTED_OP: Unsupported operation -+ * @MC_CMD_STATUS_INVALID_STATE: Invalid state -+ */ -+enum mc_cmd_status { -+ MC_CMD_STATUS_OK = 0x0, -+ MC_CMD_STATUS_READY = 0x1, -+ MC_CMD_STATUS_AUTH_ERR = 0x3, -+ MC_CMD_STATUS_NO_PRIVILEGE = 0x4, -+ MC_CMD_STATUS_DMA_ERR = 0x5, -+ MC_CMD_STATUS_CONFIG_ERR = 0x6, -+ MC_CMD_STATUS_TIMEOUT = 0x7, -+ MC_CMD_STATUS_NO_RESOURCE = 0x8, -+ MC_CMD_STATUS_NO_MEMORY = 0x9, -+ MC_CMD_STATUS_BUSY = 0xA, -+ MC_CMD_STATUS_UNSUPPORTED_OP = 0xB, -+ MC_CMD_STATUS_INVALID_STATE = 0xC -+}; -+ -+/* MC command flags */ -+ -+/** -+ * High priority flag -+ */ -+#define MC_CMD_FLAG_PRI 0x00008000 -+/** -+ * Command completion flag -+ */ -+#define MC_CMD_FLAG_INTR_DIS 0x01000000 -+ -+/** -+ * Command ID field offset -+ */ -+#define MC_CMD_HDR_CMDID_O 52 -+/** -+ * Command ID field size -+ */ -+#define MC_CMD_HDR_CMDID_S 12 -+/** -+ * Token field offset -+ */ -+#define MC_CMD_HDR_TOKEN_O 38 -+/** -+ * Token field size -+ */ -+#define MC_CMD_HDR_TOKEN_S 10 -+/** -+ * Status field offset -+ */ -+#define MC_CMD_HDR_STATUS_O 16 -+/** -+ * Status field size -+ */ -+#define MC_CMD_HDR_STATUS_S 8 -+/** -+ * Flags field offset -+ */ -+#define MC_CMD_HDR_FLAGS_O 0 -+/** -+ * Flags field size -+ */ -+#define MC_CMD_HDR_FLAGS_S 32 -+/** -+ * Command flags mask -+ */ -+#define MC_CMD_HDR_FLAGS_MASK 0xFF00FF00 -+ -+#define MC_CMD_HDR_READ_STATUS(_hdr) \ -+ ((enum mc_cmd_status)mc_dec((_hdr), \ -+ MC_CMD_HDR_STATUS_O, MC_CMD_HDR_STATUS_S)) -+ -+#define MC_CMD_HDR_READ_TOKEN(_hdr) \ -+ ((uint16_t)mc_dec((_hdr), MC_CMD_HDR_TOKEN_O, MC_CMD_HDR_TOKEN_S)) -+ -+#define MC_PREP_OP(_ext, _param, _offset, _width, _type, _arg) \ -+ ((_ext)[_param] |= cpu_to_le64(mc_enc((_offset), (_width), _arg))) -+ -+#define MC_EXT_OP(_ext, _param, _offset, _width, _type, _arg) \ -+ (_arg = (_type)mc_dec(cpu_to_le64(_ext[_param]), (_offset), (_width))) -+ -+#define MC_CMD_OP(_cmd, _param, _offset, _width, _type, _arg) \ -+ ((_cmd).params[_param] |= mc_enc((_offset), (_width), _arg)) -+ -+#define MC_RSP_OP(_cmd, _param, _offset, _width, _type, _arg) \ -+ (_arg = (_type)mc_dec(_cmd.params[_param], (_offset), (_width))) -+ -+static inline uint64_t mc_encode_cmd_header(uint16_t cmd_id, -+ uint32_t cmd_flags, -+ uint16_t token) -+{ -+ uint64_t hdr; -+ -+ hdr = mc_enc(MC_CMD_HDR_CMDID_O, MC_CMD_HDR_CMDID_S, cmd_id); -+ hdr |= mc_enc(MC_CMD_HDR_FLAGS_O, MC_CMD_HDR_FLAGS_S, -+ (cmd_flags & MC_CMD_HDR_FLAGS_MASK)); -+ hdr |= mc_enc(MC_CMD_HDR_TOKEN_O, MC_CMD_HDR_TOKEN_S, token); -+ hdr |= mc_enc(MC_CMD_HDR_STATUS_O, MC_CMD_HDR_STATUS_S, -+ MC_CMD_STATUS_READY); -+ -+ return hdr; -+} -+ -+/** -+ * mc_write_command - writes a command to a Management Complex (MC) portal -+ * -+ * @portal: pointer to an MC portal -+ * @cmd: pointer to a filled command -+ */ -+static inline void mc_write_command(struct mc_command __iomem *portal, -+ struct mc_command *cmd) -+{ -+ int i; -+ uint32_t word; -+ -+ /* copy command parameters into the portal */ -+ for (i = 0; i < MC_CMD_NUM_OF_PARAMS; i++) -+ iowrite64(cmd->params[i], &portal->params[i]); -+ -+ /* submit the command by writing the header */ -+ word = (uint32_t)mc_dec(cmd->header, 32, 32); -+ iowrite32(word, (((uint32_t *)&portal->header) + 1)); -+ -+ word = (uint32_t)mc_dec(cmd->header, 0, 32); -+ iowrite32(word, (uint32_t *)&portal->header); -+} -+ -+/** -+ * mc_read_response - reads the response for the last MC command from a -+ * Management Complex (MC) portal -+ * -+ * @portal: pointer to an MC portal -+ * @resp: pointer to command response buffer -+ * -+ * Returns MC_CMD_STATUS_OK on Success; Error code otherwise. -+ */ -+static inline enum mc_cmd_status mc_read_response( -+ struct mc_command __iomem *portal, -+ struct mc_command *resp) -+{ -+ int i; -+ enum mc_cmd_status status; -+ -+ /* Copy command response header from MC portal: */ -+ resp->header = ioread64(&portal->header); -+ status = MC_CMD_HDR_READ_STATUS(resp->header); -+ if (status != MC_CMD_STATUS_OK) -+ return status; -+ -+ /* Copy command response data from MC portal: */ -+ for (i = 0; i < MC_CMD_NUM_OF_PARAMS; i++) -+ resp->params[i] = ioread64(&portal->params[i]); -+ -+ return status; -+} -+ -+#endif /* __FSL_MC_CMD_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_mc_sys.h b/drivers/net/dpaa2/mc/fsl_mc_sys.h -new file mode 100644 -index 0000000..b9f4244 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_mc_sys.h -@@ -0,0 +1,98 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_MC_SYS_H -+#define _FSL_MC_SYS_H -+ -+#ifdef __linux_driver__ -+ -+#include -+#include -+#include -+ -+struct fsl_mc_io { -+ void *regs; -+}; -+ -+#ifndef ENOTSUP -+#define ENOTSUP 95 -+#endif -+ -+#define ioread64(_p) readq(_p) -+#define iowrite64(_v, _p) writeq(_v, _p) -+ -+#else /* __linux_driver__ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define cpu_to_le64(x) __cpu_to_le64(x) -+#ifndef dmb -+#define dmb() do {\ -+ __asm__ __volatile__ ("" : : : "memory");\ -+} while (0) -+ -+#endif -+#define __iormb() dmb() -+#define __iowmb() dmb() -+#define __arch_getq(a) (*(volatile unsigned long *)(a)) -+#define __arch_putq(v, a) (*(volatile unsigned long *)(a) = (v)) -+#define __arch_putq32(v, a) (*(volatile unsigned int *)(a) = (v)) -+#define readq(c) ({ uint64_t __v = __arch_getq(c); __iormb(); __v; }) -+#define writeq(v, c) ({ uint64_t __v = v; __iowmb(); __arch_putq(__v, c); __v; }) -+#define writeq32(v, c) ({ uint32_t __v = v; __iowmb(); __arch_putq32(__v, c); __v; }) -+#define ioread64(_p) readq(_p) -+#define iowrite64(_v, _p) writeq(_v, _p) -+#define iowrite32(_v, _p) writeq32(_v, _p) -+#define __iomem -+ -+struct fsl_mc_io { -+ void *regs; -+}; -+ -+#ifndef ENOTSUP -+#define ENOTSUP 95 -+#endif -+ -+/*GPP is supposed to use MC commands with low priority*/ -+#define CMD_PRI_LOW 0 /*!< Low Priority command indication */ -+ -+struct mc_command; -+ -+int mc_send_command(struct fsl_mc_io *mc_io, struct mc_command *cmd); -+ -+#endif /* __linux_driver__ */ -+ -+#endif /* _FSL_MC_SYS_H */ -diff --git a/drivers/net/dpaa2/mc/fsl_net.h b/drivers/net/dpaa2/mc/fsl_net.h -new file mode 100644 -index 0000000..43825b8 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/fsl_net.h -@@ -0,0 +1,480 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef __FSL_NET_H -+#define __FSL_NET_H -+ -+#define LAST_HDR_INDEX 0xFFFFFFFF -+ -+/*****************************************************************************/ -+/* Protocol fields */ -+/*****************************************************************************/ -+ -+/************************* Ethernet fields *********************************/ -+#define NH_FLD_ETH_DA (1) -+#define NH_FLD_ETH_SA (NH_FLD_ETH_DA << 1) -+#define NH_FLD_ETH_LENGTH (NH_FLD_ETH_DA << 2) -+#define NH_FLD_ETH_TYPE (NH_FLD_ETH_DA << 3) -+#define NH_FLD_ETH_FINAL_CKSUM (NH_FLD_ETH_DA << 4) -+#define NH_FLD_ETH_PADDING (NH_FLD_ETH_DA << 5) -+#define NH_FLD_ETH_ALL_FIELDS ((NH_FLD_ETH_DA << 6) - 1) -+ -+#define NH_FLD_ETH_ADDR_SIZE 6 -+ -+/*************************** VLAN fields ***********************************/ -+#define NH_FLD_VLAN_VPRI (1) -+#define NH_FLD_VLAN_CFI (NH_FLD_VLAN_VPRI << 1) -+#define NH_FLD_VLAN_VID (NH_FLD_VLAN_VPRI << 2) -+#define NH_FLD_VLAN_LENGTH (NH_FLD_VLAN_VPRI << 3) -+#define NH_FLD_VLAN_TYPE (NH_FLD_VLAN_VPRI << 4) -+#define NH_FLD_VLAN_ALL_FIELDS ((NH_FLD_VLAN_VPRI << 5) - 1) -+ -+#define NH_FLD_VLAN_TCI (NH_FLD_VLAN_VPRI | \ -+ NH_FLD_VLAN_CFI | \ -+ NH_FLD_VLAN_VID) -+ -+/************************ IP (generic) fields ******************************/ -+#define NH_FLD_IP_VER (1) -+#define NH_FLD_IP_DSCP (NH_FLD_IP_VER << 2) -+#define NH_FLD_IP_ECN (NH_FLD_IP_VER << 3) -+#define NH_FLD_IP_PROTO (NH_FLD_IP_VER << 4) -+#define NH_FLD_IP_SRC (NH_FLD_IP_VER << 5) -+#define NH_FLD_IP_DST (NH_FLD_IP_VER << 6) -+#define NH_FLD_IP_TOS_TC (NH_FLD_IP_VER << 7) -+#define NH_FLD_IP_ID (NH_FLD_IP_VER << 8) -+#define NH_FLD_IP_ALL_FIELDS ((NH_FLD_IP_VER << 9) - 1) -+ -+#define NH_FLD_IP_PROTO_SIZE 1 -+ -+/***************************** IPV4 fields *********************************/ -+#define NH_FLD_IPV4_VER (1) -+#define NH_FLD_IPV4_HDR_LEN (NH_FLD_IPV4_VER << 1) -+#define NH_FLD_IPV4_TOS (NH_FLD_IPV4_VER << 2) -+#define NH_FLD_IPV4_TOTAL_LEN (NH_FLD_IPV4_VER << 3) -+#define NH_FLD_IPV4_ID (NH_FLD_IPV4_VER << 4) -+#define NH_FLD_IPV4_FLAG_D (NH_FLD_IPV4_VER << 5) -+#define NH_FLD_IPV4_FLAG_M (NH_FLD_IPV4_VER << 6) -+#define NH_FLD_IPV4_OFFSET (NH_FLD_IPV4_VER << 7) -+#define NH_FLD_IPV4_TTL (NH_FLD_IPV4_VER << 8) -+#define NH_FLD_IPV4_PROTO (NH_FLD_IPV4_VER << 9) -+#define NH_FLD_IPV4_CKSUM (NH_FLD_IPV4_VER << 10) -+#define NH_FLD_IPV4_SRC_IP (NH_FLD_IPV4_VER << 11) -+#define NH_FLD_IPV4_DST_IP (NH_FLD_IPV4_VER << 12) -+#define NH_FLD_IPV4_OPTS (NH_FLD_IPV4_VER << 13) -+#define NH_FLD_IPV4_OPTS_COUNT (NH_FLD_IPV4_VER << 14) -+#define NH_FLD_IPV4_ALL_FIELDS ((NH_FLD_IPV4_VER << 15) - 1) -+ -+#define NH_FLD_IPV4_ADDR_SIZE 4 -+#define NH_FLD_IPV4_PROTO_SIZE 1 -+ -+/***************************** IPV6 fields *********************************/ -+#define NH_FLD_IPV6_VER (1) -+#define NH_FLD_IPV6_TC (NH_FLD_IPV6_VER << 1) -+#define NH_FLD_IPV6_SRC_IP (NH_FLD_IPV6_VER << 2) -+#define NH_FLD_IPV6_DST_IP (NH_FLD_IPV6_VER << 3) -+#define NH_FLD_IPV6_NEXT_HDR (NH_FLD_IPV6_VER << 4) -+#define NH_FLD_IPV6_FL (NH_FLD_IPV6_VER << 5) -+#define NH_FLD_IPV6_HOP_LIMIT (NH_FLD_IPV6_VER << 6) -+#define NH_FLD_IPV6_ID (NH_FLD_IPV6_VER << 7) -+#define NH_FLD_IPV6_ALL_FIELDS ((NH_FLD_IPV6_VER << 8) - 1) -+ -+#define NH_FLD_IPV6_ADDR_SIZE 16 -+#define NH_FLD_IPV6_NEXT_HDR_SIZE 1 -+ -+/***************************** ICMP fields *********************************/ -+#define NH_FLD_ICMP_TYPE (1) -+#define NH_FLD_ICMP_CODE (NH_FLD_ICMP_TYPE << 1) -+#define NH_FLD_ICMP_CKSUM (NH_FLD_ICMP_TYPE << 2) -+#define NH_FLD_ICMP_ID (NH_FLD_ICMP_TYPE << 3) -+#define NH_FLD_ICMP_SQ_NUM (NH_FLD_ICMP_TYPE << 4) -+#define NH_FLD_ICMP_ALL_FIELDS ((NH_FLD_ICMP_TYPE << 5) - 1) -+ -+#define NH_FLD_ICMP_CODE_SIZE 1 -+#define NH_FLD_ICMP_TYPE_SIZE 1 -+ -+/***************************** IGMP fields *********************************/ -+#define NH_FLD_IGMP_VERSION (1) -+#define NH_FLD_IGMP_TYPE (NH_FLD_IGMP_VERSION << 1) -+#define NH_FLD_IGMP_CKSUM (NH_FLD_IGMP_VERSION << 2) -+#define NH_FLD_IGMP_DATA (NH_FLD_IGMP_VERSION << 3) -+#define NH_FLD_IGMP_ALL_FIELDS ((NH_FLD_IGMP_VERSION << 4) - 1) -+ -+/***************************** TCP fields **********************************/ -+#define NH_FLD_TCP_PORT_SRC (1) -+#define NH_FLD_TCP_PORT_DST (NH_FLD_TCP_PORT_SRC << 1) -+#define NH_FLD_TCP_SEQ (NH_FLD_TCP_PORT_SRC << 2) -+#define NH_FLD_TCP_ACK (NH_FLD_TCP_PORT_SRC << 3) -+#define NH_FLD_TCP_OFFSET (NH_FLD_TCP_PORT_SRC << 4) -+#define NH_FLD_TCP_FLAGS (NH_FLD_TCP_PORT_SRC << 5) -+#define NH_FLD_TCP_WINDOW (NH_FLD_TCP_PORT_SRC << 6) -+#define NH_FLD_TCP_CKSUM (NH_FLD_TCP_PORT_SRC << 7) -+#define NH_FLD_TCP_URGPTR (NH_FLD_TCP_PORT_SRC << 8) -+#define NH_FLD_TCP_OPTS (NH_FLD_TCP_PORT_SRC << 9) -+#define NH_FLD_TCP_OPTS_COUNT (NH_FLD_TCP_PORT_SRC << 10) -+#define NH_FLD_TCP_ALL_FIELDS ((NH_FLD_TCP_PORT_SRC << 11) - 1) -+ -+#define NH_FLD_TCP_PORT_SIZE 2 -+ -+/***************************** UDP fields **********************************/ -+#define NH_FLD_UDP_PORT_SRC (1) -+#define NH_FLD_UDP_PORT_DST (NH_FLD_UDP_PORT_SRC << 1) -+#define NH_FLD_UDP_LEN (NH_FLD_UDP_PORT_SRC << 2) -+#define NH_FLD_UDP_CKSUM (NH_FLD_UDP_PORT_SRC << 3) -+#define NH_FLD_UDP_ALL_FIELDS ((NH_FLD_UDP_PORT_SRC << 4) - 1) -+ -+#define NH_FLD_UDP_PORT_SIZE 2 -+ -+/*************************** UDP-lite fields *******************************/ -+#define NH_FLD_UDP_LITE_PORT_SRC (1) -+#define NH_FLD_UDP_LITE_PORT_DST (NH_FLD_UDP_LITE_PORT_SRC << 1) -+#define NH_FLD_UDP_LITE_ALL_FIELDS \ -+ ((NH_FLD_UDP_LITE_PORT_SRC << 2) - 1) -+ -+#define NH_FLD_UDP_LITE_PORT_SIZE 2 -+ -+/*************************** UDP-encap-ESP fields **************************/ -+#define NH_FLD_UDP_ENC_ESP_PORT_SRC (1) -+#define NH_FLD_UDP_ENC_ESP_PORT_DST (NH_FLD_UDP_ENC_ESP_PORT_SRC << 1) -+#define NH_FLD_UDP_ENC_ESP_LEN (NH_FLD_UDP_ENC_ESP_PORT_SRC << 2) -+#define NH_FLD_UDP_ENC_ESP_CKSUM (NH_FLD_UDP_ENC_ESP_PORT_SRC << 3) -+#define NH_FLD_UDP_ENC_ESP_SPI (NH_FLD_UDP_ENC_ESP_PORT_SRC << 4) -+#define NH_FLD_UDP_ENC_ESP_SEQUENCE_NUM (NH_FLD_UDP_ENC_ESP_PORT_SRC << 5) -+#define NH_FLD_UDP_ENC_ESP_ALL_FIELDS \ -+ ((NH_FLD_UDP_ENC_ESP_PORT_SRC << 6) - 1) -+ -+#define NH_FLD_UDP_ENC_ESP_PORT_SIZE 2 -+#define NH_FLD_UDP_ENC_ESP_SPI_SIZE 4 -+ -+/***************************** SCTP fields *********************************/ -+#define NH_FLD_SCTP_PORT_SRC (1) -+#define NH_FLD_SCTP_PORT_DST (NH_FLD_SCTP_PORT_SRC << 1) -+#define NH_FLD_SCTP_VER_TAG (NH_FLD_SCTP_PORT_SRC << 2) -+#define NH_FLD_SCTP_CKSUM (NH_FLD_SCTP_PORT_SRC << 3) -+#define NH_FLD_SCTP_ALL_FIELDS ((NH_FLD_SCTP_PORT_SRC << 4) - 1) -+ -+#define NH_FLD_SCTP_PORT_SIZE 2 -+ -+/***************************** DCCP fields *********************************/ -+#define NH_FLD_DCCP_PORT_SRC (1) -+#define NH_FLD_DCCP_PORT_DST (NH_FLD_DCCP_PORT_SRC << 1) -+#define NH_FLD_DCCP_ALL_FIELDS ((NH_FLD_DCCP_PORT_SRC << 2) - 1) -+ -+#define NH_FLD_DCCP_PORT_SIZE 2 -+ -+/***************************** IPHC fields *********************************/ -+#define NH_FLD_IPHC_CID (1) -+#define NH_FLD_IPHC_CID_TYPE (NH_FLD_IPHC_CID << 1) -+#define NH_FLD_IPHC_HCINDEX (NH_FLD_IPHC_CID << 2) -+#define NH_FLD_IPHC_GEN (NH_FLD_IPHC_CID << 3) -+#define NH_FLD_IPHC_D_BIT (NH_FLD_IPHC_CID << 4) -+#define NH_FLD_IPHC_ALL_FIELDS ((NH_FLD_IPHC_CID << 5) - 1) -+ -+/***************************** SCTP fields *********************************/ -+#define NH_FLD_SCTP_CHUNK_DATA_TYPE (1) -+#define NH_FLD_SCTP_CHUNK_DATA_FLAGS (NH_FLD_SCTP_CHUNK_DATA_TYPE << 1) -+#define NH_FLD_SCTP_CHUNK_DATA_LENGTH (NH_FLD_SCTP_CHUNK_DATA_TYPE << 2) -+#define NH_FLD_SCTP_CHUNK_DATA_TSN (NH_FLD_SCTP_CHUNK_DATA_TYPE << 3) -+#define NH_FLD_SCTP_CHUNK_DATA_STREAM_ID (NH_FLD_SCTP_CHUNK_DATA_TYPE << 4) -+#define NH_FLD_SCTP_CHUNK_DATA_STREAM_SQN (NH_FLD_SCTP_CHUNK_DATA_TYPE << 5) -+#define NH_FLD_SCTP_CHUNK_DATA_PAYLOAD_PID (NH_FLD_SCTP_CHUNK_DATA_TYPE << 6) -+#define NH_FLD_SCTP_CHUNK_DATA_UNORDERED (NH_FLD_SCTP_CHUNK_DATA_TYPE << 7) -+#define NH_FLD_SCTP_CHUNK_DATA_BEGGINING (NH_FLD_SCTP_CHUNK_DATA_TYPE << 8) -+#define NH_FLD_SCTP_CHUNK_DATA_END (NH_FLD_SCTP_CHUNK_DATA_TYPE << 9) -+#define NH_FLD_SCTP_CHUNK_DATA_ALL_FIELDS \ -+ ((NH_FLD_SCTP_CHUNK_DATA_TYPE << 10) - 1) -+ -+/*************************** L2TPV2 fields *********************************/ -+#define NH_FLD_L2TPV2_TYPE_BIT (1) -+#define NH_FLD_L2TPV2_LENGTH_BIT (NH_FLD_L2TPV2_TYPE_BIT << 1) -+#define NH_FLD_L2TPV2_SEQUENCE_BIT (NH_FLD_L2TPV2_TYPE_BIT << 2) -+#define NH_FLD_L2TPV2_OFFSET_BIT (NH_FLD_L2TPV2_TYPE_BIT << 3) -+#define NH_FLD_L2TPV2_PRIORITY_BIT (NH_FLD_L2TPV2_TYPE_BIT << 4) -+#define NH_FLD_L2TPV2_VERSION (NH_FLD_L2TPV2_TYPE_BIT << 5) -+#define NH_FLD_L2TPV2_LEN (NH_FLD_L2TPV2_TYPE_BIT << 6) -+#define NH_FLD_L2TPV2_TUNNEL_ID (NH_FLD_L2TPV2_TYPE_BIT << 7) -+#define NH_FLD_L2TPV2_SESSION_ID (NH_FLD_L2TPV2_TYPE_BIT << 8) -+#define NH_FLD_L2TPV2_NS (NH_FLD_L2TPV2_TYPE_BIT << 9) -+#define NH_FLD_L2TPV2_NR (NH_FLD_L2TPV2_TYPE_BIT << 10) -+#define NH_FLD_L2TPV2_OFFSET_SIZE (NH_FLD_L2TPV2_TYPE_BIT << 11) -+#define NH_FLD_L2TPV2_FIRST_BYTE (NH_FLD_L2TPV2_TYPE_BIT << 12) -+#define NH_FLD_L2TPV2_ALL_FIELDS \ -+ ((NH_FLD_L2TPV2_TYPE_BIT << 13) - 1) -+ -+/*************************** L2TPV3 fields *********************************/ -+#define NH_FLD_L2TPV3_CTRL_TYPE_BIT (1) -+#define NH_FLD_L2TPV3_CTRL_LENGTH_BIT (NH_FLD_L2TPV3_CTRL_TYPE_BIT << 1) -+#define NH_FLD_L2TPV3_CTRL_SEQUENCE_BIT (NH_FLD_L2TPV3_CTRL_TYPE_BIT << 2) -+#define NH_FLD_L2TPV3_CTRL_VERSION (NH_FLD_L2TPV3_CTRL_TYPE_BIT << 3) -+#define NH_FLD_L2TPV3_CTRL_LENGTH (NH_FLD_L2TPV3_CTRL_TYPE_BIT << 4) -+#define NH_FLD_L2TPV3_CTRL_CONTROL (NH_FLD_L2TPV3_CTRL_TYPE_BIT << 5) -+#define NH_FLD_L2TPV3_CTRL_SENT (NH_FLD_L2TPV3_CTRL_TYPE_BIT << 6) -+#define NH_FLD_L2TPV3_CTRL_RECV (NH_FLD_L2TPV3_CTRL_TYPE_BIT << 7) -+#define NH_FLD_L2TPV3_CTRL_FIRST_BYTE (NH_FLD_L2TPV3_CTRL_TYPE_BIT << 8) -+#define NH_FLD_L2TPV3_CTRL_ALL_FIELDS \ -+ ((NH_FLD_L2TPV3_CTRL_TYPE_BIT << 9) - 1) -+ -+#define NH_FLD_L2TPV3_SESS_TYPE_BIT (1) -+#define NH_FLD_L2TPV3_SESS_VERSION (NH_FLD_L2TPV3_SESS_TYPE_BIT << 1) -+#define NH_FLD_L2TPV3_SESS_ID (NH_FLD_L2TPV3_SESS_TYPE_BIT << 2) -+#define NH_FLD_L2TPV3_SESS_COOKIE (NH_FLD_L2TPV3_SESS_TYPE_BIT << 3) -+#define NH_FLD_L2TPV3_SESS_ALL_FIELDS \ -+ ((NH_FLD_L2TPV3_SESS_TYPE_BIT << 4) - 1) -+ -+/**************************** PPP fields ***********************************/ -+#define NH_FLD_PPP_PID (1) -+#define NH_FLD_PPP_COMPRESSED (NH_FLD_PPP_PID << 1) -+#define NH_FLD_PPP_ALL_FIELDS ((NH_FLD_PPP_PID << 2) - 1) -+ -+/************************** PPPoE fields ***********************************/ -+#define NH_FLD_PPPOE_VER (1) -+#define NH_FLD_PPPOE_TYPE (NH_FLD_PPPOE_VER << 1) -+#define NH_FLD_PPPOE_CODE (NH_FLD_PPPOE_VER << 2) -+#define NH_FLD_PPPOE_SID (NH_FLD_PPPOE_VER << 3) -+#define NH_FLD_PPPOE_LEN (NH_FLD_PPPOE_VER << 4) -+#define NH_FLD_PPPOE_SESSION (NH_FLD_PPPOE_VER << 5) -+#define NH_FLD_PPPOE_PID (NH_FLD_PPPOE_VER << 6) -+#define NH_FLD_PPPOE_ALL_FIELDS ((NH_FLD_PPPOE_VER << 7) - 1) -+ -+/************************* PPP-Mux fields **********************************/ -+#define NH_FLD_PPPMUX_PID (1) -+#define NH_FLD_PPPMUX_CKSUM (NH_FLD_PPPMUX_PID << 1) -+#define NH_FLD_PPPMUX_COMPRESSED (NH_FLD_PPPMUX_PID << 2) -+#define NH_FLD_PPPMUX_ALL_FIELDS ((NH_FLD_PPPMUX_PID << 3) - 1) -+ -+/*********************** PPP-Mux sub-frame fields **************************/ -+#define NH_FLD_PPPMUX_SUBFRM_PFF (1) -+#define NH_FLD_PPPMUX_SUBFRM_LXT (NH_FLD_PPPMUX_SUBFRM_PFF << 1) -+#define NH_FLD_PPPMUX_SUBFRM_LEN (NH_FLD_PPPMUX_SUBFRM_PFF << 2) -+#define NH_FLD_PPPMUX_SUBFRM_PID (NH_FLD_PPPMUX_SUBFRM_PFF << 3) -+#define NH_FLD_PPPMUX_SUBFRM_USE_PID (NH_FLD_PPPMUX_SUBFRM_PFF << 4) -+#define NH_FLD_PPPMUX_SUBFRM_ALL_FIELDS \ -+ ((NH_FLD_PPPMUX_SUBFRM_PFF << 5) - 1) -+ -+/*************************** LLC fields ************************************/ -+#define NH_FLD_LLC_DSAP (1) -+#define NH_FLD_LLC_SSAP (NH_FLD_LLC_DSAP << 1) -+#define NH_FLD_LLC_CTRL (NH_FLD_LLC_DSAP << 2) -+#define NH_FLD_LLC_ALL_FIELDS ((NH_FLD_LLC_DSAP << 3) - 1) -+ -+/*************************** NLPID fields **********************************/ -+#define NH_FLD_NLPID_NLPID (1) -+#define NH_FLD_NLPID_ALL_FIELDS ((NH_FLD_NLPID_NLPID << 1) - 1) -+ -+/*************************** SNAP fields ***********************************/ -+#define NH_FLD_SNAP_OUI (1) -+#define NH_FLD_SNAP_PID (NH_FLD_SNAP_OUI << 1) -+#define NH_FLD_SNAP_ALL_FIELDS ((NH_FLD_SNAP_OUI << 2) - 1) -+ -+/*************************** LLC SNAP fields *******************************/ -+#define NH_FLD_LLC_SNAP_TYPE (1) -+#define NH_FLD_LLC_SNAP_ALL_FIELDS ((NH_FLD_LLC_SNAP_TYPE << 1) - 1) -+ -+#define NH_FLD_ARP_HTYPE (1) -+#define NH_FLD_ARP_PTYPE (NH_FLD_ARP_HTYPE << 1) -+#define NH_FLD_ARP_HLEN (NH_FLD_ARP_HTYPE << 2) -+#define NH_FLD_ARP_PLEN (NH_FLD_ARP_HTYPE << 3) -+#define NH_FLD_ARP_OPER (NH_FLD_ARP_HTYPE << 4) -+#define NH_FLD_ARP_SHA (NH_FLD_ARP_HTYPE << 5) -+#define NH_FLD_ARP_SPA (NH_FLD_ARP_HTYPE << 6) -+#define NH_FLD_ARP_THA (NH_FLD_ARP_HTYPE << 7) -+#define NH_FLD_ARP_TPA (NH_FLD_ARP_HTYPE << 8) -+#define NH_FLD_ARP_ALL_FIELDS ((NH_FLD_ARP_HTYPE << 9) - 1) -+ -+/*************************** RFC2684 fields ********************************/ -+#define NH_FLD_RFC2684_LLC (1) -+#define NH_FLD_RFC2684_NLPID (NH_FLD_RFC2684_LLC << 1) -+#define NH_FLD_RFC2684_OUI (NH_FLD_RFC2684_LLC << 2) -+#define NH_FLD_RFC2684_PID (NH_FLD_RFC2684_LLC << 3) -+#define NH_FLD_RFC2684_VPN_OUI (NH_FLD_RFC2684_LLC << 4) -+#define NH_FLD_RFC2684_VPN_IDX (NH_FLD_RFC2684_LLC << 5) -+#define NH_FLD_RFC2684_ALL_FIELDS ((NH_FLD_RFC2684_LLC << 6) - 1) -+ -+/*************************** User defined fields ***************************/ -+#define NH_FLD_USER_DEFINED_SRCPORT (1) -+#define NH_FLD_USER_DEFINED_PCDID (NH_FLD_USER_DEFINED_SRCPORT << 1) -+#define NH_FLD_USER_DEFINED_ALL_FIELDS \ -+ ((NH_FLD_USER_DEFINED_SRCPORT << 2) - 1) -+ -+/*************************** Payload fields ********************************/ -+#define NH_FLD_PAYLOAD_BUFFER (1) -+#define NH_FLD_PAYLOAD_SIZE (NH_FLD_PAYLOAD_BUFFER << 1) -+#define NH_FLD_MAX_FRM_SIZE (NH_FLD_PAYLOAD_BUFFER << 2) -+#define NH_FLD_MIN_FRM_SIZE (NH_FLD_PAYLOAD_BUFFER << 3) -+#define NH_FLD_PAYLOAD_TYPE (NH_FLD_PAYLOAD_BUFFER << 4) -+#define NH_FLD_FRAME_SIZE (NH_FLD_PAYLOAD_BUFFER << 5) -+#define NH_FLD_PAYLOAD_ALL_FIELDS ((NH_FLD_PAYLOAD_BUFFER << 6) - 1) -+ -+/*************************** GRE fields ************************************/ -+#define NH_FLD_GRE_TYPE (1) -+#define NH_FLD_GRE_ALL_FIELDS ((NH_FLD_GRE_TYPE << 1) - 1) -+ -+/*************************** MINENCAP fields *******************************/ -+#define NH_FLD_MINENCAP_SRC_IP (1) -+#define NH_FLD_MINENCAP_DST_IP (NH_FLD_MINENCAP_SRC_IP << 1) -+#define NH_FLD_MINENCAP_TYPE (NH_FLD_MINENCAP_SRC_IP << 2) -+#define NH_FLD_MINENCAP_ALL_FIELDS \ -+ ((NH_FLD_MINENCAP_SRC_IP << 3) - 1) -+ -+/*************************** IPSEC AH fields *******************************/ -+#define NH_FLD_IPSEC_AH_SPI (1) -+#define NH_FLD_IPSEC_AH_NH (NH_FLD_IPSEC_AH_SPI << 1) -+#define NH_FLD_IPSEC_AH_ALL_FIELDS ((NH_FLD_IPSEC_AH_SPI << 2) - 1) -+ -+/*************************** IPSEC ESP fields ******************************/ -+#define NH_FLD_IPSEC_ESP_SPI (1) -+#define NH_FLD_IPSEC_ESP_SEQUENCE_NUM (NH_FLD_IPSEC_ESP_SPI << 1) -+#define NH_FLD_IPSEC_ESP_ALL_FIELDS ((NH_FLD_IPSEC_ESP_SPI << 2) - 1) -+ -+#define NH_FLD_IPSEC_ESP_SPI_SIZE 4 -+ -+/*************************** MPLS fields ***********************************/ -+#define NH_FLD_MPLS_LABEL_STACK (1) -+#define NH_FLD_MPLS_LABEL_STACK_ALL_FIELDS \ -+ ((NH_FLD_MPLS_LABEL_STACK << 1) - 1) -+ -+/*************************** MACSEC fields *********************************/ -+#define NH_FLD_MACSEC_SECTAG (1) -+#define NH_FLD_MACSEC_ALL_FIELDS ((NH_FLD_MACSEC_SECTAG << 1) - 1) -+ -+/*************************** GTP fields ************************************/ -+#define NH_FLD_GTP_TEID (1) -+ -+/* Protocol options */ -+ -+/* Ethernet options */ -+#define NH_OPT_ETH_BROADCAST 1 -+#define NH_OPT_ETH_MULTICAST 2 -+#define NH_OPT_ETH_UNICAST 3 -+#define NH_OPT_ETH_BPDU 4 -+ -+#define NH_ETH_IS_MULTICAST_ADDR(addr) (addr[0] & 0x01) -+/* also applicable for broadcast */ -+ -+/* VLAN options */ -+#define NH_OPT_VLAN_CFI 1 -+ -+/* IPV4 options */ -+#define NH_OPT_IPV4_UNICAST 1 -+#define NH_OPT_IPV4_MULTICAST 2 -+#define NH_OPT_IPV4_BROADCAST 3 -+#define NH_OPT_IPV4_OPTION 4 -+#define NH_OPT_IPV4_FRAG 5 -+#define NH_OPT_IPV4_INITIAL_FRAG 6 -+ -+/* IPV6 options */ -+#define NH_OPT_IPV6_UNICAST 1 -+#define NH_OPT_IPV6_MULTICAST 2 -+#define NH_OPT_IPV6_OPTION 3 -+#define NH_OPT_IPV6_FRAG 4 -+#define NH_OPT_IPV6_INITIAL_FRAG 5 -+ -+/* General IP options (may be used for any version) */ -+#define NH_OPT_IP_FRAG 1 -+#define NH_OPT_IP_INITIAL_FRAG 2 -+#define NH_OPT_IP_OPTION 3 -+ -+/* Minenc. options */ -+#define NH_OPT_MINENCAP_SRC_ADDR_PRESENT 1 -+ -+/* GRE. options */ -+#define NH_OPT_GRE_ROUTING_PRESENT 1 -+ -+/* TCP options */ -+#define NH_OPT_TCP_OPTIONS 1 -+#define NH_OPT_TCP_CONTROL_HIGH_BITS 2 -+#define NH_OPT_TCP_CONTROL_LOW_BITS 3 -+ -+/* CAPWAP options */ -+#define NH_OPT_CAPWAP_DTLS 1 -+ -+enum net_prot { -+ NET_PROT_NONE = 0, -+ NET_PROT_PAYLOAD, -+ NET_PROT_ETH, -+ NET_PROT_VLAN, -+ NET_PROT_IPV4, -+ NET_PROT_IPV6, -+ NET_PROT_IP, -+ NET_PROT_TCP, -+ NET_PROT_UDP, -+ NET_PROT_UDP_LITE, -+ NET_PROT_IPHC, -+ NET_PROT_SCTP, -+ NET_PROT_SCTP_CHUNK_DATA, -+ NET_PROT_PPPOE, -+ NET_PROT_PPP, -+ NET_PROT_PPPMUX, -+ NET_PROT_PPPMUX_SUBFRM, -+ NET_PROT_L2TPV2, -+ NET_PROT_L2TPV3_CTRL, -+ NET_PROT_L2TPV3_SESS, -+ NET_PROT_LLC, -+ NET_PROT_LLC_SNAP, -+ NET_PROT_NLPID, -+ NET_PROT_SNAP, -+ NET_PROT_MPLS, -+ NET_PROT_IPSEC_AH, -+ NET_PROT_IPSEC_ESP, -+ NET_PROT_UDP_ENC_ESP, /* RFC 3948 */ -+ NET_PROT_MACSEC, -+ NET_PROT_GRE, -+ NET_PROT_MINENCAP, -+ NET_PROT_DCCP, -+ NET_PROT_ICMP, -+ NET_PROT_IGMP, -+ NET_PROT_ARP, -+ NET_PROT_CAPWAP_DATA, -+ NET_PROT_CAPWAP_CTRL, -+ NET_PROT_RFC2684, -+ NET_PROT_ICMPV6, -+ NET_PROT_FCOE, -+ NET_PROT_FIP, -+ NET_PROT_ISCSI, -+ NET_PROT_GTP, -+ NET_PROT_USER_DEFINED_L2, -+ NET_PROT_USER_DEFINED_L3, -+ NET_PROT_USER_DEFINED_L4, -+ NET_PROT_USER_DEFINED_L5, -+ NET_PROT_USER_DEFINED_SHIM1, -+ NET_PROT_USER_DEFINED_SHIM2, -+ -+ NET_PROT_DUMMY_LAST -+}; -+ -+/*! IEEE8021.Q */ -+#define NH_IEEE8021Q_ETYPE 0x8100 -+#define NH_IEEE8021Q_HDR(etype, pcp, dei, vlan_id) \ -+ ((((uint32_t)(etype & 0xFFFF)) << 16) | \ -+ (((uint32_t)(pcp & 0x07)) << 13) | \ -+ (((uint32_t)(dei & 0x01)) << 12) | \ -+ (((uint32_t)(vlan_id & 0xFFF)))) -+ -+#endif /* __FSL_NET_H */ -diff --git a/drivers/net/dpaa2/mc/mc_sys.c b/drivers/net/dpaa2/mc/mc_sys.c -new file mode 100644 -index 0000000..fcbed28 ---- /dev/null -+++ b/drivers/net/dpaa2/mc/mc_sys.c -@@ -0,0 +1,127 @@ -+/* Copyright 2013-2015 Freescale Semiconductor Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of the above-listed copyright holders nor the -+ * names of any contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * -+ * ALTERNATIVELY, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") as published by the Free Software -+ * Foundation, either version 2 of that License or (at your option) any -+ * later version. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+ -+/* ODP framework using MC poratl in shared mode. Following -+ changes to introduce Locks must be maintained while -+ merging the FLIB. -+*/ -+ -+/** -+* The mc_spinlock_t type. -+*/ -+typedef struct { -+ volatile int locked; /**< lock status 0 = unlocked, 1 = locked */ -+} mc_spinlock_t; -+ -+/** -+* A static spinlock initializer. -+*/ -+static mc_spinlock_t mc_portal_lock = { 0 }; -+ -+static inline void mc_pause(void) {} -+ -+static inline void mc_spinlock_lock(mc_spinlock_t *sl) -+{ -+ while (__sync_lock_test_and_set(&sl->locked, 1)) -+ while (sl->locked) -+ mc_pause(); -+} -+ -+static inline void mc_spinlock_unlock(mc_spinlock_t *sl) -+{ -+ __sync_lock_release(&sl->locked); -+} -+ -+static int mc_status_to_error(enum mc_cmd_status status) -+{ -+ switch (status) { -+ case MC_CMD_STATUS_OK: -+ return 0; -+ case MC_CMD_STATUS_AUTH_ERR: -+ return -EACCES; /* Token error */ -+ case MC_CMD_STATUS_NO_PRIVILEGE: -+ return -EPERM; /* Permission denied */ -+ case MC_CMD_STATUS_DMA_ERR: -+ return -EIO; /* Input/Output error */ -+ case MC_CMD_STATUS_CONFIG_ERR: -+ return -EINVAL; /* Device not configured */ -+ case MC_CMD_STATUS_TIMEOUT: -+ return -ETIMEDOUT; /* Operation timed out */ -+ case MC_CMD_STATUS_NO_RESOURCE: -+ return -ENAVAIL; /* Resource temporarily unavailable */ -+ case MC_CMD_STATUS_NO_MEMORY: -+ return -ENOMEM; /* Cannot allocate memory */ -+ case MC_CMD_STATUS_BUSY: -+ return -EBUSY; /* Device busy */ -+ case MC_CMD_STATUS_UNSUPPORTED_OP: -+ return -ENOTSUP; /* Operation not supported by device */ -+ case MC_CMD_STATUS_INVALID_STATE: -+ return -ENODEV; /* Invalid device state */ -+ default: -+ break; -+ } -+ -+ /* Not expected to reach here */ -+ return -EINVAL; -+} -+ -+int mc_send_command(struct fsl_mc_io *mc_io, struct mc_command *cmd) -+{ -+ enum mc_cmd_status status; -+ -+ if (!mc_io || !mc_io->regs) -+ return -EACCES; -+ -+ /* --- Call lock function here in case portal is shared --- */ -+ mc_spinlock_lock(&mc_portal_lock); -+ -+ mc_write_command(mc_io->regs, cmd); -+ -+ /* Spin until status changes */ -+ do { -+ status = MC_CMD_HDR_READ_STATUS(ioread64(mc_io->regs)); -+ -+ /* --- Call wait function here to prevent blocking --- -+ * Change the loop condition accordingly to exit on timeout. -+ */ -+ } while (status == MC_CMD_STATUS_READY); -+ -+ /* Read the response back into the command buffer */ -+ mc_read_response(mc_io->regs, cmd); -+ -+ /* --- Call unlock function here in case portal is shared --- */ -+ mc_spinlock_unlock(&mc_portal_lock); -+ -+ return mc_status_to_error(status); -+} -diff --git a/drivers/net/dpaa2/qbman/driver/qbman_debug.c b/drivers/net/dpaa2/qbman/driver/qbman_debug.c -new file mode 100644 -index 0000000..ef6c257 ---- /dev/null -+++ b/drivers/net/dpaa2/qbman/driver/qbman_debug.c -@@ -0,0 +1,929 @@ -+/* Copyright (C) 2015 Freescale Semiconductor, Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Freescale Semiconductor nor the -+ * names of its contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY -+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY -+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include "qbman_portal.h" -+#include "qbman_debug.h" -+#include -+ -+/* QBMan portal management command code */ -+#define QBMAN_BP_QUERY 0x32 -+#define QBMAN_FQ_QUERY 0x44 -+#define QBMAN_FQ_QUERY_NP 0x45 -+#define QBMAN_WQ_QUERY 0x47 -+#define QBMAN_CGR_QUERY 0x51 -+#define QBMAN_WRED_QUERY 0x54 -+#define QBMAN_CGR_STAT_QUERY 0x55 -+#define QBMAN_CGR_STAT_QUERY_CLR 0x56 -+ -+enum qbman_attr_usage_e { -+ qbman_attr_usage_fq, -+ qbman_attr_usage_bpool, -+ qbman_attr_usage_cgr, -+ qbman_attr_usage_wqchan -+}; -+ -+struct int_qbman_attr { -+ uint32_t words[32]; -+ enum qbman_attr_usage_e usage; -+}; -+ -+#define attr_type_set(a, e) \ -+{ \ -+ struct qbman_attr *__attr = a; \ -+ enum qbman_attr_usage_e __usage = e; \ -+ ((struct int_qbman_attr *)__attr)->usage = __usage; \ -+} -+ -+#define ATTR32(d) (&(d)->dont_manipulate_directly[0]) -+#define ATTR32_1(d) (&(d)->dont_manipulate_directly[16]) -+ -+static struct qb_attr_code code_bp_bpid = QB_CODE(0, 16, 16); -+static struct qb_attr_code code_bp_bdi = QB_CODE(1, 16, 1); -+static struct qb_attr_code code_bp_va = QB_CODE(1, 17, 1); -+static struct qb_attr_code code_bp_wae = QB_CODE(1, 18, 1); -+static struct qb_attr_code code_bp_swdet = QB_CODE(4, 0, 16); -+static struct qb_attr_code code_bp_swdxt = QB_CODE(4, 16, 16); -+static struct qb_attr_code code_bp_hwdet = QB_CODE(5, 0, 16); -+static struct qb_attr_code code_bp_hwdxt = QB_CODE(5, 16, 16); -+static struct qb_attr_code code_bp_swset = QB_CODE(6, 0, 16); -+static struct qb_attr_code code_bp_swsxt = QB_CODE(6, 16, 16); -+static struct qb_attr_code code_bp_vbpid = QB_CODE(7, 0, 14); -+static struct qb_attr_code code_bp_icid = QB_CODE(7, 16, 15); -+static struct qb_attr_code code_bp_pl = QB_CODE(7, 31, 1); -+static struct qb_attr_code code_bp_bpscn_addr_lo = QB_CODE(8, 0, 32); -+static struct qb_attr_code code_bp_bpscn_addr_hi = QB_CODE(9, 0, 32); -+static struct qb_attr_code code_bp_bpscn_ctx_lo = QB_CODE(10, 0, 32); -+static struct qb_attr_code code_bp_bpscn_ctx_hi = QB_CODE(11, 0, 32); -+static struct qb_attr_code code_bp_hw_targ = QB_CODE(12, 0, 16); -+static struct qb_attr_code code_bp_state = QB_CODE(1, 24, 3); -+static struct qb_attr_code code_bp_fill = QB_CODE(2, 0, 32); -+static struct qb_attr_code code_bp_hdptr = QB_CODE(3, 0, 32); -+static struct qb_attr_code code_bp_sdcnt = QB_CODE(13, 0, 8); -+static struct qb_attr_code code_bp_hdcnt = QB_CODE(13, 8, 8); -+static struct qb_attr_code code_bp_sscnt = QB_CODE(13, 16, 8); -+ -+static void qbman_bp_attr_clear(struct qbman_attr *a) -+{ -+ memset(a, 0, sizeof(*a)); -+ attr_type_set(a, qbman_attr_usage_bpool); -+} -+ -+int qbman_bp_query(struct qbman_swp *s, uint32_t bpid, -+ struct qbman_attr *a) -+{ -+ uint32_t *p; -+ uint32_t rslt; -+ uint32_t *attr = ATTR32(a); -+ -+ qbman_bp_attr_clear(a); -+ -+ /* Start the management command */ -+ p = qbman_swp_mc_start(s); -+ if (!p) -+ return -EBUSY; -+ -+ /* Encode the caller-provided attributes */ -+ qb_attr_code_encode(&code_bp_bpid, p, bpid); -+ -+ /* Complete the management command */ -+ p = qbman_swp_mc_complete(s, p, p[0] | QBMAN_BP_QUERY); -+ -+ /* Decode the outcome */ -+ rslt = qb_attr_code_decode(&code_generic_rslt, p); -+ BUG_ON(qb_attr_code_decode(&code_generic_verb, p) != QBMAN_BP_QUERY); -+ -+ /* Determine success or failure */ -+ if (unlikely(rslt != QBMAN_MC_RSLT_OK)) { -+ pr_err("Query of BPID 0x%x failed, code=0x%02x\n", bpid, rslt); -+ return -EIO; -+ } -+ -+ /* For the query, word[0] of the result contains only the -+ * verb/rslt fields, so skip word[0]. -+ */ -+ word_copy(&attr[1], &p[1], 15); -+ return 0; -+} -+ -+void qbman_bp_attr_get_bdi(struct qbman_attr *a, int *bdi, int *va, int *wae) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ *bdi = !!qb_attr_code_decode(&code_bp_bdi, p); -+ *va = !!qb_attr_code_decode(&code_bp_va, p); -+ *wae = !!qb_attr_code_decode(&code_bp_wae, p); -+} -+ -+static uint32_t qbman_bp_thresh_to_value(uint32_t val) -+{ -+ return (val & 0xff) << ((val & 0xf00) >> 8); -+} -+ -+void qbman_bp_attr_get_swdet(struct qbman_attr *a, uint32_t *swdet) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ *swdet = qbman_bp_thresh_to_value(qb_attr_code_decode(&code_bp_swdet, -+ p)); -+} -+ -+void qbman_bp_attr_get_swdxt(struct qbman_attr *a, uint32_t *swdxt) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ *swdxt = qbman_bp_thresh_to_value(qb_attr_code_decode(&code_bp_swdxt, -+ p)); -+} -+ -+void qbman_bp_attr_get_hwdet(struct qbman_attr *a, uint32_t *hwdet) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ *hwdet = qbman_bp_thresh_to_value(qb_attr_code_decode(&code_bp_hwdet, -+ p)); -+} -+ -+void qbman_bp_attr_get_hwdxt(struct qbman_attr *a, uint32_t *hwdxt) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ *hwdxt = qbman_bp_thresh_to_value(qb_attr_code_decode(&code_bp_hwdxt, -+ p)); -+} -+ -+void qbman_bp_attr_get_swset(struct qbman_attr *a, uint32_t *swset) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ *swset = qbman_bp_thresh_to_value(qb_attr_code_decode(&code_bp_swset, -+ p)); -+} -+ -+void qbman_bp_attr_get_swsxt(struct qbman_attr *a, uint32_t *swsxt) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ *swsxt = qbman_bp_thresh_to_value(qb_attr_code_decode(&code_bp_swsxt, -+ p)); -+} -+ -+void qbman_bp_attr_get_vbpid(struct qbman_attr *a, uint32_t *vbpid) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ *vbpid = qb_attr_code_decode(&code_bp_vbpid, p); -+} -+ -+void qbman_bp_attr_get_icid(struct qbman_attr *a, uint32_t *icid, int *pl) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ *icid = qb_attr_code_decode(&code_bp_icid, p); -+ *pl = !!qb_attr_code_decode(&code_bp_pl, p); -+} -+ -+void qbman_bp_attr_get_bpscn_addr(struct qbman_attr *a, uint64_t *bpscn_addr) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ *bpscn_addr = ((uint64_t)qb_attr_code_decode(&code_bp_bpscn_addr_hi, -+ p) << 32) | -+ (uint64_t)qb_attr_code_decode(&code_bp_bpscn_addr_lo, -+ p); -+} -+ -+void qbman_bp_attr_get_bpscn_ctx(struct qbman_attr *a, uint64_t *bpscn_ctx) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ *bpscn_ctx = ((uint64_t)qb_attr_code_decode(&code_bp_bpscn_ctx_hi, p) -+ << 32) | -+ (uint64_t)qb_attr_code_decode(&code_bp_bpscn_ctx_lo, -+ p); -+} -+ -+void qbman_bp_attr_get_hw_targ(struct qbman_attr *a, uint32_t *hw_targ) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ *hw_targ = qb_attr_code_decode(&code_bp_hw_targ, p); -+} -+ -+int qbman_bp_info_has_free_bufs(struct qbman_attr *a) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ return !(int)(qb_attr_code_decode(&code_bp_state, p) & 0x1); -+} -+ -+int qbman_bp_info_is_depleted(struct qbman_attr *a) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ return (int)(qb_attr_code_decode(&code_bp_state, p) & 0x2); -+} -+ -+int qbman_bp_info_is_surplus(struct qbman_attr *a) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ return (int)(qb_attr_code_decode(&code_bp_state, p) & 0x4); -+} -+ -+uint32_t qbman_bp_info_num_free_bufs(struct qbman_attr *a) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ return qb_attr_code_decode(&code_bp_fill, p); -+} -+ -+uint32_t qbman_bp_info_hdptr(struct qbman_attr *a) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ return qb_attr_code_decode(&code_bp_hdptr, p); -+} -+ -+uint32_t qbman_bp_info_sdcnt(struct qbman_attr *a) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ return qb_attr_code_decode(&code_bp_sdcnt, p); -+} -+ -+uint32_t qbman_bp_info_hdcnt(struct qbman_attr *a) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ return qb_attr_code_decode(&code_bp_hdcnt, p); -+} -+ -+uint32_t qbman_bp_info_sscnt(struct qbman_attr *a) -+{ -+ uint32_t *p = ATTR32(a); -+ -+ return qb_attr_code_decode(&code_bp_sscnt, p); -+} -+ -+static struct qb_attr_code code_fq_fqid = QB_CODE(1, 0, 24); -+static struct qb_attr_code code_fq_cgrid = QB_CODE(2, 16, 16); -+static struct qb_attr_code code_fq_destwq = QB_CODE(3, 0, 15); -+static struct qb_attr_code code_fq_fqctrl = QB_CODE(3, 24, 8); -+static struct qb_attr_code code_fq_icscred = QB_CODE(4, 0, 15); -+static struct qb_attr_code code_fq_tdthresh = QB_CODE(4, 16, 13); -+static struct qb_attr_code code_fq_oa_len = QB_CODE(5, 0, 12); -+static struct qb_attr_code code_fq_oa_ics = QB_CODE(5, 14, 1); -+static struct qb_attr_code code_fq_oa_cgr = QB_CODE(5, 15, 1); -+static struct qb_attr_code code_fq_mctl_bdi = QB_CODE(5, 24, 1); -+static struct qb_attr_code code_fq_mctl_ff = QB_CODE(5, 25, 1); -+static struct qb_attr_code code_fq_mctl_va = QB_CODE(5, 26, 1); -+static struct qb_attr_code code_fq_mctl_ps = QB_CODE(5, 27, 1); -+static struct qb_attr_code code_fq_ctx_lower32 = QB_CODE(6, 0, 32); -+static struct qb_attr_code code_fq_ctx_upper32 = QB_CODE(7, 0, 32); -+static struct qb_attr_code code_fq_icid = QB_CODE(8, 0, 15); -+static struct qb_attr_code code_fq_pl = QB_CODE(8, 15, 1); -+static struct qb_attr_code code_fq_vfqid = QB_CODE(9, 0, 24); -+static struct qb_attr_code code_fq_erfqid = QB_CODE(10, 0, 24); -+ -+static void qbman_fq_attr_clear(struct qbman_attr *a) -+{ -+ memset(a, 0, sizeof(*a)); -+ attr_type_set(a, qbman_attr_usage_fq); -+} -+ -+/* FQ query function for programmable fields */ -+int qbman_fq_query(struct qbman_swp *s, uint32_t fqid, struct qbman_attr *desc) -+{ -+ uint32_t *p; -+ uint32_t rslt; -+ uint32_t *d = ATTR32(desc); -+ -+ qbman_fq_attr_clear(desc); -+ -+ p = qbman_swp_mc_start(s); -+ if (!p) -+ return -EBUSY; -+ qb_attr_code_encode(&code_fq_fqid, p, fqid); -+ p = qbman_swp_mc_complete(s, p, QBMAN_FQ_QUERY); -+ -+ /* Decode the outcome */ -+ rslt = qb_attr_code_decode(&code_generic_rslt, p); -+ BUG_ON(qb_attr_code_decode(&code_generic_verb, p) != QBMAN_FQ_QUERY); -+ -+ /* Determine success or failure */ -+ if (unlikely(rslt != QBMAN_MC_RSLT_OK)) { -+ pr_err("Query of FQID 0x%x failed, code=0x%02x\n", -+ fqid, rslt); -+ return -EIO; -+ } -+ /* For the configure, word[0] of the command contains only the WE-mask. -+ * For the query, word[0] of the result contains only the verb/rslt -+ * fields. Skip word[0] in the latter case. */ -+ word_copy(&d[1], &p[1], 15); -+ return 0; -+} -+ -+void qbman_fq_attr_get_fqctrl(struct qbman_attr *d, uint32_t *fqctrl) -+{ -+ uint32_t *p = ATTR32(d); -+ -+ *fqctrl = qb_attr_code_decode(&code_fq_fqctrl, p); -+} -+ -+void qbman_fq_attr_get_cgrid(struct qbman_attr *d, uint32_t *cgrid) -+{ -+ uint32_t *p = ATTR32(d); -+ -+ *cgrid = qb_attr_code_decode(&code_fq_cgrid, p); -+} -+ -+void qbman_fq_attr_get_destwq(struct qbman_attr *d, uint32_t *destwq) -+{ -+ uint32_t *p = ATTR32(d); -+ -+ *destwq = qb_attr_code_decode(&code_fq_destwq, p); -+} -+ -+void qbman_fq_attr_get_icscred(struct qbman_attr *d, uint32_t *icscred) -+{ -+ uint32_t *p = ATTR32(d); -+ -+ *icscred = qb_attr_code_decode(&code_fq_icscred, p); -+} -+ -+static struct qb_attr_code code_tdthresh_exp = QB_CODE(0, 0, 5); -+static struct qb_attr_code code_tdthresh_mant = QB_CODE(0, 5, 8); -+static uint32_t qbman_thresh_to_value(uint32_t val) -+{ -+ uint32_t m, e; -+ -+ m = qb_attr_code_decode(&code_tdthresh_mant, &val); -+ e = qb_attr_code_decode(&code_tdthresh_exp, &val); -+ return m << e; -+} -+ -+void qbman_fq_attr_get_tdthresh(struct qbman_attr *d, uint32_t *tdthresh) -+{ -+ uint32_t *p = ATTR32(d); -+ -+ *tdthresh = qbman_thresh_to_value(qb_attr_code_decode(&code_fq_tdthresh, -+ p)); -+} -+ -+void qbman_fq_attr_get_oa(struct qbman_attr *d, -+ int *oa_ics, int *oa_cgr, int32_t *oa_len) -+{ -+ uint32_t *p = ATTR32(d); -+ -+ *oa_ics = !!qb_attr_code_decode(&code_fq_oa_ics, p); -+ *oa_cgr = !!qb_attr_code_decode(&code_fq_oa_cgr, p); -+ *oa_len = qb_attr_code_makesigned(&code_fq_oa_len, -+ qb_attr_code_decode(&code_fq_oa_len, p)); -+} -+ -+void qbman_fq_attr_get_mctl(struct qbman_attr *d, -+ int *bdi, int *ff, int *va, int *ps) -+{ -+ uint32_t *p = ATTR32(d); -+ -+ *bdi = !!qb_attr_code_decode(&code_fq_mctl_bdi, p); -+ *ff = !!qb_attr_code_decode(&code_fq_mctl_ff, p); -+ *va = !!qb_attr_code_decode(&code_fq_mctl_va, p); -+ *ps = !!qb_attr_code_decode(&code_fq_mctl_ps, p); -+} -+ -+void qbman_fq_attr_get_ctx(struct qbman_attr *d, uint32_t *hi, uint32_t *lo) -+{ -+ uint32_t *p = ATTR32(d); -+ -+ *hi = qb_attr_code_decode(&code_fq_ctx_upper32, p); -+ *lo = qb_attr_code_decode(&code_fq_ctx_lower32, p); -+} -+ -+void qbman_fq_attr_get_icid(struct qbman_attr *d, uint32_t *icid, int *pl) -+{ -+ uint32_t *p = ATTR32(d); -+ -+ *icid = qb_attr_code_decode(&code_fq_icid, p); -+ *pl = !!qb_attr_code_decode(&code_fq_pl, p); -+} -+ -+void qbman_fq_attr_get_vfqid(struct qbman_attr *d, uint32_t *vfqid) -+{ -+ uint32_t *p = ATTR32(d); -+ -+ *vfqid = qb_attr_code_decode(&code_fq_vfqid, p); -+} -+ -+void qbman_fq_attr_get_erfqid(struct qbman_attr *d, uint32_t *erfqid) -+{ -+ uint32_t *p = ATTR32(d); -+ -+ *erfqid = qb_attr_code_decode(&code_fq_erfqid, p); -+} -+ -+/* Query FQ Non-Programmalbe Fields */ -+static struct qb_attr_code code_fq_np_state = QB_CODE(0, 16, 3); -+static struct qb_attr_code code_fq_np_fe = QB_CODE(0, 19, 1); -+static struct qb_attr_code code_fq_np_x = QB_CODE(0, 20, 1); -+static struct qb_attr_code code_fq_np_r = QB_CODE(0, 21, 1); -+static struct qb_attr_code code_fq_np_oe = QB_CODE(0, 22, 1); -+static struct qb_attr_code code_fq_np_frm_cnt = QB_CODE(6, 0, 24); -+static struct qb_attr_code code_fq_np_byte_cnt = QB_CODE(7, 0, 32); -+ -+int qbman_fq_query_state(struct qbman_swp *s, uint32_t fqid, -+ struct qbman_attr *state) -+{ -+ uint32_t *p; -+ uint32_t rslt; -+ uint32_t *d = ATTR32(state); -+ -+ qbman_fq_attr_clear(state); -+ -+ p = qbman_swp_mc_start(s); -+ if (!p) -+ return -EBUSY; -+ qb_attr_code_encode(&code_fq_fqid, p, fqid); -+ p = qbman_swp_mc_complete(s, p, QBMAN_FQ_QUERY_NP); -+ -+ /* Decode the outcome */ -+ rslt = qb_attr_code_decode(&code_generic_rslt, p); -+ BUG_ON(qb_attr_code_decode(&code_generic_verb, p) != QBMAN_FQ_QUERY_NP); -+ -+ /* Determine success or failure */ -+ if (unlikely(rslt != QBMAN_MC_RSLT_OK)) { -+ pr_err("Query NP fields of FQID 0x%x failed, code=0x%02x\n", -+ fqid, rslt); -+ return -EIO; -+ } -+ word_copy(&d[0], &p[0], 16); -+ return 0; -+} -+ -+uint32_t qbman_fq_state_schedstate(const struct qbman_attr *state) -+{ -+ const uint32_t *p = ATTR32(state); -+ -+ return qb_attr_code_decode(&code_fq_np_state, p); -+} -+ -+int qbman_fq_state_force_eligible(const struct qbman_attr *state) -+{ -+ const uint32_t *p = ATTR32(state); -+ -+ return !!qb_attr_code_decode(&code_fq_np_fe, p); -+} -+ -+int qbman_fq_state_xoff(const struct qbman_attr *state) -+{ -+ const uint32_t *p = ATTR32(state); -+ -+ return !!qb_attr_code_decode(&code_fq_np_x, p); -+} -+ -+int qbman_fq_state_retirement_pending(const struct qbman_attr *state) -+{ -+ const uint32_t *p = ATTR32(state); -+ -+ return !!qb_attr_code_decode(&code_fq_np_r, p); -+} -+ -+int qbman_fq_state_overflow_error(const struct qbman_attr *state) -+{ -+ const uint32_t *p = ATTR32(state); -+ -+ return !!qb_attr_code_decode(&code_fq_np_oe, p); -+} -+ -+uint32_t qbman_fq_state_frame_count(const struct qbman_attr *state) -+{ -+ const uint32_t *p = ATTR32(state); -+ -+ return qb_attr_code_decode(&code_fq_np_frm_cnt, p); -+} -+ -+uint32_t qbman_fq_state_byte_count(const struct qbman_attr *state) -+{ -+ const uint32_t *p = ATTR32(state); -+ -+ return qb_attr_code_decode(&code_fq_np_byte_cnt, p); -+} -+ -+/* Query CGR */ -+static struct qb_attr_code code_cgr_cgid = QB_CODE(0, 16, 16); -+static struct qb_attr_code code_cgr_cscn_wq_en_enter = QB_CODE(2, 0, 1); -+static struct qb_attr_code code_cgr_cscn_wq_en_exit = QB_CODE(2, 1, 1); -+static struct qb_attr_code code_cgr_cscn_wq_icd = QB_CODE(2, 2, 1); -+static struct qb_attr_code code_cgr_mode = QB_CODE(3, 16, 2); -+static struct qb_attr_code code_cgr_rej_cnt_mode = QB_CODE(3, 18, 1); -+static struct qb_attr_code code_cgr_cscn_bdi = QB_CODE(3, 19, 1); -+static struct qb_attr_code code_cgr_cscn_wr_en_enter = QB_CODE(3, 24, 1); -+static struct qb_attr_code code_cgr_cscn_wr_en_exit = QB_CODE(3, 25, 1); -+static struct qb_attr_code code_cgr_cg_wr_ae = QB_CODE(3, 26, 1); -+static struct qb_attr_code code_cgr_cscn_dcp_en = QB_CODE(3, 27, 1); -+static struct qb_attr_code code_cgr_cg_wr_va = QB_CODE(3, 28, 1); -+static struct qb_attr_code code_cgr_i_cnt_wr_en = QB_CODE(4, 0, 1); -+static struct qb_attr_code code_cgr_i_cnt_wr_bnd = QB_CODE(4, 1, 5); -+static struct qb_attr_code code_cgr_td_en = QB_CODE(4, 8, 1); -+static struct qb_attr_code code_cgr_cs_thres = QB_CODE(4, 16, 13); -+static struct qb_attr_code code_cgr_cs_thres_x = QB_CODE(5, 0, 13); -+static struct qb_attr_code code_cgr_td_thres = QB_CODE(5, 16, 13); -+static struct qb_attr_code code_cgr_cscn_tdcp = QB_CODE(6, 0, 16); -+static struct qb_attr_code code_cgr_cscn_wqid = QB_CODE(6, 16, 16); -+static struct qb_attr_code code_cgr_cscn_vcgid = QB_CODE(7, 0, 16); -+static struct qb_attr_code code_cgr_cg_icid = QB_CODE(7, 16, 15); -+static struct qb_attr_code code_cgr_cg_pl = QB_CODE(7, 31, 1); -+static struct qb_attr_code code_cgr_cg_wr_addr_lo = QB_CODE(8, 0, 32); -+static struct qb_attr_code code_cgr_cg_wr_addr_hi = QB_CODE(9, 0, 32); -+static struct qb_attr_code code_cgr_cscn_ctx_lo = QB_CODE(10, 0, 32); -+static struct qb_attr_code code_cgr_cscn_ctx_hi = QB_CODE(11, 0, 32); -+ -+static void qbman_cgr_attr_clear(struct qbman_attr *a) -+{ -+ memset(a, 0, sizeof(*a)); -+ attr_type_set(a, qbman_attr_usage_cgr); -+} -+ -+int qbman_cgr_query(struct qbman_swp *s, uint32_t cgid, struct qbman_attr *attr) -+{ -+ uint32_t *p; -+ uint32_t verb, rslt; -+ uint32_t *d[2]; -+ int i; -+ uint32_t query_verb; -+ -+ d[0] = ATTR32(attr); -+ d[1] = ATTR32_1(attr); -+ -+ qbman_cgr_attr_clear(attr); -+ -+ for (i = 0; i < 2; i++) { -+ p = qbman_swp_mc_start(s); -+ if (!p) -+ return -EBUSY; -+ query_verb = i ? QBMAN_WRED_QUERY : QBMAN_CGR_QUERY; -+ -+ qb_attr_code_encode(&code_cgr_cgid, p, cgid); -+ p = qbman_swp_mc_complete(s, p, p[0] | query_verb); -+ -+ /* Decode the outcome */ -+ verb = qb_attr_code_decode(&code_generic_verb, p); -+ rslt = qb_attr_code_decode(&code_generic_rslt, p); -+ BUG_ON(verb != query_verb); -+ -+ /* Determine success or failure */ -+ if (unlikely(rslt != QBMAN_MC_RSLT_OK)) { -+ pr_err("Query CGID 0x%x failed,", cgid); -+ pr_err(" verb=0x%02x, code=0x%02x\n", verb, rslt); -+ return -EIO; -+ } -+ /* For the configure, word[0] of the command contains only the -+ * verb/cgid. For the query, word[0] of the result contains -+ * only the verb/rslt fields. Skip word[0] in the latter case. -+ */ -+ word_copy(&d[i][1], &p[1], 15); -+ } -+ return 0; -+} -+ -+void qbman_cgr_attr_get_ctl1(struct qbman_attr *d, int *cscn_wq_en_enter, -+ int *cscn_wq_en_exit, int *cscn_wq_icd) -+ { -+ uint32_t *p = ATTR32(d); -+ *cscn_wq_en_enter = !!qb_attr_code_decode(&code_cgr_cscn_wq_en_enter, -+ p); -+ *cscn_wq_en_exit = !!qb_attr_code_decode(&code_cgr_cscn_wq_en_exit, p); -+ *cscn_wq_icd = !!qb_attr_code_decode(&code_cgr_cscn_wq_icd, p); -+} -+ -+void qbman_cgr_attr_get_mode(struct qbman_attr *d, uint32_t *mode, -+ int *rej_cnt_mode, int *cscn_bdi) -+{ -+ uint32_t *p = ATTR32(d); -+ *mode = qb_attr_code_decode(&code_cgr_mode, p); -+ *rej_cnt_mode = !!qb_attr_code_decode(&code_cgr_rej_cnt_mode, p); -+ *cscn_bdi = !!qb_attr_code_decode(&code_cgr_cscn_bdi, p); -+} -+ -+void qbman_cgr_attr_get_ctl2(struct qbman_attr *d, int *cscn_wr_en_enter, -+ int *cscn_wr_en_exit, int *cg_wr_ae, -+ int *cscn_dcp_en, int *cg_wr_va) -+{ -+ uint32_t *p = ATTR32(d); -+ *cscn_wr_en_enter = !!qb_attr_code_decode(&code_cgr_cscn_wr_en_enter, -+ p); -+ *cscn_wr_en_exit = !!qb_attr_code_decode(&code_cgr_cscn_wr_en_exit, p); -+ *cg_wr_ae = !!qb_attr_code_decode(&code_cgr_cg_wr_ae, p); -+ *cscn_dcp_en = !!qb_attr_code_decode(&code_cgr_cscn_dcp_en, p); -+ *cg_wr_va = !!qb_attr_code_decode(&code_cgr_cg_wr_va, p); -+} -+ -+void qbman_cgr_attr_get_iwc(struct qbman_attr *d, int *i_cnt_wr_en, -+ uint32_t *i_cnt_wr_bnd) -+{ -+ uint32_t *p = ATTR32(d); -+ *i_cnt_wr_en = !!qb_attr_code_decode(&code_cgr_i_cnt_wr_en, p); -+ *i_cnt_wr_bnd = qb_attr_code_decode(&code_cgr_i_cnt_wr_bnd, p); -+} -+ -+void qbman_cgr_attr_get_tdc(struct qbman_attr *d, int *td_en) -+{ -+ uint32_t *p = ATTR32(d); -+ *td_en = !!qb_attr_code_decode(&code_cgr_td_en, p); -+} -+ -+void qbman_cgr_attr_get_cs_thres(struct qbman_attr *d, uint32_t *cs_thres) -+{ -+ uint32_t *p = ATTR32(d); -+ *cs_thres = qbman_thresh_to_value(qb_attr_code_decode( -+ &code_cgr_cs_thres, p)); -+} -+ -+void qbman_cgr_attr_get_cs_thres_x(struct qbman_attr *d, -+ uint32_t *cs_thres_x) -+{ -+ uint32_t *p = ATTR32(d); -+ *cs_thres_x = qbman_thresh_to_value(qb_attr_code_decode( -+ &code_cgr_cs_thres_x, p)); -+} -+ -+void qbman_cgr_attr_get_td_thres(struct qbman_attr *d, uint32_t *td_thres) -+{ -+ uint32_t *p = ATTR32(d); -+ *td_thres = qbman_thresh_to_value(qb_attr_code_decode( -+ &code_cgr_td_thres, p)); -+} -+ -+void qbman_cgr_attr_get_cscn_tdcp(struct qbman_attr *d, uint32_t *cscn_tdcp) -+{ -+ uint32_t *p = ATTR32(d); -+ *cscn_tdcp = qb_attr_code_decode(&code_cgr_cscn_tdcp, p); -+} -+ -+void qbman_cgr_attr_get_cscn_wqid(struct qbman_attr *d, uint32_t *cscn_wqid) -+{ -+ uint32_t *p = ATTR32(d); -+ *cscn_wqid = qb_attr_code_decode(&code_cgr_cscn_wqid, p); -+} -+ -+void qbman_cgr_attr_get_cscn_vcgid(struct qbman_attr *d, -+ uint32_t *cscn_vcgid) -+{ -+ uint32_t *p = ATTR32(d); -+ *cscn_vcgid = qb_attr_code_decode(&code_cgr_cscn_vcgid, p); -+} -+ -+void qbman_cgr_attr_get_cg_icid(struct qbman_attr *d, uint32_t *icid, -+ int *pl) -+{ -+ uint32_t *p = ATTR32(d); -+ *icid = qb_attr_code_decode(&code_cgr_cg_icid, p); -+ *pl = !!qb_attr_code_decode(&code_cgr_cg_pl, p); -+} -+ -+void qbman_cgr_attr_get_cg_wr_addr(struct qbman_attr *d, -+ uint64_t *cg_wr_addr) -+{ -+ uint32_t *p = ATTR32(d); -+ *cg_wr_addr = ((uint64_t)qb_attr_code_decode(&code_cgr_cg_wr_addr_hi, -+ p) << 32) | -+ (uint64_t)qb_attr_code_decode(&code_cgr_cg_wr_addr_lo, -+ p); -+} -+ -+void qbman_cgr_attr_get_cscn_ctx(struct qbman_attr *d, uint64_t *cscn_ctx) -+{ -+ uint32_t *p = ATTR32(d); -+ *cscn_ctx = ((uint64_t)qb_attr_code_decode(&code_cgr_cscn_ctx_hi, p) -+ << 32) | -+ (uint64_t)qb_attr_code_decode(&code_cgr_cscn_ctx_lo, p); -+} -+ -+#define WRED_EDP_WORD(n) (18 + n / 4) -+#define WRED_EDP_OFFSET(n) (8 * (n % 4)) -+#define WRED_PARM_DP_WORD(n) (n + 20) -+#define WRED_WE_EDP(n) (16 + n * 2) -+#define WRED_WE_PARM_DP(n) (17 + n * 2) -+void qbman_cgr_attr_wred_get_edp(struct qbman_attr *d, uint32_t idx, -+ int *edp) -+{ -+ uint32_t *p = ATTR32(d); -+ struct qb_attr_code code_wred_edp = QB_CODE(WRED_EDP_WORD(idx), -+ WRED_EDP_OFFSET(idx), 8); -+ *edp = (int)qb_attr_code_decode(&code_wred_edp, p); -+} -+ -+void qbman_cgr_attr_wred_dp_decompose(uint32_t dp, uint64_t *minth, -+ uint64_t *maxth, uint8_t *maxp) -+{ -+ uint8_t ma, mn, step_i, step_s, pn; -+ -+ ma = (uint8_t)(dp >> 24); -+ mn = (uint8_t)(dp >> 19) & 0x1f; -+ step_i = (uint8_t)(dp >> 11); -+ step_s = (uint8_t)(dp >> 6) & 0x1f; -+ pn = (uint8_t)dp & 0x3f; -+ -+ *maxp = (uint8_t)(((pn << 2) * 100) / 256); -+ -+ if (mn == 0) -+ *maxth = ma; -+ else -+ *maxth = ((ma + 256) * (1 << (mn - 1))); -+ -+ if (step_s == 0) -+ *minth = *maxth - step_i; -+ else -+ *minth = *maxth - (256 + step_i) * (1 << (step_s - 1)); -+} -+ -+void qbman_cgr_attr_wred_get_parm_dp(struct qbman_attr *d, uint32_t idx, -+ uint32_t *dp) -+{ -+ uint32_t *p = ATTR32(d); -+ struct qb_attr_code code_wred_parm_dp = QB_CODE(WRED_PARM_DP_WORD(idx), -+ 0, 8); -+ *dp = qb_attr_code_decode(&code_wred_parm_dp, p); -+} -+ -+/* Query CGR/CCGR/CQ statistics */ -+static struct qb_attr_code code_cgr_stat_ct = QB_CODE(4, 0, 32); -+static struct qb_attr_code code_cgr_stat_frame_cnt_lo = QB_CODE(4, 0, 32); -+static struct qb_attr_code code_cgr_stat_frame_cnt_hi = QB_CODE(5, 0, 8); -+static struct qb_attr_code code_cgr_stat_byte_cnt_lo = QB_CODE(6, 0, 32); -+static struct qb_attr_code code_cgr_stat_byte_cnt_hi = QB_CODE(7, 0, 16); -+static int qbman_cgr_statistics_query(struct qbman_swp *s, uint32_t cgid, -+ int clear, uint32_t command_type, -+ uint64_t *frame_cnt, uint64_t *byte_cnt) -+{ -+ uint32_t *p; -+ uint32_t verb, rslt; -+ uint32_t query_verb; -+ uint32_t hi, lo; -+ -+ p = qbman_swp_mc_start(s); -+ if (!p) -+ return -EBUSY; -+ -+ qb_attr_code_encode(&code_cgr_cgid, p, cgid); -+ if (command_type < 2) -+ qb_attr_code_encode(&code_cgr_stat_ct, p, command_type); -+ query_verb = clear ? -+ QBMAN_CGR_STAT_QUERY_CLR : QBMAN_CGR_STAT_QUERY; -+ p = qbman_swp_mc_complete(s, p, p[0] | query_verb); -+ -+ /* Decode the outcome */ -+ verb = qb_attr_code_decode(&code_generic_verb, p); -+ rslt = qb_attr_code_decode(&code_generic_rslt, p); -+ BUG_ON(verb != query_verb); -+ -+ /* Determine success or failure */ -+ if (unlikely(rslt != QBMAN_MC_RSLT_OK)) { -+ pr_err("Query statistics of CGID 0x%x failed,", cgid); -+ pr_err(" verb=0x%02x code=0x%02x\n", verb, rslt); -+ return -EIO; -+ } -+ -+ if (*frame_cnt) { -+ hi = qb_attr_code_decode(&code_cgr_stat_frame_cnt_hi, p); -+ lo = qb_attr_code_decode(&code_cgr_stat_frame_cnt_lo, p); -+ *frame_cnt = ((uint64_t)hi << 32) | (uint64_t)lo; -+ } -+ if (*byte_cnt) { -+ hi = qb_attr_code_decode(&code_cgr_stat_byte_cnt_hi, p); -+ lo = qb_attr_code_decode(&code_cgr_stat_byte_cnt_lo, p); -+ *byte_cnt = ((uint64_t)hi << 32) | (uint64_t)lo; -+ } -+ -+ return 0; -+} -+ -+int qbman_cgr_reject_statistics(struct qbman_swp *s, uint32_t cgid, int clear, -+ uint64_t *frame_cnt, uint64_t *byte_cnt) -+{ -+ return qbman_cgr_statistics_query(s, cgid, clear, 0xff, -+ frame_cnt, byte_cnt); -+} -+ -+int qbman_ccgr_reject_statistics(struct qbman_swp *s, uint32_t cgid, int clear, -+ uint64_t *frame_cnt, uint64_t *byte_cnt) -+{ -+ return qbman_cgr_statistics_query(s, cgid, clear, 1, -+ frame_cnt, byte_cnt); -+} -+ -+int qbman_cq_dequeue_statistics(struct qbman_swp *s, uint32_t cgid, int clear, -+ uint64_t *frame_cnt, uint64_t *byte_cnt) -+{ -+ return qbman_cgr_statistics_query(s, cgid, clear, 0, -+ frame_cnt, byte_cnt); -+} -+ -+/* WQ Chan Query */ -+static struct qb_attr_code code_wqchan_chanid = QB_CODE(0, 16, 16); -+static struct qb_attr_code code_wqchan_cdan_ctx_lo = QB_CODE(2, 0, 32); -+static struct qb_attr_code code_wqchan_cdan_ctx_hi = QB_CODE(3, 0, 32); -+static struct qb_attr_code code_wqchan_cdan_wqid = QB_CODE(1, 16, 16); -+static struct qb_attr_code code_wqchan_ctrl = QB_CODE(1, 8, 8); -+ -+static void qbman_wqchan_attr_clear(struct qbman_attr *a) -+{ -+ memset(a, 0, sizeof(*a)); -+ attr_type_set(a, qbman_attr_usage_wqchan); -+} -+ -+int qbman_wqchan_query(struct qbman_swp *s, uint16_t chanid, -+ struct qbman_attr *a) -+{ -+ uint32_t *p; -+ uint32_t rslt; -+ uint32_t *attr = ATTR32(a); -+ -+ qbman_wqchan_attr_clear(a); -+ -+ /* Start the management command */ -+ p = qbman_swp_mc_start(s); -+ if (!p) -+ return -EBUSY; -+ -+ /* Encode the caller-provided attributes */ -+ qb_attr_code_encode(&code_wqchan_chanid, p, chanid); -+ -+ /* Complete the management command */ -+ p = qbman_swp_mc_complete(s, p, p[0] | QBMAN_WQ_QUERY); -+ -+ /* Decode the outcome */ -+ rslt = qb_attr_code_decode(&code_generic_rslt, p); -+ BUG_ON(qb_attr_code_decode(&code_generic_verb, p); != QBMAN_WQ_QUERY); -+ -+ /* Determine success or failure */ -+ if (unlikely(rslt != QBMAN_MC_RSLT_OK)) { -+ pr_err("Query of WQCHAN 0x%x failed, code=0x%02x\n", -+ chanid, rslt); -+ return -EIO; -+ } -+ -+ /* For the query, word[0] of the result contains only the -+ * verb/rslt fields, so skip word[0]. -+ */ -+ word_copy(&attr[1], &p[1], 15); -+ return 0; -+} -+ -+void qbman_wqchan_attr_get_wqlen(struct qbman_attr *attr, int wq, uint32_t *len) -+{ -+ uint32_t *p = ATTR32(attr); -+ struct qb_attr_code code_wqchan_len = QB_CODE(wq + 8, 0, 24); -+ *len = qb_attr_code_decode(&code_wqchan_len, p); -+} -+ -+void qbman_wqchan_attr_get_cdan_ctx(struct qbman_attr *attr, uint64_t *cdan_ctx) -+{ -+ uint32_t lo, hi; -+ uint32_t *p = ATTR32(attr); -+ -+ lo = qb_attr_code_decode(&code_wqchan_cdan_ctx_lo, p); -+ hi = qb_attr_code_decode(&code_wqchan_cdan_ctx_hi, p); -+ *cdan_ctx = ((uint64_t)hi << 32) | (uint64_t)lo; -+} -+ -+void qbman_wqchan_attr_get_cdan_wqid(struct qbman_attr *attr, -+ uint16_t *cdan_wqid) -+{ -+ uint32_t *p = ATTR32(attr); -+ *cdan_wqid = (uint16_t)qb_attr_code_decode(&code_wqchan_cdan_wqid, p); -+} -+ -+void qbman_wqchan_attr_get_ctrl(struct qbman_attr *attr, uint8_t *ctrl) -+{ -+ uint32_t *p = ATTR32(attr); -+ *ctrl = (uint8_t)qb_attr_code_decode(&code_wqchan_ctrl, p); -+} -+ -+void qbman_wqchan_attr_get_chanid(struct qbman_attr *attr, uint16_t *chanid) -+{ -+ uint32_t *p = ATTR32(attr); -+ *chanid = (uint16_t)qb_attr_code_decode(&code_wqchan_chanid, p); -+} -diff --git a/drivers/net/dpaa2/qbman/driver/qbman_debug.h b/drivers/net/dpaa2/qbman/driver/qbman_debug.h -new file mode 100644 -index 0000000..4d586a6 ---- /dev/null -+++ b/drivers/net/dpaa2/qbman/driver/qbman_debug.h -@@ -0,0 +1,140 @@ -+/* Copyright (C) 2015 Freescale Semiconductor, Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Freescale Semiconductor nor the -+ * names of its contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY -+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY -+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+struct qbman_attr { -+ uint32_t dont_manipulate_directly[40]; -+}; -+ -+/* Buffer pool query commands */ -+int qbman_bp_query(struct qbman_swp *s, uint32_t bpid, -+ struct qbman_attr *a); -+void qbman_bp_attr_get_bdi(struct qbman_attr *a, int *bdi, int *va, int *wae); -+void qbman_bp_attr_get_swdet(struct qbman_attr *a, uint32_t *swdet); -+void qbman_bp_attr_get_swdxt(struct qbman_attr *a, uint32_t *swdxt); -+void qbman_bp_attr_get_hwdet(struct qbman_attr *a, uint32_t *hwdet); -+void qbman_bp_attr_get_hwdxt(struct qbman_attr *a, uint32_t *hwdxt); -+void qbman_bp_attr_get_swset(struct qbman_attr *a, uint32_t *swset); -+void qbman_bp_attr_get_swsxt(struct qbman_attr *a, uint32_t *swsxt); -+void qbman_bp_attr_get_vbpid(struct qbman_attr *a, uint32_t *vbpid); -+void qbman_bp_attr_get_icid(struct qbman_attr *a, uint32_t *icid, int *pl); -+void qbman_bp_attr_get_bpscn_addr(struct qbman_attr *a, uint64_t *bpscn_addr); -+void qbman_bp_attr_get_bpscn_ctx(struct qbman_attr *a, uint64_t *bpscn_ctx); -+void qbman_bp_attr_get_hw_targ(struct qbman_attr *a, uint32_t *hw_targ); -+int qbman_bp_info_has_free_bufs(struct qbman_attr *a); -+int qbman_bp_info_is_depleted(struct qbman_attr *a); -+int qbman_bp_info_is_surplus(struct qbman_attr *a); -+uint32_t qbman_bp_info_num_free_bufs(struct qbman_attr *a); -+uint32_t qbman_bp_info_hdptr(struct qbman_attr *a); -+uint32_t qbman_bp_info_sdcnt(struct qbman_attr *a); -+uint32_t qbman_bp_info_hdcnt(struct qbman_attr *a); -+uint32_t qbman_bp_info_sscnt(struct qbman_attr *a); -+ -+/* FQ query function for programmable fields */ -+int qbman_fq_query(struct qbman_swp *s, uint32_t fqid, -+ struct qbman_attr *desc); -+void qbman_fq_attr_get_fqctrl(struct qbman_attr *d, uint32_t *fqctrl); -+void qbman_fq_attr_get_cgrid(struct qbman_attr *d, uint32_t *cgrid); -+void qbman_fq_attr_get_destwq(struct qbman_attr *d, uint32_t *destwq); -+void qbman_fq_attr_get_icscred(struct qbman_attr *d, uint32_t *icscred); -+void qbman_fq_attr_get_tdthresh(struct qbman_attr *d, uint32_t *tdthresh); -+void qbman_fq_attr_get_oa(struct qbman_attr *d, -+ int *oa_ics, int *oa_cgr, int32_t *oa_len); -+void qbman_fq_attr_get_mctl(struct qbman_attr *d, -+ int *bdi, int *ff, int *va, int *ps); -+void qbman_fq_attr_get_ctx(struct qbman_attr *d, uint32_t *hi, uint32_t *lo); -+void qbman_fq_attr_get_icid(struct qbman_attr *d, uint32_t *icid, int *pl); -+void qbman_fq_attr_get_vfqid(struct qbman_attr *d, uint32_t *vfqid); -+void qbman_fq_attr_get_erfqid(struct qbman_attr *d, uint32_t *erfqid); -+ -+/* FQ query command for non-programmable fields*/ -+enum qbman_fq_schedstate_e { -+ qbman_fq_schedstate_oos = 0, -+ qbman_fq_schedstate_retired, -+ qbman_fq_schedstate_tentatively_scheduled, -+ qbman_fq_schedstate_truly_scheduled, -+ qbman_fq_schedstate_parked, -+ qbman_fq_schedstate_held_active, -+}; -+ -+int qbman_fq_query_state(struct qbman_swp *s, uint32_t fqid, -+ struct qbman_attr *state); -+uint32_t qbman_fq_state_schedstate(const struct qbman_attr *state); -+int qbman_fq_state_force_eligible(const struct qbman_attr *state); -+int qbman_fq_state_xoff(const struct qbman_attr *state); -+int qbman_fq_state_retirement_pending(const struct qbman_attr *state); -+int qbman_fq_state_overflow_error(const struct qbman_attr *state); -+uint32_t qbman_fq_state_frame_count(const struct qbman_attr *state); -+uint32_t qbman_fq_state_byte_count(const struct qbman_attr *state); -+ -+/* CGR query */ -+int qbman_cgr_query(struct qbman_swp *s, uint32_t cgid, -+ struct qbman_attr *attr); -+void qbman_cgr_attr_get_ctl1(struct qbman_attr *d, int *cscn_wq_en_enter, -+ int *cscn_wq_en_exit, int *cscn_wq_icd); -+void qbman_cgr_attr_get_mode(struct qbman_attr *d, uint32_t *mode, -+ int *rej_cnt_mode, int *cscn_bdi); -+void qbman_cgr_attr_get_ctl2(struct qbman_attr *d, int *cscn_wr_en_enter, -+ int *cscn_wr_en_exit, int *cg_wr_ae, -+ int *cscn_dcp_en, int *cg_wr_va); -+void qbman_cgr_attr_get_iwc(struct qbman_attr *d, int *i_cnt_wr_en, -+ uint32_t *i_cnt_wr_bnd); -+void qbman_cgr_attr_get_tdc(struct qbman_attr *d, int *td_en); -+void qbman_cgr_attr_get_cs_thres(struct qbman_attr *d, uint32_t *cs_thres); -+void qbman_cgr_attr_get_cs_thres_x(struct qbman_attr *d, -+ uint32_t *cs_thres_x); -+void qbman_cgr_attr_get_td_thres(struct qbman_attr *d, uint32_t *td_thres); -+void qbman_cgr_attr_get_cscn_tdcp(struct qbman_attr *d, uint32_t *cscn_tdcp); -+void qbman_cgr_attr_get_cscn_wqid(struct qbman_attr *d, uint32_t *cscn_wqid); -+void qbman_cgr_attr_get_cscn_vcgid(struct qbman_attr *d, -+ uint32_t *cscn_vcgid); -+void qbman_cgr_attr_get_cg_icid(struct qbman_attr *d, uint32_t *icid, -+ int *pl); -+void qbman_cgr_attr_get_cg_wr_addr(struct qbman_attr *d, -+ uint64_t *cg_wr_addr); -+void qbman_cgr_attr_get_cscn_ctx(struct qbman_attr *d, uint64_t *cscn_ctx); -+void qbman_cgr_attr_wred_get_edp(struct qbman_attr *d, uint32_t idx, -+ int *edp); -+void qbman_cgr_attr_wred_dp_decompose(uint32_t dp, uint64_t *minth, -+ uint64_t *maxth, uint8_t *maxp); -+void qbman_cgr_attr_wred_get_parm_dp(struct qbman_attr *d, uint32_t idx, -+ uint32_t *dp); -+ -+/* CGR/CCGR/CQ statistics query */ -+int qbman_cgr_reject_statistics(struct qbman_swp *s, uint32_t cgid, int clear, -+ uint64_t *frame_cnt, uint64_t *byte_cnt); -+int qbman_ccgr_reject_statistics(struct qbman_swp *s, uint32_t cgid, int clear, -+ uint64_t *frame_cnt, uint64_t *byte_cnt); -+int qbman_cq_dequeue_statistics(struct qbman_swp *s, uint32_t cgid, int clear, -+ uint64_t *frame_cnt, uint64_t *byte_cnt); -+ -+/* Query Work Queue Channel */ -+int qbman_wqchan_query(struct qbman_swp *s, uint16_t chanid, -+ struct qbman_attr *attr); -+void qbman_wqchan_attr_get_wqlen(struct qbman_attr *attr, int wq, uint32_t *len); -+void qbman_wqchan_attr_get_cdan_ctx(struct qbman_attr *attr, uint64_t *cdan_ctx); -+void qbman_wqchan_attr_get_cdan_wqid(struct qbman_attr *attr, -+ uint16_t *cdan_wqid); -+void qbman_wqchan_attr_get_ctrl(struct qbman_attr *attr, uint8_t *ctrl); -+void qbman_wqchan_attr_get_chanid(struct qbman_attr *attr, uint16_t *chanid); -diff --git a/drivers/net/dpaa2/qbman/driver/qbman_portal.c b/drivers/net/dpaa2/qbman/driver/qbman_portal.c -new file mode 100644 -index 0000000..52e1f64 ---- /dev/null -+++ b/drivers/net/dpaa2/qbman/driver/qbman_portal.c -@@ -0,0 +1,1441 @@ -+/* Copyright (C) 2014 Freescale Semiconductor, Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Freescale Semiconductor nor the -+ * names of its contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY -+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY -+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include "qbman_portal.h" -+ -+/* QBMan portal management command codes */ -+#define QBMAN_MC_ACQUIRE 0x30 -+#define QBMAN_WQCHAN_CONFIGURE 0x46 -+ -+/* CINH register offsets */ -+#define QBMAN_CINH_SWP_EQCR_PI 0x800 -+#define QBMAN_CINH_SWP_EQCR_CI 0x840 -+#define QBMAN_CINH_SWP_EQAR 0x8c0 -+#define QBMAN_CINH_SWP_DQPI 0xa00 -+#define QBMAN_CINH_SWP_DCAP 0xac0 -+#define QBMAN_CINH_SWP_SDQCR 0xb00 -+#define QBMAN_CINH_SWP_RAR 0xcc0 -+#define QBMAN_CINH_SWP_ISR 0xe00 -+#define QBMAN_CINH_SWP_IER 0xe40 -+#define QBMAN_CINH_SWP_ISDR 0xe80 -+#define QBMAN_CINH_SWP_IIR 0xec0 -+ -+/* CENA register offsets */ -+#define QBMAN_CENA_SWP_EQCR(n) (0x000 + ((uint32_t)(n) << 6)) -+#define QBMAN_CENA_SWP_DQRR(n) (0x200 + ((uint32_t)(n) << 6)) -+#define QBMAN_CENA_SWP_RCR(n) (0x400 + ((uint32_t)(n) << 6)) -+#define QBMAN_CENA_SWP_CR 0x600 -+#define QBMAN_CENA_SWP_RR(vb) (0x700 + ((uint32_t)(vb) >> 1)) -+#define QBMAN_CENA_SWP_VDQCR 0x780 -+#define QBMAN_CENA_SWP_EQCR_CI 0x840 -+ -+/* Reverse mapping of QBMAN_CENA_SWP_DQRR() */ -+#define QBMAN_IDX_FROM_DQRR(p) (((unsigned long)p & 0x1ff) >> 6) -+ -+/* QBMan FQ management command codes */ -+#define QBMAN_FQ_SCHEDULE 0x48 -+#define QBMAN_FQ_FORCE 0x49 -+#define QBMAN_FQ_XON 0x4d -+#define QBMAN_FQ_XOFF 0x4e -+ -+/*******************************/ -+/* Pre-defined attribute codes */ -+/*******************************/ -+ -+struct qb_attr_code code_generic_verb = QB_CODE(0, 0, 7); -+struct qb_attr_code code_generic_rslt = QB_CODE(0, 8, 8); -+ -+/*************************/ -+/* SDQCR attribute codes */ -+/*************************/ -+ -+/* we put these here because at least some of them are required by -+ * qbman_swp_init() */ -+struct qb_attr_code code_sdqcr_dct = QB_CODE(0, 24, 2); -+struct qb_attr_code code_sdqcr_fc = QB_CODE(0, 29, 1); -+struct qb_attr_code code_sdqcr_tok = QB_CODE(0, 16, 8); -+static struct qb_attr_code code_eq_dca_idx; -+#define CODE_SDQCR_DQSRC(n) QB_CODE(0, n, 1) -+enum qbman_sdqcr_dct { -+ qbman_sdqcr_dct_null = 0, -+ qbman_sdqcr_dct_prio_ics, -+ qbman_sdqcr_dct_active_ics, -+ qbman_sdqcr_dct_active -+}; -+ -+enum qbman_sdqcr_fc { -+ qbman_sdqcr_fc_one = 0, -+ qbman_sdqcr_fc_up_to_3 = 1 -+}; -+ -+struct qb_attr_code code_sdqcr_dqsrc = QB_CODE(0, 0, 16); -+ -+/*********************************/ -+/* Portal constructor/destructor */ -+/*********************************/ -+ -+/* Software portals should always be in the power-on state when we initialise, -+ * due to the CCSR-based portal reset functionality that MC has. -+ * -+ * Erk! Turns out that QMan versions prior to 4.1 do not correctly reset DQRR -+ * valid-bits, so we need to support a workaround where we don't trust -+ * valid-bits when detecting new entries until any stale ring entries have been -+ * overwritten at least once. The idea is that we read PI for the first few -+ * entries, then switch to valid-bit after that. The trick is to clear the -+ * bug-work-around boolean once the PI wraps around the ring for the first time. -+ * -+ * Note: this still carries a slight additional cost once the decrementer hits -+ * zero. -+ */ -+struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d) -+{ -+ int ret; -+ uint32_t eqcr_pi; -+ struct qbman_swp *p = kmalloc(sizeof(*p), GFP_KERNEL); -+ -+ if (!p) -+ return NULL; -+ p->desc = d; -+#ifdef QBMAN_CHECKING -+ p->mc.check = swp_mc_can_start; -+#endif -+ p->mc.valid_bit = QB_VALID_BIT; -+ p->sdq = 0; -+ qb_attr_code_encode(&code_sdqcr_dct, &p->sdq, qbman_sdqcr_dct_prio_ics); -+ qb_attr_code_encode(&code_sdqcr_fc, &p->sdq, qbman_sdqcr_fc_up_to_3); -+ qb_attr_code_encode(&code_sdqcr_tok, &p->sdq, 0xbb); -+ atomic_set(&p->vdq.busy, 1); -+ p->vdq.valid_bit = QB_VALID_BIT; -+ p->dqrr.next_idx = 0; -+ p->dqrr.valid_bit = QB_VALID_BIT; -+ qman_version = p->desc->qman_version; -+ if ((qman_version & 0xFFFF0000) < QMAN_REV_4100) { -+ p->dqrr.dqrr_size = 4; -+ p->dqrr.reset_bug = 1; -+ /* Set size of DQRR to 4, encoded in 2 bits */ -+ code_eq_dca_idx = (struct qb_attr_code)QB_CODE(0, 8, 2); -+ } else { -+ p->dqrr.dqrr_size = 8; -+ p->dqrr.reset_bug = 0; -+ /* Set size of DQRR to 8, encoded in 3 bits */ -+ code_eq_dca_idx = (struct qb_attr_code)QB_CODE(0, 8, 3); -+ } -+ -+ ret = qbman_swp_sys_init(&p->sys, d, p->dqrr.dqrr_size); -+ if (ret) { -+ kfree(p); -+ pr_err("qbman_swp_sys_init() failed %d\n", ret); -+ return NULL; -+ } -+ /* SDQCR needs to be initialized to 0 when no channels are -+ being dequeued from or else the QMan HW will indicate an -+ error. The values that were calculated above will be -+ applied when dequeues from a specific channel are enabled */ -+ qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_SDQCR, 0); -+ eqcr_pi = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_PI); -+ p->eqcr.pi = eqcr_pi & 0xF; -+ p->eqcr.pi_vb = eqcr_pi & QB_VALID_BIT; -+ p->eqcr.ci = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_CI) & 0xF; -+ p->eqcr.available = QBMAN_EQCR_SIZE - qm_cyc_diff(QBMAN_EQCR_SIZE, -+ p->eqcr.ci, p->eqcr.pi); -+ -+ return p; -+} -+ -+void qbman_swp_finish(struct qbman_swp *p) -+{ -+#ifdef QBMAN_CHECKING -+ BUG_ON(p->mc.check != swp_mc_can_start); -+#endif -+ qbman_swp_sys_finish(&p->sys); -+ kfree(p); -+} -+ -+const struct qbman_swp_desc *qbman_swp_get_desc(struct qbman_swp *p) -+{ -+ return p->desc; -+} -+ -+/**************/ -+/* Interrupts */ -+/**************/ -+ -+uint32_t qbman_swp_interrupt_get_vanish(struct qbman_swp *p) -+{ -+ return qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_ISDR); -+} -+ -+void qbman_swp_interrupt_set_vanish(struct qbman_swp *p, uint32_t mask) -+{ -+ qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_ISDR, mask); -+} -+ -+uint32_t qbman_swp_interrupt_read_status(struct qbman_swp *p) -+{ -+ return qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_ISR); -+} -+ -+void qbman_swp_interrupt_clear_status(struct qbman_swp *p, uint32_t mask) -+{ -+ qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_ISR, mask); -+} -+ -+uint32_t qbman_swp_interrupt_get_trigger(struct qbman_swp *p) -+{ -+ return qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_IER); -+} -+ -+void qbman_swp_interrupt_set_trigger(struct qbman_swp *p, uint32_t mask) -+{ -+ qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_IER, mask); -+} -+ -+int qbman_swp_interrupt_get_inhibit(struct qbman_swp *p) -+{ -+ return qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_IIR); -+} -+ -+void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit) -+{ -+ qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_IIR, inhibit ? 0xffffffff : 0); -+} -+ -+/***********************/ -+/* Management commands */ -+/***********************/ -+ -+/* -+ * Internal code common to all types of management commands. -+ */ -+ -+void *qbman_swp_mc_start(struct qbman_swp *p) -+{ -+ void *ret; -+#ifdef QBMAN_CHECKING -+ BUG_ON(p->mc.check != swp_mc_can_start); -+#endif -+ ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR); -+#ifdef QBMAN_CHECKING -+ if (!ret) -+ p->mc.check = swp_mc_can_submit; -+#endif -+ return ret; -+} -+ -+void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint32_t cmd_verb) -+{ -+ uint32_t *v = cmd; -+#ifdef QBMAN_CHECKING -+ BUG_ON(!p->mc.check != swp_mc_can_submit); -+#endif -+ /* TBD: "|=" is going to hurt performance. Need to move as many fields -+ * out of word zero, and for those that remain, the "OR" needs to occur -+ * at the caller side. This debug check helps to catch cases where the -+ * caller wants to OR but has forgotten to do so. */ -+ BUG_ON((*v & cmd_verb) != *v); -+ *v = cmd_verb | p->mc.valid_bit; -+ qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd); -+#ifdef QBMAN_CHECKING -+ p->mc.check = swp_mc_can_poll; -+#endif -+} -+ -+void *qbman_swp_mc_result(struct qbman_swp *p) -+{ -+ uint32_t *ret, verb; -+#ifdef QBMAN_CHECKING -+ BUG_ON(p->mc.check != swp_mc_can_poll); -+#endif -+ qbman_cena_invalidate_prefetch(&p->sys, -+ QBMAN_CENA_SWP_RR(p->mc.valid_bit)); -+ ret = qbman_cena_read(&p->sys, QBMAN_CENA_SWP_RR(p->mc.valid_bit)); -+ /* Remove the valid-bit - command completed iff the rest is non-zero */ -+ verb = ret[0] & ~QB_VALID_BIT; -+ if (!verb) -+ return NULL; -+#ifdef QBMAN_CHECKING -+ p->mc.check = swp_mc_can_start; -+#endif -+ p->mc.valid_bit ^= QB_VALID_BIT; -+ return ret; -+} -+ -+/***********/ -+/* Enqueue */ -+/***********/ -+ -+/* These should be const, eventually */ -+static struct qb_attr_code code_eq_cmd = QB_CODE(0, 0, 2); -+static struct qb_attr_code code_eq_eqdi = QB_CODE(0, 3, 1); -+static struct qb_attr_code code_eq_dca_en = QB_CODE(0, 15, 1); -+static struct qb_attr_code code_eq_dca_pk = QB_CODE(0, 14, 1); -+/* Can't set code_eq_dca_idx width. Need qman version. Read at runtime */ -+static struct qb_attr_code code_eq_orp_en = QB_CODE(0, 2, 1); -+static struct qb_attr_code code_eq_orp_is_nesn = QB_CODE(0, 31, 1); -+static struct qb_attr_code code_eq_orp_nlis = QB_CODE(0, 30, 1); -+static struct qb_attr_code code_eq_orp_seqnum = QB_CODE(0, 16, 14); -+static struct qb_attr_code code_eq_opr_id = QB_CODE(1, 0, 16); -+static struct qb_attr_code code_eq_tgt_id = QB_CODE(2, 0, 24); -+/* static struct qb_attr_code code_eq_tag = QB_CODE(3, 0, 32); */ -+static struct qb_attr_code code_eq_qd_en = QB_CODE(0, 4, 1); -+static struct qb_attr_code code_eq_qd_bin = QB_CODE(4, 0, 16); -+static struct qb_attr_code code_eq_qd_pri = QB_CODE(4, 16, 4); -+static struct qb_attr_code code_eq_rsp_stash = QB_CODE(5, 16, 1); -+static struct qb_attr_code code_eq_rsp_id = QB_CODE(5, 24, 8); -+static struct qb_attr_code code_eq_rsp_lo = QB_CODE(6, 0, 32); -+ -+enum qbman_eq_cmd_e { -+ /* No enqueue, primarily for plugging ORP gaps for dropped frames */ -+ qbman_eq_cmd_empty, -+ /* DMA an enqueue response once complete */ -+ qbman_eq_cmd_respond, -+ /* DMA an enqueue response only if the enqueue fails */ -+ qbman_eq_cmd_respond_reject -+}; -+ -+void qbman_eq_desc_clear(struct qbman_eq_desc *d) -+{ -+ memset(d, 0, sizeof(*d)); -+} -+ -+void qbman_eq_desc_set_no_orp(struct qbman_eq_desc *d, int respond_success) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_eq_orp_en, cl, 0); -+ qb_attr_code_encode(&code_eq_cmd, cl, -+ respond_success ? qbman_eq_cmd_respond : -+ qbman_eq_cmd_respond_reject); -+} -+ -+void qbman_eq_desc_set_orp(struct qbman_eq_desc *d, int respond_success, -+ uint32_t opr_id, uint32_t seqnum, int incomplete) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_eq_orp_en, cl, 1); -+ qb_attr_code_encode(&code_eq_cmd, cl, -+ respond_success ? qbman_eq_cmd_respond : -+ qbman_eq_cmd_respond_reject); -+ qb_attr_code_encode(&code_eq_opr_id, cl, opr_id); -+ qb_attr_code_encode(&code_eq_orp_seqnum, cl, seqnum); -+ qb_attr_code_encode(&code_eq_orp_nlis, cl, !!incomplete); -+} -+ -+void qbman_eq_desc_set_orp_hole(struct qbman_eq_desc *d, uint32_t opr_id, -+ uint32_t seqnum) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_eq_orp_en, cl, 1); -+ qb_attr_code_encode(&code_eq_cmd, cl, qbman_eq_cmd_empty); -+ qb_attr_code_encode(&code_eq_opr_id, cl, opr_id); -+ qb_attr_code_encode(&code_eq_orp_seqnum, cl, seqnum); -+ qb_attr_code_encode(&code_eq_orp_nlis, cl, 0); -+ qb_attr_code_encode(&code_eq_orp_is_nesn, cl, 0); -+} -+ -+void qbman_eq_desc_set_orp_nesn(struct qbman_eq_desc *d, uint32_t opr_id, -+ uint32_t seqnum) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_eq_orp_en, cl, 1); -+ qb_attr_code_encode(&code_eq_cmd, cl, qbman_eq_cmd_empty); -+ qb_attr_code_encode(&code_eq_opr_id, cl, opr_id); -+ qb_attr_code_encode(&code_eq_orp_seqnum, cl, seqnum); -+ qb_attr_code_encode(&code_eq_orp_nlis, cl, 0); -+ qb_attr_code_encode(&code_eq_orp_is_nesn, cl, 1); -+} -+ -+void qbman_eq_desc_set_response(struct qbman_eq_desc *d, -+ dma_addr_t storage_phys, -+ int stash) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode_64(&code_eq_rsp_lo, (uint64_t *)cl, storage_phys); -+ qb_attr_code_encode(&code_eq_rsp_stash, cl, !!stash); -+} -+ -+void qbman_eq_desc_set_token(struct qbman_eq_desc *d, uint8_t token) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_eq_rsp_id, cl, (uint32_t)token); -+} -+ -+void qbman_eq_desc_set_fq(struct qbman_eq_desc *d, uint32_t fqid) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_eq_qd_en, cl, 0); -+ qb_attr_code_encode(&code_eq_tgt_id, cl, fqid); -+} -+ -+void qbman_eq_desc_set_qd(struct qbman_eq_desc *d, uint32_t qdid, -+ uint32_t qd_bin, uint32_t qd_prio) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_eq_qd_en, cl, 1); -+ qb_attr_code_encode(&code_eq_tgt_id, cl, qdid); -+ qb_attr_code_encode(&code_eq_qd_bin, cl, qd_bin); -+ qb_attr_code_encode(&code_eq_qd_pri, cl, qd_prio); -+} -+ -+void qbman_eq_desc_set_eqdi(struct qbman_eq_desc *d, int enable) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_eq_eqdi, cl, !!enable); -+} -+ -+void qbman_eq_desc_set_dca(struct qbman_eq_desc *d, int enable, -+ uint32_t dqrr_idx, int park) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_eq_dca_en, cl, !!enable); -+ if (enable) { -+ qb_attr_code_encode(&code_eq_dca_pk, cl, !!park); -+ qb_attr_code_encode(&code_eq_dca_idx, cl, dqrr_idx); -+ } -+} -+ -+#define EQAR_IDX(eqar) ((eqar) & 0x7) -+#define EQAR_VB(eqar) ((eqar) & 0x80) -+#define EQAR_SUCCESS(eqar) ((eqar) & 0x100) -+static int qbman_swp_enqueue_array_mode(struct qbman_swp *s, -+ const struct qbman_eq_desc *d, -+ const struct qbman_fd *fd) -+{ -+ uint32_t *p; -+ const uint32_t *cl = qb_cl(d); -+ uint32_t eqar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_EQAR); -+ -+ pr_debug("EQAR=%08x\n", eqar); -+ if (!EQAR_SUCCESS(eqar)) -+ return -EBUSY; -+ p = qbman_cena_write_start_wo_shadow(&s->sys, -+ QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); -+ word_copy(&p[1], &cl[1], 7); -+ word_copy(&p[8], fd, sizeof(*fd) >> 2); -+ /* Set the verb byte, have to substitute in the valid-bit */ -+ lwsync(); -+ p[0] = cl[0] | EQAR_VB(eqar); -+ qbman_cena_write_complete_wo_shadow(&s->sys, -+ QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); -+ return 0; -+} -+ -+static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s, -+ const struct qbman_eq_desc *d, -+ const struct qbman_fd *fd) -+{ -+ uint32_t *p; -+ const uint32_t *cl = qb_cl(d); -+ uint32_t eqcr_ci; -+ uint8_t diff; -+ -+ if (!s->eqcr.available) { -+ eqcr_ci = s->eqcr.ci; -+ s->eqcr.ci = qbman_cena_read_reg(&s->sys, -+ QBMAN_CENA_SWP_EQCR_CI) & 0xF; -+ diff = qm_cyc_diff(QBMAN_EQCR_SIZE, -+ eqcr_ci, s->eqcr.ci); -+ s->eqcr.available += diff; -+ if (!diff) -+ return -EBUSY; -+ } -+ -+ p = qbman_cena_write_start_wo_shadow(&s->sys, -+ QBMAN_CENA_SWP_EQCR(s->eqcr.pi & 7)); -+ word_copy(&p[1], &cl[1], 7); -+ word_copy(&p[8], fd, sizeof(*fd) >> 2); -+ lwsync(); -+ /* Set the verb byte, have to substitute in the valid-bit */ -+ p[0] = cl[0] | s->eqcr.pi_vb; -+ qbman_cena_write_complete_wo_shadow(&s->sys, -+ QBMAN_CENA_SWP_EQCR(s->eqcr.pi & 7)); -+ s->eqcr.pi++; -+ s->eqcr.pi &= 0xF; -+ s->eqcr.available--; -+ if (!(s->eqcr.pi & 7)) -+ s->eqcr.pi_vb ^= QB_VALID_BIT; -+ return 0; -+} -+ -+int qbman_swp_fill_ring(struct qbman_swp *s, -+ const struct qbman_eq_desc *d, -+ const struct qbman_fd *fd, -+ __attribute__((unused)) uint8_t burst_index) -+{ -+ uint32_t *p; -+ const uint32_t *cl = qb_cl(d); -+ uint32_t eqcr_ci; -+ uint8_t diff; -+ -+ if (!s->eqcr.available) { -+ eqcr_ci = s->eqcr.ci; -+ s->eqcr.ci = qbman_cena_read_reg(&s->sys, -+ QBMAN_CENA_SWP_EQCR_CI) & 0xF; -+ diff = qm_cyc_diff(QBMAN_EQCR_SIZE, -+ eqcr_ci, s->eqcr.ci); -+ s->eqcr.available += diff; -+ if (!diff) -+ return -EBUSY; -+ -+ } -+ p = qbman_cena_write_start_wo_shadow(&s->sys, -+ QBMAN_CENA_SWP_EQCR((s->eqcr.pi/* +burst_index */) & 7)); -+ /* word_copy(&p[1], &cl[1], 7); */ -+ memcpy(&p[1], &cl[1], 7*4); -+ /* word_copy(&p[8], fd, sizeof(*fd) >> 2); */ -+ memcpy(&p[8], fd, sizeof(struct qbman_fd)); -+ -+ /* lwsync(); */ -+ p[0] = cl[0] | s->eqcr.pi_vb; -+ -+ s->eqcr.pi++; -+ s->eqcr.pi &= 0xF; -+ s->eqcr.available--; -+ if (!(s->eqcr.pi & 7)) -+ s->eqcr.pi_vb ^= QB_VALID_BIT; -+ -+ return 0; -+} -+ -+int qbman_swp_flush_ring(struct qbman_swp *s) -+{ -+ void *ptr = s->sys.addr_cena; -+ -+ dcbf((uint64_t)ptr); -+ dcbf((uint64_t)ptr + 0x40); -+ dcbf((uint64_t)ptr + 0x80); -+ dcbf((uint64_t)ptr + 0xc0); -+ dcbf((uint64_t)ptr + 0x100); -+ dcbf((uint64_t)ptr + 0x140); -+ dcbf((uint64_t)ptr + 0x180); -+ dcbf((uint64_t)ptr + 0x1c0); -+ -+ return 0; -+} -+ -+void qbman_sync(void) -+{ -+ lwsync(); -+} -+ -+int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d, -+ const struct qbman_fd *fd) -+{ -+ if (s->sys.eqcr_mode == qman_eqcr_vb_array) -+ return qbman_swp_enqueue_array_mode(s, d, fd); -+ else /* Use ring mode by default */ -+ return qbman_swp_enqueue_ring_mode(s, d, fd); -+} -+ -+/*************************/ -+/* Static (push) dequeue */ -+/*************************/ -+ -+void qbman_swp_push_get(struct qbman_swp *s, uint8_t channel_idx, int *enabled) -+{ -+ struct qb_attr_code code = CODE_SDQCR_DQSRC(channel_idx); -+ -+ BUG_ON(channel_idx > 15); -+ *enabled = (int)qb_attr_code_decode(&code, &s->sdq); -+} -+ -+void qbman_swp_push_set(struct qbman_swp *s, uint8_t channel_idx, int enable) -+{ -+ uint16_t dqsrc; -+ struct qb_attr_code code = CODE_SDQCR_DQSRC(channel_idx); -+ -+ BUG_ON(channel_idx > 15); -+ qb_attr_code_encode(&code, &s->sdq, !!enable); -+ /* Read make the complete src map. If no channels are enabled -+ the SDQCR must be 0 or else QMan will assert errors */ -+ dqsrc = (uint16_t)qb_attr_code_decode(&code_sdqcr_dqsrc, &s->sdq); -+ if (dqsrc != 0) -+ qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_SDQCR, s->sdq); -+ else -+ qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_SDQCR, 0); -+} -+ -+/***************************/ -+/* Volatile (pull) dequeue */ -+/***************************/ -+ -+/* These should be const, eventually */ -+static struct qb_attr_code code_pull_dct = QB_CODE(0, 0, 2); -+static struct qb_attr_code code_pull_dt = QB_CODE(0, 2, 2); -+static struct qb_attr_code code_pull_rls = QB_CODE(0, 4, 1); -+static struct qb_attr_code code_pull_stash = QB_CODE(0, 5, 1); -+static struct qb_attr_code code_pull_numframes = QB_CODE(0, 8, 4); -+static struct qb_attr_code code_pull_token = QB_CODE(0, 16, 8); -+static struct qb_attr_code code_pull_dqsource = QB_CODE(1, 0, 24); -+static struct qb_attr_code code_pull_rsp_lo = QB_CODE(2, 0, 32); -+ -+enum qb_pull_dt_e { -+ qb_pull_dt_channel, -+ qb_pull_dt_workqueue, -+ qb_pull_dt_framequeue -+}; -+ -+void qbman_pull_desc_clear(struct qbman_pull_desc *d) -+{ -+ memset(d, 0, sizeof(*d)); -+} -+ -+void qbman_pull_desc_set_storage(struct qbman_pull_desc *d, -+ struct qbman_result *storage, -+ dma_addr_t storage_phys, -+ int stash) -+{ -+ uint32_t *cl = qb_cl(d); -+ /* Squiggle the pointer 'storage' into the extra 2 words of the -+ * descriptor (which aren't copied to the hw command) */ -+ *(void **)&cl[4] = storage; -+ if (!storage) { -+ qb_attr_code_encode(&code_pull_rls, cl, 0); -+ return; -+ } -+ qb_attr_code_encode(&code_pull_rls, cl, 1); -+ qb_attr_code_encode(&code_pull_stash, cl, !!stash); -+ qb_attr_code_encode_64(&code_pull_rsp_lo, (uint64_t *)cl, storage_phys); -+} -+ -+void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d, uint8_t numframes) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ BUG_ON(!numframes || (numframes > 16)); -+ qb_attr_code_encode(&code_pull_numframes, cl, -+ (uint32_t)(numframes - 1)); -+} -+ -+void qbman_pull_desc_set_token(struct qbman_pull_desc *d, uint8_t token) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_pull_token, cl, token); -+} -+ -+void qbman_pull_desc_set_fq(struct qbman_pull_desc *d, uint32_t fqid) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_pull_dct, cl, 1); -+ qb_attr_code_encode(&code_pull_dt, cl, qb_pull_dt_framequeue); -+ qb_attr_code_encode(&code_pull_dqsource, cl, fqid); -+} -+ -+void qbman_pull_desc_set_wq(struct qbman_pull_desc *d, uint32_t wqid, -+ enum qbman_pull_type_e dct) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_pull_dct, cl, dct); -+ qb_attr_code_encode(&code_pull_dt, cl, qb_pull_dt_workqueue); -+ qb_attr_code_encode(&code_pull_dqsource, cl, wqid); -+} -+ -+void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, uint32_t chid, -+ enum qbman_pull_type_e dct) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_pull_dct, cl, dct); -+ qb_attr_code_encode(&code_pull_dt, cl, qb_pull_dt_channel); -+ qb_attr_code_encode(&code_pull_dqsource, cl, chid); -+} -+ -+int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d) -+{ -+ uint32_t *p; -+ uint32_t *cl = qb_cl(d); -+ -+ if (!atomic_dec_and_test(&s->vdq.busy)) { -+ atomic_inc(&s->vdq.busy); -+ return -EBUSY; -+ } -+ s->vdq.storage = *(void **)&cl[4]; -+ qb_attr_code_encode(&code_pull_token, cl, 1); -+ p = qbman_cena_write_start_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR); -+ word_copy(&p[1], &cl[1], 3); -+ /* Set the verb byte, have to substitute in the valid-bit */ -+ lwsync(); -+ p[0] = cl[0] | s->vdq.valid_bit; -+ s->vdq.valid_bit ^= QB_VALID_BIT; -+ qbman_cena_write_complete_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR); -+ return 0; -+} -+ -+/****************/ -+/* Polling DQRR */ -+/****************/ -+ -+static struct qb_attr_code code_dqrr_verb = QB_CODE(0, 0, 8); -+static struct qb_attr_code code_dqrr_response = QB_CODE(0, 0, 7); -+static struct qb_attr_code code_dqrr_stat = QB_CODE(0, 8, 8); -+static struct qb_attr_code code_dqrr_seqnum = QB_CODE(0, 16, 14); -+static struct qb_attr_code code_dqrr_odpid = QB_CODE(1, 0, 16); -+/* static struct qb_attr_code code_dqrr_tok = QB_CODE(1, 24, 8); */ -+static struct qb_attr_code code_dqrr_fqid = QB_CODE(2, 0, 24); -+static struct qb_attr_code code_dqrr_byte_count = QB_CODE(4, 0, 32); -+static struct qb_attr_code code_dqrr_frame_count = QB_CODE(5, 0, 24); -+static struct qb_attr_code code_dqrr_ctx_lo = QB_CODE(6, 0, 32); -+ -+#define QBMAN_RESULT_DQ 0x60 -+#define QBMAN_RESULT_FQRN 0x21 -+#define QBMAN_RESULT_FQRNI 0x22 -+#define QBMAN_RESULT_FQPN 0x24 -+#define QBMAN_RESULT_FQDAN 0x25 -+#define QBMAN_RESULT_CDAN 0x26 -+#define QBMAN_RESULT_CSCN_MEM 0x27 -+#define QBMAN_RESULT_CGCU 0x28 -+#define QBMAN_RESULT_BPSCN 0x29 -+#define QBMAN_RESULT_CSCN_WQ 0x2a -+ -+static struct qb_attr_code code_dqpi_pi = QB_CODE(0, 0, 4); -+ -+/* NULL return if there are no unconsumed DQRR entries. Returns a DQRR entry -+ * only once, so repeated calls can return a sequence of DQRR entries, without -+ * requiring they be consumed immediately or in any particular order. */ -+const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s) -+{ -+ uint32_t verb; -+ uint32_t response_verb; -+ uint32_t flags; -+ const struct qbman_result *dq; -+ const uint32_t *p; -+ -+ /* Before using valid-bit to detect if something is there, we have to -+ * handle the case of the DQRR reset bug... */ -+ if (unlikely(s->dqrr.reset_bug)) { -+ /* We pick up new entries by cache-inhibited producer index, -+ * which means that a non-coherent mapping would require us to -+ * invalidate and read *only* once that PI has indicated that -+ * there's an entry here. The first trip around the DQRR ring -+ * will be much less efficient than all subsequent trips around -+ * it... -+ */ -+ uint32_t dqpi = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_DQPI); -+ uint32_t pi = qb_attr_code_decode(&code_dqpi_pi, &dqpi); -+ /* there are new entries iff pi != next_idx */ -+ if (pi == s->dqrr.next_idx) -+ return NULL; -+ /* if next_idx is/was the last ring index, and 'pi' is -+ * different, we can disable the workaround as all the ring -+ * entries have now been DMA'd to so valid-bit checking is -+ * repaired. Note: this logic needs to be based on next_idx -+ * (which increments one at a time), rather than on pi (which -+ * can burst and wrap-around between our snapshots of it). -+ */ -+ BUG_ON((s->dqrr.dqrr_size - 1) < 0); -+ if (s->dqrr.next_idx == (s->dqrr.dqrr_size - 1u)) { -+ pr_debug("DEBUG: next_idx=%d, pi=%d, clear reset bug\n", -+ s->dqrr.next_idx, pi); -+ s->dqrr.reset_bug = 0; -+ } -+ qbman_cena_invalidate_prefetch(&s->sys, -+ QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); -+ } -+ dq = qbman_cena_read_wo_shadow(&s->sys, -+ QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); -+ p = qb_cl(dq); -+ verb = qb_attr_code_decode(&code_dqrr_verb, p); -+ /* If the valid-bit isn't of the expected polarity, nothing there. Note, -+ * in the DQRR reset bug workaround, we shouldn't need to skip these -+ * check, because we've already determined that a new entry is available -+ * and we've invalidated the cacheline before reading it, so the -+ * valid-bit behaviour is repaired and should tell us what we already -+ * knew from reading PI. -+ */ -+ if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit) -+ return NULL; -+ -+ /* There's something there. Move "next_idx" attention to the next ring -+ * entry (and prefetch it) before returning what we found. */ -+ s->dqrr.next_idx++; -+ if (s->dqrr.next_idx == s->dqrr.dqrr_size) { -+ s->dqrr.next_idx = 0; -+ s->dqrr.valid_bit ^= QB_VALID_BIT; -+ } -+ /* If this is the final response to a volatile dequeue command -+ indicate that the vdq is no longer busy */ -+ flags = qbman_result_DQ_flags(dq); -+ response_verb = qb_attr_code_decode(&code_dqrr_response, &verb); -+ if ((response_verb == QBMAN_RESULT_DQ) && -+ (flags & QBMAN_DQ_STAT_VOLATILE) && -+ (flags & QBMAN_DQ_STAT_EXPIRED)) -+ atomic_inc(&s->vdq.busy); -+ -+ return dq; -+} -+ -+/* Consume DQRR entries previously returned from qbman_swp_dqrr_next(). */ -+void qbman_swp_dqrr_consume(struct qbman_swp *s, -+ const struct qbman_result *dq) -+{ -+ qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_DCAP, QBMAN_IDX_FROM_DQRR(dq)); -+} -+ -+/*********************************/ -+/* Polling user-provided storage */ -+/*********************************/ -+ -+int qbman_result_has_new_result(__attribute__((unused)) struct qbman_swp *s, -+ const struct qbman_result *dq) -+{ -+ /* To avoid converting the little-endian DQ entry to host-endian prior -+ * to us knowing whether there is a valid entry or not (and run the -+ * risk of corrupting the incoming hardware LE write), we detect in -+ * hardware endianness rather than host. This means we need a different -+ * "code" depending on whether we are BE or LE in software, which is -+ * where DQRR_TOK_OFFSET comes in... */ -+ static struct qb_attr_code code_dqrr_tok_detect = -+ QB_CODE(0, DQRR_TOK_OFFSET, 8); -+ /* The user trying to poll for a result treats "dq" as const. It is -+ * however the same address that was provided to us non-const in the -+ * first place, for directing hardware DMA to. So we can cast away the -+ * const because it is mutable from our perspective. */ -+ uint32_t *p = (uint32_t *)(unsigned long)qb_cl(dq); -+ uint32_t token; -+ -+ token = qb_attr_code_decode(&code_dqrr_tok_detect, &p[1]); -+ if (token != 1) -+ return 0; -+ qb_attr_code_encode(&code_dqrr_tok_detect, &p[1], 0); -+ -+ /* Only now do we convert from hardware to host endianness. Also, as we -+ * are returning success, the user has promised not to call us again, so -+ * there's no risk of us converting the endianness twice... */ -+ make_le32_n(p, 16); -+ return 1; -+} -+ -+int qbman_check_command_complete(struct qbman_swp *s, -+ const struct qbman_result *dq) -+{ -+ /* To avoid converting the little-endian DQ entry to host-endian prior -+ * to us knowing whether there is a valid entry or not (and run the -+ * risk of corrupting the incoming hardware LE write), we detect in -+ * hardware endianness rather than host. This means we need a different -+ * "code" depending on whether we are BE or LE in software, which is -+ * where DQRR_TOK_OFFSET comes in... */ -+ static struct qb_attr_code code_dqrr_tok_detect = -+ QB_CODE(0, DQRR_TOK_OFFSET, 8); -+ /* The user trying to poll for a result treats "dq" as const. It is -+ * however the same address that was provided to us non-const in the -+ * first place, for directing hardware DMA to. So we can cast away the -+ * const because it is mutable from our perspective. */ -+ uint32_t *p = (uint32_t *)(unsigned long)qb_cl(dq); -+ uint32_t token; -+ -+ token = qb_attr_code_decode(&code_dqrr_tok_detect, &p[1]); -+ if (token != 1) -+ return 0; -+ /*When token is set it indicates that VDQ command has been fetched by qbman and -+ *is working on it. It is safe for software to issue another VDQ command, so -+ *incrementing the busy variable.*/ -+ if (s->vdq.storage == dq) { -+ s->vdq.storage = NULL; -+ atomic_inc(&s->vdq.busy); -+ } -+ return 1; -+} -+ -+/********************************/ -+/* Categorising qbman results */ -+/********************************/ -+ -+static struct qb_attr_code code_result_in_mem = -+ QB_CODE(0, QBMAN_RESULT_VERB_OFFSET_IN_MEM, 7); -+ -+static inline int __qbman_result_is_x(const struct qbman_result *dq, -+ uint32_t x) -+{ -+ const uint32_t *p = qb_cl(dq); -+ uint32_t response_verb = qb_attr_code_decode(&code_dqrr_response, p); -+ -+ return (response_verb == x); -+} -+ -+static inline int __qbman_result_is_x_in_mem(const struct qbman_result *dq, -+ uint32_t x) -+{ -+ const uint32_t *p = qb_cl(dq); -+ uint32_t response_verb = qb_attr_code_decode(&code_result_in_mem, p); -+ -+ return (response_verb == x); -+} -+ -+int qbman_result_is_DQ(const struct qbman_result *dq) -+{ -+ return __qbman_result_is_x(dq, QBMAN_RESULT_DQ); -+} -+ -+int qbman_result_is_FQDAN(const struct qbman_result *dq) -+{ -+ return __qbman_result_is_x(dq, QBMAN_RESULT_FQDAN); -+} -+ -+int qbman_result_is_CDAN(const struct qbman_result *dq) -+{ -+ return __qbman_result_is_x(dq, QBMAN_RESULT_CDAN); -+} -+ -+int qbman_result_is_CSCN(const struct qbman_result *dq) -+{ -+ return __qbman_result_is_x_in_mem(dq, QBMAN_RESULT_CSCN_MEM) || -+ __qbman_result_is_x(dq, QBMAN_RESULT_CSCN_WQ); -+} -+ -+int qbman_result_is_BPSCN(const struct qbman_result *dq) -+{ -+ return __qbman_result_is_x_in_mem(dq, QBMAN_RESULT_BPSCN); -+} -+ -+int qbman_result_is_CGCU(const struct qbman_result *dq) -+{ -+ return __qbman_result_is_x_in_mem(dq, QBMAN_RESULT_CGCU); -+} -+ -+int qbman_result_is_FQRN(const struct qbman_result *dq) -+{ -+ return __qbman_result_is_x_in_mem(dq, QBMAN_RESULT_FQRN); -+} -+ -+int qbman_result_is_FQRNI(const struct qbman_result *dq) -+{ -+ return __qbman_result_is_x_in_mem(dq, QBMAN_RESULT_FQRNI); -+} -+ -+int qbman_result_is_FQPN(const struct qbman_result *dq) -+{ -+ return __qbman_result_is_x(dq, QBMAN_RESULT_FQPN); -+} -+ -+/*********************************/ -+/* Parsing frame dequeue results */ -+/*********************************/ -+ -+/* These APIs assume qbman_result_is_DQ() is TRUE */ -+ -+uint32_t qbman_result_DQ_flags(const struct qbman_result *dq) -+{ -+ const uint32_t *p = qb_cl(dq); -+ -+ return qb_attr_code_decode(&code_dqrr_stat, p); -+} -+ -+uint16_t qbman_result_DQ_seqnum(const struct qbman_result *dq) -+{ -+ const uint32_t *p = qb_cl(dq); -+ -+ return (uint16_t)qb_attr_code_decode(&code_dqrr_seqnum, p); -+} -+ -+uint16_t qbman_result_DQ_odpid(const struct qbman_result *dq) -+{ -+ const uint32_t *p = qb_cl(dq); -+ -+ return (uint16_t)qb_attr_code_decode(&code_dqrr_odpid, p); -+} -+ -+uint32_t qbman_result_DQ_fqid(const struct qbman_result *dq) -+{ -+ const uint32_t *p = qb_cl(dq); -+ -+ return qb_attr_code_decode(&code_dqrr_fqid, p); -+} -+ -+uint32_t qbman_result_DQ_byte_count(const struct qbman_result *dq) -+{ -+ const uint32_t *p = qb_cl(dq); -+ -+ return qb_attr_code_decode(&code_dqrr_byte_count, p); -+} -+ -+uint32_t qbman_result_DQ_frame_count(const struct qbman_result *dq) -+{ -+ const uint32_t *p = qb_cl(dq); -+ -+ return qb_attr_code_decode(&code_dqrr_frame_count, p); -+} -+ -+uint64_t qbman_result_DQ_fqd_ctx(const struct qbman_result *dq) -+{ -+ const uint64_t *p = (const uint64_t *)qb_cl(dq); -+ -+ return qb_attr_code_decode_64(&code_dqrr_ctx_lo, p); -+} -+ -+const struct qbman_fd *qbman_result_DQ_fd(const struct qbman_result *dq) -+{ -+ const uint32_t *p = qb_cl(dq); -+ -+ return (const struct qbman_fd *)&p[8]; -+} -+ -+/**************************************/ -+/* Parsing state-change notifications */ -+/**************************************/ -+ -+static struct qb_attr_code code_scn_state = QB_CODE(0, 16, 8); -+static struct qb_attr_code code_scn_rid = QB_CODE(1, 0, 24); -+static struct qb_attr_code code_scn_state_in_mem = -+ QB_CODE(0, SCN_STATE_OFFSET_IN_MEM, 8); -+static struct qb_attr_code code_scn_rid_in_mem = -+ QB_CODE(1, SCN_RID_OFFSET_IN_MEM, 24); -+static struct qb_attr_code code_scn_ctx_lo = QB_CODE(2, 0, 32); -+ -+uint8_t qbman_result_SCN_state(const struct qbman_result *scn) -+{ -+ const uint32_t *p = qb_cl(scn); -+ -+ return (uint8_t)qb_attr_code_decode(&code_scn_state, p); -+} -+ -+uint32_t qbman_result_SCN_rid(const struct qbman_result *scn) -+{ -+ const uint32_t *p = qb_cl(scn); -+ -+ return qb_attr_code_decode(&code_scn_rid, p); -+} -+ -+uint64_t qbman_result_SCN_ctx(const struct qbman_result *scn) -+{ -+ const uint64_t *p = (const uint64_t *)qb_cl(scn); -+ -+ return qb_attr_code_decode_64(&code_scn_ctx_lo, p); -+} -+ -+uint8_t qbman_result_SCN_state_in_mem(const struct qbman_result *scn) -+{ -+ const uint32_t *p = qb_cl(scn); -+ -+ return (uint8_t)qb_attr_code_decode(&code_scn_state_in_mem, p); -+} -+ -+uint32_t qbman_result_SCN_rid_in_mem(const struct qbman_result *scn) -+{ -+ const uint32_t *p = qb_cl(scn); -+ uint32_t result_rid; -+ -+ result_rid = qb_attr_code_decode(&code_scn_rid_in_mem, p); -+ return make_le24(result_rid); -+} -+ -+/*****************/ -+/* Parsing BPSCN */ -+/*****************/ -+uint16_t qbman_result_bpscn_bpid(const struct qbman_result *scn) -+{ -+ return (uint16_t)qbman_result_SCN_rid_in_mem(scn) & 0x3FFF; -+} -+ -+int qbman_result_bpscn_has_free_bufs(const struct qbman_result *scn) -+{ -+ return !(int)(qbman_result_SCN_state_in_mem(scn) & 0x1); -+} -+ -+int qbman_result_bpscn_is_depleted(const struct qbman_result *scn) -+{ -+ return (int)(qbman_result_SCN_state_in_mem(scn) & 0x2); -+} -+ -+int qbman_result_bpscn_is_surplus(const struct qbman_result *scn) -+{ -+ return (int)(qbman_result_SCN_state_in_mem(scn) & 0x4); -+} -+ -+uint64_t qbman_result_bpscn_ctx(const struct qbman_result *scn) -+{ -+ uint64_t ctx; -+ uint32_t ctx_hi, ctx_lo; -+ -+ ctx = qbman_result_SCN_ctx(scn); -+ ctx_hi = upper32(ctx); -+ ctx_lo = lower32(ctx); -+ return ((uint64_t)make_le32(ctx_hi) << 32 | -+ (uint64_t)make_le32(ctx_lo)); -+} -+ -+/*****************/ -+/* Parsing CGCU */ -+/*****************/ -+uint16_t qbman_result_cgcu_cgid(const struct qbman_result *scn) -+{ -+ return (uint16_t)qbman_result_SCN_rid_in_mem(scn) & 0xFFFF; -+} -+ -+uint64_t qbman_result_cgcu_icnt(const struct qbman_result *scn) -+{ -+ uint64_t ctx; -+ uint32_t ctx_hi, ctx_lo; -+ -+ ctx = qbman_result_SCN_ctx(scn); -+ ctx_hi = upper32(ctx); -+ ctx_lo = lower32(ctx); -+ return ((uint64_t)(make_le32(ctx_hi) & 0xFF) << 32) | -+ (uint64_t)make_le32(ctx_lo); -+} -+ -+/******************/ -+/* Buffer release */ -+/******************/ -+ -+/* These should be const, eventually */ -+/* static struct qb_attr_code code_release_num = QB_CODE(0, 0, 3); */ -+static struct qb_attr_code code_release_set_me = QB_CODE(0, 5, 1); -+static struct qb_attr_code code_release_rcdi = QB_CODE(0, 6, 1); -+static struct qb_attr_code code_release_bpid = QB_CODE(0, 16, 16); -+ -+void qbman_release_desc_clear(struct qbman_release_desc *d) -+{ -+ uint32_t *cl; -+ -+ memset(d, 0, sizeof(*d)); -+ cl = qb_cl(d); -+ qb_attr_code_encode(&code_release_set_me, cl, 1); -+} -+ -+void qbman_release_desc_set_bpid(struct qbman_release_desc *d, uint32_t bpid) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_release_bpid, cl, bpid); -+} -+ -+void qbman_release_desc_set_rcdi(struct qbman_release_desc *d, int enable) -+{ -+ uint32_t *cl = qb_cl(d); -+ -+ qb_attr_code_encode(&code_release_rcdi, cl, !!enable); -+} -+ -+#define RAR_IDX(rar) ((rar) & 0x7) -+#define RAR_VB(rar) ((rar) & 0x80) -+#define RAR_SUCCESS(rar) ((rar) & 0x100) -+ -+int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d, -+ const uint64_t *buffers, unsigned int num_buffers) -+{ -+ uint32_t *p; -+ const uint32_t *cl = qb_cl(d); -+ uint32_t rar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_RAR); -+ -+ pr_debug("RAR=%08x\n", rar); -+ if (!RAR_SUCCESS(rar)) -+ return -EBUSY; -+ BUG_ON(!num_buffers || (num_buffers > 7)); -+ /* Start the release command */ -+ p = qbman_cena_write_start_wo_shadow(&s->sys, -+ QBMAN_CENA_SWP_RCR(RAR_IDX(rar))); -+ /* Copy the caller's buffer pointers to the command */ -+ u64_to_le32_copy(&p[2], buffers, num_buffers); -+ /* Set the verb byte, have to substitute in the valid-bit and the number -+ * of buffers. */ -+ lwsync(); -+ p[0] = cl[0] | RAR_VB(rar) | num_buffers; -+ qbman_cena_write_complete_wo_shadow(&s->sys, -+ QBMAN_CENA_SWP_RCR(RAR_IDX(rar))); -+ return 0; -+} -+ -+/*******************/ -+/* Buffer acquires */ -+/*******************/ -+ -+/* These should be const, eventually */ -+static struct qb_attr_code code_acquire_bpid = QB_CODE(0, 16, 16); -+static struct qb_attr_code code_acquire_num = QB_CODE(1, 0, 3); -+static struct qb_attr_code code_acquire_r_num = QB_CODE(1, 0, 3); -+ -+int qbman_swp_acquire(struct qbman_swp *s, uint32_t bpid, uint64_t *buffers, -+ unsigned int num_buffers) -+{ -+ uint32_t *p; -+ uint32_t rslt, num; -+ -+ BUG_ON(!num_buffers || (num_buffers > 7)); -+ -+ /* Start the management command */ -+ p = qbman_swp_mc_start(s); -+ -+ if (!p) -+ return -EBUSY; -+ -+ /* Encode the caller-provided attributes */ -+ qb_attr_code_encode(&code_acquire_bpid, p, bpid); -+ qb_attr_code_encode(&code_acquire_num, p, num_buffers); -+ -+ /* Complete the management command */ -+ p = qbman_swp_mc_complete(s, p, p[0] | QBMAN_MC_ACQUIRE); -+ -+ /* Decode the outcome */ -+ rslt = qb_attr_code_decode(&code_generic_rslt, p); -+ num = qb_attr_code_decode(&code_acquire_r_num, p); -+ BUG_ON(qb_attr_code_decode(&code_generic_verb, p) != QBMAN_MC_ACQUIRE); -+ -+ /* Determine success or failure */ -+ if (unlikely(rslt != QBMAN_MC_RSLT_OK)) { -+ pr_err("Acquire buffers from BPID 0x%x failed, code=0x%02x\n", -+ bpid, rslt); -+ return -EIO; -+ } -+ BUG_ON(num > num_buffers); -+ /* Copy the acquired buffers to the caller's array */ -+ u64_from_le32_copy(buffers, &p[2], num); -+ return (int)num; -+} -+ -+/*****************/ -+/* FQ management */ -+/*****************/ -+ -+static struct qb_attr_code code_fqalt_fqid = QB_CODE(1, 0, 32); -+ -+static int qbman_swp_alt_fq_state(struct qbman_swp *s, uint32_t fqid, -+ uint8_t alt_fq_verb) -+{ -+ uint32_t *p; -+ uint32_t rslt; -+ -+ /* Start the management command */ -+ p = qbman_swp_mc_start(s); -+ if (!p) -+ return -EBUSY; -+ -+ qb_attr_code_encode(&code_fqalt_fqid, p, fqid); -+ /* Complete the management command */ -+ p = qbman_swp_mc_complete(s, p, p[0] | alt_fq_verb); -+ -+ /* Decode the outcome */ -+ rslt = qb_attr_code_decode(&code_generic_rslt, p); -+ BUG_ON(qb_attr_code_decode(&code_generic_verb, p) != alt_fq_verb); -+ -+ /* Determine success or failure */ -+ if (unlikely(rslt != QBMAN_MC_RSLT_OK)) { -+ pr_err("ALT FQID %d failed: verb = 0x%08x, code = 0x%02x\n", -+ fqid, alt_fq_verb, rslt); -+ return -EIO; -+ } -+ -+ return 0; -+} -+ -+int qbman_swp_fq_schedule(struct qbman_swp *s, uint32_t fqid) -+{ -+ return qbman_swp_alt_fq_state(s, fqid, QBMAN_FQ_SCHEDULE); -+} -+ -+int qbman_swp_fq_force(struct qbman_swp *s, uint32_t fqid) -+{ -+ return qbman_swp_alt_fq_state(s, fqid, QBMAN_FQ_FORCE); -+} -+ -+int qbman_swp_fq_xon(struct qbman_swp *s, uint32_t fqid) -+{ -+ return qbman_swp_alt_fq_state(s, fqid, QBMAN_FQ_XON); -+} -+ -+int qbman_swp_fq_xoff(struct qbman_swp *s, uint32_t fqid) -+{ -+ return qbman_swp_alt_fq_state(s, fqid, QBMAN_FQ_XOFF); -+} -+ -+/**********************/ -+/* Channel management */ -+/**********************/ -+ -+static struct qb_attr_code code_cdan_cid = QB_CODE(0, 16, 12); -+static struct qb_attr_code code_cdan_we = QB_CODE(1, 0, 8); -+static struct qb_attr_code code_cdan_en = QB_CODE(1, 8, 1); -+static struct qb_attr_code code_cdan_ctx_lo = QB_CODE(2, 0, 32); -+ -+/* Hide "ICD" for now as we don't use it, don't set it, and don't test it, so it -+ * would be irresponsible to expose it. */ -+#define CODE_CDAN_WE_EN 0x1 -+#define CODE_CDAN_WE_CTX 0x4 -+ -+static int qbman_swp_CDAN_set(struct qbman_swp *s, uint16_t channelid, -+ uint8_t we_mask, uint8_t cdan_en, -+ uint64_t ctx) -+{ -+ uint32_t *p; -+ uint32_t rslt; -+ -+ /* Start the management command */ -+ p = qbman_swp_mc_start(s); -+ if (!p) -+ return -EBUSY; -+ -+ /* Encode the caller-provided attributes */ -+ qb_attr_code_encode(&code_cdan_cid, p, channelid); -+ qb_attr_code_encode(&code_cdan_we, p, we_mask); -+ qb_attr_code_encode(&code_cdan_en, p, cdan_en); -+ qb_attr_code_encode_64(&code_cdan_ctx_lo, (uint64_t *)p, ctx); -+ /* Complete the management command */ -+ p = qbman_swp_mc_complete(s, p, p[0] | QBMAN_WQCHAN_CONFIGURE); -+ -+ /* Decode the outcome */ -+ rslt = qb_attr_code_decode(&code_generic_rslt, p); -+ BUG_ON(qb_attr_code_decode(&code_generic_verb, p) -+ != QBMAN_WQCHAN_CONFIGURE); -+ -+ /* Determine success or failure */ -+ if (unlikely(rslt != QBMAN_MC_RSLT_OK)) { -+ pr_err("CDAN cQID %d failed: code = 0x%02x\n", -+ channelid, rslt); -+ return -EIO; -+ } -+ -+ return 0; -+} -+ -+int qbman_swp_CDAN_set_context(struct qbman_swp *s, uint16_t channelid, -+ uint64_t ctx) -+{ -+ return qbman_swp_CDAN_set(s, channelid, -+ CODE_CDAN_WE_CTX, -+ 0, ctx); -+} -+ -+int qbman_swp_CDAN_enable(struct qbman_swp *s, uint16_t channelid) -+{ -+ return qbman_swp_CDAN_set(s, channelid, -+ CODE_CDAN_WE_EN, -+ 1, 0); -+} -+ -+int qbman_swp_CDAN_disable(struct qbman_swp *s, uint16_t channelid) -+{ -+ return qbman_swp_CDAN_set(s, channelid, -+ CODE_CDAN_WE_EN, -+ 0, 0); -+} -+ -+int qbman_swp_CDAN_set_context_enable(struct qbman_swp *s, uint16_t channelid, -+ uint64_t ctx) -+{ -+ return qbman_swp_CDAN_set(s, channelid, -+ CODE_CDAN_WE_EN | CODE_CDAN_WE_CTX, -+ 1, ctx); -+} -+ -+uint8_t qbman_get_dqrr_idx(struct qbman_result *dqrr) -+{ -+ return QBMAN_IDX_FROM_DQRR(dqrr); -+} -+ -+struct qbman_result *qbman_get_dqrr_from_idx(struct qbman_swp *s, uint8_t idx) -+{ -+ struct qbman_result *dq; -+ -+ dq = qbman_cena_read(&s->sys, QBMAN_CENA_SWP_DQRR(idx)); -+ return dq; -+} -+ -+int qbman_swp_send_multiple(struct qbman_swp *s, -+ const struct qbman_eq_desc *d, -+ const struct qbman_fd *fd, -+ int frames_to_send) -+{ -+ uint32_t *p; -+ const uint32_t *cl = qb_cl(d); -+ uint32_t eqcr_ci; -+ uint8_t diff; -+ int sent = 0; -+ int i; -+ int initial_pi = s->eqcr.pi; -+ uint64_t start_pointer; -+ -+ if (!s->eqcr.available) { -+ eqcr_ci = s->eqcr.ci; -+ s->eqcr.ci = qbman_cena_read_reg(&s->sys, -+ QBMAN_CENA_SWP_EQCR_CI) & 0xF; -+ diff = qm_cyc_diff(QBMAN_EQCR_SIZE, -+ eqcr_ci, s->eqcr.ci); -+ if (!diff) -+ goto done; -+ s->eqcr.available += diff; -+ } -+ -+ /* we are trying to send frames_to_send if we have enough space in the ring */ -+ while (s->eqcr.available && frames_to_send--) { -+ p = qbman_cena_write_start_wo_shadow_fast(&s->sys, -+ QBMAN_CENA_SWP_EQCR((initial_pi) & 7)); -+ /* Write command (except of first byte) and FD */ -+ memcpy(&p[1], &cl[1], 7*4); -+ memcpy(&p[8], &fd[sent], sizeof(struct qbman_fd)); -+ -+ initial_pi++; -+ initial_pi &= 0xF; -+ s->eqcr.available--; -+ sent++; -+ } -+ -+ done: -+ initial_pi = s->eqcr.pi; -+ lwsync(); -+ -+ /* in order for flushes to complete faster */ -+ /*For that we use a following trick: we record all lines in 32 bit word */ -+ -+ initial_pi = s->eqcr.pi; -+ for (i = 0; i < sent; i++) { -+ p = qbman_cena_write_start_wo_shadow_fast(&s->sys, -+ QBMAN_CENA_SWP_EQCR((initial_pi) & 7)); -+ -+ p[0] = cl[0] | s->eqcr.pi_vb; -+ initial_pi++; -+ initial_pi &= 0xF; -+ -+ if (!(initial_pi & 7)) -+ s->eqcr.pi_vb ^= QB_VALID_BIT; -+ } -+ -+ initial_pi = s->eqcr.pi; -+ -+ /* We need to flush all the lines but without load/store operations between them */ -+ /* We assign start_pointer before we start loop so that in loop we do not read it from memory */ -+ start_pointer = (uint64_t)s->sys.addr_cena; -+ for (i = 0; i < sent; i++) { -+ p = (uint32_t *)(start_pointer + QBMAN_CENA_SWP_EQCR(initial_pi & 7)); -+ dcbf((uint64_t)p); -+ initial_pi++; -+ initial_pi &= 0xF; -+ } -+ -+ /* Update producer index for the next call */ -+ s->eqcr.pi = initial_pi; -+ -+ return sent; -+} -diff --git a/drivers/net/dpaa2/qbman/driver/qbman_portal.h b/drivers/net/dpaa2/qbman/driver/qbman_portal.h -new file mode 100644 -index 0000000..17f1c53 ---- /dev/null -+++ b/drivers/net/dpaa2/qbman/driver/qbman_portal.h -@@ -0,0 +1,270 @@ -+/* Copyright (C) 2014 Freescale Semiconductor, Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Freescale Semiconductor nor the -+ * names of its contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY -+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY -+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include "qbman_private.h" -+#include -+ -+uint32_t qman_version; -+/* All QBMan command and result structures use this "valid bit" encoding */ -+#define QB_VALID_BIT ((uint32_t)0x80) -+ -+/* Management command result codes */ -+#define QBMAN_MC_RSLT_OK 0xf0 -+ -+/* QBMan DQRR size is set at runtime in qbman_portal.c */ -+ -+#define QBMAN_EQCR_SIZE 8 -+ -+static inline u8 qm_cyc_diff(u8 ringsize, u8 first, u8 last) -+{ -+ /* 'first' is included, 'last' is excluded */ -+ if (first <= last) -+ return last - first; -+ return (2 * ringsize) + last - first; -+} -+ -+/* --------------------- */ -+/* portal data structure */ -+/* --------------------- */ -+ -+struct qbman_swp { -+ const struct qbman_swp_desc *desc; -+ /* The qbman_sys (ie. arch/OS-specific) support code can put anything it -+ * needs in here. */ -+ struct qbman_swp_sys sys; -+ /* Management commands */ -+ struct { -+#ifdef QBMAN_CHECKING -+ enum swp_mc_check { -+ swp_mc_can_start, /* call __qbman_swp_mc_start() */ -+ swp_mc_can_submit, /* call __qbman_swp_mc_submit() */ -+ swp_mc_can_poll, /* call __qbman_swp_mc_result() */ -+ } check; -+#endif -+ uint32_t valid_bit; /* 0x00 or 0x80 */ -+ } mc; -+ /* Push dequeues */ -+ uint32_t sdq; -+ /* Volatile dequeues */ -+ struct { -+ /* VDQCR supports a "1 deep pipeline", meaning that if you know -+ * the last-submitted command is already executing in the -+ * hardware (as evidenced by at least 1 valid dequeue result), -+ * you can write another dequeue command to the register, the -+ * hardware will start executing it as soon as the -+ * already-executing command terminates. (This minimises latency -+ * and stalls.) With that in mind, this "busy" variable refers -+ * to whether or not a command can be submitted, not whether or -+ * not a previously-submitted command is still executing. In -+ * other words, once proof is seen that the previously-submitted -+ * command is executing, "vdq" is no longer "busy". */ -+ atomic_t busy; -+ uint32_t valid_bit; /* 0x00 or 0x80 */ -+ /* We need to determine when vdq is no longer busy. This depends -+ * on whether the "busy" (last-submitted) dequeue command is -+ * targeting DQRR or main-memory, and detected is based on the -+ * presence of the dequeue command's "token" showing up in -+ * dequeue entries in DQRR or main-memory (respectively). */ -+ struct qbman_result *storage; /* NULL if DQRR */ -+ } vdq; -+ /* DQRR */ -+ struct { -+ uint32_t next_idx; -+ uint32_t valid_bit; -+ uint8_t dqrr_size; -+ int reset_bug; -+ } dqrr; -+ struct { -+ uint32_t pi; -+ uint32_t pi_vb; -+ uint32_t ci; -+ int available; -+ } eqcr; -+}; -+ -+/* -------------------------- */ -+/* portal management commands */ -+/* -------------------------- */ -+ -+/* Different management commands all use this common base layer of code to issue -+ * commands and poll for results. The first function returns a pointer to where -+ * the caller should fill in their MC command (though they should ignore the -+ * verb byte), the second function commits merges in the caller-supplied command -+ * verb (which should not include the valid-bit) and submits the command to -+ * hardware, and the third function checks for a completed response (returns -+ * non-NULL if only if the response is complete). */ -+void *qbman_swp_mc_start(struct qbman_swp *p); -+void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint32_t cmd_verb); -+void *qbman_swp_mc_result(struct qbman_swp *p); -+ -+/* Wraps up submit + poll-for-result */ -+static inline void *qbman_swp_mc_complete(struct qbman_swp *swp, void *cmd, -+ uint32_t cmd_verb) -+{ -+ int loopvar; -+ -+ qbman_swp_mc_submit(swp, cmd, cmd_verb); -+ DBG_POLL_START(loopvar); -+ do { -+ DBG_POLL_CHECK(loopvar); -+ cmd = qbman_swp_mc_result(swp); -+ } while (!cmd); -+ return cmd; -+} -+ -+/* ------------ */ -+/* qb_attr_code */ -+/* ------------ */ -+ -+/* This struct locates a sub-field within a QBMan portal (CENA) cacheline which -+ * is either serving as a configuration command or a query result. The -+ * representation is inherently little-endian, as the indexing of the words is -+ * itself little-endian in nature and DPAA2 QBMan is little endian for anything -+ * that crosses a word boundary too (64-bit fields are the obvious examples). -+ */ -+struct qb_attr_code { -+ unsigned int word; /* which uint32_t[] array member encodes the field */ -+ unsigned int lsoffset; /* encoding offset from ls-bit */ -+ unsigned int width; /* encoding width. (bool must be 1.) */ -+}; -+ -+/* Some pre-defined codes */ -+extern struct qb_attr_code code_generic_verb; -+extern struct qb_attr_code code_generic_rslt; -+ -+/* Macros to define codes */ -+#define QB_CODE(a, b, c) { a, b, c} -+#define QB_CODE_NULL \ -+ QB_CODE((unsigned int)-1, (unsigned int)-1, (unsigned int)-1) -+ -+/* Rotate a code "ms", meaning that it moves from less-significant bytes to -+ * more-significant, from less-significant words to more-significant, etc. The -+ * "ls" version does the inverse, from more-significant towards -+ * less-significant. -+ */ -+static inline void qb_attr_code_rotate_ms(struct qb_attr_code *code, -+ unsigned int bits) -+{ -+ code->lsoffset += bits; -+ while (code->lsoffset > 31) { -+ code->word++; -+ code->lsoffset -= 32; -+ } -+} -+ -+static inline void qb_attr_code_rotate_ls(struct qb_attr_code *code, -+ unsigned int bits) -+{ -+ /* Don't be fooled, this trick should work because the types are -+ * unsigned. So the case that interests the while loop (the rotate has -+ * gone too far and the word count needs to compensate for it), is -+ * manifested when lsoffset is negative. But that equates to a really -+ * large unsigned value, starting with lots of "F"s. As such, we can -+ * continue adding 32 back to it until it wraps back round above zero, -+ * to a value of 31 or less... -+ */ -+ code->lsoffset -= bits; -+ while (code->lsoffset > 31) { -+ code->word--; -+ code->lsoffset += 32; -+ } -+} -+ -+/* Implement a loop of code rotations until 'expr' evaluates to FALSE (0). */ -+#define qb_attr_code_for_ms(code, bits, expr) \ -+ for (; expr; qb_attr_code_rotate_ms(code, bits)) -+#define qb_attr_code_for_ls(code, bits, expr) \ -+ for (; expr; qb_attr_code_rotate_ls(code, bits)) -+ -+/* decode a field from a cacheline */ -+static inline uint32_t qb_attr_code_decode(const struct qb_attr_code *code, -+ const uint32_t *cacheline) -+{ -+ return d32_uint32_t(code->lsoffset, code->width, cacheline[code->word]); -+} -+ -+static inline uint64_t qb_attr_code_decode_64(const struct qb_attr_code *code, -+ const uint64_t *cacheline) -+{ -+ return cacheline[code->word / 2]; -+} -+ -+/* encode a field to a cacheline */ -+static inline void qb_attr_code_encode(const struct qb_attr_code *code, -+ uint32_t *cacheline, uint32_t val) -+{ -+ cacheline[code->word] = -+ r32_uint32_t(code->lsoffset, code->width, cacheline[code->word]) -+ | e32_uint32_t(code->lsoffset, code->width, val); -+} -+ -+static inline void qb_attr_code_encode_64(const struct qb_attr_code *code, -+ uint64_t *cacheline, uint64_t val) -+{ -+ cacheline[code->word / 2] = val; -+} -+ -+/* Small-width signed values (two's-complement) will decode into medium-width -+ * positives. (Eg. for an 8-bit signed field, which stores values from -128 to -+ * +127, a setting of -7 would appear to decode to the 32-bit unsigned value -+ * 249. Likewise -120 would decode as 136.) This function allows the caller to -+ * "re-sign" such fields to 32-bit signed. (Eg. -7, which was 249 with an 8-bit -+ * encoding, will become 0xfffffff9 if you cast the return value to uint32_t). -+ */ -+static inline int32_t qb_attr_code_makesigned(const struct qb_attr_code *code, -+ uint32_t val) -+{ -+ BUG_ON(val >= (1u << code->width)); -+ /* code->width should never exceed the width of val. If it does then a -+ * different function with larger val size must be used to translate -+ * from unsigned to signed */ -+ BUG_ON(code->width > sizeof(val) * CHAR_BIT); -+ /* If the high bit was set, it was encoding a negative */ -+ if (val >= 1u << (code->width - 1)) -+ return (int32_t)0 - (int32_t)(((uint32_t)1 << code->width) - -+ val); -+ /* Otherwise, it was encoding a positive */ -+ return (int32_t)val; -+} -+ -+/* ---------------------- */ -+/* Descriptors/cachelines */ -+/* ---------------------- */ -+ -+/* To avoid needless dynamic allocation, the driver API often gives the caller -+ * a "descriptor" type that the caller can instantiate however they like. -+ * Ultimately though, it is just a cacheline of binary storage (or something -+ * smaller when it is known that the descriptor doesn't need all 64 bytes) for -+ * holding pre-formatted pieces of hardware commands. The performance-critical -+ * code can then copy these descriptors directly into hardware command -+ * registers more efficiently than trying to construct/format commands -+ * on-the-fly. The API user sees the descriptor as an array of 32-bit words in -+ * order for the compiler to know its size, but the internal details are not -+ * exposed. The following macro is used within the driver for converting *any* -+ * descriptor pointer to a usable array pointer. The use of a macro (instead of -+ * an inline) is necessary to work with different descriptor types and to work -+ * correctly with const and non-const inputs (and similarly-qualified outputs). -+ */ -+#define qb_cl(d) (&(d)->dont_manipulate_directly[0]) -diff --git a/drivers/net/dpaa2/qbman/driver/qbman_private.h b/drivers/net/dpaa2/qbman/driver/qbman_private.h -new file mode 100644 -index 0000000..624ede1 ---- /dev/null -+++ b/drivers/net/dpaa2/qbman/driver/qbman_private.h -@@ -0,0 +1,168 @@ -+/* Copyright (C) 2014 Freescale Semiconductor, Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Freescale Semiconductor nor the -+ * names of its contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY -+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY -+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+*/ -+ -+/* Perform extra checking */ -+#define QBMAN_CHECKING -+ -+/* To maximise the amount of logic that is common between the Linux driver and -+ * other targets (such as the embedded MC firmware), we pivot here between the -+ * inclusion of two platform-specific headers. -+ * -+ * The first, qbman_sys_decl.h, includes any and all required system headers as -+ * well as providing any definitions for the purposes of compatibility. The -+ * second, qbman_sys.h, is where platform-specific routines go. -+ * -+ * The point of the split is that the platform-independent code (including this -+ * header) may depend on platform-specific declarations, yet other -+ * platform-specific routines may depend on platform-independent definitions. -+ */ -+ -+#include "qbman_sys_decl.h" -+ -+/* When things go wrong, it is a convenient trick to insert a few FOO() -+ * statements in the code to trace progress. TODO: remove this once we are -+ * hacking the code less actively. -+ */ -+#define FOO() fsl_os_print("FOO: %s:%d\n", __FILE__, __LINE__) -+ -+/* Any time there is a register interface which we poll on, this provides a -+ * "break after x iterations" scheme for it. It's handy for debugging, eg. -+ * where you don't want millions of lines of log output from a polling loop -+ * that won't, because such things tend to drown out the earlier log output -+ * that might explain what caused the problem. (NB: put ";" after each macro!) -+ * TODO: we should probably remove this once we're done sanitising the -+ * simulator... -+ */ -+#define DBG_POLL_START(loopvar) (loopvar = 10) -+#define DBG_POLL_CHECK(loopvar) \ -+ do {if (!(loopvar--)) BUG_ON(NULL == "DBG_POLL_CHECK"); } while (0) -+ -+/* For CCSR or portal-CINH registers that contain fields at arbitrary offsets -+ * and widths, these macro-generated encode/decode/isolate/remove inlines can -+ * be used. -+ * -+ * Eg. to "d"ecode a 14-bit field out of a register (into a "uint16_t" type), -+ * where the field is located 3 bits "up" from the least-significant bit of the -+ * register (ie. the field location within the 32-bit register corresponds to a -+ * mask of 0x0001fff8), you would do; -+ * uint16_t field = d32_uint16_t(3, 14, reg_value); -+ * -+ * Or to "e"ncode a 1-bit boolean value (input type is "int", zero is FALSE, -+ * non-zero is TRUE, so must convert all non-zero inputs to 1, hence the "!!" -+ * operator) into a register at bit location 0x00080000 (19 bits "in" from the -+ * LS bit), do; -+ * reg_value |= e32_int(19, 1, !!field); -+ * -+ * If you wish to read-modify-write a register, such that you leave the 14-bit -+ * field as-is but have all other fields set to zero, then "i"solate the 14-bit -+ * value using; -+ * reg_value = i32_uint16_t(3, 14, reg_value); -+ * -+ * Alternatively, you could "r"emove the 1-bit boolean field (setting it to -+ * zero) but leaving all other fields as-is; -+ * reg_val = r32_int(19, 1, reg_value); -+ * -+ */ -+#define MAKE_MASK32(width) (width == 32 ? 0xffffffff : \ -+ (uint32_t)((1 << width) - 1)) -+#define DECLARE_CODEC32(t) \ -+static inline uint32_t e32_##t(uint32_t lsoffset, uint32_t width, t val) \ -+{ \ -+ BUG_ON(width > (sizeof(t) * 8)); \ -+ return ((uint32_t)val & MAKE_MASK32(width)) << lsoffset; \ -+} \ -+static inline t d32_##t(uint32_t lsoffset, uint32_t width, uint32_t val) \ -+{ \ -+ BUG_ON(width > (sizeof(t) * 8)); \ -+ return (t)((val >> lsoffset) & MAKE_MASK32(width)); \ -+} \ -+static inline uint32_t i32_##t(uint32_t lsoffset, uint32_t width, \ -+ uint32_t val) \ -+{ \ -+ BUG_ON(width > (sizeof(t) * 8)); \ -+ return e32_##t(lsoffset, width, d32_##t(lsoffset, width, val)); \ -+} \ -+static inline uint32_t r32_##t(uint32_t lsoffset, uint32_t width, \ -+ uint32_t val) \ -+{ \ -+ BUG_ON(width > (sizeof(t) * 8)); \ -+ return ~(MAKE_MASK32(width) << lsoffset) & val; \ -+} -+DECLARE_CODEC32(uint32_t) -+DECLARE_CODEC32(uint16_t) -+DECLARE_CODEC32(uint8_t) -+DECLARE_CODEC32(int) -+ -+ /*********************/ -+ /* Debugging assists */ -+ /*********************/ -+ -+static inline void __hexdump(unsigned long start, unsigned long end, -+ unsigned long p, size_t sz, const unsigned char *c) -+{ -+ while (start < end) { -+ unsigned int pos = 0; -+ char buf[64]; -+ int nl = 0; -+ -+ pos += sprintf(buf + pos, "%08lx: ", start); -+ do { -+ if ((start < p) || (start >= (p + sz))) -+ pos += sprintf(buf + pos, ".."); -+ else -+ pos += sprintf(buf + pos, "%02x", *(c++)); -+ if (!(++start & 15)) { -+ buf[pos++] = '\n'; -+ nl = 1; -+ } else { -+ nl = 0; -+ if (!(start & 1)) -+ buf[pos++] = ' '; -+ if (!(start & 3)) -+ buf[pos++] = ' '; -+ } -+ } while (start & 15); -+ if (!nl) -+ buf[pos++] = '\n'; -+ buf[pos] = '\0'; -+ pr_info("%s", buf); -+ } -+} -+ -+static inline void hexdump(const void *ptr, size_t sz) -+{ -+ unsigned long p = (unsigned long)ptr; -+ unsigned long start = p & ~(unsigned long)15; -+ unsigned long end = (p + sz + 15) & ~(unsigned long)15; -+ const unsigned char *c = ptr; -+ -+ __hexdump(start, end, p, sz, c); -+} -+ -+#define QMAN_REV_4000 0x04000000 -+#define QMAN_REV_4100 0x04010000 -+#define QMAN_REV_4101 0x04010001 -+ -+#include "qbman_sys.h" -diff --git a/drivers/net/dpaa2/qbman/driver/qbman_sys.h b/drivers/net/dpaa2/qbman/driver/qbman_sys.h -new file mode 100644 -index 0000000..a39fa71 ---- /dev/null -+++ b/drivers/net/dpaa2/qbman/driver/qbman_sys.h -@@ -0,0 +1,373 @@ -+/* Copyright (C) 2014 Freescale Semiconductor, Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Freescale Semiconductor nor the -+ * names of its contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY -+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY -+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+/* qbman_sys_decl.h and qbman_sys.h are the two platform-specific files in the -+ * driver. They are only included via qbman_private.h, which is itself a -+ * platform-independent file and is included by all the other driver source. -+ * -+ * qbman_sys_decl.h is included prior to all other declarations and logic, and -+ * it exists to provide compatibility with any linux interfaces our -+ * single-source driver code is dependent on (eg. kmalloc). Ie. this file -+ * provides linux compatibility. -+ * -+ * This qbman_sys.h header, on the other hand, is included *after* any common -+ * and platform-neutral declarations and logic in qbman_private.h, and exists to -+ * implement any platform-specific logic of the qbman driver itself. Ie. it is -+ * *not* to provide linux compatibility. -+ */ -+ -+/* Trace the 3 different classes of read/write access to QBMan. #undef as -+ * required. */ -+#undef QBMAN_CCSR_TRACE -+#undef QBMAN_CINH_TRACE -+#undef QBMAN_CENA_TRACE -+ -+static inline void word_copy(void *d, const void *s, unsigned int cnt) -+{ -+ uint32_t *dd = d; -+ const uint32_t *ss = s; -+ -+ while (cnt--) -+ *(dd++) = *(ss++); -+} -+ -+/* Currently, the CENA support code expects each 32-bit word to be written in -+ * host order, and these are converted to hardware (little-endian) order on -+ * command submission. However, 64-bit quantities are must be written (and read) -+ * as two 32-bit words with the least-significant word first, irrespective of -+ * host endianness. */ -+static inline void u64_to_le32_copy(void *d, const uint64_t *s, -+ unsigned int cnt) -+{ -+ uint32_t *dd = d; -+ const uint32_t *ss = (const uint32_t *)s; -+ -+ while (cnt--) { -+ /* TBD: the toolchain was choking on the use of 64-bit types up -+ * until recently so this works entirely with 32-bit variables. -+ * When 64-bit types become usable again, investigate better -+ * ways of doing this. */ -+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -+ *(dd++) = ss[1]; -+ *(dd++) = ss[0]; -+ ss += 2; -+#else -+ *(dd++) = *(ss++); -+ *(dd++) = *(ss++); -+#endif -+ } -+} -+ -+static inline void u64_from_le32_copy(uint64_t *d, const void *s, -+ unsigned int cnt) -+{ -+ const uint32_t *ss = s; -+ uint32_t *dd = (uint32_t *)d; -+ -+ while (cnt--) { -+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -+ dd[1] = *(ss++); -+ dd[0] = *(ss++); -+ dd += 2; -+#else -+ *(dd++) = *(ss++); -+ *(dd++) = *(ss++); -+#endif -+ } -+} -+ -+/* Convert a host-native 32bit value into little endian */ -+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -+static inline uint32_t make_le32(uint32_t val) -+{ -+ return ((val & 0xff) << 24) | ((val & 0xff00) << 8) | -+ ((val & 0xff0000) >> 8) | ((val & 0xff000000) >> 24); -+} -+ -+static inline uint32_t make_le24(uint32_t val) -+{ -+ return (((val & 0xff) << 16) | (val & 0xff00) | -+ ((val & 0xff0000) >> 16)); -+} -+#else -+#define make_le32(val) (val) -+#define make_le24(val) (val) -+#endif -+static inline void make_le32_n(uint32_t *val, unsigned int num) -+{ -+ while (num--) { -+ *val = make_le32(*val); -+ val++; -+ } -+} -+ -+ /******************/ -+ /* Portal access */ -+ /******************/ -+struct qbman_swp_sys { -+ /* On GPP, the sys support for qbman_swp is here. The CENA region isi -+ * not an mmap() of the real portal registers, but an allocated -+ * place-holder, because the actual writes/reads to/from the portal are -+ * marshalled from these allocated areas using QBMan's "MC access -+ * registers". CINH accesses are atomic so there's no need for a -+ * place-holder. */ -+ uint8_t *cena; -+ uint8_t __iomem *addr_cena; -+ uint8_t __iomem *addr_cinh; -+ uint32_t idx; -+ enum qbman_eqcr_mode eqcr_mode; -+}; -+ -+/* P_OFFSET is (ACCESS_CMD,0,12) - offset within the portal -+ * C is (ACCESS_CMD,12,1) - is inhibited? (0==CENA, 1==CINH) -+ * SWP_IDX is (ACCESS_CMD,16,10) - Software portal index -+ * P is (ACCESS_CMD,28,1) - (0==special portal, 1==any portal) -+ * T is (ACCESS_CMD,29,1) - Command type (0==READ, 1==WRITE) -+ * E is (ACCESS_CMD,31,1) - Command execute (1 to issue, poll for 0==complete) -+ */ -+ -+static inline void qbman_cinh_write(struct qbman_swp_sys *s, uint32_t offset, -+ uint32_t val) -+{ -+ __raw_writel(val, s->addr_cinh + offset); -+#ifdef QBMAN_CINH_TRACE -+ pr_info("qbman_cinh_write(%p:%d:0x%03x) 0x%08x\n", -+ s->addr_cinh, s->idx, offset, val); -+#endif -+} -+ -+static inline uint32_t qbman_cinh_read(struct qbman_swp_sys *s, uint32_t offset) -+{ -+ uint32_t reg = __raw_readl(s->addr_cinh + offset); -+#ifdef QBMAN_CINH_TRACE -+ pr_info("qbman_cinh_read(%p:%d:0x%03x) 0x%08x\n", -+ s->addr_cinh, s->idx, offset, reg); -+#endif -+ return reg; -+} -+ -+static inline void *qbman_cena_write_start(struct qbman_swp_sys *s, -+ uint32_t offset) -+{ -+ void *shadow = s->cena + offset; -+ -+#ifdef QBMAN_CENA_TRACE -+ pr_info("qbman_cena_write_start(%p:%d:0x%03x) %p\n", -+ s->addr_cena, s->idx, offset, shadow); -+#endif -+ BUG_ON(offset & 63); -+ dcbz(shadow); -+ return shadow; -+} -+ -+static inline void *qbman_cena_write_start_wo_shadow(struct qbman_swp_sys *s, -+ uint32_t offset) -+{ -+#ifdef QBMAN_CENA_TRACE -+ pr_info("qbman_cena_write_start(%p:%d:0x%03x)\n", -+ s->addr_cena, s->idx, offset); -+#endif -+ BUG_ON(offset & 63); -+ return (s->addr_cena + offset); -+} -+ -+static inline void qbman_cena_write_complete(struct qbman_swp_sys *s, -+ uint32_t offset, void *cmd) -+{ -+ const uint32_t *shadow = cmd; -+ int loop; -+#ifdef QBMAN_CENA_TRACE -+ pr_info("qbman_cena_write_complete(%p:%d:0x%03x) %p\n", -+ s->addr_cena, s->idx, offset, shadow); -+ hexdump(cmd, 64); -+#endif -+ for (loop = 15; loop >= 1; loop--) -+ __raw_writel(shadow[loop], s->addr_cena + -+ offset + loop * 4); -+ lwsync(); -+ __raw_writel(shadow[0], s->addr_cena + offset); -+ dcbf(s->addr_cena + offset); -+} -+ -+static inline void qbman_cena_write_complete_wo_shadow(struct qbman_swp_sys *s, -+ uint32_t offset) -+{ -+#ifdef QBMAN_CENA_TRACE -+ pr_info("qbman_cena_write_complete(%p:%d:0x%03x)\n", -+ s->addr_cena, s->idx, offset); -+ hexdump(cmd, 64); -+#endif -+ dcbf(s->addr_cena + offset); -+} -+ -+static inline uint32_t qbman_cena_read_reg(struct qbman_swp_sys *s, -+ uint32_t offset) -+{ -+ return __raw_readl(s->addr_cena + offset); -+} -+ -+static inline void *qbman_cena_read(struct qbman_swp_sys *s, uint32_t offset) -+{ -+ uint32_t *shadow = (uint32_t *)(s->cena + offset); -+ unsigned int loop; -+#ifdef QBMAN_CENA_TRACE -+ pr_info("qbman_cena_read(%p:%d:0x%03x) %p\n", -+ s->addr_cena, s->idx, offset, shadow); -+#endif -+ -+ for (loop = 0; loop < 16; loop++) -+ shadow[loop] = __raw_readl(s->addr_cena + offset -+ + loop * 4); -+#ifdef QBMAN_CENA_TRACE -+ hexdump(shadow, 64); -+#endif -+ return shadow; -+} -+ -+static inline void *qbman_cena_read_wo_shadow(struct qbman_swp_sys *s, -+ uint32_t offset) -+{ -+#ifdef QBMAN_CENA_TRACE -+ pr_info("qbman_cena_read(%p:%d:0x%03x) %p\n", -+ s->addr_cena, s->idx, offset, shadow); -+#endif -+ -+#ifdef QBMAN_CENA_TRACE -+ hexdump(shadow, 64); -+#endif -+ return s->addr_cena + offset; -+} -+ -+static inline void qbman_cena_invalidate(struct qbman_swp_sys *s, -+ uint32_t offset) -+{ -+ dccivac(s->addr_cena + offset); -+} -+ -+static inline void qbman_cena_invalidate_prefetch(struct qbman_swp_sys *s, -+ uint32_t offset) -+{ -+ dccivac(s->addr_cena + offset); -+ prefetch_for_load(s->addr_cena + offset); -+} -+ -+static inline void qbman_cena_prefetch(struct qbman_swp_sys *s, -+ uint32_t offset) -+{ -+ prefetch_for_load(s->addr_cena + offset); -+} -+ -+ /******************/ -+ /* Portal support */ -+ /******************/ -+ -+/* The SWP_CFG portal register is special, in that it is used by the -+ * platform-specific code rather than the platform-independent code in -+ * qbman_portal.c. So use of it is declared locally here. */ -+#define QBMAN_CINH_SWP_CFG 0xd00 -+ -+/* For MC portal use, we always configure with -+ * DQRR_MF is (SWP_CFG,20,3) - DQRR max fill (<- 0x4) -+ * EST is (SWP_CFG,16,3) - EQCR_CI stashing threshold (<- 0x2) -+ * RPM is (SWP_CFG,12,2) - RCR production notification mode (<- 0x3) -+ * DCM is (SWP_CFG,10,2) - DQRR consumption notification mode (<- 0x2) -+ * EPM is (SWP_CFG,8,2) - EQCR production notification mode (<- 0x2) -+ * SD is (SWP_CFG,5,1) - memory stashing drop enable (<- TRUE) -+ * SP is (SWP_CFG,4,1) - memory stashing priority (<- TRUE) -+ * SE is (SWP_CFG,3,1) - memory stashing enable (<- TRUE) -+ * DP is (SWP_CFG,2,1) - dequeue stashing priority (<- TRUE) -+ * DE is (SWP_CFG,1,1) - dequeue stashing enable (<- TRUE) -+ * EP is (SWP_CFG,0,1) - EQCR_CI stashing priority (<- TRUE) -+ */ -+static inline uint32_t qbman_set_swp_cfg(uint8_t max_fill, uint8_t wn, -+ uint8_t est, uint8_t rpm, uint8_t dcm, -+ uint8_t epm, int sd, int sp, int se, -+ int dp, int de, int ep) -+{ -+ uint32_t reg; -+ -+ reg = e32_uint8_t(20, (uint32_t)(3 + (max_fill >> 3)), max_fill) | -+ e32_uint8_t(16, 3, est) | -+ e32_uint8_t(12, 2, rpm) | e32_uint8_t(10, 2, dcm) | -+ e32_uint8_t(8, 2, epm) | e32_int(5, 1, sd) | -+ e32_int(4, 1, sp) | e32_int(3, 1, se) | e32_int(2, 1, dp) | -+ e32_int(1, 1, de) | e32_int(0, 1, ep) | e32_uint8_t(14, 1, wn); -+ return reg; -+} -+ -+static inline int qbman_swp_sys_init(struct qbman_swp_sys *s, -+ const struct qbman_swp_desc *d, -+ uint8_t dqrr_size) -+{ -+ uint32_t reg; -+ -+ s->addr_cena = d->cena_bar; -+ s->addr_cinh = d->cinh_bar; -+ s->idx = (uint32_t)d->idx; -+ s->cena = (void *)get_zeroed_page(GFP_KERNEL); -+ if (!s->cena) { -+ pr_err("Could not allocate page for cena shadow\n"); -+ return -1; -+ } -+ s->eqcr_mode = d->eqcr_mode; -+ BUG_ON(d->idx < 0); -+#ifdef QBMAN_CHECKING -+ /* We should never be asked to initialise for a portal that isn't in -+ * the power-on state. (Ie. don't forget to reset portals when they are -+ * decommissioned!) -+ */ -+ reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG); -+ BUG_ON(reg); -+#endif -+ if (s->eqcr_mode == qman_eqcr_vb_array) -+ reg = qbman_set_swp_cfg(dqrr_size, 0, 0, 3, 2, 3, 1, 1, 1, 1, -+ 1, 1); -+ else -+ reg = qbman_set_swp_cfg(dqrr_size, 0, 2, 3, 2, 2, 1, 1, 1, 1, -+ 1, 1); -+ qbman_cinh_write(s, QBMAN_CINH_SWP_CFG, reg); -+ reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG); -+ if (!reg) { -+ pr_err("The portal %d is not enabled!\n", s->idx); -+ kfree(s->cena); -+ return -1; -+ } -+ return 0; -+} -+ -+static inline void qbman_swp_sys_finish(struct qbman_swp_sys *s) -+{ -+ free_page((unsigned long)s->cena); -+} -+ -+static inline void *qbman_cena_write_start_wo_shadow_fast(struct qbman_swp_sys *s, -+ uint32_t offset) -+{ -+#ifdef QBMAN_CENA_TRACE -+ pr_info("qbman_cena_write_start(%p:%d:0x%03x)\n", -+ s->addr_cena, s->idx, offset); -+#endif -+ BUG_ON(offset & 63); -+ return (s->addr_cena + offset); -+} -diff --git a/drivers/net/dpaa2/qbman/driver/qbman_sys_decl.h b/drivers/net/dpaa2/qbman/driver/qbman_sys_decl.h -new file mode 100644 -index 0000000..bbf3627 ---- /dev/null -+++ b/drivers/net/dpaa2/qbman/driver/qbman_sys_decl.h -@@ -0,0 +1,69 @@ -+/* Copyright (C) 2014 Freescale Semiconductor, Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Freescale Semiconductor nor the -+ * names of its contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY -+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY -+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+ -+/* Sanity check */ -+#if (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) && \ -+ (__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__) -+#error "Unknown endianness!" -+#endif -+ -+/* The platform-independent code shouldn't need endianness, except for -+ * weird/fast-path cases like qbman_result_has_token(), which needs to -+ * perform a passive and endianness-specific test on a read-only data structure -+ * very quickly. It's an exception, and this symbol is used for that case. */ -+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -+#define DQRR_TOK_OFFSET 0 -+#define QBMAN_RESULT_VERB_OFFSET_IN_MEM 24 -+#define SCN_STATE_OFFSET_IN_MEM 8 -+#define SCN_RID_OFFSET_IN_MEM 8 -+#else -+#define DQRR_TOK_OFFSET 24 -+#define QBMAN_RESULT_VERB_OFFSET_IN_MEM 0 -+#define SCN_STATE_OFFSET_IN_MEM 16 -+#define SCN_RID_OFFSET_IN_MEM 0 -+#endif -+ -+/* Similarly-named functions */ -+#define upper32(a) upper_32_bits(a) -+#define lower32(a) lower_32_bits(a) -+ -+ /****************/ -+ /* arch assists */ -+ /****************/ -+#define dcbz(p) { asm volatile("dc zva, %0" : : "r" (p) : "memory"); } -+#define lwsync() { asm volatile("dmb st" : : : "memory"); } -+#define dcbf(p) { asm volatile("dc cvac, %0" : : "r"(p) : "memory"); } -+#define dccivac(p) { asm volatile("dc civac, %0" : : "r"(p) : "memory"); } -+static inline void prefetch_for_load(void *p) -+{ -+ asm volatile("prfm pldl1keep, [%0, #64]" : : "r" (p)); -+} -+ -+static inline void prefetch_for_store(void *p) -+{ -+ asm volatile("prfm pstl1keep, [%0, #64]" : : "r" (p)); -+} -diff --git a/drivers/net/dpaa2/qbman/include/compat.h b/drivers/net/dpaa2/qbman/include/compat.h -new file mode 100644 -index 0000000..456f938 ---- /dev/null -+++ b/drivers/net/dpaa2/qbman/include/compat.h -@@ -0,0 +1,637 @@ -+/* Copyright (c) 2008-2011 Freescale Semiconductor, Inc. -+ * All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Freescale Semiconductor nor the -+ * names of its contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY -+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY -+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#ifndef HEADER_COMPAT_H -+#define HEADER_COMPAT_H -+ -+#include -+ -+#ifndef _GNU_SOURCE -+#define _GNU_SOURCE -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* The following definitions are primarily to allow the single-source driver -+ * interfaces to be included by arbitrary program code. Ie. for interfaces that -+ * are also available in kernel-space, these definitions provide compatibility -+ * with certain attributes and types used in those interfaces. */ -+ -+/* Required compiler attributes */ -+#define __maybe_unused __attribute__((unused)) -+#define __always_unused __attribute__((unused)) -+#define __packed __attribute__((__packed__)) -+#define __user -+#define likely(x) __builtin_expect(!!(x), 1) -+#define unlikely(x) __builtin_expect(!!(x), 0) -+#define ____cacheline_aligned __attribute__((aligned(L1_CACHE_BYTES))) -+#undef container_of -+#define container_of(ptr, type, member) ({ \ -+ typeof(((type *)0)->member)(*__mptr) = (ptr); \ -+ (type *)((char *)__mptr - offsetof(type, member)); }) -+#define __stringify_1(x) #x -+#define __stringify(x) __stringify_1(x) -+#define panic(x) \ -+do { \ -+ printf("panic: %s", x); \ -+ abort(); \ -+} while (0) -+ -+#ifdef ARRAY_SIZE -+#undef ARRAY_SIZE -+#endif -+#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) -+ -+/* Required types */ -+typedef uint8_t u8; -+typedef uint16_t u16; -+typedef uint32_t u32; -+typedef uint64_t u64; -+typedef uint64_t dma_addr_t; -+typedef cpu_set_t cpumask_t; -+#define spinlock_t pthread_mutex_t -+typedef u32 compat_uptr_t; -+static inline void __user *compat_ptr(compat_uptr_t uptr) -+{ -+ return (void __user *)(unsigned long)uptr; -+} -+ -+static inline compat_uptr_t ptr_to_compat(void __user *uptr) -+{ -+ return (u32)(unsigned long)uptr; -+} -+ -+/* I/O operations */ -+static inline u32 in_be32(volatile void *__p) -+{ -+ volatile u32 *p = __p; -+ return *p; -+} -+ -+static inline void out_be32(volatile void *__p, u32 val) -+{ -+ volatile u32 *p = __p; -+ *p = val; -+} -+ -+/* Debugging */ -+#define prflush(fmt, args...) \ -+ do { \ -+ printf(fmt, ##args); \ -+ fflush(stdout); \ -+ } while (0) -+#define pr_crit(fmt, args...) prflush("CRIT:" fmt, ##args) -+#define pr_err(fmt, args...) prflush("ERR:" fmt, ##args) -+#define pr_warn(fmt, args...) prflush("WARN:" fmt, ##args) -+#define pr_info(fmt, args...) prflush(fmt, ##args) -+ -+#define BUG() abort() -+#ifdef CONFIG_BUGON -+#ifdef pr_debug -+#undef pr_debug -+#endif -+#define pr_debug(fmt, args...) printf(fmt, ##args) -+#define BUG_ON(c) \ -+do { \ -+ if (c) { \ -+ pr_crit("BUG: %s:%d\n", __FILE__, __LINE__); \ -+ abort(); \ -+ } \ -+} while (0) -+#define might_sleep_if(c) BUG_ON(c) -+#define msleep(x) \ -+do { \ -+ pr_crit("BUG: illegal call %s:%d\n", __FILE__, __LINE__); \ -+ exit(EXIT_FAILURE); \ -+} while (0) -+#else -+#ifdef pr_debug -+#undef pr_debug -+#endif -+#define pr_debug(fmt, args...) do { ; } while (0) -+#define BUG_ON(c) do { ; } while (0) -+#define might_sleep_if(c) do { ; } while (0) -+#define msleep(x) do { ; } while (0) -+#endif -+#define WARN_ON(c, str) \ -+do { \ -+ static int warned_##__LINE__; \ -+ if ((c) && !warned_##__LINE__) { \ -+ pr_warn("%s\n", str); \ -+ pr_warn("(%s:%d)\n", __FILE__, __LINE__); \ -+ warned_##__LINE__ = 1; \ -+ } \ -+} while (0) -+ -+#define ALIGN(x, a) (((x) + ((typeof(x))(a) - 1)) & ~((typeof(x))(a) - 1)) -+ -+/****************/ -+/* Linked-lists */ -+/****************/ -+ -+struct list_head { -+ struct list_head *prev; -+ struct list_head *next; -+}; -+ -+#define LIST_HEAD(n) \ -+struct list_head n = { \ -+ .prev = &n, \ -+ .next = &n \ -+} -+ -+#define INIT_LIST_HEAD(p) \ -+do { \ -+ struct list_head *__p298 = (p); \ -+ __p298->prev = __p298->next = __p298; \ -+} while (0) -+#define list_entry(node, type, member) \ -+ (type *)((void *)node - offsetof(type, member)) -+#define list_empty(p) \ -+({ \ -+ const struct list_head *__p298 = (p); \ -+ ((__p298->next == __p298) && (__p298->prev == __p298)); \ -+}) -+#define list_add(p, l) \ -+do { \ -+ struct list_head *__p298 = (p); \ -+ struct list_head *__l298 = (l); \ -+ __p298->next = __l298->next; \ -+ __p298->prev = __l298; \ -+ __l298->next->prev = __p298; \ -+ __l298->next = __p298; \ -+} while (0) -+#define list_add_tail(p, l) \ -+do { \ -+ struct list_head *__p298 = (p); \ -+ struct list_head *__l298 = (l); \ -+ __p298->prev = __l298->prev; \ -+ __p298->next = __l298; \ -+ __l298->prev->next = __p298; \ -+ __l298->prev = __p298; \ -+} while (0) -+#define list_for_each(i, l) \ -+ for (i = (l)->next; i != (l); i = i->next) -+#define list_for_each_safe(i, j, l) \ -+ for (i = (l)->next, j = i->next; i != (l); \ -+ i = j, j = i->next) -+#define list_for_each_entry(i, l, name) \ -+ for (i = list_entry((l)->next, typeof(*i), name); &i->name != (l); \ -+ i = list_entry(i->name.next, typeof(*i), name)) -+#define list_for_each_entry_safe(i, j, l, name) \ -+ for (i = list_entry((l)->next, typeof(*i), name), \ -+ j = list_entry(i->name.next, typeof(*j), name); \ -+ &i->name != (l); \ -+ i = j, j = list_entry(j->name.next, typeof(*j), name)) -+#define list_del(i) \ -+do { \ -+ (i)->next->prev = (i)->prev; \ -+ (i)->prev->next = (i)->next; \ -+} while (0) -+ -+/* Other miscellaneous interfaces our APIs depend on; */ -+ -+#define lower_32_bits(x) ((u32)(x)) -+#define upper_32_bits(x) ((u32)(((x) >> 16) >> 16)) -+ -+/* Compiler/type stuff */ -+typedef unsigned int gfp_t; -+typedef uint32_t phandle; -+ -+#define noinline __attribute__((noinline)) -+#define __iomem -+#define EINTR 4 -+#define ENODEV 19 -+#define MODULE_AUTHOR(s) -+#define MODULE_LICENSE(s) -+#define MODULE_DESCRIPTION(s) -+#define MODULE_PARM_DESC(x, y) -+#define EXPORT_SYMBOL(x) -+#define module_init(fn) int m_##fn(void) { return fn(); } -+#define module_exit(fn) void m_##fn(void) { fn(); } -+#define module_param(x, y, z) -+#define module_param_string(w, x, y, z) -+#define GFP_KERNEL 0 -+#define __KERNEL__ -+#define __init -+#define __raw_readb(p) *(const volatile unsigned char *)(p) -+#define __raw_readl(p) *(const volatile unsigned int *)(p) -+#define __raw_writel(v, p) \ -+do { \ -+ *(volatile unsigned int *)(p) = (v); \ -+} while (0) -+ -+/* printk() stuff */ -+#define printk(fmt, args...) do_not_use_printk -+#define nada(fmt, args...) do { ; } while (0) -+ -+/* Interrupt stuff */ -+typedef uint32_t irqreturn_t; -+#define IRQ_HANDLED 0 -+ -+/* memcpy() stuff - when you know alignments in advance */ -+#ifdef CONFIG_TRY_BETTER_MEMCPY -+static inline void copy_words(void *dest, const void *src, size_t sz) -+{ -+ u32 *__dest = dest; -+ const u32 *__src = src; -+ size_t __sz = sz >> 2; -+ -+ BUG_ON((unsigned long)dest & 0x3); -+ BUG_ON((unsigned long)src & 0x3); -+ BUG_ON(sz & 0x3); -+ while (__sz--) -+ *(__dest++) = *(__src++); -+} -+ -+static inline void copy_shorts(void *dest, const void *src, size_t sz) -+{ -+ u16 *__dest = dest; -+ const u16 *__src = src; -+ size_t __sz = sz >> 1; -+ -+ BUG_ON((unsigned long)dest & 0x1); -+ BUG_ON((unsigned long)src & 0x1); -+ BUG_ON(sz & 0x1); -+ while (__sz--) -+ *(__dest++) = *(__src++); -+} -+ -+static inline void copy_bytes(void *dest, const void *src, size_t sz) -+{ -+ u8 *__dest = dest; -+ const u8 *__src = src; -+ -+ while (sz--) -+ *(__dest++) = *(__src++); -+} -+#else -+#define copy_words memcpy -+#define copy_shorts memcpy -+#define copy_bytes memcpy -+#endif -+ -+/* Spinlock stuff */ -+#define spinlock_t pthread_mutex_t -+#define __SPIN_LOCK_UNLOCKED(x) PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP -+#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) -+#define spin_lock_init(x) \ -+ do { \ -+ __maybe_unused int __foo; \ -+ pthread_mutexattr_t __foo_attr; \ -+ __foo = pthread_mutexattr_init(&__foo_attr); \ -+ BUG_ON(__foo); \ -+ __foo = pthread_mutexattr_settype(&__foo_attr, \ -+ PTHREAD_MUTEX_ADAPTIVE_NP); \ -+ BUG_ON(__foo); \ -+ __foo = pthread_mutex_init(x, &__foo_attr); \ -+ BUG_ON(__foo); \ -+ } while (0) -+#define spin_lock(x) \ -+ do { \ -+ __maybe_unused int __foo = pthread_mutex_lock(x); \ -+ BUG_ON(__foo); \ -+ } while (0) -+#define spin_unlock(x) \ -+ do { \ -+ __maybe_unused int __foo = pthread_mutex_unlock(x); \ -+ BUG_ON(__foo); \ -+ } while (0) -+#define spin_lock_irq(x) do { \ -+ local_irq_disable(); \ -+ spin_lock(x); \ -+ } while (0) -+#define spin_unlock_irq(x) do { \ -+ spin_unlock(x); \ -+ local_irq_enable(); \ -+ } while (0) -+#define spin_lock_irqsave(x, f) do { spin_lock_irq(x); } while (0) -+#define spin_unlock_irqrestore(x, f) do { spin_unlock_irq(x); } while (0) -+ -+#define raw_spinlock_t spinlock_t -+#define raw_spin_lock_init(x) spin_lock_init(x) -+#define raw_spin_lock_irqsave(x, f) spin_lock(x) -+#define raw_spin_unlock_irqrestore(x, f) spin_unlock(x) -+ -+/* Completion stuff */ -+#define DECLARE_COMPLETION(n) int n = 0; -+#define complete(n) \ -+do { \ -+ *n = 1; \ -+} while (0) -+#define wait_for_completion(n) \ -+do { \ -+ while (!*n) { \ -+ bman_poll(); \ -+ qman_poll(); \ -+ } \ -+ *n = 0; \ -+} while (0) -+ -+/* Platform device stuff */ -+struct platform_device { void *dev; }; -+static inline struct -+platform_device *platform_device_alloc(const char *name __always_unused, -+ int id __always_unused) -+{ -+ struct platform_device *ret = malloc(sizeof(*ret)); -+ -+ if (ret) -+ ret->dev = NULL; -+ return ret; -+} -+ -+#define platform_device_add(pdev) 0 -+#define platform_device_del(pdev) do { ; } while (0) -+static inline void platform_device_put(struct platform_device *pdev) -+{ -+ free(pdev); -+} -+ -+struct resource { -+ int unused; -+}; -+ -+/* Allocator stuff */ -+#define kmalloc(sz, t) malloc(sz) -+#define vmalloc(sz) malloc(sz) -+#define kfree(p) do { if (p) free(p); } while (0) -+static inline void *kzalloc(size_t sz, gfp_t __foo __always_unused) -+{ -+ void *ptr = malloc(sz); -+ -+ if (ptr) -+ memset(ptr, 0, sz); -+ return ptr; -+} -+ -+static inline unsigned long get_zeroed_page(gfp_t __foo __always_unused) -+{ -+ void *p; -+ -+ if (posix_memalign(&p, 4096, 4096)) -+ return 0; -+ memset(p, 0, 4096); -+ return (unsigned long)p; -+} -+ -+static inline void free_page(unsigned long p) -+{ -+ free((void *)p); -+} -+ -+struct kmem_cache { -+ size_t sz; -+ size_t align; -+}; -+ -+#define SLAB_HWCACHE_ALIGN 0 -+static inline struct kmem_cache *kmem_cache_create(const char *n __always_unused, -+ size_t sz, size_t align, unsigned long flags __always_unused, -+ void (*c)(void *) __always_unused) -+{ -+ struct kmem_cache *ret = malloc(sizeof(*ret)); -+ -+ if (ret) { -+ ret->sz = sz; -+ ret->align = align; -+ } -+ return ret; -+} -+ -+static inline void kmem_cache_destroy(struct kmem_cache *c) -+{ -+ free(c); -+} -+ -+static inline void *kmem_cache_alloc(struct kmem_cache *c, gfp_t f __always_unused) -+{ -+ void *p; -+ -+ if (posix_memalign(&p, c->align, c->sz)) -+ return NULL; -+ return p; -+} -+ -+static inline void kmem_cache_free(struct kmem_cache *c __always_unused, void *p) -+{ -+ free(p); -+} -+ -+static inline void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t f) -+{ -+ void *ret = kmem_cache_alloc(c, f); -+ -+ if (ret) -+ memset(ret, 0, c->sz); -+ return ret; -+} -+ -+/* Bitfield stuff. */ -+#define BITS_PER_ULONG (sizeof(unsigned long) << 3) -+#define SHIFT_PER_ULONG (((1 << 5) == BITS_PER_ULONG) ? 5 : 6) -+#define BITS_MASK(idx) ((unsigned long)1 << ((idx) & (BITS_PER_ULONG - 1))) -+#define BITS_IDX(idx) ((idx) >> SHIFT_PER_ULONG) -+static inline unsigned long test_bits(unsigned long mask, -+ volatile unsigned long *p) -+{ -+ return *p & mask; -+} -+ -+static inline int test_bit(int idx, volatile unsigned long *bits) -+{ -+ return test_bits(BITS_MASK(idx), bits + BITS_IDX(idx)); -+} -+ -+static inline void set_bits(unsigned long mask, volatile unsigned long *p) -+{ -+ *p |= mask; -+} -+ -+static inline void set_bit(int idx, volatile unsigned long *bits) -+{ -+ set_bits(BITS_MASK(idx), bits + BITS_IDX(idx)); -+} -+ -+static inline void clear_bits(unsigned long mask, volatile unsigned long *p) -+{ -+ *p &= ~mask; -+} -+ -+static inline void clear_bit(int idx, volatile unsigned long *bits) -+{ -+ clear_bits(BITS_MASK(idx), bits + BITS_IDX(idx)); -+} -+ -+static inline unsigned long test_and_set_bits(unsigned long mask, -+ volatile unsigned long *p) -+{ -+ unsigned long ret = test_bits(mask, p); -+ -+ set_bits(mask, p); -+ return ret; -+} -+ -+static inline int test_and_set_bit(int idx, volatile unsigned long *bits) -+{ -+ int ret = test_bit(idx, bits); -+ -+ set_bit(idx, bits); -+ return ret; -+} -+ -+static inline int test_and_clear_bit(int idx, volatile unsigned long *bits) -+{ -+ int ret = test_bit(idx, bits); -+ -+ clear_bit(idx, bits); -+ return ret; -+} -+ -+static inline int find_next_zero_bit(unsigned long *bits, int limit, int idx) -+{ -+ while ((++idx < limit) && test_bit(idx, bits)) -+ ; -+ return idx; -+} -+ -+static inline int find_first_zero_bit(unsigned long *bits, int limit) -+{ -+ int idx = 0; -+ -+ while (test_bit(idx, bits) && (++idx < limit)) -+ ; -+ return idx; -+} -+ -+static inline u64 div64_u64(u64 n, u64 d) -+{ -+ return n / d; -+} -+ -+#define dmb(opt) { asm volatile("dmb " #opt : : : "memory"); } -+#define smp_mb() dmb(ish) -+ -+/* Atomic stuff */ -+typedef struct { -+ int counter; -+} atomic_t; -+ -+#define atomic_read(v) (*(volatile int *)&(v)->counter) -+#define atomic_set(v, i) (((v)->counter) = (i)) -+static inline void atomic_add(int i, atomic_t *v) -+{ -+ unsigned long tmp; -+ int result; -+ -+ asm volatile("// atomic_add\n" -+ "1: ldxr %w0, %2\n" -+ " add %w0, %w0, %w3\n" -+ " stxr %w1, %w0, %2\n" -+ " cbnz %w1, 1b" -+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) -+ : "Ir" (i)); -+} -+ -+static inline int atomic_add_return(int i, atomic_t *v) -+{ -+ unsigned long tmp; -+ int result; -+ -+ asm volatile("// atomic_add_return\n" -+ "1: ldxr %w0, %2\n" -+ " add %w0, %w0, %w3\n" -+ " stlxr %w1, %w0, %2\n" -+ " cbnz %w1, 1b" -+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) -+ : "Ir" (i) -+ : "memory"); -+ -+ smp_mb(); -+ return result; -+} -+ -+static inline void atomic_sub(int i, atomic_t *v) -+{ -+ unsigned long tmp; -+ int result; -+ -+ asm volatile("// atomic_sub\n" -+ "1: ldxr %w0, %2\n" -+ " sub %w0, %w0, %w3\n" -+ " stxr %w1, %w0, %2\n" -+ " cbnz %w1, 1b" -+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) -+ : "Ir" (i)); -+} -+ -+static inline int atomic_sub_return(int i, atomic_t *v) -+{ -+ unsigned long tmp; -+ int result; -+ -+ asm volatile("// atomic_sub_return\n" -+ "1: ldxr %w0, %2\n" -+ " sub %w0, %w0, %w3\n" -+ " stlxr %w1, %w0, %2\n" -+ " cbnz %w1, 1b" -+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) -+ : "Ir" (i) -+ : "memory"); -+ -+ smp_mb(); -+ return result; -+} -+ -+#define atomic_inc(v) atomic_add(1, v) -+#define atomic_dec(v) atomic_sub(1, v) -+ -+#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) -+#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) -+#define atomic_inc_return(v) (atomic_add_return(1, v)) -+#define atomic_dec_return(v) (atomic_sub_return(1, v)) -+#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) -+ -+#endif /* HEADER_COMPAT_H */ -diff --git a/drivers/net/dpaa2/qbman/include/drivers/fsl_qbman_base.h b/drivers/net/dpaa2/qbman/include/drivers/fsl_qbman_base.h -new file mode 100644 -index 0000000..4cb784c ---- /dev/null -+++ b/drivers/net/dpaa2/qbman/include/drivers/fsl_qbman_base.h -@@ -0,0 +1,151 @@ -+/* Copyright (C) 2014 Freescale Semiconductor, Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Freescale Semiconductor nor the -+ * names of its contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY -+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY -+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_QBMAN_BASE_H -+#define _FSL_QBMAN_BASE_H -+ -+/** -+ * DOC: QBMan basic structures -+ * -+ * The QBMan block descriptor, software portal descriptor and Frame descriptor -+ * are defined here. -+ * -+ */ -+ -+/** -+ * struct qbman_block_desc - qbman block descriptor structure -+ * @ccsr_reg_bar: CCSR register map. -+ * @irq_rerr: Recoverable error interrupt line. -+ * @irq_nrerr: Non-recoverable error interrupt line -+ * -+ * Descriptor for a QBMan instance on the SoC. On partitions/targets that do not -+ * control this QBMan instance, these values may simply be place-holders. The -+ * idea is simply that we be able to distinguish between them, eg. so that SWP -+ * descriptors can identify which QBMan instance they belong to. -+ */ -+struct qbman_block_desc { -+ void *ccsr_reg_bar; -+ int irq_rerr; -+ int irq_nrerr; -+}; -+ -+enum qbman_eqcr_mode { -+ qman_eqcr_vb_ring = 2, /* Valid bit, with eqcr in ring mode */ -+ qman_eqcr_vb_array, /* Valid bit, with eqcr in array mode */ -+}; -+ -+/** -+ * struct qbman_swp_desc - qbman software portal descriptor structure -+ * @block: The QBMan instance. -+ * @cena_bar: Cache-enabled portal register map. -+ * @cinh_bar: Cache-inhibited portal register map. -+ * @irq: -1 if unused (or unassigned) -+ * @idx: SWPs within a QBMan are indexed. -1 if opaque to the user. -+ * @qman_version: the qman version. -+ * @eqcr_mode: Select the eqcr mode, currently only valid bit ring mode and -+ * valid bit array mode are supported. -+ * -+ * Descriptor for a QBMan software portal, expressed in terms that make sense to -+ * the user context. Ie. on MC, this information is likely to be true-physical, -+ * and instantiated statically at compile-time. On GPP, this information is -+ * likely to be obtained via "discovery" over a partition's "MC bus" -+ * (ie. in response to a MC portal command), and would take into account any -+ * virtualisation of the GPP user's address space and/or interrupt numbering. -+ */ -+struct qbman_swp_desc { -+ const struct qbman_block_desc *block; -+ uint8_t *cena_bar; -+ uint8_t *cinh_bar; -+ int irq; -+ int idx; -+ uint32_t qman_version; -+ enum qbman_eqcr_mode eqcr_mode; -+}; -+ -+/* Driver object for managing a QBMan portal */ -+struct qbman_swp; -+ -+/** -+ * struct qbman_fd - basci structure for qbman frame descriptor -+ * @words: for easier/faster copying the whole FD structure. -+ * @addr_lo: the lower 32 bits of the address in FD. -+ * @addr_hi: the upper 32 bits of the address in FD. -+ * @len: the length field in FD. -+ * @bpid_offset: represent the bpid and offset fields in FD. offset in -+ * the MS 16 bits, BPID in the LS 16 bits. -+ * @frc: frame context -+ * @ctrl: the 32bit control bits including dd, sc,... va, err. -+ * @flc_lo: the lower 32bit of flow context. -+ * @flc_hi: the upper 32bits of flow context. -+ * -+ * Place-holder for FDs, we represent it via the simplest form that we need for -+ * now. Different overlays may be needed to support different options, etc. (It -+ * is impractical to define One True Struct, because the resulting encoding -+ * routines (lots of read-modify-writes) would be worst-case performance whether -+ * or not circumstances required them.) -+ * -+ * Note, as with all data-structures exchanged between software and hardware (be -+ * they located in the portal register map or DMA'd to and from main-memory), -+ * the driver ensures that the caller of the driver API sees the data-structures -+ * in host-endianness. "struct qbman_fd" is no exception. The 32-bit words -+ * contained within this structure are represented in host-endianness, even if -+ * hardware always treats them as little-endian. As such, if any of these fields -+ * are interpreted in a binary (rather than numerical) fashion by hardware -+ * blocks (eg. accelerators), then the user should be careful. We illustrate -+ * with an example; -+ * -+ * Suppose the desired behaviour of an accelerator is controlled by the "frc" -+ * field of the FDs that are sent to it. Suppose also that the behaviour desired -+ * by the user corresponds to an "frc" value which is expressed as the literal -+ * sequence of bytes 0xfe, 0xed, 0xab, and 0xba. So "frc" should be the 32-bit -+ * value in which 0xfe is the first byte and 0xba is the last byte, and as -+ * hardware is little-endian, this amounts to a 32-bit "value" of 0xbaabedfe. If -+ * the software is little-endian also, this can simply be achieved by setting -+ * frc=0xbaabedfe. On the other hand, if software is big-endian, it should set -+ * frc=0xfeedabba! The best away of avoiding trouble with this sort of thing is -+ * to treat the 32-bit words as numerical values, in which the offset of a field -+ * from the beginning of the first byte (as required or generated by hardware) -+ * is numerically encoded by a left-shift (ie. by raising the field to a -+ * corresponding power of 2). Ie. in the current example, software could set -+ * "frc" in the following way, and it would work correctly on both little-endian -+ * and big-endian operation; -+ * fd.frc = (0xfe << 0) | (0xed << 8) | (0xab << 16) | (0xba << 24); -+ */ -+struct qbman_fd { -+ union { -+ uint32_t words[8]; -+ struct qbman_fd_simple { -+ uint32_t addr_lo; -+ uint32_t addr_hi; -+ uint32_t len; -+ uint32_t bpid_offset; -+ uint32_t frc; -+ uint32_t ctrl; -+ uint32_t flc_lo; -+ uint32_t flc_hi; -+ } simple; -+ }; -+}; -+ -+#endif /* !_FSL_QBMAN_BASE_H */ -diff --git a/drivers/net/dpaa2/qbman/include/drivers/fsl_qbman_portal.h b/drivers/net/dpaa2/qbman/include/drivers/fsl_qbman_portal.h -new file mode 100644 -index 0000000..9e8e5f2 ---- /dev/null -+++ b/drivers/net/dpaa2/qbman/include/drivers/fsl_qbman_portal.h -@@ -0,0 +1,1087 @@ -+/* Copyright (C) 2014 Freescale Semiconductor, Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Freescale Semiconductor nor the -+ * names of its contributors may be used to endorse or promote products -+ * derived from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY -+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY -+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef _FSL_QBMAN_PORTAL_H -+#define _FSL_QBMAN_PORTAL_H -+ -+#include -+ -+/** -+ * DOC - QBMan portal APIs to implement the following functions: -+ * - Initialize and destroy Software portal object. -+ * - Read and write Software portal interrupt registers. -+ * - Enqueue, including setting the enqueue descriptor, and issuing enqueue -+ * command etc. -+ * - Dequeue, including setting the dequeue descriptor, issuing dequeue command, -+ * parsing the dequeue response in DQRR and memeory, parsing the state change -+ * notifications etc. -+ * - Release, including setting the release descriptor, and issuing the buffer -+ * release command. -+ * - Acquire, acquire the buffer from the given buffer pool. -+ * - FQ management. -+ * - Channel management, enable/disable CDAN with or without context. -+ */ -+ -+/** -+ * qbman_swp_init() - Create a functional object representing the given -+ * QBMan portal descriptor. -+ * @d: the given qbman swp descriptor -+ * -+ * Return qbman_swp portal object for success, NULL if the object cannot -+ * be created. -+ */ -+struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d); -+ -+/** -+ * qbman_swp_finish() - Create and destroy a functional object representing -+ * the given QBMan portal descriptor. -+ * @p: the qbman_swp object to be destroyed. -+ * -+ */ -+void qbman_swp_finish(struct qbman_swp *p); -+ -+/** -+ * qbman_swp_get_desc() - Get the descriptor of the given portal object. -+ * @p: the given portal object. -+ * -+ * Return the descriptor for this portal. -+ */ -+const struct qbman_swp_desc *qbman_swp_get_desc(struct qbman_swp *); -+ -+ /**************/ -+ /* Interrupts */ -+ /**************/ -+ -+/* EQCR ring interrupt */ -+#define QBMAN_SWP_INTERRUPT_EQRI ((uint32_t)0x00000001) -+/* Enqueue command dispatched interrupt */ -+#define QBMAN_SWP_INTERRUPT_EQDI ((uint32_t)0x00000002) -+/* DQRR non-empty interrupt */ -+#define QBMAN_SWP_INTERRUPT_DQRI ((uint32_t)0x00000004) -+/* RCR ring interrupt */ -+#define QBMAN_SWP_INTERRUPT_RCRI ((uint32_t)0x00000008) -+/* Release command dispatched interrupt */ -+#define QBMAN_SWP_INTERRUPT_RCDI ((uint32_t)0x00000010) -+/* Volatile dequeue command interrupt */ -+#define QBMAN_SWP_INTERRUPT_VDCI ((uint32_t)0x00000020) -+ -+/** -+ * qbman_swp_interrupt_get_vanish() - Get the data in software portal -+ * interrupt status disable register. -+ * @p: the given software portal object. -+ * -+ * Return the settings in SWP_ISDR register. -+ */ -+uint32_t qbman_swp_interrupt_get_vanish(struct qbman_swp *p); -+ -+/** -+ * qbman_swp_interrupt_set_vanish() - Set the data in software portal -+ * interrupt status disable register. -+ * @p: the given software portal object. -+ * @mask: The value to set in SWP_IDSR register. -+ */ -+void qbman_swp_interrupt_set_vanish(struct qbman_swp *p, uint32_t mask); -+ -+/** -+ * qbman_swp_interrupt_read_status() - Get the data in software portal -+ * interrupt status register. -+ * @p: the given software portal object. -+ * -+ * Return the settings in SWP_ISR register. -+ */ -+uint32_t qbman_swp_interrupt_read_status(struct qbman_swp *p); -+ -+/** -+ * qbman_swp_interrupt_clear_status() - Set the data in software portal -+ * interrupt status register. -+ * @p: the given software portal object. -+ * @mask: The value to set in SWP_ISR register. -+ */ -+void qbman_swp_interrupt_clear_status(struct qbman_swp *p, uint32_t mask); -+ -+/** -+ * qbman_swp_interrupt_get_trigger() - Get the data in software portal -+ * interrupt enable register. -+ * @p: the given software portal object. -+ * -+ * Return the settings in SWP_IER register. -+ */ -+uint32_t qbman_swp_interrupt_get_trigger(struct qbman_swp *p); -+ -+/** -+ * qbman_swp_interrupt_set_trigger() - Set the data in software portal -+ * interrupt enable register. -+ * @p: the given software portal object. -+ * @mask: The value to set in SWP_IER register. -+ */ -+void qbman_swp_interrupt_set_trigger(struct qbman_swp *p, uint32_t mask); -+ -+/** -+ * qbman_swp_interrupt_get_inhibit() - Get the data in software portal -+ * interrupt inhibit register. -+ * @p: the given software portal object. -+ * -+ * Return the settings in SWP_IIR register. -+ */ -+int qbman_swp_interrupt_get_inhibit(struct qbman_swp *p); -+ -+/** -+ * qbman_swp_interrupt_set_inhibit() - Set the data in software portal -+ * interrupt inhibit register. -+ * @p: the given software portal object. -+ * @mask: The value to set in SWP_IIR register. -+ */ -+void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit); -+ -+ /************/ -+ /* Dequeues */ -+ /************/ -+ -+/** -+ * struct qbman_result - structure for qbman dequeue response and/or -+ * notification. -+ * @dont_manipulate_directly: the 16 32bit data to represent the whole -+ * possible qbman dequeue result. -+ */ -+struct qbman_result { -+ uint32_t dont_manipulate_directly[16]; -+}; -+ -+/* TODO: -+ *A DQRI interrupt can be generated when there are dequeue results on the -+ * portal's DQRR (this mechanism does not deal with "pull" dequeues to -+ * user-supplied 'storage' addresses). There are two parameters to this -+ * interrupt source, one is a threshold and the other is a timeout. The -+ * interrupt will fire if either the fill-level of the ring exceeds 'thresh', or -+ * if the ring has been non-empty for been longer than 'timeout' nanoseconds. -+ * For timeout, an approximation to the desired nanosecond-granularity value is -+ * made, so there are get and set APIs to allow the user to see what actual -+ * timeout is set (compared to the timeout that was requested). */ -+int qbman_swp_dequeue_thresh(struct qbman_swp *s, unsigned int thresh); -+int qbman_swp_dequeue_set_timeout(struct qbman_swp *s, unsigned int timeout); -+int qbman_swp_dequeue_get_timeout(struct qbman_swp *s, unsigned int *timeout); -+ -+/* ------------------- */ -+/* Push-mode dequeuing */ -+/* ------------------- */ -+ -+/* The user of a portal can enable and disable push-mode dequeuing of up to 16 -+ * channels independently. It does not specify this toggling by channel IDs, but -+ * rather by specifying the index (from 0 to 15) that has been mapped to the -+ * desired channel. -+ */ -+ -+/** -+ * qbman_swp_push_get() - Get the push dequeue setup. -+ * @s: the software portal object. -+ * @channel_idx: the channel index to query. -+ * @enabled: returned boolean to show whether the push dequeue is enabled for -+ * the given channel. -+ */ -+void qbman_swp_push_get(struct qbman_swp *s, uint8_t channel_idx, int *enabled); -+ -+/** -+ * qbman_swp_push_set() - Enable or disable push dequeue. -+ * @s: the software portal object. -+ * @channel_idx: the channel index.. -+ * @enable: enable or disable push dequeue. -+ * -+ * The user of a portal can enable and disable push-mode dequeuing of up to 16 -+ * channels independently. It does not specify this toggling by channel IDs, but -+ * rather by specifying the index (from 0 to 15) that has been mapped to the -+ * desired channel. -+ */ -+void qbman_swp_push_set(struct qbman_swp *s, uint8_t channel_idx, int enable); -+ -+/* ------------------- */ -+/* Pull-mode dequeuing */ -+/* ------------------- */ -+ -+/** -+ * struct qbman_pull_desc - the structure for pull dequeue descriptor -+ * @dont_manipulate_directly: the 6 32bit data to represent the whole -+ * possible settings for pull dequeue descriptor. -+ */ -+struct qbman_pull_desc { -+ uint32_t dont_manipulate_directly[6]; -+}; -+ -+enum qbman_pull_type_e { -+ /* dequeue with priority precedence, respect intra-class scheduling */ -+ qbman_pull_type_prio = 1, -+ /* dequeue with active FQ precedence, respect ICS */ -+ qbman_pull_type_active, -+ /* dequeue with active FQ precedence, no ICS */ -+ qbman_pull_type_active_noics -+}; -+ -+/** -+ * qbman_pull_desc_clear() - Clear the contents of a descriptor to -+ * default/starting state. -+ * @d: the pull dequeue descriptor to be cleared. -+ */ -+void qbman_pull_desc_clear(struct qbman_pull_desc *d); -+ -+/** -+ * qbman_pull_desc_set_storage()- Set the pull dequeue storage -+ * @d: the pull dequeue descriptor to be set. -+ * @storage: the pointer of the memory to store the dequeue result. -+ * @storage_phys: the physical address of the storage memory. -+ * @stash: to indicate whether write allocate is enabled. -+ * -+ * If not called, or if called with 'storage' as NULL, the result pull dequeues -+ * will produce results to DQRR. If 'storage' is non-NULL, then results are -+ * produced to the given memory location (using the physical/DMA address which -+ * the caller provides in 'storage_phys'), and 'stash' controls whether or not -+ * those writes to main-memory express a cache-warming attribute. -+ */ -+void qbman_pull_desc_set_storage(struct qbman_pull_desc *d, -+ struct qbman_result *storage, -+ dma_addr_t storage_phys, -+ int stash); -+/** -+ * qbman_pull_desc_set_numframes() - Set the number of frames to be dequeued. -+ * @d: the pull dequeue descriptor to be set. -+ * @numframes: number of frames to be set, must be between 1 and 16, inclusive. -+ */ -+void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d, -+ uint8_t numframes); -+/** -+ * qbman_pull_desc_set_token() - Set dequeue token for pull command -+ * @d: the dequeue descriptor -+ * @token: the token to be set -+ * -+ * token is the value that shows up in the dequeue response that can be used to -+ * detect when the results have been published. The easiest technique is to zero -+ * result "storage" before issuing a dequeue, and use any non-zero 'token' value -+ */ -+void qbman_pull_desc_set_token(struct qbman_pull_desc *d, uint8_t token); -+ -+/* Exactly one of the following descriptor "actions" should be set. (Calling any -+ * one of these will replace the effect of any prior call to one of these.) -+ * - pull dequeue from the given frame queue (FQ) -+ * - pull dequeue from any FQ in the given work queue (WQ) -+ * - pull dequeue from any FQ in any WQ in the given channel -+ */ -+/** -+ * qbman_pull_desc_set_fq() - Set fqid from which the dequeue command dequeues. -+ * @fqid: the frame queue index of the given FQ. -+ */ -+void qbman_pull_desc_set_fq(struct qbman_pull_desc *d, uint32_t fqid); -+ -+/** -+ * qbman_pull_desc_set_wq() - Set wqid from which the dequeue command dequeues. -+ * @wqid: composed of channel id and wqid within the channel. -+ * @dct: the dequeue command type. -+ */ -+void qbman_pull_desc_set_wq(struct qbman_pull_desc *d, uint32_t wqid, -+ enum qbman_pull_type_e dct); -+ -+/* qbman_pull_desc_set_channel() - Set channelid from which the dequeue command -+ * dequeues. -+ * @chid: the channel id to be dequeued. -+ * @dct: the dequeue command type. -+ */ -+void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, uint32_t chid, -+ enum qbman_pull_type_e dct); -+ -+/** -+ * qbman_swp_pull() - Issue the pull dequeue command -+ * @s: the software portal object. -+ * @d: the software portal descriptor which has been configured with -+ * the set of qbman_pull_desc_set_*() calls. -+ * -+ * Return 0 for success, and -EBUSY if the software portal is not ready -+ * to do pull dequeue. -+ */ -+int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d); -+ -+/* -------------------------------- */ -+/* Polling DQRR for dequeue results */ -+/* -------------------------------- */ -+ -+/** -+ * qbman_swp_dqrr_next() - Get an valid DQRR entry. -+ * @s: the software portal object. -+ * -+ * Return NULL if there are no unconsumed DQRR entries. Return a DQRR entry -+ * only once, so repeated calls can return a sequence of DQRR entries, without -+ * requiring they be consumed immediately or in any particular order. -+ */ -+const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *); -+ -+/** -+ * qbman_swp_dqrr_consume() - Consume DQRR entries previously returned from -+ * qbman_swp_dqrr_next(). -+ * @s: the software portal object. -+ * @dq: the DQRR entry to be consumed. -+ */ -+void qbman_swp_dqrr_consume(struct qbman_swp *s, const struct qbman_result *dq); -+ -+/** -+ * qbman_get_dqrr_idx() - Get dqrr index from the given dqrr -+ * @dqrr: the given dqrr object. -+ * -+ * Return dqrr index. -+ */ -+uint8_t qbman_get_dqrr_idx(struct qbman_result *dqrr); -+ -+/** -+ * qbman_get_dqrr_from_idx() - Use index to get the dqrr entry from the -+ * given portal -+ * @s: the given portal. -+ * @idx: the dqrr index. -+ * -+ * Return dqrr entry object. -+ */ -+struct qbman_result *qbman_get_dqrr_from_idx(struct qbman_swp *s, uint8_t idx); -+ -+/* ------------------------------------------------- */ -+/* Polling user-provided storage for dequeue results */ -+/* ------------------------------------------------- */ -+ -+/** -+ * qbman_result_has_new_result() - Check and get the dequeue response from the -+ * dq storage memory set in pull dequeue command -+ * @s: the software portal object. -+ * @dq: the dequeue result read from the memory. -+ * -+ * Only used for user-provided storage of dequeue results, not DQRR. For -+ * efficiency purposes, the driver will perform any required endianness -+ * conversion to ensure that the user's dequeue result storage is in host-endian -+ * format (whether or not that is the same as the little-endian format that -+ * hardware DMA'd to the user's storage). As such, once the user has called -+ * qbman_result_has_new_result() and been returned a valid dequeue result, -+ * they should not call it again on the same memory location (except of course -+ * if another dequeue command has been executed to produce a new result to that -+ * location). -+ * -+ * Return 1 for getting a valid dequeue result, or 0 for not getting a valid -+ * dequeue result. -+ */ -+int qbman_result_has_new_result(struct qbman_swp *s, -+ const struct qbman_result *dq); -+ -+/* -------------------------------------------------------- */ -+/* Parsing dequeue entries (DQRR and user-provided storage) */ -+/* -------------------------------------------------------- */ -+ -+/** -+ * qbman_result_is_DQ() - check the dequeue result is a dequeue response or not -+ * @dq: the dequeue result to be checked. -+ * -+ * DQRR entries may contain non-dequeue results, ie. notifications -+ */ -+int qbman_result_is_DQ(const struct qbman_result *); -+ -+/** -+ * qbman_result_is_SCN() - Check the dequeue result is notification or not -+ * @dq: the dequeue result to be checked. -+ * -+ * All the non-dequeue results (FQDAN/CDAN/CSCN/...) are "state change -+ * notifications" of one type or another. Some APIs apply to all of them, of the -+ * form qbman_result_SCN_***(). -+ */ -+static inline int qbman_result_is_SCN(const struct qbman_result *dq) -+{ -+ return !qbman_result_is_DQ(dq); -+} -+ -+/* Recognise different notification types, only required if the user allows for -+ * these to occur, and cares about them when they do. -+ */ -+ -+/** -+ * qbman_result_is_FQDAN() - Check for FQ Data Availability -+ * @dq: the qbman_result object. -+ * -+ * Return 1 if this is FQDAN. -+ */ -+int qbman_result_is_FQDAN(const struct qbman_result *dq); -+ -+/** -+ * qbman_result_is_CDAN() - Check for Channel Data Availability -+ * @dq: the qbman_result object to check. -+ * -+ * Return 1 if this is CDAN. -+ */ -+int qbman_result_is_CDAN(const struct qbman_result *dq); -+ -+/** -+ * qbman_result_is_CSCN() - Check for Congestion State Change -+ * @dq: the qbman_result object to check. -+ * -+ * Return 1 if this is CSCN. -+ */ -+int qbman_result_is_CSCN(const struct qbman_result *dq); -+ -+/** -+ * qbman_result_is_BPSCN() - Check for Buffer Pool State Change. -+ * @dq: the qbman_result object to check. -+ * -+ * Return 1 if this is BPSCN. -+ */ -+int qbman_result_is_BPSCN(const struct qbman_result *dq); -+ -+/** -+ * qbman_result_is_CGCU() - Check for Congestion Group Count Update. -+ * @dq: the qbman_result object to check. -+ * -+ * Return 1 if this is CGCU. -+ */ -+int qbman_result_is_CGCU(const struct qbman_result *dq); -+ -+/* Frame queue state change notifications; (FQDAN in theory counts too as it -+ * leaves a FQ parked, but it is primarily a data availability notification) -+ */ -+ -+/** -+ * qbman_result_is_FQRN() - Check for FQ Retirement Notification. -+ * @dq: the qbman_result object to check. -+ * -+ * Return 1 if this is FQRN. -+ */ -+int qbman_result_is_FQRN(const struct qbman_result *); -+ -+/** -+ * qbman_result_is_FQRNI() - Check for FQ Retirement Immediate -+ * @dq: the qbman_result object to check. -+ * -+ * Return 1 if this is FQRNI. -+ */ -+int qbman_result_is_FQRNI(const struct qbman_result *); -+ -+/** -+ * qbman_result_is_FQPN() - Check for FQ Park Notification -+ * @dq: the qbman_result object to check. -+ * -+ * Return 1 if this is FQPN. -+ */ -+int qbman_result_is_FQPN(const struct qbman_result *dq); -+ -+/* Parsing frame dequeue results (qbman_result_is_DQ() must be TRUE) -+ */ -+/* FQ empty */ -+#define QBMAN_DQ_STAT_FQEMPTY 0x80 -+/* FQ held active */ -+#define QBMAN_DQ_STAT_HELDACTIVE 0x40 -+/* FQ force eligible */ -+#define QBMAN_DQ_STAT_FORCEELIGIBLE 0x20 -+/* Valid frame */ -+#define QBMAN_DQ_STAT_VALIDFRAME 0x10 -+/* FQ ODP enable */ -+#define QBMAN_DQ_STAT_ODPVALID 0x04 -+/* Volatile dequeue */ -+#define QBMAN_DQ_STAT_VOLATILE 0x02 -+/* volatile dequeue command is expired */ -+#define QBMAN_DQ_STAT_EXPIRED 0x01 -+ -+/** -+ * qbman_result_DQ_flags() - Get the STAT field of dequeue response -+ * @dq: the dequeue result. -+ * -+ * Return the state field. -+ */ -+uint32_t qbman_result_DQ_flags(const struct qbman_result *dq); -+ -+/** -+ * qbman_result_DQ_is_pull() - Check whether the dq response is from a pull -+ * command. -+ * @dq: the dequeue result. -+ * -+ * Return 1 for volatile(pull) dequeue, 0 for static dequeue. -+ */ -+static inline int qbman_result_DQ_is_pull(const struct qbman_result *dq) -+{ -+ return (int)(qbman_result_DQ_flags(dq) & QBMAN_DQ_STAT_VOLATILE); -+} -+ -+/** -+ * qbman_result_DQ_is_pull_complete() - Check whether the pull command is -+ * completed. -+ * @dq: the dequeue result. -+ * -+ * Return boolean. -+ */ -+static inline int qbman_result_DQ_is_pull_complete( -+ const struct qbman_result *dq) -+{ -+ return (int)(qbman_result_DQ_flags(dq) & QBMAN_DQ_STAT_EXPIRED); -+} -+ -+/** -+ * qbman_result_DQ_seqnum() - Get the seqnum field in dequeue response -+ * seqnum is valid only if VALIDFRAME flag is TRUE -+ * @dq: the dequeue result. -+ * -+ * Return seqnum. -+ */ -+uint16_t qbman_result_DQ_seqnum(const struct qbman_result *dq); -+ -+/** -+ * qbman_result_DQ_odpid() - Get the seqnum field in dequeue response -+ * odpid is valid only if ODPVAILD flag is TRUE. -+ * @dq: the dequeue result. -+ * -+ * Return odpid. -+ */ -+uint16_t qbman_result_DQ_odpid(const struct qbman_result *dq); -+ -+/** -+ * qbman_result_DQ_fqid() - Get the fqid in dequeue response -+ * @dq: the dequeue result. -+ * -+ * Return fqid. -+ */ -+uint32_t qbman_result_DQ_fqid(const struct qbman_result *dq); -+ -+/** -+ * qbman_result_DQ_byte_count() - Get the byte count in dequeue response -+ * @dq: the dequeue result. -+ * -+ * Return the byte count remaining in the FQ. -+ */ -+uint32_t qbman_result_DQ_byte_count(const struct qbman_result *dq); -+ -+/** -+ * qbman_result_DQ_frame_count - Get the frame count in dequeue response -+ * @dq: the dequeue result. -+ * -+ * Return the frame count remaining in the FQ. -+ */ -+uint32_t qbman_result_DQ_frame_count(const struct qbman_result *dq); -+ -+/** -+ * qbman_result_DQ_fqd_ctx() - Get the frame queue context in dequeue response -+ * @dq: the dequeue result. -+ * -+ * Return the frame queue context. -+ */ -+uint64_t qbman_result_DQ_fqd_ctx(const struct qbman_result *dq); -+ -+/** -+ * qbman_result_DQ_fd() - Get the frame descriptor in dequeue response -+ * @dq: the dequeue result. -+ * -+ * Return the frame descriptor. -+ */ -+const struct qbman_fd *qbman_result_DQ_fd(const struct qbman_result *dq); -+ -+/* State-change notifications (FQDAN/CDAN/CSCN/...). */ -+ -+/** -+ * qbman_result_SCN_state() - Get the state field in State-change notification -+ * @scn: the state change notification. -+ * -+ * Return the state in the notifiation. -+ */ -+uint8_t qbman_result_SCN_state(const struct qbman_result *scn); -+ -+/** -+ * qbman_result_SCN_rid() - Get the resource id from the notification -+ * @scn: the state change notification. -+ * -+ * Return the resource id. -+ */ -+uint32_t qbman_result_SCN_rid(const struct qbman_result *scn); -+ -+/** -+ * qbman_result_SCN_ctx() - get the context from the notification -+ * @scn: the state change notification. -+ * -+ * Return the context. -+ */ -+uint64_t qbman_result_SCN_ctx(const struct qbman_result *scn); -+ -+/** -+ * qbman_result_SCN_state_in_mem() - Get the state in notification written -+ * in memory -+ * @scn: the state change notification. -+ * -+ * Return the state. -+ */ -+uint8_t qbman_result_SCN_state_in_mem(const struct qbman_result *scn); -+ -+/** -+ * qbman_result_SCN_rid_in_mem() - Get the resource id in notification written -+ * in memory. -+ * @scn: the state change notification. -+ * -+ * Return the resource id. -+ */ -+uint32_t qbman_result_SCN_rid_in_mem(const struct qbman_result *scn); -+ -+/* Type-specific "resource IDs". Mainly for illustration purposes, though it -+ * also gives the appropriate type widths. -+ */ -+/* Get the FQID from the FQDAN */ -+#define qbman_result_FQDAN_fqid(dq) qbman_result_SCN_rid(dq) -+/* Get the FQID from the FQRN */ -+#define qbman_result_FQRN_fqid(dq) qbman_result_SCN_rid(dq) -+/* Get the FQID from the FQRNI */ -+#define qbman_result_FQRNI_fqid(dq) qbman_result_SCN_rid(dq) -+/* Get the FQID from the FQPN */ -+#define qbman_result_FQPN_fqid(dq) qbman_result_SCN_rid(dq) -+/* Get the channel ID from the CDAN */ -+#define qbman_result_CDAN_cid(dq) ((uint16_t)qbman_result_SCN_rid(dq)) -+/* Get the CGID from the CSCN */ -+#define qbman_result_CSCN_cgid(dq) ((uint16_t)qbman_result_SCN_rid(dq)) -+ -+/** -+ * qbman_result_bpscn_bpid() - Get the bpid from BPSCN -+ * @scn: the state change notification. -+ * -+ * Return the buffer pool id. -+ */ -+uint16_t qbman_result_bpscn_bpid(const struct qbman_result *scn); -+ -+/** -+ * qbman_result_bpscn_has_free_bufs() - Check whether there are free -+ * buffers in the pool from BPSCN. -+ * @scn: the state change notification. -+ * -+ * Return the number of free buffers. -+ */ -+int qbman_result_bpscn_has_free_bufs(const struct qbman_result *scn); -+ -+/** -+ * qbman_result_bpscn_is_depleted() - Check BPSCN to see whether the -+ * buffer pool is depleted. -+ * @scn: the state change notification. -+ * -+ * Return the status of buffer pool depletion. -+ */ -+int qbman_result_bpscn_is_depleted(const struct qbman_result *scn); -+ -+/** -+ * qbman_result_bpscn_is_surplus() - Check BPSCN to see whether the buffer -+ * pool is surplus or not. -+ * @scn: the state change notification. -+ * -+ * Return the status of buffer pool surplus. -+ */ -+int qbman_result_bpscn_is_surplus(const struct qbman_result *scn); -+ -+/** -+ * qbman_result_bpscn_ctx() - Get the BPSCN CTX from BPSCN message -+ * @scn: the state change notification. -+ * -+ * Return the BPSCN context. -+ */ -+uint64_t qbman_result_bpscn_ctx(const struct qbman_result *scn); -+ -+/* Parsing CGCU */ -+/** -+ * qbman_result_cgcu_cgid() - Check CGCU resouce id, i.e. cgid -+ * @scn: the state change notification. -+ * -+ * Return the CGCU resource id. -+ */ -+uint16_t qbman_result_cgcu_cgid(const struct qbman_result *scn); -+ -+/** -+ * qbman_result_cgcu_icnt() - Get the I_CNT from CGCU -+ * @scn: the state change notification. -+ * -+ * Return instantaneous count in the CGCU notification. -+ */ -+uint64_t qbman_result_cgcu_icnt(const struct qbman_result *scn); -+ -+ /************/ -+ /* Enqueues */ -+ /************/ -+ -+/** -+ * struct qbman_eq_desc - structure of enqueue descriptor -+ * @dont_manipulate_directly: the 8 32bit data to represent the whole -+ * possible qbman enqueue setting in enqueue descriptor. -+ */ -+struct qbman_eq_desc { -+ uint32_t dont_manipulate_directly[8]; -+}; -+ -+/** -+ * struct qbman_eq_response - structure of enqueue response -+ * @dont_manipulate_directly: the 16 32bit data to represent the whole -+ * enqueue response. -+ */ -+struct qbman_eq_response { -+ uint32_t dont_manipulate_directly[16]; -+}; -+ -+/** -+ * qbman_eq_desc_clear() - Clear the contents of a descriptor to -+ * default/starting state. -+ * @d: the given enqueue descriptor. -+ */ -+void qbman_eq_desc_clear(struct qbman_eq_desc *d); -+ -+/* Exactly one of the following descriptor "actions" should be set. (Calling -+ * any one of these will replace the effect of any prior call to one of these.) -+ * - enqueue without order-restoration -+ * - enqueue with order-restoration -+ * - fill a hole in the order-restoration sequence, without any enqueue -+ * - advance NESN (Next Expected Sequence Number), without any enqueue -+ * 'respond_success' indicates whether an enqueue response should be DMA'd -+ * after success (otherwise a response is DMA'd only after failure). -+ * 'incomplete' indicates that other fragments of the same 'seqnum' are yet to -+ * be enqueued. -+ */ -+ -+/** -+ * qbman_eq_desc_set_no_orp() - Set enqueue descriptor without orp -+ * @d: the enqueue descriptor. -+ * @response_success: 1 = enqueue with response always; 0 = enqueue with -+ * rejections returned on a FQ. -+ */ -+void qbman_eq_desc_set_no_orp(struct qbman_eq_desc *d, int respond_success); -+/** -+ * qbman_eq_desc_set_orp() - Set order-resotration in the enqueue descriptor -+ * @d: the enqueue descriptor. -+ * @response_success: 1 = enqueue with response always; 0 = enqueue with -+ * rejections returned on a FQ. -+ * @opr_id: the order point record id. -+ * @seqnum: the order restoration sequence number. -+ * @incomplete: indiates whether this is the last fragments using the same -+ * sequeue number. -+ */ -+void qbman_eq_desc_set_orp(struct qbman_eq_desc *d, int respond_success, -+ uint32_t opr_id, uint32_t seqnum, int incomplete); -+ -+/** -+ * qbman_eq_desc_set_orp_hole() - fill a hole in the order-restoration sequence -+ * without any enqueue -+ * @d: the enqueue descriptor. -+ * @opr_id: the order point record id. -+ * @seqnum: the order restoration sequence number. -+ */ -+void qbman_eq_desc_set_orp_hole(struct qbman_eq_desc *d, uint32_t opr_id, -+ uint32_t seqnum); -+ -+/** -+ * qbman_eq_desc_set_orp_nesn() - advance NESN (Next Expected Sequence Number) -+ * without any enqueue -+ * @d: the enqueue descriptor. -+ * @opr_id: the order point record id. -+ * @seqnum: the order restoration sequence number. -+ */ -+void qbman_eq_desc_set_orp_nesn(struct qbman_eq_desc *d, uint32_t opr_id, -+ uint32_t seqnum); -+/** -+ * qbman_eq_desc_set_response() - Set the enqueue response info. -+ * @d: the enqueue descriptor -+ * @storage_phys: the physical address of the enqueue response in memory. -+ * @stash: indicate that the write allocation enabled or not. -+ * -+ * In the case where an enqueue response is DMA'd, this determines where that -+ * response should go. (The physical/DMA address is given for hardware's -+ * benefit, but software should interpret it as a "struct qbman_eq_response" -+ * data structure.) 'stash' controls whether or not the write to main-memory -+ * expresses a cache-warming attribute. -+ */ -+void qbman_eq_desc_set_response(struct qbman_eq_desc *d, -+ dma_addr_t storage_phys, -+ int stash); -+ -+/** -+ * qbman_eq_desc_set_token() - Set token for the enqueue command -+ * @d: the enqueue descriptor -+ * @token: the token to be set. -+ * -+ * token is the value that shows up in an enqueue response that can be used to -+ * detect when the results have been published. The easiest technique is to zero -+ * result "storage" before issuing an enqueue, and use any non-zero 'token' -+ * value. -+ */ -+void qbman_eq_desc_set_token(struct qbman_eq_desc *d, uint8_t token); -+ -+/** -+ * Exactly one of the following descriptor "targets" should be set. (Calling any -+ * one of these will replace the effect of any prior call to one of these.) -+ * - enqueue to a frame queue -+ * - enqueue to a queuing destination -+ * Note, that none of these will have any affect if the "action" type has been -+ * set to "orp_hole" or "orp_nesn". -+ */ -+/** -+ * qbman_eq_desc_set_fq() - Set Frame Queue id for the enqueue command -+ * @d: the enqueue descriptor -+ * @fqid: the id of the frame queue to be enqueued. -+ */ -+void qbman_eq_desc_set_fq(struct qbman_eq_desc *d, uint32_t fqid); -+ -+/** -+ * qbman_eq_desc_set_qd() - Set Queuing Destination for the enqueue command. -+ * @d: the enqueue descriptor -+ * @qdid: the id of the queuing destination to be enqueued. -+ * @qd_bin: the queuing destination bin -+ * @qd_prio: the queuing destination priority. -+ */ -+void qbman_eq_desc_set_qd(struct qbman_eq_desc *d, uint32_t qdid, -+ uint32_t qd_bin, uint32_t qd_prio); -+ -+/** -+ * qbman_eq_desc_set_eqdi() - enable/disable EQDI interrupt -+ * @d: the enqueue descriptor -+ * @enable: boolean to enable/disable EQDI -+ * -+ * Determines whether or not the portal's EQDI interrupt source should be -+ * asserted after the enqueue command is completed. -+ */ -+void qbman_eq_desc_set_eqdi(struct qbman_eq_desc *d, int enable); -+ -+/** -+ * qbman_eq_desc_set_dca() - Set DCA mode in the enqueue command. -+ * @d: the enqueue descriptor. -+ * @enable: enabled/disable DCA mode. -+ * @dqrr_idx: DCAP_CI, the DCAP consumer index. -+ * @park: determine the whether park the FQ or not -+ * -+ * Determines whether or not a portal DQRR entry should be consumed once the -+ * enqueue command is completed. (And if so, and the DQRR entry corresponds to a -+ * held-active (order-preserving) FQ, whether the FQ should be parked instead of -+ * being rescheduled.) -+ */ -+void qbman_eq_desc_set_dca(struct qbman_eq_desc *d, int enable, -+ uint32_t dqrr_idx, int park); -+ -+/** -+ * qbman_swp_enqueue() - Issue an enqueue command. -+ * @s: the software portal used for enqueue. -+ * @d: the enqueue descriptor. -+ * @fd: the frame descriptor to be enqueued. -+ * -+ * Please note that 'fd' should only be NULL if the "action" of the -+ * descriptor is "orp_hole" or "orp_nesn". -+ * -+ * Return 0 for a successful enqueue, -EBUSY if the EQCR is not ready. -+ */ -+int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d, -+ const struct qbman_fd *fd); -+ -+/* TODO: -+ * qbman_swp_enqueue_thresh() - Set threshold for EQRI interrupt. -+ * @s: the software portal. -+ * @thresh: the threshold to trigger the EQRI interrupt. -+ * -+ * An EQRI interrupt can be generated when the fill-level of EQCR falls below -+ * the 'thresh' value set here. Setting thresh==0 (the default) disables. -+ */ -+int qbman_swp_enqueue_thresh(struct qbman_swp *s, unsigned int thresh); -+ -+ /*******************/ -+ /* Buffer releases */ -+ /*******************/ -+/** -+ * struct qbman_release_desc - The structure for buffer release descriptor -+ * @dont_manipulate_directly: the 32bit data to represent the whole -+ * possible settings of qbman release descriptor. -+ */ -+struct qbman_release_desc { -+ uint32_t dont_manipulate_directly[1]; -+}; -+ -+/** -+ * qbman_release_desc_clear() - Clear the contents of a descriptor to -+ * default/starting state. -+ * @d: the qbman release descriptor. -+ */ -+void qbman_release_desc_clear(struct qbman_release_desc *d); -+ -+/** -+ * qbman_release_desc_set_bpid() - Set the ID of the buffer pool to release to -+ * @d: the qbman release descriptor. -+ */ -+void qbman_release_desc_set_bpid(struct qbman_release_desc *d, uint32_t bpid); -+ -+/** -+ * qbman_release_desc_set_rcdi() - Determines whether or not the portal's RCDI -+ * interrupt source should be asserted after the release command is completed. -+ * @d: the qbman release descriptor. -+ */ -+void qbman_release_desc_set_rcdi(struct qbman_release_desc *d, int enable); -+ -+/** -+ * qbman_swp_release() - Issue a buffer release command. -+ * @s: the software portal object. -+ * @d: the release descriptor. -+ * @buffers: a pointer pointing to the buffer address to be released. -+ * @num_buffers: number of buffers to be released, must be less than 8. -+ * -+ * Return 0 for success, -EBUSY if the release command ring is not ready. -+ */ -+int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d, -+ const uint64_t *buffers, unsigned int num_buffers); -+ -+/* TODO: -+ * qbman_swp_release_thresh() - Set threshold for RCRI interrupt -+ * @s: the software portal. -+ * @thresh: the threshold. -+ * An RCRI interrupt can be generated when the fill-level of RCR falls below -+ * the 'thresh' value set here. Setting thresh==0 (the default) disables. -+ */ -+int qbman_swp_release_thresh(struct qbman_swp *s, unsigned int thresh); -+ -+ /*******************/ -+ /* Buffer acquires */ -+ /*******************/ -+/** -+ * qbman_swp_acquire() - Issue a buffer acquire command. -+ * @s: the software portal object. -+ * @bpid: the buffer pool index. -+ * @buffers: a pointer pointing to the acquired buffer address|es. -+ * @num_buffers: number of buffers to be acquired, must be less than 8. -+ * -+ * Return 0 for success, or negative error code if the acquire command -+ * fails. -+ */ -+int qbman_swp_acquire(struct qbman_swp *s, uint32_t bpid, uint64_t *buffers, -+ unsigned int num_buffers); -+ -+ /*****************/ -+ /* FQ management */ -+ /*****************/ -+/** -+ * qbman_swp_fq_schedule() - Move the fq to the scheduled state. -+ * @s: the software portal object. -+ * @fqid: the index of frame queue to be scheduled. -+ * -+ * There are a couple of different ways that a FQ can end up parked state, -+ * This schedules it. -+ * -+ * Return 0 for success, or negative error code for failure. -+ */ -+int qbman_swp_fq_schedule(struct qbman_swp *s, uint32_t fqid); -+ -+/** -+ * qbman_swp_fq_force() - Force the FQ to fully scheduled state. -+ * @s: the software portal object. -+ * @fqid: the index of frame queue to be forced. -+ * -+ * Force eligible will force a tentatively-scheduled FQ to be fully-scheduled -+ * and thus be available for selection by any channel-dequeuing behaviour (push -+ * or pull). If the FQ is subsequently "dequeued" from the channel and is still -+ * empty at the time this happens, the resulting dq_entry will have no FD. -+ * (qbman_result_DQ_fd() will return NULL.) -+ * -+ * Return 0 for success, or negative error code for failure. -+ */ -+int qbman_swp_fq_force(struct qbman_swp *s, uint32_t fqid); -+ -+/** -+ * These functions change the FQ flow-control stuff between XON/XOFF. (The -+ * default is XON.) This setting doesn't affect enqueues to the FQ, just -+ * dequeues. XOFF FQs will remain in the tenatively-scheduled state, even when -+ * non-empty, meaning they won't be selected for scheduled dequeuing. If a FQ is -+ * changed to XOFF after it had already become truly-scheduled to a channel, and -+ * a pull dequeue of that channel occurs that selects that FQ for dequeuing, -+ * then the resulting dq_entry will have no FD. (qbman_result_DQ_fd() will -+ * return NULL.) -+ */ -+/** -+ * qbman_swp_fq_xon() - XON the frame queue. -+ * @s: the software portal object. -+ * @fqid: the index of frame queue. -+ * -+ * Return 0 for success, or negative error code for failure. -+ */ -+int qbman_swp_fq_xon(struct qbman_swp *s, uint32_t fqid); -+/** -+ * qbman_swp_fq_xoff() - XOFF the frame queue. -+ * @s: the software portal object. -+ * @fqid: the index of frame queue. -+ * -+ * Return 0 for success, or negative error code for failure. -+ */ -+int qbman_swp_fq_xoff(struct qbman_swp *s, uint32_t fqid); -+ -+ /**********************/ -+ /* Channel management */ -+ /**********************/ -+ -+/** -+ * If the user has been allocated a channel object that is going to generate -+ * CDANs to another channel, then these functions will be necessary. -+ * CDAN-enabled channels only generate a single CDAN notification, after which -+ * it they need to be reenabled before they'll generate another. (The idea is -+ * that pull dequeuing will occur in reaction to the CDAN, followed by a -+ * reenable step.) Each function generates a distinct command to hardware, so a -+ * combination function is provided if the user wishes to modify the "context" -+ * (which shows up in each CDAN message) each time they reenable, as a single -+ * command to hardware. -+ */ -+ -+/** -+ * qbman_swp_CDAN_set_context() - Set CDAN context -+ * @s: the software portal object. -+ * @channelid: the channel index. -+ * @ctx: the context to be set in CDAN. -+ * -+ * Return 0 for success, or negative error code for failure. -+ */ -+int qbman_swp_CDAN_set_context(struct qbman_swp *s, uint16_t channelid, -+ uint64_t ctx); -+ -+/** -+ * qbman_swp_CDAN_enable() - Enable CDAN for the channel. -+ * @s: the software portal object. -+ * @channelid: the index of the channel to generate CDAN. -+ * -+ * Return 0 for success, or negative error code for failure. -+ */ -+int qbman_swp_CDAN_enable(struct qbman_swp *s, uint16_t channelid); -+ -+/** -+ * qbman_swp_CDAN_disable() - disable CDAN for the channel. -+ * @s: the software portal object. -+ * @channelid: the index of the channel to generate CDAN. -+ * -+ * Return 0 for success, or negative error code for failure. -+ */ -+int qbman_swp_CDAN_disable(struct qbman_swp *s, uint16_t channelid); -+ -+/** -+ * qbman_swp_CDAN_set_context_enable() - Set CDAN contest and enable CDAN -+ * @s: the software portal object. -+ * @channelid: the index of the channel to generate CDAN. -+ * @ctx: the context set in CDAN. -+ * -+ * Return 0 for success, or negative error code for failure. -+ */ -+int qbman_swp_CDAN_set_context_enable(struct qbman_swp *s, uint16_t channelid, -+ uint64_t ctx); -+int qbman_swp_fill_ring(struct qbman_swp *s, -+ const struct qbman_eq_desc *d, -+ const struct qbman_fd *fd, -+ uint8_t burst_index); -+int qbman_swp_flush_ring(struct qbman_swp *s); -+void qbman_sync(void); -+int qbman_swp_send_multiple(struct qbman_swp *s, -+ const struct qbman_eq_desc *d, -+ const struct qbman_fd *fd, -+ int frames_to_send); -+ -+int qbman_check_command_complete(struct qbman_swp *s, -+ const struct qbman_result *dq); -+#endif /* !_FSL_QBMAN_PORTAL_H */ -diff --git a/drivers/net/dpaa2/rte_eth_dpaa2_pvt.h b/drivers/net/dpaa2/rte_eth_dpaa2_pvt.h -new file mode 100644 -index 0000000..bd5d4d5 ---- /dev/null -+++ b/drivers/net/dpaa2/rte_eth_dpaa2_pvt.h -@@ -0,0 +1,330 @@ -+/*- -+ * BSD LICENSE -+ * -+ * Copyright (c) 2014 Freescale Semiconductor, Inc. All rights reserved. -+ * All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * * Neither the name of Freescale Semiconductor nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#ifndef _RTE_ETH_DPAA2_PVT_H_ -+#define _RTE_ETH_DPAA2_PVT_H_ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+typedef uint64_t dma_addr_t; -+ -+#define FALSE 0 -+#define TRUE 1 -+#ifndef false -+#define false FALSE -+#endif -+#ifndef true -+#define true TRUE -+#endif -+#define lower_32_bits(x) ((uint32_t)(x)) -+#define upper_32_bits(x) ((uint32_t)(((x) >> 16) >> 16)) -+ -+#ifndef ETH_ADDR_LEN -+#define ETH_ADDR_LEN 6 -+#endif -+#ifndef ETH_VLAN_HLEN -+#define ETH_VLAN_HLEN 4 /** < Vlan Header Length */ -+#endif -+ -+#define NUM_MAX_RECV_FRAMES 16 -+ -+#define MC_PORTAL_INDEX 0 -+#define NUM_DPIO_REGIONS 2 -+#define NUM_DQS_PER_QUEUE 2 -+#define MC_PORTALS_BASE_PADDR 0x00080C000000ULL -+#define MC_PORTAL_STRIDE 0x10000 -+#define MC_PORTAL_SIZE 64 -+#define MC_PORTAL_ID_TO_PADDR(portal_id) \ -+(MC_PORTALS_BASE_PADDR + (portal_id) * MC_PORTAL_STRIDE) -+ -+struct dpaa2_dpio_dev { -+ TAILQ_ENTRY(dpaa2_dpio_dev) next; /**< Pointer to Next device instance */ -+ uint16_t index; /**< Index of a instance in the list */ -+ rte_atomic16_t ref_count; /**< How many thread contexts are sharing this.*/ -+ struct fsl_mc_io *dpio; /** handle to DPIO portal object */ -+ uint16_t token; -+ struct qbman_swp *sw_portal; /** SW portal object */ -+ const struct qbman_result *dqrr[4]; /**< DQRR Entry for this SW portal */ -+ pthread_mutex_t lock; /** Required when Portal is shared */ -+ void *mc_portal; /**< MC Portal for configuring this device */ -+ uintptr_t qbman_portal_ce_paddr; /**< Physical address of Cache Enabled Area */ -+ uintptr_t ce_size; /**< Size of the CE region */ -+ uintptr_t qbman_portal_ci_paddr; /**< Physical address of Cache Inhibit Area */ -+ uintptr_t ci_size; /**< Size of the CI region */ -+ void *intr_handle; -+ int32_t vfio_fd; /**< File descriptor received via VFIO */ -+ int32_t hw_id; /**< An unique ID of this DPIO device instance */ -+}; -+ -+struct queue_storage_info_t { -+ struct qbman_result *dq_storage[NUM_DQS_PER_QUEUE]; -+ struct qbman_result *active_dqs; -+ int toggle; -+}; -+ -+struct thread_io_info_t { -+ struct dpaa2_dpio_dev *dpio_dev; -+ struct dpaa2_dpio_dev *sec_dpio_dev; -+ struct qbman_result *global_active_dqs; -+}; -+ -+/*! Global per thread DPIO portal */ -+extern __thread struct thread_io_info_t thread_io_info; -+/*! Global MCP list */ -+extern void *(*mcp_ptr_list); -+ -+/* Refer to Table 7-3 in SEC BG */ -+struct qbman_fle { -+ uint32_t addr_lo; -+ uint32_t addr_hi; -+ uint32_t length; -+ /* FMT must be 00, MSB is final bit */ -+ uint32_t fin_bpid_offset; -+ uint32_t frc; -+ uint32_t reserved[3]; /* Not used currently */ -+}; -+ -+/* Maximum release/acquire from QBMAN */ -+#define DPAA2_MBUF_MAX_ACQ_REL 7 -+ -+#define MAX_BPID 256 -+ -+/*Macros to define operations on FD*/ -+#define DPAA2_SET_FD_ADDR(fd, addr) \ -+ fd->simple.addr_lo = lower_32_bits((uint64_t)addr); \ -+ fd->simple.addr_hi = upper_32_bits((uint64_t)addr); -+#define DPAA2_SET_FD_LEN(fd, length) fd->simple.len = length -+#define DPAA2_SET_FD_BPID(fd, bpid) fd->simple.bpid_offset |= bpid; -+#define DPAA2_SET_FD_IVP(fd) ((fd->simple.bpid_offset |= 0x00004000)) -+#define DPAA2_SET_FD_OFFSET(fd, offset) (fd->simple.bpid_offset |= (uint32_t)(offset) << 16); -+#define DPAA2_SET_FD_INTERNAL_JD(fd, len) fd->simple.frc = (0x80000000 | (len)); -+#define DPAA2_SET_FD_FRC(fd, frc) fd->simple.frc = frc; -+#define DPAA2_RESET_FD_CTRL(fd) fd->simple.ctrl = 0; -+ -+#define DPAA2_SET_FD_ASAL(fd, asal) (fd->simple.ctrl |= (asal << 16)) -+#define DPAA2_SET_FD_FLC(fd, addr) \ -+ fd->simple.flc_lo = lower_32_bits((uint64_t)addr); \ -+ fd->simple.flc_hi = upper_32_bits((uint64_t)addr); -+#define DPAA2_SET_FLE_INTERNAL_JD(fle, len) fle->frc = (0x80000000 | (len)); -+#define DPAA2_GET_FLE_ADDR(fle) \ -+ (uint64_t)((((uint64_t)(fle->addr_hi)) << 32) + fle->addr_lo) -+#define DPAA2_SET_FLE_ADDR(fle, addr) \ -+ fle->addr_lo = lower_32_bits((uint64_t)addr); \ -+ fle->addr_hi = upper_32_bits((uint64_t)addr); -+#define DPAA2_SET_FLE_OFFSET(fle, offset) (fle)->fin_bpid_offset |= (uint32_t)(offset) << 16; -+#define DPAA2_SET_FLE_BPID(fle, bpid) (fle)->fin_bpid_offset |= (uint64_t)bpid; -+#define DPAA2_GET_FLE_BPID(fle, bpid) (fle->fin_bpid_offset & 0x000000ff) -+#define DPAA2_SET_FLE_FIN(fle) fle->fin_bpid_offset |= (uint64_t)1 << 31; -+#define DPAA2_SET_FLE_IVP(fle) (((fle)->fin_bpid_offset |= 0x00004000)) -+#define DPAA2_SET_FD_COMPOUND_FMT(fd) \ -+ fd->simple.bpid_offset |= (uint32_t)1 << 28; -+#define DPAA2_GET_FD_ADDR(fd) \ -+ (uint64_t)((((uint64_t)(fd->simple.addr_hi)) << 32) + fd->simple.addr_lo) -+#define DPAA2_GET_FD_LEN(fd) (fd->simple.len) -+#define DPAA2_GET_FD_BPID(fd) ((fd->simple.bpid_offset & 0x00003FFF)) -+#define DPAA2_GET_FD_IVP(fd) ((fd->simple.bpid_offset & 0x00004000) >> 14) -+#define DPAA2_GET_FD_OFFSET(fd) ((fd->simple.bpid_offset & 0x0FFF0000) >> 16) -+#define DPAA2_GET_FD_FRC(fd) (fd->simple.frc) -+#define DPAA2_GET_FD_FLC(fd) \ -+ (uint64_t)((((uint64_t)(fd->simple.flc_hi)) << 32) + fd->simple.flc_lo) -+ -+#define DPAA2_SET_FLE_SG_EXT(fle) fle->fin_bpid_offset |= (uint64_t)1 << 29; -+#define DPAA2_IS_SET_FLE_SG_EXT(fle) \ -+ (fle->fin_bpid_offset & ((uint64_t)1 << 29)) ? 1 : 0 -+ -+#define DPAA2_INLINE_MBUF_FROM_BUF(buf, meta_data_size) \ -+ ((struct rte_mbuf *)((uint64_t)buf - meta_data_size)) -+#define DPAA2_BUF_FROM_INLINE_MBUF(mbuf, meta_data_size) \ -+ ((uint8_t *)((uint64_t)mbuf + meta_data_size)) -+ -+#define DPAA2_ASAL_VAL (DPAA2_MBUF_HW_ANNOTATION / 64) -+ -+/*Macros to define QBMAN enqueue options */ -+#define DPAA2_ETH_EQ_DISABLE 0 /*!< Dont Enqueue the Frame */ -+#define DPAA2_ETH_EQ_RESP_ON_SUCC 1 /*!< Enqueue the Frame with -+ response after success*/ -+#define DPAA2_ETH_EQ_RESP_ON_FAIL 2 /*!< Enqueue the Frame with -+ response after failure*/ -+#define DPAA2_ETH_EQ_NO_RESP 3 /*!< Enqueue the Frame without -+ response*/ -+/* Only Enqueue Error responses will be -+ * pushed on FQID_ERR of Enqueue FQ */ -+#define DPAA2_EQ_RESP_ERR_FQ 0 -+/* All Enqueue responses will be pushed on address -+ * set with qbman_eq_desc_set_response */ -+#define DPAA2_EQ_RESP_ALWAYS 1 -+ -+#define DPAA2_MAX_BUF_POOLS 8 -+ -+struct dpbp_node { -+ struct dpbp_node *next; -+ struct fsl_mc_io dpbp; -+ uint16_t token; -+ int dpbp_id; -+}; -+ -+struct buf_pool_cfg { -+ void *addr; /*!< The address from where DPAA2 will carve out the -+ * buffers. 'addr' should be 'NULL' if user wants -+ * to create buffers from the memory which user -+ * asked DPAA2 to reserve during 'nadk init' */ -+ phys_addr_t phys_addr; /*!< corresponding physical address -+ * of the memory provided in addr */ -+ uint32_t num; /*!< number of buffers */ -+ uint32_t size; /*!< size of each buffer. 'size' should include -+ * any headroom to be reserved and alignment */ -+ uint16_t align; /*!< Buffer alignment (in bytes) */ -+ uint16_t bpid; /*!< The buffer pool id. This will be filled -+ *in by DPAA2 for each buffer pool */ -+}; -+ -+struct buf_pool { -+ uint32_t size; -+ uint32_t num_bufs; -+ uint16_t bpid; -+ uint8_t *h_bpool_mem; -+ struct rte_mempool *mp; -+ struct dpbp_node *dpbp_node; -+}; -+ -+/*! -+ * Buffer pool list configuration structure. User need to give DPAA2 the -+ * valid number of 'num_buf_pools'. -+ */ -+struct dpaa2_bp_list_cfg { -+ struct buf_pool_cfg buf_pool; /* Configuration -+ * of each buffer pool */ -+}; -+ -+struct dpaa2_bp_list { -+ struct dpaa2_bp_list *next; -+ struct rte_mempool *mp; -+ struct buf_pool buf_pool; -+}; -+ -+struct bp_info { -+ uint32_t meta_data_size; -+ uint32_t bpid; -+ struct dpaa2_bp_list *bp_list; -+}; -+ -+#define mempool_to_bpinfo(mp) ((struct bp_info *)mp->pool_data) -+#define mempool_to_bpid(mp) ((mempool_to_bpinfo(mp))->bpid) -+ -+extern struct dpaa2_bp_list *h_bp_list; -+ -+/* todo - this is costly, need to write a fast coversion routine */ -+static void *dpaa2_mem_ptov(phys_addr_t paddr) -+{ -+ const struct rte_memseg *memseg = rte_eal_get_physmem_layout(); -+ int i; -+ -+ for (i = 0; i < RTE_MAX_MEMSEG && memseg[i].addr_64 != 0; i++) { -+ if (paddr >= memseg[i].phys_addr && -+ (char *)paddr < (char *)memseg[i].phys_addr + memseg[i].len) -+ return (void *)(memseg[i].addr_64 + (paddr - memseg[i].phys_addr)); -+ } -+ return NULL; -+} -+ -+static phys_addr_t dpaa2_mem_vtop(uint64_t vaddr) -+{ -+ const struct rte_memseg *memseg = rte_eal_get_physmem_layout(); -+ int i; -+ -+ for (i = 0; i < RTE_MAX_MEMSEG && memseg[i].addr_64 != 0; i++) { -+ if (vaddr >= memseg[i].addr_64 && -+ vaddr < memseg[i].addr_64 + memseg[i].len) -+ return memseg[i].phys_addr + (vaddr - memseg[i].addr_64); -+ } -+ return (phys_addr_t)(NULL); -+} -+ -+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA -+/* -+ * When we are using Physical addresses as IO Virtual Addresses, -+ * we call conversion routines nadk_mem_vtop & nadk_mem_ptov wherever required. -+ * These routines are called with help of below MACRO's -+ */ -+ -+#define DPAA2_MBUF_VADDR_TO_IOVA(mbuf) (mbuf->buf_physaddr) -+#define DPAA2_OP_VADDR_TO_IOVA(op) (op->phys_addr) -+ -+/** -+ * macro to convert Virtual address to IOVA -+ */ -+#define DPAA2_VADDR_TO_IOVA(_vaddr) dpaa2_mem_vtop((uint64_t)(_vaddr)) -+ -+/** -+ * macro to convert IOVA to Virtual address -+ */ -+#define DPAA2_IOVA_TO_VADDR(_iova) dpaa2_mem_ptov((phys_addr_t)(_iova)) -+ -+/** -+ * macro to convert modify the memory containing Virtual address to IOVA -+ */ -+#define DPAA2_MODIFY_VADDR_TO_IOVA(_mem, _type) \ -+ {_mem = (_type)(dpaa2_mem_vtop((uint64_t)(_mem))); } -+ -+/** -+ * macro to convert modify the memory containing IOVA to Virtual address -+ */ -+#define DPAA2_MODIFY_IOVA_TO_VADDR(_mem, _type) \ -+ {_mem = (_type)(dpaa2_mem_ptov((phys_addr_t)(_mem))); } -+ -+#else -+#define DPAA2_MBUF_VADDR_TO_IOVA(mbuf) (mbuf->buf_addr) -+#define DPAA2_OP_VADDR_TO_IOVA(op) (op) -+ -+#define DPAA2_VADDR_TO_IOVA(_vaddr) (_vaddr) -+#define DPAA2_IOVA_TO_VADDR(_iova) (_iova) -+#define DPAA2_MODIFY_VADDR_TO_IOVA(_mem, _type) -+#define DPAA2_MODIFY_IOVA_TO_VADDR(_mem, _type) -+#endif -+ -+/* Function definitions for Mempool operations */ -+int hw_mbuf_init(struct rte_mempool *mp, void *_m); -+int hw_mbuf_free_bulk(struct rte_mempool *pool, void * const *obj_table, -+ unsigned n); -+int hw_mbuf_alloc_bulk(struct rte_mempool *pool, void **obj_table, -+ unsigned count); -+int hw_mbuf_create_pool(struct rte_mempool *mp); -+unsigned hw_mbuf_get_count(const struct rte_mempool *mp); -+ -+#endif -diff --git a/drivers/net/dpaa2/rte_eth_dpbp.c b/drivers/net/dpaa2/rte_eth_dpbp.c -new file mode 100644 -index 0000000..a4d29c9 ---- /dev/null -+++ b/drivers/net/dpaa2/rte_eth_dpbp.c -@@ -0,0 +1,377 @@ -+/*- -+ * BSD LICENSE -+ * -+ * Copyright (c) 2014 Freescale Semiconductor, Inc. All rights reserved. -+ * All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * * Neither the name of Freescale Semiconductor nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "rte_pci.h" -+#include "rte_memzone.h" -+ -+#include "rte_eth_dpaa2_pvt.h" -+#include "fsl_qbman_portal.h" -+#include -+ -+#include -+#include "dpaa2_logs.h" -+ -+static struct dpbp_node *g_dpbp_list; -+static struct dpbp_node *avail_dpbp; -+ -+struct bp_info bpid_info[MAX_BPID]; -+ -+struct dpaa2_bp_list *h_bp_list; -+ -+int -+dpaa2_create_dpbp_device( -+ int dpbp_id) -+{ -+ struct dpbp_node *dpbp_node; -+ int ret; -+ -+ /* Allocate DPAA2 dpbp handle */ -+ dpbp_node = (struct dpbp_node *)malloc(sizeof(struct dpbp_node)); -+ if (!dpbp_node) { -+ PMD_DRV_LOG(ERR, "Memory allocation failed for DPBP Device\n"); -+ return -1; -+ } -+ -+ /* Open the dpbp object */ -+ dpbp_node->dpbp.regs = mcp_ptr_list[MC_PORTAL_INDEX]; -+ ret = dpbp_open(&dpbp_node->dpbp, CMD_PRI_LOW, dpbp_id, &dpbp_node->token); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Resource allocation failure with err code: %d", -+ ret); -+ free(dpbp_node); -+ return -1; -+ } -+ -+ /* Clean the device first */ -+ ret = dpbp_reset(&dpbp_node->dpbp, CMD_PRI_LOW, dpbp_node->token); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Failure cleaning dpbp device with" -+ "error code %d\n", ret); -+ return -1; -+ } -+ -+ dpbp_node->dpbp_id = dpbp_id; -+ /* Add the dpbp handle into the global list */ -+ dpbp_node->next = g_dpbp_list; -+ g_dpbp_list = dpbp_node; -+ avail_dpbp = g_dpbp_list; -+ -+ PMD_DRV_LOG(INFO, "Buffer resource initialized"); -+ -+ return 0; -+} -+ -+int hw_mbuf_create_pool(struct rte_mempool *mp) -+{ -+ struct dpaa2_bp_list *bp_list; -+ struct dpbp_attr dpbp_attr; -+ uint32_t bpid; -+ int ret; -+ -+ if (!avail_dpbp) { -+ PMD_DRV_LOG(ERR, "DPAA2 resources not available\n"); -+ return -1; -+ } -+ -+ ret = dpbp_enable(&avail_dpbp->dpbp, CMD_PRI_LOW, avail_dpbp->token); -+ if (ret != 0) { -+ PMD_DRV_LOG(ERR, "Resource enable failure with" -+ "err code: %d\n", ret); -+ return -1; -+ } -+ -+ ret = dpbp_get_attributes(&avail_dpbp->dpbp, CMD_PRI_LOW, -+ avail_dpbp->token, &dpbp_attr); -+ if (ret != 0) { -+ PMD_DRV_LOG(ERR, "Resource read failure with" -+ "err code: %d\n", ret); -+ ret = dpbp_disable(&avail_dpbp->dpbp, CMD_PRI_LOW, -+ avail_dpbp->token); -+ return -1; -+ } -+ -+ /* Allocate the bp_list which will be added into global_bp_list */ -+ bp_list = (struct dpaa2_bp_list *)malloc(sizeof(struct dpaa2_bp_list)); -+ if (!bp_list) { -+ PMD_DRV_LOG(ERR, "No heap memory available\n"); -+ return -1; -+ } -+ -+ /* Set parameters of buffer pool list */ -+ bp_list->buf_pool.num_bufs = mp->size; -+ bp_list->buf_pool.size = mp->elt_size -+ - sizeof(struct rte_mbuf) - rte_pktmbuf_priv_size(mp); -+ bp_list->buf_pool.bpid = dpbp_attr.bpid; -+ bp_list->buf_pool.h_bpool_mem = NULL; -+ bp_list->buf_pool.mp = mp; -+ bp_list->buf_pool.dpbp_node = avail_dpbp; -+ bp_list->next = h_bp_list; -+ -+ bpid = dpbp_attr.bpid; -+ -+ /* Increment the available DPBP */ -+ avail_dpbp = avail_dpbp->next; -+ -+ bpid_info[bpid].meta_data_size = sizeof(struct rte_mbuf) -+ + rte_pktmbuf_priv_size(mp); -+ bpid_info[bpid].bp_list = bp_list; -+ bpid_info[bpid].bpid = bpid; -+ -+ mp->pool_data = (void *)&bpid_info[bpid]; -+ -+ PMD_DRV_LOG(INFO, "BP List created for bpid =%d\n", dpbp_attr.bpid); -+ -+ h_bp_list = bp_list; -+ /* TODO: Replace with mp->pool_data->flags after creating appropriate -+ * pool_data structure -+ */ -+ mp->flags |= MEMPOOL_F_HW_PKT_POOL; -+ return 0; -+} -+ -+void hw_mbuf_free_pool(struct rte_mempool *mp __rte_unused) -+{ -+ /* TODO: -+ * 1. Release bp_list memory allocation -+ * 2. opposite of dpbp_enable() -+ * -+ */ -+ struct dpaa2_bp_list *bp; -+ -+ /* Iterate over h_bp_list linked list and release each element */ -+ while (h_bp_list) { -+ bp = h_bp_list; -+ h_bp_list = bp->next; -+ -+ /* TODO: Should be changed to rte_free */ -+ free(bp); -+ } -+ -+ PMD_DRV_LOG(DEBUG, "(%s) called\n", __func__); -+ return; -+} -+ -+static inline void dpaa2_mbuf_release(uint64_t buf, uint32_t bpid) -+{ -+ struct qbman_release_desc releasedesc; -+ struct qbman_swp *swp; -+ int ret; -+ -+ if (!thread_io_info.dpio_dev) { -+ ret = dpaa2_affine_qbman_swp(); -+ if (ret != 0) { -+ PMD_DRV_LOG(ERR, "Failed to allocate IO portal"); -+ return; -+ } -+ } -+ swp = thread_io_info.dpio_dev->sw_portal; -+ -+ /* Create a release descriptor required for releasing -+ * buffers into BMAN */ -+ qbman_release_desc_clear(&releasedesc); -+ qbman_release_desc_set_bpid(&releasedesc, bpid); -+ -+ do { -+ /* Release buffer into the BMAN */ -+ ret = qbman_swp_release(swp, &releasedesc, &buf, 1); -+ } while (ret == -EBUSY); -+ PMD_TX_FREE_LOG(DEBUG, "Released %p address to BMAN\n", buf); -+} -+ -+int hw_mbuf_alloc_bulk(struct rte_mempool *pool, -+ void **obj_table, unsigned count) -+{ -+#ifdef RTE_LIBRTE_DPAA2_DEBUG_DRIVER -+ static int alloc; -+#endif -+ struct qbman_swp *swp; -+ uint32_t mbuf_size; -+ uint16_t bpid; -+ uint64_t bufs[RTE_MEMPOOL_CACHE_MAX_SIZE + 1]; -+ int ret; -+ unsigned i, n = 0; -+ struct bp_info *bp_info; -+ -+ PMD_DRV_LOG_RAW(INFO, "%s/n", __func__); -+ bp_info = mempool_to_bpinfo(pool); -+ -+ if (!(bp_info->bp_list)) { -+ printf("\nDPAA2 buffer pool not configured\n"); -+ return -2; -+ } -+ -+ bpid = bp_info->bpid; -+ -+ if (!thread_io_info.dpio_dev) { -+ ret = dpaa2_affine_qbman_swp(); -+ if (ret != 0) { -+ PMD_DRV_LOG(ERR, "Failed to allocate IO portal"); -+ return -1; -+ } -+ } -+ swp = thread_io_info.dpio_dev->sw_portal; -+ -+ /* if number of buffers requested is less than 7 */ -+ if (count < DPAA2_MBUF_MAX_ACQ_REL) { -+ ret = qbman_swp_acquire(swp, bpid, &bufs[n], count); -+ if (ret <= 0) { -+ PMD_DRV_LOG(ERR, "Failed to allocate buffers %d", ret); -+ return -1; -+ } -+ n = ret; -+ goto set_buf; -+ } -+ -+ while (n < count) { -+ ret = 0; -+ /* Acquire is all-or-nothing, so we drain in 7s, -+ * then the remainder. -+ */ -+ if ((count - n) > DPAA2_MBUF_MAX_ACQ_REL) { -+ ret = qbman_swp_acquire(swp, bpid, &bufs[n], -+ DPAA2_MBUF_MAX_ACQ_REL); -+ if (ret == DPAA2_MBUF_MAX_ACQ_REL) { -+ n += ret; -+ } -+ } else { -+ ret = qbman_swp_acquire(swp, bpid, &bufs[n], count - n); -+ if (ret > 0) { -+ PMD_DRV_LOG(DEBUG, "Drained buffer: %x", -+ bufs[n]); -+ n += ret; -+ } -+ } -+ /* In case of less than requested number of buffers available -+ * in pool, qbman_swp_acquire returns 0 -+ */ -+ if (ret <= 0) { -+ PMD_DRV_LOG(WARNING, "Buffer aquire failed with" -+ "err code: %d", ret); -+ break; -+ } -+ } -+ -+ /* This function either returns expected buffers or error */ -+ if (count != n) { -+ i = 0; -+ /* Releasing all buffers allocated */ -+ while (i < n) { -+ dpaa2_mbuf_release(bufs[i], bpid); -+ i++; -+ } -+ return -1; -+ } -+ -+ if (ret < 0 || n == 0) { -+ PMD_DRV_LOG_RAW(ERR, "Failed to allocate buffers %d", ret); -+ return -1; -+ } -+set_buf: -+ -+ mbuf_size = sizeof(struct rte_mbuf) + rte_pktmbuf_priv_size(pool); -+ -+ for (i = 0; i < n; i++) { -+ DPAA2_MODIFY_IOVA_TO_VADDR(bufs[i], uint64_t); -+ obj_table[i] = (struct rte_mbuf *)(bufs[i] - mbuf_size); -+ PMD_DRV_LOG(DEBUG, "Acquired %p address %p from BMAN\n", -+ (void *)bufs[i], (void *)obj_table[i]); -+ } -+ -+#ifdef RTE_LIBRTE_DPAA2_DEBUG_DRIVER -+ alloc += n; -+ PMD_DRV_LOG_RAW(INFO, "Total = %d , req = %d done = %d", -+ alloc, count, n); -+#endif -+ return 0; -+} -+ -+int hw_mbuf_free_bulk(struct rte_mempool *pool, void * const *obj_table, -+ unsigned n) -+{ -+ unsigned i; -+ struct bp_info *bp_info; -+ -+ PMD_DRV_LOG_RAW(INFO, "%s/n", __func__); -+ -+ bp_info = mempool_to_bpinfo(pool); -+ if (!(bp_info->bp_list)) { -+ PMD_DRV_LOG(INFO, "DPAA2 buffer pool not configured\n"); -+ return -1; -+ } -+ /* TODO - optimize it */ -+ for (i = 0; i < n; i++) { -+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA -+ dpaa2_mbuf_release( -+ (uint64_t)rte_mempool_virt2phy(pool, obj_table[i]) -+ + bp_info->meta_data_size, bp_info->bpid); -+#else -+ dpaa2_mbuf_release((uint64_t)obj_table[i] -+ + bp_info->meta_data_size, bp_info->bpid); -+#endif -+ -+ } -+ -+ return 0; -+} -+ -+unsigned hw_mbuf_get_count(const struct rte_mempool *mp __rte_unused) -+{ -+ /* TODO: incomplete */ -+ return 0; -+} -+ -+struct rte_mempool_ops dpaa2_mpool_ops = { -+ .name = "dpaa2", -+ .alloc = hw_mbuf_create_pool, -+ .free = hw_mbuf_free_pool, -+ .enqueue = hw_mbuf_free_bulk, -+ .dequeue = hw_mbuf_alloc_bulk, -+ .get_count = hw_mbuf_get_count, -+}; -+ -+MEMPOOL_REGISTER_OPS(dpaa2_mpool_ops); -diff --git a/drivers/net/dpaa2/rte_eth_dpio.c b/drivers/net/dpaa2/rte_eth_dpio.c -new file mode 100644 -index 0000000..2d06923 ---- /dev/null -+++ b/drivers/net/dpaa2/rte_eth_dpio.c -@@ -0,0 +1,336 @@ -+/*- -+ * BSD LICENSE -+ * -+ * Copyright (c) 2014 Freescale Semiconductor, Inc. All rights reserved. -+ * All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * * Neither the name of Freescale Semiconductor nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "rte_pci.h" -+#include "rte_memzone.h" -+#include -+ -+#include "rte_eth_dpaa2_pvt.h" -+#include "fsl_qbman_portal.h" -+#include -+ -+#include -+#include "dpaa2_logs.h" -+ -+#define NUM_HOST_CPUS RTE_MAX_LCORE -+ -+__thread struct thread_io_info_t thread_io_info; -+ -+TAILQ_HEAD(dpio_device_list, dpaa2_dpio_dev); -+static struct dpio_device_list *dpio_dev_list; /*!< DPIO device list */ -+static uint32_t io_space_count; -+ -+/*Stashing Macros*/ -+#define DPAA2_CORE_CLUSTER_BASE 0x04 -+#define DPAA2_CORE_CLUSTER_FIRST (DPAA2_CORE_CLUSTER_BASE + 0) -+#define DPAA2_CORE_CLUSTER_SECOND (DPAA2_CORE_CLUSTER_BASE + 1) -+#define DPAA2_CORE_CLUSTER_THIRD (DPAA2_CORE_CLUSTER_BASE + 2) -+#define DPAA2_CORE_CLUSTER_FOURTH (DPAA2_CORE_CLUSTER_BASE + 3) -+ -+#define DPAA2_CORE_CLUSTER_GET(sdest, cpu_id) \ -+do { \ -+ if (cpu_id == 0 || cpu_id == 1) \ -+ sdest = DPAA2_CORE_CLUSTER_FIRST; \ -+ else if (cpu_id == 2 || cpu_id == 3) \ -+ sdest = DPAA2_CORE_CLUSTER_SECOND; \ -+ else if (cpu_id == 4 || cpu_id == 5) \ -+ sdest = DPAA2_CORE_CLUSTER_THIRD; \ -+ else \ -+ sdest = DPAA2_CORE_CLUSTER_FOURTH; \ -+} while (0) -+ -+static int -+configure_dpio_qbman_swp(struct dpaa2_dpio_dev *dpio_dev) -+{ -+ struct qbman_swp_desc p_des; -+ struct dpio_attr attr; -+ -+ dpio_dev->dpio = malloc(sizeof(struct fsl_mc_io)); -+ if (!dpio_dev->dpio) { -+ PMD_DRV_LOG(ERR, "Memory allocation failure\n"); -+ return -1; -+ } -+ -+ PMD_DRV_LOG(INFO, "\t Alocated DPIO[%p]", dpio_dev->dpio); -+ dpio_dev->dpio->regs = dpio_dev->mc_portal; -+ if (dpio_open(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->hw_id, -+ &dpio_dev->token)) { -+ PMD_DRV_LOG(ERR, "Failed to allocate IO space\n"); -+ free(dpio_dev->dpio); -+ return -1; -+ } -+ -+ if (dpio_reset(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token)) { -+ PMD_DRV_LOG(ERR, "Failed to reset dpio\n"); -+ dpio_close(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token); -+ free(dpio_dev->dpio); -+ return -1; -+ } -+ -+ if (dpio_enable(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token)) { -+ PMD_DRV_LOG(ERR, "Failed to Enable dpio\n"); -+ dpio_close(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token); -+ free(dpio_dev->dpio); -+ return -1; -+ } -+ -+ if (dpio_get_attributes(dpio_dev->dpio, CMD_PRI_LOW, -+ dpio_dev->token, &attr)) { -+ PMD_DRV_LOG(ERR, "DPIO Get attribute failed\n"); -+ dpio_disable(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token); -+ dpio_close(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token); -+ free(dpio_dev->dpio); -+ return -1; -+ } -+ -+ PMD_DRV_LOG(INFO, "Qbman Portal ID %d", attr.qbman_portal_id); -+ PMD_DRV_LOG(INFO, "Portal CE addr 0x%lX", attr.qbman_portal_ce_offset); -+ PMD_DRV_LOG(INFO, "Portal CI addr 0x%lX", attr.qbman_portal_ci_offset); -+ -+ /* Configure & setup SW portal */ -+ p_des.block = NULL; -+ p_des.idx = attr.qbman_portal_id; -+ p_des.cena_bar = (void *)(dpio_dev->qbman_portal_ce_paddr); -+ p_des.cinh_bar = (void *)(dpio_dev->qbman_portal_ci_paddr); -+ p_des.irq = -1; -+ p_des.qman_version = attr.qbman_version; -+ -+ PMD_DRV_LOG(INFO, "Portal CE addr 0x%p", p_des.cena_bar); -+ PMD_DRV_LOG(INFO, "Portal CI addr 0x%p", p_des.cinh_bar); -+ -+ dpio_dev->sw_portal = qbman_swp_init(&p_des); -+ if (dpio_dev->sw_portal == NULL) { -+ PMD_DRV_LOG(ERR, " QBMan SW Portal Init failed\n"); -+ dpio_close(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token); -+ free(dpio_dev->dpio); -+ return -1; -+ } -+ -+ PMD_DRV_LOG(INFO, "QBMan SW Portal 0x%p\n", dpio_dev->sw_portal); -+ -+ return 0; -+} -+ -+int dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev) -+{ -+ int sdest; -+ int cpu_id, ret; -+ -+ /* Set the Stashing Destination */ -+ cpu_id = rte_lcore_id(); -+ if (cpu_id < 0) { -+ cpu_id = rte_get_master_lcore(); -+ if (cpu_id < 0) { -+ PMD_DRV_LOG(ERR, "\tGetting CPU Index failed\n"); -+ return -1; -+ } -+ } -+ -+ /* -+ * In case of running DPDK on the Virtual Machine the Stashing -+ * Destination gets set in the H/W w.r.t. the Virtual CPU ID's. -+ * As a W.A. environment variable HOST_START_CPU tells which -+ * the offset of the host start core of the Virtual Machine threads. -+ */ -+ if (getenv("HOST_START_CPU")) { -+ cpu_id += -+ atoi(getenv("HOST_START_CPU")); -+ cpu_id = cpu_id % NUM_HOST_CPUS; -+ } -+ -+ /* Set the STASH Destination depending on Current CPU ID. -+ Valid values of SDEST are 4,5,6,7. Where, -+ CPU 0-1 will have SDEST 4 -+ CPU 2-3 will have SDEST 5.....and so on. -+ */ -+ DPAA2_CORE_CLUSTER_GET(sdest, cpu_id); -+ PMD_DRV_LOG(INFO, "Portal= %d CPU= %u SDEST= %d\n", -+ dpio_dev->index, cpu_id, sdest); -+ -+ ret = dpio_set_stashing_destination(dpio_dev->dpio, CMD_PRI_LOW, -+ dpio_dev->token, sdest); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "%d ERROR in SDEST\n", ret); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static inline struct dpaa2_dpio_dev *dpaa2_get_qbman_swp(void) -+{ -+ struct dpaa2_dpio_dev *dpio_dev = NULL; -+ int ret; -+ -+ /* Get DPIO dev handle from list using index */ -+ TAILQ_FOREACH(dpio_dev, dpio_dev_list, next) { -+ if (dpio_dev && rte_atomic16_test_and_set(&dpio_dev->ref_count)) -+ break; -+ } -+ if (!dpio_dev) -+ return NULL; -+ -+ ret = dpaa2_configure_stashing(dpio_dev); -+ if (ret) { -+ RTE_LOG(ERR, EAL, "dpaa2_configure_stashing failed"); -+ } -+ return dpio_dev; -+} -+int -+dpaa2_affine_qbman_swp(void) -+{ -+ if (thread_io_info.dpio_dev) -+ return 0; -+ -+ /* Populate the thread_io_info structure */ -+ thread_io_info.dpio_dev = dpaa2_get_qbman_swp(); -+ if (thread_io_info.dpio_dev) -+ return 0; -+ else -+ return -1; -+} -+ -+int -+dpaa2_affine_qbman_swp_sec(void) -+{ -+ if (thread_io_info.sec_dpio_dev) -+ return 0; -+ -+ /* Populate the thread_io_info structure */ -+ thread_io_info.sec_dpio_dev = dpaa2_get_qbman_swp(); -+ if (thread_io_info.sec_dpio_dev) -+ return 0; -+ else -+ return -1; -+} -+ -+int -+dpaa2_create_dpio_device(struct fsl_vfio_device *vdev, -+ struct vfio_device_info *obj_info, -+ int object_id) -+{ -+ struct dpaa2_dpio_dev *dpio_dev; -+ struct vfio_region_info reg_info = { .argsz = sizeof(reg_info)}; -+ -+ if (obj_info->num_regions < NUM_DPIO_REGIONS) { -+ PMD_DRV_LOG(ERR, "ERROR, Not sufficient number " -+ "of DPIO regions.\n"); -+ return -1; -+ } -+ -+ if (!dpio_dev_list) { -+ dpio_dev_list = malloc(sizeof(struct dpio_device_list)); -+ if (NULL == dpio_dev_list) { -+ PMD_DRV_LOG(ERR, "Memory allocation failed for DPIO list\n"); -+ return -1; -+ } -+ -+ /* Initialize the DPIO List */ -+ TAILQ_INIT(dpio_dev_list); -+ } -+ -+ dpio_dev = malloc(sizeof(struct dpaa2_dpio_dev)); -+ if (!dpio_dev) { -+ PMD_DRV_LOG(ERR, "Memory allocation failed for DPIO Device\n"); -+ return -1; -+ } -+ -+ PMD_DRV_LOG(INFO, "\t Aloocated DPIO [%p]", dpio_dev); -+ dpio_dev->dpio = NULL; -+ dpio_dev->hw_id = object_id; -+ dpio_dev->vfio_fd = vdev->fd; -+ rte_atomic16_init(&dpio_dev->ref_count); -+ /* Using single portal for all devices */ -+ dpio_dev->mc_portal = mcp_ptr_list[MC_PORTAL_INDEX]; -+ -+ reg_info.index = 0; -+ if (ioctl(dpio_dev->vfio_fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { -+ printf("vfio: error getting region info\n"); -+ return -1; -+ } -+ -+ PMD_DRV_LOG(INFO, "\t Region Offset = %llx", reg_info.offset); -+ PMD_DRV_LOG(INFO, "\t Region Size = %llx", reg_info.size); -+ dpio_dev->ce_size = reg_info.size; -+ dpio_dev->qbman_portal_ce_paddr = (uint64_t)mmap(NULL, reg_info.size, -+ PROT_WRITE | PROT_READ, MAP_SHARED, -+ dpio_dev->vfio_fd, reg_info.offset); -+ -+ /* Create Mapping for QBMan Cache Enabled area. This is a fix for -+ SMMU fault for DQRR statshing transaction. */ -+ if (vfio_dmamap_mem_region(dpio_dev->qbman_portal_ce_paddr, -+ reg_info.offset, reg_info.size)) { -+ PMD_DRV_LOG(ERR, "DMAMAP for Portal CE area failed.\n"); -+ return -1; -+ } -+ -+ reg_info.index = 1; -+ if (ioctl(dpio_dev->vfio_fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { -+ printf("vfio: error getting region info\n"); -+ return -1; -+ } -+ -+ PMD_DRV_LOG(INFO, "\t Region Offset = %llx", reg_info.offset); -+ PMD_DRV_LOG(INFO, "\t Region Size = %llx", reg_info.size); -+ dpio_dev->ci_size = reg_info.size; -+ dpio_dev->qbman_portal_ci_paddr = (uint64_t)mmap(NULL, reg_info.size, -+ PROT_WRITE | PROT_READ, MAP_SHARED, -+ dpio_dev->vfio_fd, reg_info.offset); -+ -+ if (configure_dpio_qbman_swp(dpio_dev)) { -+ PMD_DRV_LOG(ERR, -+ "Failed in configuring the qbman portal for dpio %d\n", -+ dpio_dev->hw_id); -+ return -1; -+ } -+ -+ io_space_count++; -+ dpio_dev->index = io_space_count; -+ TAILQ_INSERT_HEAD(dpio_dev_list, dpio_dev, next); -+ -+ return 0; -+} -diff --git a/drivers/net/dpaa2/rte_eth_dpni.c b/drivers/net/dpaa2/rte_eth_dpni.c -new file mode 100644 -index 0000000..c1587dc ---- /dev/null -+++ b/drivers/net/dpaa2/rte_eth_dpni.c -@@ -0,0 +1,2269 @@ -+/*- -+ * BSD LICENSE -+ * -+ * Copyright (c) 2014 Freescale Semiconductor, Inc. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * * Neither the name of Freescale Semiconductor, Inc nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+/* MC header files */ -+#include -+#include -+#include "rte_eth_dpaa2_pvt.h" -+#include "rte_eth_dpni_annot.h" -+#include "dpaa2_logs.h" -+ -+#include -+#include -+ -+#define DPAA2_STASHING -+ -+/* tx fd send batching */ -+#define QBMAN_MULTI_TX -+/* #define DPAA2_CGR_SUPPORT */ -+ -+ -+#define DPAA2_MIN_RX_BUF_SIZE 512 -+#define DPAA2_MAX_RX_PKT_LEN 10240 /*WRIOP support*/ -+ -+#define RTE_ETH_DPAA2_SNAPSHOT_LEN 65535 -+#define RTE_ETH_DPAA2_SNAPLEN 4096 -+#define RTE_ETH_DPAA2_PROMISC 1 -+#define RTE_ETH_DPAA2_TIMEOUT -1 -+#define ETH_DPAA2_RX_IFACE_ARG "rx_iface" -+#define ETH_DPAA2_TX_IFACE_ARG "tx_iface" -+#define ETH_DPAA2_IFACE_ARG "iface" -+ -+static const char *drivername = "DPNI PMD"; -+ -+#define MAX_TCS DPNI_MAX_TC -+#define MAX_RX_QUEUES 64 -+#define MAX_TX_QUEUES 64 -+ -+/*Maximum number of slots available in TX ring*/ -+#define MAX_SLOTS 8 -+ -+/*Threshold for a queue to *Enter* Congestion state. -+ It is set to 128 frames of size 64 bytes.*/ -+#define CONG_ENTER_THRESHOLD (128 * 64) -+ -+/*Threshold for a queue to *Exit* Congestion state. -+ It is set to 98 frames of size 64 bytes*/ -+#define CONG_EXIT_THRESHOLD (98 * 64) -+ -+/*! Maximum number of flow distributions per traffic class */ -+#define MAX_DIST_PER_TC 16 -+ -+/* Size of the input SMMU mapped memory required by MC */ -+#define DIST_PARAM_IOVA_SIZE 256 -+ -+struct dpaa2_queue { -+ void *dev; -+ int32_t eventfd; /*!< Event Fd of this queue */ -+ uint32_t fqid; /*!< Unique ID of this queue */ -+ uint8_t tc_index; /*!< traffic class identifier */ -+ uint16_t flow_id; /*!< To be used by DPAA2 frmework */ -+ uint64_t rx_pkts; -+ uint64_t tx_pkts; -+ uint64_t err_pkts; -+ union { -+ struct queue_storage_info_t *q_storage; -+ struct qbman_result *cscn; -+ }; -+}; -+ -+struct dpaa2_dev_priv { -+ void *hw; -+ int32_t hw_id; -+ int32_t qdid; -+ uint16_t token; -+ uint8_t nb_tx_queues; -+ uint8_t nb_rx_queues; -+ void *rx_vq[MAX_RX_QUEUES]; -+ void *tx_vq[MAX_TX_QUEUES]; -+ -+ struct dpaa2_bp_list *bp_list; /**data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ uint64_t value; -+ -+ dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, DPNI_CNT_ING_FRAME, &value); -+ printf("Rx packets: %ld\n", value); -+ dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, DPNI_CNT_ING_BYTE, &value); -+ printf("Rx bytes: %ld\n", value); -+ dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, DPNI_CNT_ING_MCAST_FRAME, &value); -+ printf("Rx Multicast: %ld\n", value); -+ dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, DPNI_CNT_ING_FRAME_DROP, &value); -+ printf("Rx dropped: %ld\n", value); -+ dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, DPNI_CNT_ING_FRAME_DISCARD, &value); -+ printf("Rx discarded: %ld\n", value); -+ dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, DPNI_CNT_EGR_FRAME, &value); -+ printf("Tx packets: %ld\n", value); -+ dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, DPNI_CNT_EGR_BYTE, &value); -+ printf("Tx bytes: %ld\n", value); -+ dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, DPNI_CNT_EGR_FRAME_DISCARD, &value); -+ printf("Tx dropped: %ld\n", value); -+} -+ -+/** -+ * Atomically reads the link status information from global -+ * structure rte_eth_dev. -+ * -+ * @param dev -+ * - Pointer to the structure rte_eth_dev to read from. -+ * - Pointer to the buffer to be saved with the link status. -+ * -+ * @return -+ * - On success, zero. -+ * - On failure, negative value. -+ */ -+static inline int -+rte_dpni_dev_atomic_read_link_status(struct rte_eth_dev *dev, -+ struct rte_eth_link *link) -+{ -+ struct rte_eth_link *dst = link; -+ struct rte_eth_link *src = &dev->data->dev_link; -+ -+ if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, -+ *(uint64_t *)src) == 0) -+ return -1; -+ -+ return 0; -+} -+ -+/** -+ * Atomically writes the link status information into global -+ * structure rte_eth_dev. -+ * -+ * @param dev -+ * - Pointer to the structure rte_eth_dev to read from. -+ * - Pointer to the buffer to be saved with the link status. -+ * -+ * @return -+ * - On success, zero. -+ * - On failure, negative value. -+ */ -+static inline int -+rte_dpni_dev_atomic_write_link_status(struct rte_eth_dev *dev, -+ struct rte_eth_link *link) -+{ -+ struct rte_eth_link *dst = &dev->data->dev_link; -+ struct rte_eth_link *src = link; -+ -+ if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, -+ *(uint64_t *)src) == 0) -+ return -1; -+ -+ return 0; -+} -+ -+static inline void -+dpaa2_eth_parse_packet(struct rte_mbuf *mbuf, uint64_t hw_annot_addr) -+{ -+ uint32_t pkt_type = 0; -+ struct pkt_annotation *annotation = -+ (struct pkt_annotation *)hw_annot_addr; -+ -+ PMD_DRV_LOG(DEBUG, "\n 1 annotation = 0x%lx ", annotation->word4); -+ -+ if (BIT_ISSET_AT_POS(annotation->word3, L2_ETH_MAC_PRESENT)) -+ pkt_type/* mbuf->packet_type */ |= RTE_PTYPE_L2_ETHER; -+ -+ if (BIT_ISSET_AT_POS(annotation->word4, L3_IPV4_1_PRESENT)) -+ pkt_type/* mbuf->packet_type */ |= RTE_PTYPE_L3_IPV4; -+ -+ if (BIT_ISSET_AT_POS(annotation->word4, L3_IPV6_1_PRESENT)) -+ pkt_type /* mbuf->packet_type */ |= RTE_PTYPE_L3_IPV6; -+ -+ if (BIT_ISSET_AT_POS(annotation->word4, L3_IP_1_OPT_PRESENT)) -+ pkt_type/* mbuf->packet_type */ |= RTE_PTYPE_L3_IPV4_EXT; -+ -+ if (BIT_ISSET_AT_POS(annotation->word4, L3_PROTO_UDP_PRESENT)) -+ pkt_type/* mbuf->packet_type */ |= RTE_PTYPE_L4_UDP; -+ -+ if (BIT_ISSET_AT_POS(annotation->word4, L3_PROTO_TCP_PRESENT)) -+ pkt_type/* mbuf->packet_type */ |= RTE_PTYPE_L4_TCP; -+ -+ if (BIT_ISSET_AT_POS(annotation->word4, L3_PROTO_SCTP_PRESENT)) -+ pkt_type/* mbuf->packet_type */ |= RTE_PTYPE_L4_SCTP; -+ -+ if (BIT_ISSET_AT_POS(annotation->word4, L3_PROTO_ICMP_PRESENT)) -+ pkt_type/* mbuf->packet_type */ |= RTE_PTYPE_L4_ICMP; -+ -+ if (BIT_ISSET_AT_POS(annotation->word4, L3_IP_UNKNOWN_PROTOCOL)) -+ pkt_type/* mbuf->packet_type */ |= RTE_PTYPE_UNKNOWN; -+ -+ mbuf->packet_type = pkt_type; -+} -+ -+static inline -+struct rte_mbuf *eth_fd_to_mbuf(const struct qbman_fd *fd) -+{ -+ struct rte_mbuf *mbuf = DPAA2_INLINE_MBUF_FROM_BUF( -+ DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd)), -+ bpid_info[DPAA2_GET_FD_BPID(fd)].meta_data_size); -+ /* need to repopulated some of the fields, -+ as they may have changed in last transmission*/ -+ -+ -+ mbuf->data_off = DPAA2_GET_FD_OFFSET(fd); -+ mbuf->data_len = DPAA2_GET_FD_LEN(fd); -+ mbuf->pkt_len = mbuf->data_len; -+ mbuf->next = NULL; -+ rte_mbuf_refcnt_set(mbuf, 1); -+ -+ PMD_DRV_LOG(DEBUG, "to mbuf - mbuf =%p, mbuf->buf_addr =%p, off = %d," -+ "fd_off=%d fd =%lx, meta = %d bpid =%d, len=%d\n", -+ mbuf, mbuf->buf_addr, mbuf->data_off, -+ DPAA2_GET_FD_OFFSET(fd), DPAA2_GET_FD_ADDR(fd), -+ bpid_info[DPAA2_GET_FD_BPID(fd)].meta_data_size, -+ DPAA2_GET_FD_BPID(fd), DPAA2_GET_FD_LEN(fd)); -+ -+ /* Parse the packet */ -+ /* parse results are after the private - sw annotation area */ -+ dpaa2_eth_parse_packet(mbuf, -+ (uint64_t)DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd)) -+ + DPAA2_FD_PTA_SIZE); -+ -+ mbuf->nb_segs = 1; -+ mbuf->ol_flags = 0; -+ -+ return mbuf; -+} -+ -+static void __attribute__ ((noinline)) eth_mbuf_to_fd(struct rte_mbuf *mbuf, -+ struct qbman_fd *fd, uint16_t bpid) -+{ -+ /*Resetting the buffer pool id and offset field*/ -+ fd->simple.bpid_offset = 0; -+ -+ DPAA2_SET_FD_ADDR(fd, DPAA2_MBUF_VADDR_TO_IOVA(mbuf)); -+ DPAA2_SET_FD_LEN(fd, mbuf->data_len); -+ DPAA2_SET_FD_BPID(fd, bpid); -+ DPAA2_SET_FD_OFFSET(fd, mbuf->data_off); -+ DPAA2_SET_FD_ASAL(fd, DPAA2_ASAL_VAL); -+ -+ PMD_DRV_LOG(DEBUG, "mbuf =%p, mbuf->buf_addr =%p, off = %d," -+ "fd_off=%d fd =%lx, meta = %d bpid =%d, len=%d\n", -+ mbuf, mbuf->buf_addr, mbuf->data_off, -+ DPAA2_GET_FD_OFFSET(fd), DPAA2_GET_FD_ADDR(fd), -+ bpid_info[DPAA2_GET_FD_BPID(fd)].meta_data_size, -+ DPAA2_GET_FD_BPID(fd), DPAA2_GET_FD_LEN(fd)); -+ -+ return; -+} -+ -+static int eth_copy_mbuf_to_fd(struct rte_mbuf *mbuf, -+ struct qbman_fd *fd, uint16_t bpid) -+{ -+ struct rte_mbuf *m; -+ void *mb = NULL; -+ -+ if (hw_mbuf_alloc_bulk(bpid_info[bpid].bp_list->buf_pool.mp, &mb, 1)) { -+ PMD_DRV_LOG(WARNING, "Unable to allocated DPAA2 buffer"); -+ rte_pktmbuf_free(mbuf); -+ return -1; -+ } -+ m = (struct rte_mbuf *)mb; -+ memcpy((char *)m->buf_addr + mbuf->data_off, -+ (void *)((char *)mbuf->buf_addr + mbuf->data_off), -+ mbuf->pkt_len); -+ -+ /*Resetting the buffer pool id and offset field*/ -+ fd->simple.bpid_offset = 0; -+ -+ DPAA2_SET_FD_ADDR(fd, DPAA2_MBUF_VADDR_TO_IOVA(m)); -+ DPAA2_SET_FD_LEN(fd, mbuf->data_len); -+ DPAA2_SET_FD_BPID(fd, bpid); -+ DPAA2_SET_FD_OFFSET(fd, mbuf->data_off); -+ DPAA2_SET_FD_ASAL(fd, DPAA2_ASAL_VAL); -+ -+ PMD_DRV_LOG(DEBUG, "\nmbuf %p BMAN buf addr %p", -+ (void *)mbuf, mbuf->buf_addr); -+ -+ PMD_DRV_LOG(DEBUG, "\nfdaddr =%lx bpid =%d meta =%d off =%d, len =%d\n", -+ DPAA2_GET_FD_ADDR(fd), -+ DPAA2_GET_FD_BPID(fd), -+ bpid_info[DPAA2_GET_FD_BPID(fd)].meta_data_size, -+ DPAA2_GET_FD_OFFSET(fd), -+ DPAA2_GET_FD_LEN(fd)); -+ /*free the original packet */ -+ rte_pktmbuf_free(mbuf); -+ -+ return 0; -+} -+ -+static uint16_t -+eth_dpaa2_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) -+{ -+ /* Function is responsible to receive frames for a given device and VQ*/ -+ struct dpaa2_queue *dpaa2_q = (struct dpaa2_queue *)queue; -+ struct qbman_result *dq_storage; -+ uint32_t fqid = dpaa2_q->fqid; -+ int ret, num_rx = 0; -+ uint8_t is_last = 0, status; -+ struct qbman_swp *swp; -+ const struct qbman_fd *fd; -+ struct qbman_pull_desc pulldesc; -+ struct rte_eth_dev *dev = dpaa2_q->dev; -+ -+ if (!thread_io_info.dpio_dev) { -+ ret = dpaa2_affine_qbman_swp(); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Failure in affining portal\n"); -+ return 0; -+ } -+ } -+ swp = thread_io_info.dpio_dev->sw_portal; -+ dq_storage = dpaa2_q->q_storage->dq_storage[0]; -+ -+ qbman_pull_desc_clear(&pulldesc); -+ qbman_pull_desc_set_numframes(&pulldesc, nb_pkts); -+ qbman_pull_desc_set_fq(&pulldesc, fqid); -+ /* todo optimization - we can have dq_storage_phys available*/ -+ qbman_pull_desc_set_storage(&pulldesc, dq_storage, -+ (dma_addr_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1); -+ -+ /*Issue a volatile dequeue command. */ -+ while (1) { -+ if (qbman_swp_pull(swp, &pulldesc)) { -+ PMD_DRV_LOG(ERR, "VDQ command is not issued." -+ "QBMAN is busy\n"); -+ /* Portal was busy, try again */ -+ continue; -+ } -+ break; -+ }; -+ -+ /* Receive the packets till Last Dequeue entry is found with -+ respect to the above issues PULL command. -+ */ -+ while (!is_last) { -+ /*Check if the previous issued command is completed. -+ *Also seems like the SWP is shared between the Ethernet Driver -+ *and the SEC driver.*/ -+ while (!qbman_check_command_complete(swp, dq_storage)) -+ ; -+ /* Loop until the dq_storage is updated with -+ * new token by QBMAN */ -+ while (!qbman_result_has_new_result(swp, dq_storage)) -+ ; -+ /* Check whether Last Pull command is Expired and -+ setting Condition for Loop termination */ -+ if (qbman_result_DQ_is_pull_complete(dq_storage)) { -+ is_last = 1; -+ /* Check for valid frame. */ -+ status = (uint8_t)qbman_result_DQ_flags(dq_storage); -+ if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0)) { -+ PMD_DRV_LOG(DEBUG, "No frame is delivered\n"); -+ continue; -+ } -+ } -+ -+ fd = qbman_result_DQ_fd(dq_storage); -+ bufs[num_rx] = eth_fd_to_mbuf(fd); -+ bufs[num_rx]->port = dev->data->port_id; -+ -+ num_rx++; -+ dq_storage++; -+ } /* End of Packet Rx loop */ -+ -+ dpaa2_q->rx_pkts += num_rx; -+ -+ PMD_DRV_LOG(INFO, "Ethernet Received %d Packets\n", num_rx); -+ /*Return the total number of packets received to DPAA2 app*/ -+ return num_rx; -+} -+ -+static uint16_t -+eth_dpaa2_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) -+{ -+ /* Function is responsible to receive frames for a given device and VQ*/ -+ struct dpaa2_queue *dpaa2_q = (struct dpaa2_queue *)queue; -+ struct qbman_result *dq_storage; -+ uint32_t fqid = dpaa2_q->fqid; -+ int ret, i, num_rx = 0; -+ uint8_t is_last = 0, status; -+ struct qbman_swp *swp; -+ const struct qbman_fd *fd[16]; -+ struct qbman_pull_desc pulldesc; -+ struct queue_storage_info_t *q_storage = dpaa2_q->q_storage; -+ struct rte_eth_dev *dev = dpaa2_q->dev; -+ -+ if (!thread_io_info.dpio_dev) { -+ ret = dpaa2_affine_qbman_swp(); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Failure in affining portal\n"); -+ return 0; -+ } -+ } -+ swp = thread_io_info.dpio_dev->sw_portal; -+ -+ if (!q_storage->active_dqs) { -+ q_storage->toggle = 0; -+ dq_storage = q_storage->dq_storage[q_storage->toggle]; -+ qbman_pull_desc_clear(&pulldesc); -+ qbman_pull_desc_set_numframes(&pulldesc, nb_pkts); -+ qbman_pull_desc_set_fq(&pulldesc, fqid); -+ qbman_pull_desc_set_storage(&pulldesc, dq_storage, -+ (dma_addr_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1); -+ if (thread_io_info.global_active_dqs) { -+ while (!qbman_check_command_complete(swp, thread_io_info.global_active_dqs)) -+ ; -+ } -+ while (1) { -+ if (qbman_swp_pull(swp, &pulldesc)) { -+ PMD_DRV_LOG(WARNING, "VDQ command is not issued." -+ "QBMAN is busy\n"); -+ /* Portal was busy, try again */ -+ continue; -+ } -+ break; -+ } -+ q_storage->active_dqs = dq_storage; -+ thread_io_info.global_active_dqs = dq_storage; -+ } -+ if (thread_io_info.global_active_dqs) -+ while (!qbman_check_command_complete(swp, thread_io_info.global_active_dqs)) -+ ; -+ dq_storage = q_storage->active_dqs; -+ while (!is_last) { -+ /* Loop until the dq_storage is updated with -+ * new token by QBMAN */ -+ struct rte_mbuf *mbuf; -+ -+ while (!qbman_result_has_new_result(swp, dq_storage)) -+ ; -+ rte_prefetch0((void *)((uint64_t)(dq_storage + 1))); -+ /* Check whether Last Pull command is Expired and -+ setting Condition for Loop termination */ -+ if (qbman_result_DQ_is_pull_complete(dq_storage)) { -+ is_last = 1; -+ /* Check for valid frame. */ -+ status = (uint8_t)qbman_result_DQ_flags(dq_storage); -+ if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0)) { -+ PMD_DRV_LOG2(DEBUG, "No frame is delivered\n"); -+ continue; -+ } -+ } -+ fd[num_rx] = qbman_result_DQ_fd(dq_storage); -+ mbuf = (struct rte_mbuf *)DPAA2_IOVA_TO_VADDR( -+ DPAA2_GET_FD_ADDR(fd[num_rx]) -+ - bpid_info[DPAA2_GET_FD_BPID(fd[num_rx])].meta_data_size); -+ /* Prefeth mbuf */ -+ rte_prefetch0(mbuf); -+ /* Prefetch Annotation address from where we get parse results */ -+ rte_prefetch0((void *)((uint64_t)DPAA2_GET_FD_ADDR(fd[num_rx]) + DPAA2_FD_PTA_SIZE + 16)); -+ /*Prefetch Data buffer*/ -+ /* rte_prefetch0((void *)((uint64_t)DPAA2_GET_FD_ADDR(fd[num_rx]) + DPAA2_GET_FD_OFFSET(fd[num_rx]))); */ -+ dq_storage++; -+ num_rx++; -+ -+ } /* End of Packet Rx loop */ -+ -+ for (i = 0; i < num_rx; i++) { -+ bufs[i] = eth_fd_to_mbuf(fd[i]); -+ bufs[i]->port = dev->data->port_id; -+ } -+ -+ q_storage->toggle ^= 1; -+ dq_storage = q_storage->dq_storage[q_storage->toggle]; -+ qbman_pull_desc_clear(&pulldesc); -+ qbman_pull_desc_set_numframes(&pulldesc, nb_pkts); -+ qbman_pull_desc_set_fq(&pulldesc, fqid); -+ qbman_pull_desc_set_storage(&pulldesc, dq_storage, -+ (dma_addr_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1); -+ /*Issue a volatile dequeue command. */ -+ -+ while (1) { -+ if (qbman_swp_pull(swp, &pulldesc)) { -+ PMD_DRV_LOG(WARNING, "VDQ command is not issued." -+ "QBMAN is busy\n"); -+ continue; -+ } -+ break; -+ } -+ q_storage->active_dqs = dq_storage; -+ thread_io_info.global_active_dqs = dq_storage; -+ -+ dpaa2_q->rx_pkts += num_rx; -+ -+ PMD_DRV_LOG2(INFO, "Ethernet Received %d Packets\n", num_rx); -+ /*Return the total number of packets received to DPAA2 app*/ -+ return num_rx; -+} -+ -+/* -+ * Callback to handle sending packets through a real NIC. -+ */ -+static uint16_t -+eth_dpaa2_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) -+{ -+ /* Function to transmit the frames to given device and VQ*/ -+ uint32_t loop; -+ int32_t ret; -+#ifdef QBMAN_MULTI_TX -+ struct qbman_fd fd_arr[8]; -+ uint32_t frames_to_send; -+#else -+ struct qbman_fd fd; -+#endif -+ struct rte_mempool *mp; -+ struct qbman_eq_desc eqdesc; -+ struct dpaa2_queue *dpaa2_q = (struct dpaa2_queue *)queue; -+ struct qbman_swp *swp; -+ uint16_t num_tx = 0; -+ uint16_t bpid; -+ struct rte_eth_dev *dev = dpaa2_q->dev; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ -+ if (!thread_io_info.dpio_dev) { -+ ret = dpaa2_affine_qbman_swp(); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Failure in affining portal\n"); -+ return 0; -+ } -+ } -+ swp = thread_io_info.dpio_dev->sw_portal; -+ -+ PMD_DRV_LOG(DEBUG, "===> dev =%p, fqid =%d", dev, dpaa2_q->fqid); -+ -+ /*Prepare enqueue descriptor*/ -+ qbman_eq_desc_clear(&eqdesc); -+ qbman_eq_desc_set_no_orp(&eqdesc, DPAA2_EQ_RESP_ERR_FQ); -+ qbman_eq_desc_set_response(&eqdesc, 0, 0); -+ qbman_eq_desc_set_qd(&eqdesc, priv->qdid, -+ dpaa2_q->flow_id, dpaa2_q->tc_index); -+ -+ /*Clear the unused FD fields before sending*/ -+#ifdef QBMAN_MULTI_TX -+ while (nb_pkts) { -+#ifdef DPAA2_CGR_SUPPORT -+ /*Check if the queue is congested*/ -+ if (qbman_result_is_CSCN(dpaa2_q->cscn)) -+ goto skip_tx; -+#endif -+ frames_to_send = (nb_pkts >> 3) ? MAX_SLOTS : nb_pkts; -+ -+ for (loop = 0; loop < frames_to_send; loop++) { -+ fd_arr[loop].simple.frc = 0; -+ DPAA2_RESET_FD_CTRL((&fd_arr[loop])); -+ DPAA2_SET_FD_FLC((&fd_arr[loop]), NULL); -+ mp = (*bufs)->pool; -+ /* Not a hw_pkt pool allocated frame */ -+ if (mp && !(mp->flags & MEMPOOL_F_HW_PKT_POOL)) { -+ printf("\n non hw offload bufffer "); -+ /* alloc should be from the default buffer pool -+ attached to this interface */ -+ if (priv->bp_list) { -+ bpid = priv->bp_list->buf_pool.bpid; -+ } else { -+ printf("\n ??? why no bpool attached"); -+ num_tx = 0; -+ goto skip_tx; -+ } -+ if (eth_copy_mbuf_to_fd(*bufs, &fd_arr[loop], bpid)) { -+ bufs++; -+ continue; -+ } -+ } else { -+ RTE_ASSERT(mp); -+ bpid = mempool_to_bpid(mp); -+ eth_mbuf_to_fd(*bufs, &fd_arr[loop], bpid); -+ } -+ bufs++; -+ } -+ loop = 0; -+ while (loop < frames_to_send) { -+ loop += qbman_swp_send_multiple(swp, &eqdesc, -+ &fd_arr[loop], frames_to_send - loop); -+ } -+ -+ num_tx += frames_to_send; -+ dpaa2_q->tx_pkts += frames_to_send; -+ nb_pkts -= frames_to_send; -+ } -+#else -+#ifdef DPAA2_CGR_SUPPORT -+ /*Check if the queue is congested*/ -+ if(qbman_result_is_CSCN(dpaa2_q->cscn)) -+ goto skip_tx; -+#endif -+ -+ fd.simple.frc = 0; -+ DPAA2_RESET_FD_CTRL((&fd)); -+ DPAA2_SET_FD_FLC((&fd), NULL); -+ loop = 0; -+ -+ while (loop < nb_pkts) { -+ /*Prepare each packet which is to be sent*/ -+ mp = bufs[loop]->pool; -+ /* Not a hw_pkt pool allocated frame */ -+ if (mp && !(mp->flags & MEMPOOL_F_HW_PKT_POOL)) { -+ /* alloc should be from the default buffer pool -+ attached to this interface */ -+ if (priv->bp_list) { -+ bpid = priv->bp_list->buf_pool.bpid; -+ } else { -+ /* Buffer not from offloaded area as well as -+ * lacks buffer pool identifier. Cannot -+ * continue. -+ */ -+ PMD_DRV_LOG(ERR, "No Buffer pool " -+ "attached.\n"); -+ num_tx = 0; -+ goto skip_tx; -+ } -+ -+ if (eth_copy_mbuf_to_fd(bufs[loop], &fd, bpid)) { -+ loop++; -+ continue; -+ } -+ } else { -+ RTE_ASSERT(mp); -+ bpid = mempool_to_bpid(mp); -+ eth_mbuf_to_fd(bufs[loop], &fd, bpid); -+ } -+ /*Enqueue a single packet to the QBMAN*/ -+ do { -+ ret = qbman_swp_enqueue(swp, &eqdesc, &fd); -+ if (ret != 0) { -+ PMD_DRV_LOG(DEBUG, "Error in transmiting the frame\n"); -+ } -+ } while (ret != 0); -+ -+ /* Free the buffer shell */ -+ /* rte_pktmbuf_free(bufs[loop]); */ -+ num_tx++; loop++; -+ } -+ dpaa2_q->tx_pkts += num_tx; -+ dpaa2_q->err_pkts += nb_pkts - num_tx; -+#endif -+skip_tx: -+ return num_tx; -+} -+ -+static int -+dpaa2_vlan_stripping_set(struct rte_eth_dev *dev, int on) -+{ -+ int ret; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ -+ PMD_INIT_FUNC_TRACE(); -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return -1; -+ } -+ -+ ret = dpni_set_vlan_removal(dpni, CMD_PRI_LOW, priv->token, on); -+ if (ret < 0) -+ PMD_DRV_LOG(ERR, "Unable to dpni_set_vlan_removal hwid =%d", -+ priv->hw_id); -+ return ret; -+} -+ -+static int -+dpaa2_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) -+{ -+ int ret; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return -1; -+ } -+ -+ if (on) -+ ret = dpni_add_vlan_id(dpni, CMD_PRI_LOW, priv->token, vlan_id); -+ else -+ ret = dpni_remove_vlan_id(dpni, CMD_PRI_LOW, priv->token, vlan_id); -+ -+ if (ret < 0) -+ PMD_DRV_LOG(ERR, "ret = %d Unable to add/rem vlan %d hwid =%d", -+ ret, vlan_id, priv->hw_id); -+ -+ /*todo this should on global basis */ -+/* ret = dpni_set_vlan_filters(dpni, CMD_PRI_LOW, priv->token, on); -+ if (ret < 0) -+ PMD_DRV_LOG(ERR, "Unable to set vlan filter"); -+*/ return ret; -+} -+ -+static void -+dpaa2_vlan_offload_set(struct rte_eth_dev *dev, int mask) -+{ -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ int ret; -+ -+ if (mask & ETH_VLAN_FILTER_MASK) { -+ if (dev->data->dev_conf.rxmode.hw_vlan_filter) -+ ret = dpni_set_vlan_filters(dpni, CMD_PRI_LOW, priv->token, TRUE); -+ else -+ ret = dpni_set_vlan_filters(dpni, CMD_PRI_LOW, priv->token, FALSE); -+ if (ret < 0) -+ PMD_DRV_LOG(ERR, "ret = %d Unable to set vlan filter", ret); -+ } -+ -+ if (mask & ETH_VLAN_STRIP_MASK) { -+ /* Enable or disable VLAN stripping */ -+ if (dev->data->dev_conf.rxmode.hw_vlan_strip) -+ dpaa2_vlan_stripping_set(dev, TRUE); -+ else -+ dpaa2_vlan_stripping_set(dev, FALSE); -+ } -+ -+ if (mask & ETH_VLAN_EXTEND_MASK) { -+ PMD_INIT_FUNC_TRACE(); -+/* if (dev->data->dev_conf.rxmode.hw_vlan_extend) -+ i40e_vsi_config_double_vlan(vsi, TRUE); -+ else -+ i40e_vsi_config_double_vlan(vsi, FALSE); -+*/ } -+} -+ -+static void -+dpaa2_eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) -+{ -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ -+ dev_info->driver_name = drivername; -+ dev_info->if_index = priv->hw_id; -+ dev_info->max_mac_addrs = priv->max_unicast_filters; -+ dev_info->max_rx_pktlen = DPAA2_MAX_RX_PKT_LEN; -+ dev_info->max_rx_queues = (uint16_t)priv->nb_rx_queues; -+ dev_info->max_tx_queues = (uint16_t)priv->nb_tx_queues; -+ dev_info->min_rx_bufsize = DPAA2_MIN_RX_BUF_SIZE; -+ dev_info->pci_dev = dev->pci_dev; -+/* dev_info->rx_offload_capa = -+ DEV_RX_OFFLOAD_IPV4_CKSUM | -+ DEV_RX_OFFLOAD_UDP_CKSUM | -+ DEV_RX_OFFLOAD_TCP_CKSUM; -+ dev_info->tx_offload_capa = -+ DEV_TX_OFFLOAD_IPV4_CKSUM | -+ DEV_TX_OFFLOAD_UDP_CKSUM | -+ DEV_TX_OFFLOAD_TCP_CKSUM | -+ DEV_TX_OFFLOAD_SCTP_CKSUM; -+*/ -+} -+ -+static int -+dpaa2_alloc_rx_tx_queues(struct rte_eth_dev *dev) -+{ -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ uint8_t tc_idx; -+ uint16_t dist_idx; -+ uint32_t vq_id; -+ struct dpaa2_queue *mc_q, *mcq; -+ uint32_t tot_queues; -+ int i; -+ struct dpaa2_queue *dpaa2_q; -+ -+ tot_queues = priv->nb_rx_queues + priv->nb_tx_queues; -+ mc_q = rte_malloc(NULL, sizeof(struct dpaa2_queue) * tot_queues, -+ RTE_CACHE_LINE_SIZE); -+ if (!mc_q) { -+ PMD_DRV_LOG(ERR, "malloc failed for rx/tx queues\n"); -+ return -1; -+ } -+ -+ for (i = 0; i < priv->nb_rx_queues; i++) { -+ mc_q->dev = dev; -+ priv->rx_vq[i] = mc_q++; -+ dpaa2_q = (struct dpaa2_queue *)priv->rx_vq[i]; -+ dpaa2_q->q_storage = rte_malloc("dq_storage", -+ sizeof(struct queue_storage_info_t), -+ RTE_CACHE_LINE_SIZE); -+ if (!dpaa2_q->q_storage) -+ goto fail; -+ -+ memset(dpaa2_q->q_storage, 0, sizeof(struct queue_storage_info_t)); -+ } -+ -+ for (i = 0; i < priv->nb_tx_queues; i++) { -+ mc_q->dev = dev; -+ mc_q->flow_id = DPNI_NEW_FLOW_ID; -+ priv->tx_vq[i] = mc_q++; -+ } -+ -+ vq_id = 0; -+ for (tc_idx = 0; tc_idx < priv->num_tc; tc_idx++) { -+ for (dist_idx = 0; dist_idx < priv->num_dist_per_tc[tc_idx]; dist_idx++) { -+ mcq = (struct dpaa2_queue *)priv->rx_vq[vq_id]; -+ mcq->tc_index = tc_idx; -+ mcq->flow_id = dist_idx; -+ vq_id++; -+ } -+ } -+ -+ return 0; -+fail: -+ i -= 1; -+ while (i >= 0) { -+ dpaa2_q = (struct dpaa2_queue *)priv->rx_vq[i]; -+ rte_free(dpaa2_q->q_storage); -+ } -+ return -1; -+} -+ -+static void dpaa2_distset_to_dpkg_profile_cfg( -+ uint32_t req_dist_set, -+ struct dpkg_profile_cfg *kg_cfg) -+{ -+ uint32_t loop = 0, i = 0, dist_field = 0; -+ int l2_configured = 0, l3_configured = 0; -+ int l4_configured = 0, sctp_configured = 0; -+ -+ memset(kg_cfg, 0, sizeof(struct dpkg_profile_cfg)); -+ while (req_dist_set) { -+ if (req_dist_set % 2 != 0) { -+ dist_field = 1U << loop; -+ switch (dist_field) { -+ case ETH_RSS_L2_PAYLOAD: -+ -+ if (l2_configured) -+ break; -+ l2_configured = 1; -+ -+ kg_cfg->extracts[i].extract.from_hdr.prot = -+ NET_PROT_ETH; -+ kg_cfg->extracts[i].extract.from_hdr.field = -+ NH_FLD_ETH_TYPE; -+ kg_cfg->extracts[i].type = DPKG_EXTRACT_FROM_HDR; -+ kg_cfg->extracts[i].extract.from_hdr.type = -+ DPKG_FULL_FIELD; -+ i++; -+ break; -+ -+ case ETH_RSS_IPV4: -+ case ETH_RSS_FRAG_IPV4: -+ case ETH_RSS_NONFRAG_IPV4_OTHER: -+ case ETH_RSS_IPV6: -+ case ETH_RSS_FRAG_IPV6: -+ case ETH_RSS_NONFRAG_IPV6_OTHER: -+ case ETH_RSS_IPV6_EX: -+ -+ if (l3_configured) -+ break; -+ l3_configured = 1; -+ -+ kg_cfg->extracts[i].extract.from_hdr.prot = -+ NET_PROT_IP; -+ kg_cfg->extracts[i].extract.from_hdr.field = -+ NH_FLD_IP_SRC; -+ kg_cfg->extracts[i].type = DPKG_EXTRACT_FROM_HDR; -+ kg_cfg->extracts[i].extract.from_hdr.type = -+ DPKG_FULL_FIELD; -+ i++; -+ -+ kg_cfg->extracts[i].extract.from_hdr.prot = -+ NET_PROT_IP; -+ kg_cfg->extracts[i].extract.from_hdr.field = -+ NH_FLD_IP_DST; -+ kg_cfg->extracts[i].type = DPKG_EXTRACT_FROM_HDR; -+ kg_cfg->extracts[i].extract.from_hdr.type = -+ DPKG_FULL_FIELD; -+ i++; -+ -+ kg_cfg->extracts[i].extract.from_hdr.prot = -+ NET_PROT_IP; -+ kg_cfg->extracts[i].extract.from_hdr.field = -+ NH_FLD_IP_PROTO; -+ kg_cfg->extracts[i].type = DPKG_EXTRACT_FROM_HDR; -+ kg_cfg->extracts[i].extract.from_hdr.type = -+ DPKG_FULL_FIELD; -+ kg_cfg->num_extracts++; -+ i++; -+ break; -+ -+ case ETH_RSS_NONFRAG_IPV4_TCP: -+ case ETH_RSS_NONFRAG_IPV6_TCP: -+ case ETH_RSS_NONFRAG_IPV4_UDP: -+ case ETH_RSS_NONFRAG_IPV6_UDP: -+ -+ if (l4_configured) -+ break; -+ l4_configured = 1; -+ -+ kg_cfg->extracts[i].extract.from_hdr.prot = -+ NET_PROT_TCP; -+ kg_cfg->extracts[i].extract.from_hdr.field = -+ NH_FLD_TCP_PORT_SRC; -+ kg_cfg->extracts[i].type = DPKG_EXTRACT_FROM_HDR; -+ kg_cfg->extracts[i].extract.from_hdr.type = -+ DPKG_FULL_FIELD; -+ i++; -+ -+ kg_cfg->extracts[i].extract.from_hdr.prot = -+ NET_PROT_TCP; -+ kg_cfg->extracts[i].extract.from_hdr.field = -+ NH_FLD_TCP_PORT_SRC; -+ kg_cfg->extracts[i].type = DPKG_EXTRACT_FROM_HDR; -+ kg_cfg->extracts[i].extract.from_hdr.type = -+ DPKG_FULL_FIELD; -+ i++; -+ break; -+ -+ case ETH_RSS_NONFRAG_IPV4_SCTP: -+ case ETH_RSS_NONFRAG_IPV6_SCTP: -+ -+ if (sctp_configured) -+ break; -+ sctp_configured = 1; -+ -+ kg_cfg->extracts[i].extract.from_hdr.prot = -+ NET_PROT_SCTP; -+ kg_cfg->extracts[i].extract.from_hdr.field = -+ NH_FLD_SCTP_PORT_SRC; -+ kg_cfg->extracts[i].type = DPKG_EXTRACT_FROM_HDR; -+ kg_cfg->extracts[i].extract.from_hdr.type = -+ DPKG_FULL_FIELD; -+ i++; -+ -+ kg_cfg->extracts[i].extract.from_hdr.prot = -+ NET_PROT_SCTP; -+ kg_cfg->extracts[i].extract.from_hdr.field = -+ NH_FLD_SCTP_PORT_DST; -+ kg_cfg->extracts[i].type = DPKG_EXTRACT_FROM_HDR; -+ kg_cfg->extracts[i].extract.from_hdr.type = -+ DPKG_FULL_FIELD; -+ i++; -+ break; -+ -+ default: -+ PMD_DRV_LOG(WARNING, "Bad flow distribution option %x\n", dist_field); -+ } -+ } -+ req_dist_set = req_dist_set >> 1; -+ loop++; -+ } -+ kg_cfg->num_extracts = i; -+} -+ -+static int dpaa2_setup_flow_distribution(struct rte_eth_dev *eth_dev, -+ uint32_t req_dist_set) -+{ -+ struct dpaa2_dev_priv *priv = eth_dev->data->dev_private; -+ struct fsl_mc_io *dpni = priv->hw; -+ struct dpni_rx_tc_dist_cfg tc_cfg; -+ struct dpkg_profile_cfg kg_cfg; -+ void *p_params; -+ int ret, tc_index = 0; -+ -+ p_params = rte_malloc( -+ NULL, DIST_PARAM_IOVA_SIZE, RTE_CACHE_LINE_SIZE); -+ if (!p_params) { -+ PMD_DRV_LOG(ERR, "Memory unavaialble\n"); -+ return -ENOMEM; -+ } -+ memset(p_params, 0, DIST_PARAM_IOVA_SIZE); -+ memset(&tc_cfg, 0, sizeof(struct dpni_rx_tc_dist_cfg)); -+ -+ dpaa2_distset_to_dpkg_profile_cfg(req_dist_set, &kg_cfg); -+ tc_cfg.key_cfg_iova = (uint64_t)(DPAA2_VADDR_TO_IOVA(p_params)); -+ tc_cfg.dist_size = eth_dev->data->nb_rx_queues; -+ tc_cfg.dist_mode = DPNI_DIST_MODE_HASH; -+ -+ ret = dpni_prepare_key_cfg(&kg_cfg, p_params); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Unable to prepare extract parameters\n"); -+ rte_free(p_params); -+ return ret; -+ } -+ -+ ret = dpni_set_rx_tc_dist(dpni, CMD_PRI_LOW, priv->token, tc_index, -+ &tc_cfg); -+ rte_free(p_params); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Setting distribution for Rx failed with" -+ "err code: %d\n", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int -+dpaa2_remove_flow_distribution(struct rte_eth_dev *eth_dev, uint8_t tc_index) -+{ -+ struct dpaa2_dev_priv *priv = eth_dev->data->dev_private; -+ struct fsl_mc_io *dpni = priv->hw; -+ struct dpni_rx_tc_dist_cfg tc_cfg; -+ struct dpkg_profile_cfg kg_cfg; -+ void *p_params; -+ int ret; -+ -+ p_params = rte_malloc( -+ NULL, DIST_PARAM_IOVA_SIZE, RTE_CACHE_LINE_SIZE); -+ if (!p_params) { -+ PMD_DRV_LOG(ERR, "Memory unavaialble\n"); -+ return -ENOMEM; -+ } -+ memset(p_params, 0, DIST_PARAM_IOVA_SIZE); -+ memset(&tc_cfg, 0, sizeof(struct dpni_rx_tc_dist_cfg)); -+ -+ tc_cfg.key_cfg_iova = (uint64_t)(DPAA2_VADDR_TO_IOVA(p_params)); -+ tc_cfg.dist_size = 0; -+ tc_cfg.dist_mode = DPNI_DIST_MODE_NONE; -+ -+ ret = dpni_prepare_key_cfg(&kg_cfg, p_params); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Unable to prepare extract parameters\n"); -+ rte_free(p_params); -+ return ret; -+ } -+ -+ ret = dpni_set_rx_tc_dist(dpni, CMD_PRI_LOW, priv->token, tc_index, -+ &tc_cfg); -+ rte_free(p_params); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Setting distribution for Rx failed with" -+ "err code: %d\n", ret); -+ return ret; -+ } -+ return ret; -+} -+ -+static int -+dpaa2_alloc_dq_storage(struct queue_storage_info_t *q_storage) -+{ -+ int i = 0; -+ -+ for (i = 0; i < NUM_DQS_PER_QUEUE; i++) { -+ q_storage->dq_storage[i] = rte_malloc(NULL, -+ NUM_MAX_RECV_FRAMES * sizeof(struct qbman_result), -+ RTE_CACHE_LINE_SIZE); -+ if (!q_storage->dq_storage[i]) -+ goto fail; -+ /*setting toggle for initial condition*/ -+ q_storage->toggle = -1; -+ } -+ return 0; -+fail: -+ i -= 1; -+ while (i >= 0) -+ rte_free(q_storage->dq_storage[i]); -+ -+ return -1; -+} -+ -+static int -+dpaa2_eth_dev_configure(struct rte_eth_dev *dev) -+{ -+ struct rte_eth_dev_data *data = dev->data; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct rte_eth_conf *eth_conf = &data->dev_conf; -+ struct dpaa2_queue *dpaa2_q; -+ int i, ret; -+ -+ for (i = 0; i < data->nb_rx_queues; i++) { -+ data->rx_queues[i] = priv->rx_vq[i]; -+ dpaa2_q = (struct dpaa2_queue *)data->rx_queues[i]; -+ if (dpaa2_alloc_dq_storage(dpaa2_q->q_storage)) -+ return -1; -+ } -+ -+ for (i = 0; i < data->nb_tx_queues; i++) { -+ data->tx_queues[i] = priv->tx_vq[i]; -+ dpaa2_q = (struct dpaa2_queue *)data->tx_queues[i]; -+ dpaa2_q->cscn = rte_malloc(NULL, sizeof(struct qbman_result), 16); -+ if (!dpaa2_q->cscn) -+ goto fail_tx_queue; -+ } -+ -+ /* Check for correct configuration */ -+ if (eth_conf->rxmode.mq_mode != ETH_MQ_RX_RSS && -+ data->nb_rx_queues > 1) { -+ PMD_DRV_LOG(ERR, "Distribution is not enabled, " -+ "but Rx queues more than 1\n"); -+ return -1; -+ } -+ -+ if (eth_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) { -+ /* Return in case number of Rx queues is 1 */ -+ if (data->nb_rx_queues == 1) -+ return 0; -+ ret = dpaa2_setup_flow_distribution(dev, -+ eth_conf->rx_adv_conf.rss_conf.rss_hf); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "dpaa2_setup_flow_distribution failed\n"); -+ return ret; -+ } -+ } -+ -+ return 0; -+ fail_tx_queue: -+ i -= 1; -+ while (i >= 0) { -+ dpaa2_q = (struct dpaa2_queue *)data->tx_queues[i]; -+ rte_free(dpaa2_q->cscn); -+ } -+ return -1; -+} -+ -+static int dpaa2_attach_bp_list(struct dpaa2_dev_priv *priv, -+ void *blist) -+{ -+ /* Function to attach a DPNI with a buffer pool list. Buffer pool list -+ * handle is passed in blist. -+ */ -+ int32_t retcode; -+ struct fsl_mc_io *dpni = priv->hw; -+ struct dpni_pools_cfg bpool_cfg; -+ struct dpaa2_bp_list *bp_list = (struct dpaa2_bp_list *)blist; -+ struct dpni_buffer_layout layout; -+ int tot_size; -+ -+ /* ... rx buffer layout . -+ Check alignment for buffer layouts first*/ -+ -+ /* ... rx buffer layout ... */ -+ tot_size = DPAA2_HW_BUF_RESERVE + RTE_PKTMBUF_HEADROOM; -+ tot_size = RTE_ALIGN_CEIL(tot_size, -+ DPAA2_PACKET_LAYOUT_ALIGN); -+ -+ memset(&layout, 0, sizeof(struct dpni_buffer_layout)); -+ layout.options = DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM; -+ -+ layout.data_head_room = tot_size - DPAA2_FD_PTA_SIZE - DPAA2_MBUF_HW_ANNOTATION; -+ retcode = dpni_set_rx_buffer_layout(dpni, CMD_PRI_LOW, priv->token, -+ &layout); -+ if (retcode) { -+ PMD_DRV_LOG(ERR, "Err(%d) in setting rx buffer layout\n", retcode); -+ return retcode; -+ } -+ -+ /*Attach buffer pool to the network interface as described by the user*/ -+ bpool_cfg.num_dpbp = 1; -+ bpool_cfg.pools[0].dpbp_id = bp_list->buf_pool.dpbp_node->dpbp_id; -+ bpool_cfg.pools[0].backup_pool = 0; -+ bpool_cfg.pools[0].buffer_size = -+ RTE_ALIGN_CEIL(bp_list->buf_pool.size, -+ 256 /*DPAA2_PACKET_LAYOUT_ALIGN*/); -+ -+ retcode = dpni_set_pools(dpni, CMD_PRI_LOW, priv->token, &bpool_cfg); -+ if (retcode != 0) { -+ PMD_DRV_LOG(ERR, "Error in attaching the buffer pool list" -+ "bpid = %d Error code = %d\n", -+ bpool_cfg.pools[0].dpbp_id, retcode); -+ return retcode; -+ } -+ -+ priv->bp_list = bp_list; -+ return 0; -+} -+ -+/* Function to setup RX flow information. It contains traffic class ID, -+ * flow ID, destination configuration etc. -+ */ -+static int -+dpaa2_rx_queue_setup(struct rte_eth_dev *dev, -+ uint16_t rx_queue_id, -+ uint16_t nb_rx_desc __rte_unused, -+ unsigned int socket_id __rte_unused, -+ const struct rte_eth_rxconf *rx_conf __rte_unused, -+ struct rte_mempool *mb_pool) -+{ -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ struct dpaa2_queue *dpaa2_q; -+ struct dpni_queue_cfg cfg; -+ uint8_t tc_id, flow_id; -+ uint32_t bpid; -+ int ret; -+ -+ PMD_DRV_LOG(INFO, "dev =%p, queue =%d, pool = %p, conf =%p", -+ dev, rx_queue_id, mb_pool, rx_conf); -+ -+ if (!priv->bp_list || priv->bp_list->mp != mb_pool) { -+ RTE_VERIFY(mb_pool->pool_data); -+ bpid = mempool_to_bpid(mb_pool); -+ ret = dpaa2_attach_bp_list(priv, -+ bpid_info[bpid].bp_list); -+ if (ret) -+ return ret; -+ } -+ dpaa2_q = (struct dpaa2_queue *)dev->data->rx_queues[rx_queue_id]; -+ -+ /*Get the tc id and flow id from given VQ id*/ -+ tc_id = rx_queue_id / MAX_DIST_PER_TC; -+ flow_id = rx_queue_id % MAX_DIST_PER_TC; -+ memset(&cfg, 0, sizeof(struct dpni_queue_cfg)); -+ -+ cfg.options = cfg.options | DPNI_QUEUE_OPT_USER_CTX; -+ -+#ifdef DPAA2_STASHING -+ cfg.options = cfg.options | DPNI_QUEUE_OPT_FLC; -+#endif -+ -+ cfg.user_ctx = (uint64_t)(dpaa2_q); -+#ifdef DPAA2_STASHING -+ cfg.flc_cfg.flc_type = DPNI_FLC_STASH; -+ cfg.flc_cfg.frame_data_size = DPNI_STASH_SIZE_64B; -+ /* Enabling Annotation stashing */ -+ cfg.options |= DPNI_FLC_STASH_FRAME_ANNOTATION; -+ cfg.flc_cfg.options = DPNI_FLC_STASH_FRAME_ANNOTATION; -+#endif -+ ret = dpni_set_rx_flow(dpni, CMD_PRI_LOW, priv->token, -+ tc_id, flow_id, &cfg); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Error in setting the rx flow: = %d\n", ret); -+ return -1; -+ } -+ return 0; -+} -+ -+static int -+dpaa2_tx_queue_setup(struct rte_eth_dev *dev, -+ uint16_t tx_queue_id, -+ uint16_t nb_tx_desc __rte_unused, -+ unsigned int socket_id __rte_unused, -+ const struct rte_eth_txconf *tx_conf __rte_unused) -+{ -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct dpaa2_queue *dpaa2_q = (struct dpaa2_queue *) -+ dev->data->tx_queues[tx_queue_id]; -+ struct fsl_mc_io *dpni = priv->hw; -+ struct dpni_tx_flow_cfg cfg; -+ struct dpni_tx_conf_cfg tx_conf_cfg; -+#ifdef DPAA2_CGR_SUPPORT -+ struct dpni_congestion_notification_cfg cong_notif_cfg; -+#endif -+ uint32_t tc_idx; -+ int ret; -+ -+ PMD_INIT_FUNC_TRACE(); -+ -+ /* Return if queue already configured */ -+ if (dpaa2_q->flow_id != DPNI_NEW_FLOW_ID) -+ return 0; -+ -+ memset(&cfg, 0, sizeof(struct dpni_tx_flow_cfg)); -+ cfg.l3_chksum_gen = 1; -+ cfg.options |= DPNI_TX_FLOW_OPT_L3_CHKSUM_GEN; -+ cfg.l4_chksum_gen = 1; -+ cfg.options = DPNI_TX_FLOW_OPT_L4_CHKSUM_GEN; -+ memset(&tx_conf_cfg, 0, sizeof(struct dpni_tx_conf_cfg)); -+ tx_conf_cfg.errors_only = TRUE; -+ -+ /* -+ if (action & DPAA2BUF_TX_CONF_REQUIRED) { -+ cfg.options = DPNI_TX_FLOW_OPT_TX_CONF_ERROR; -+ cfg.use_common_tx_conf_queue = -+ ((action & DPAA2BUF_TX_CONF_ERR_ON_COMMON_Q) ? -+ TRUE : FALSE); -+ tx_conf_cfg.errors_only = FALSE; -+ }*/ -+ -+ if (priv->num_tc == 1) -+ tc_idx = 0; -+ else -+ tc_idx = tx_queue_id; -+ -+ ret = dpni_set_tx_flow(dpni, CMD_PRI_LOW, priv->token, -+ &(dpaa2_q->flow_id), &cfg); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Error in setting the tx flow:" -+ "ErrorCode = %x\n", ret); -+ return -1; -+ } -+ /*Set tx-conf and error configuration*/ -+ ret = dpni_set_tx_conf(dpni, CMD_PRI_LOW, priv->token, -+ dpaa2_q->flow_id, &tx_conf_cfg); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Error in setting tx conf settings: " -+ "ErrorCode = %x", ret); -+ return -1; -+ } -+ -+ if (tx_queue_id == 0) { -+ /*Set tx-conf and error configuration*/ -+ ret = dpni_set_tx_conf(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_COMMON_TX_CONF, &tx_conf_cfg); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Error in setting tx conf settings: " -+ "ErrorCode = %x", ret); -+ return -1; -+ } -+ } -+ dpaa2_q->tc_index = tc_idx; -+ -+#ifdef DPAA2_CGR_SUPPORT -+ cong_notif_cfg.units = DPNI_CONGESTION_UNIT_BYTES; -+ /*Notify about congestion when the queue size is 128 frames with each \ -+ frame 64 bytes size*/ -+ cong_notif_cfg.threshold_entry = CONG_ENTER_THRESHOLD; -+ /*Notify that the queue is not congested when the number of frames in \ -+ the queue is below this thershold. -+ TODO: Check if this value is the optimum value for better performance*/ -+ cong_notif_cfg.threshold_exit = CONG_EXIT_THRESHOLD; -+ cong_notif_cfg.message_ctx = 0; -+ cong_notif_cfg.message_iova = (uint64_t)dpaa2_q->cscn; -+ cong_notif_cfg.dest_cfg.dest_type = DPNI_DEST_NONE; -+ cong_notif_cfg.options = DPNI_CONG_OPT_WRITE_MEM_ON_ENTER | -+ DPNI_CONG_OPT_WRITE_MEM_ON_EXIT | DPNI_CONG_OPT_COHERENT_WRITE; -+ -+ ret = dpni_set_tx_tc_congestion_notification(dpni, CMD_PRI_LOW, -+ priv->token, -+ tc_idx, &cong_notif_cfg); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Error in setting tx congestion notification " -+ "settings: ErrorCode = %x", ret); -+ return -1; -+ } -+#endif -+ return 0; -+} -+ -+void -+dpaa2_rx_queue_release(void *q) -+{ -+ printf("\n(%s) called for 1=%p\n", __func__, q); -+ return; -+} -+ -+void -+dpaa2_tx_queue_release(void *q) -+{ -+ printf("\n(%s) called for 1=%p\n", __func__, q); -+ return; -+} -+ -+static const uint32_t * -+dpaa2_supported_ptypes_get(struct rte_eth_dev *dev) -+{ -+ static const uint32_t ptypes[] = { -+ /*todo -= add more types */ -+ RTE_PTYPE_L2_ETHER, -+ RTE_PTYPE_L3_IPV4, -+ RTE_PTYPE_L3_IPV4_EXT, -+ RTE_PTYPE_L3_IPV6, -+ RTE_PTYPE_L3_IPV6_EXT, -+ RTE_PTYPE_L4_TCP, -+ RTE_PTYPE_L4_UDP, -+ RTE_PTYPE_L4_SCTP, -+ RTE_PTYPE_L4_ICMP, -+ RTE_PTYPE_UNKNOWN -+ }; -+ -+ if (dev->rx_pkt_burst == eth_dpaa2_prefetch_rx || -+ dev->rx_pkt_burst == eth_dpaa2_rx) -+ return ptypes; -+ return NULL; -+} -+ -+static int -+dpaa2_dev_start(struct rte_eth_dev *dev) -+{ -+ struct rte_eth_dev_data *data = dev->data; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ struct dpni_queue_attr cfg; -+ uint16_t qdid; -+ struct dpaa2_queue *dpaa2_q; -+ int ret, i, mask = 0; -+ -+ PMD_INIT_FUNC_TRACE(); -+ -+ dev->data->dev_link.link_status = 1; -+ -+ ret = dpni_enable(dpni, CMD_PRI_LOW, priv->token); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Failure %d in enabling dpni %d device\n", -+ ret, priv->hw_id); -+ return ret; -+ } -+ -+ ret = dpni_get_qdid(dpni, CMD_PRI_LOW, priv->token, &qdid); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Error to get qdid:ErrorCode = %d\n", ret); -+ return ret; -+ } -+ priv->qdid = qdid; -+ -+ for (i = 0; i < data->nb_rx_queues; i++) { -+ dpaa2_q = (struct dpaa2_queue *)data->rx_queues[i]; -+ ret = dpni_get_rx_flow(dpni, CMD_PRI_LOW, priv->token, -+ dpaa2_q->tc_index, dpaa2_q->flow_id, &cfg); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Error to get flow " -+ "information Error code = %d\n", ret); -+ return ret; -+ } -+ dpaa2_q->fqid = cfg.fqid; -+ } -+ /* -+ * VLAN Offload Settings -+ */ -+ if (priv->options & DPNI_OPT_VLAN_FILTER) -+ mask = ETH_VLAN_FILTER_MASK; -+ -+ if (priv->options & DPNI_OPT_VLAN_MANIPULATION) -+ mask = ETH_VLAN_STRIP_MASK; -+ -+ if (mask) -+ dpaa2_vlan_offload_set(dev, mask); -+ -+ return 0; -+} -+ -+/********************************************************************* -+ * -+ * This routine disables all traffic on the adapter by issuing a -+ * global reset on the MAC. -+ * -+ **********************************************************************/ -+static void -+dpaa2_dev_stop(struct rte_eth_dev *dev) -+{ -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ int ret; -+ struct rte_eth_link link; -+ -+ dev->data->dev_link.link_status = 0; -+ -+ ret = dpni_disable(dpni, CMD_PRI_LOW, priv->token); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Failure in disabling dpni %d device\n", priv->hw_id); -+ return; -+ } -+ -+ /* clear the recorded link status */ -+ memset(&link, 0, sizeof(link)); -+ rte_dpni_dev_atomic_write_link_status(dev, &link); -+} -+ -+static void -+dpaa2_dev_close(struct rte_eth_dev *dev) -+{ -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ int ret; -+ struct rte_eth_link link; -+ -+ /*Function is reverse of dpaa2_dev_init. -+ * It does the following: -+ * 1. Detach a DPNI from attached resources i.e. buffer pools, dpbp_id. -+ * 2. Close the DPNI device -+ * 3. Free the allocated reqources. -+ */ -+ -+ /* Clean the device first */ -+ ret = dpni_reset(dpni, CMD_PRI_LOW, priv->token); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Failure cleaning dpni device with" -+ "error code %d\n", ret); -+ return; -+ } -+ -+ /*Close the device at underlying layer*/ -+ ret = dpni_close(dpni, CMD_PRI_LOW, priv->token); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Failure closing dpni device with" -+ "error code %d\n", ret); -+ return; -+ } -+ -+ /*Free the allocated memory for ethernet private data and dpni*/ -+ priv->hw = NULL; -+ free(dpni); -+ -+ memset(&link, 0, sizeof(link)); -+ rte_dpni_dev_atomic_write_link_status(dev, &link); -+} -+ -+static void -+dpaa2_dev_promiscuous_enable( -+ struct rte_eth_dev *dev) -+{ -+ int ret; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return; -+ } -+ -+ ret = dpni_set_unicast_promisc(dpni, CMD_PRI_LOW, priv->token, TRUE); -+ if (ret < 0) -+ PMD_DRV_LOG(ERR, "Unable to enable promiscuous mode"); -+ return; -+} -+ -+static void -+dpaa2_dev_promiscuous_disable( -+ struct rte_eth_dev *dev) -+{ -+ int ret; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return; -+ } -+ -+ ret = dpni_set_unicast_promisc(dpni, CMD_PRI_LOW, priv->token, FALSE); -+ if (ret < 0) -+ PMD_DRV_LOG(ERR, "Unable to disable promiscuous mode"); -+ return; -+} -+ -+static void -+dpaa2_dev_allmulticast_enable( -+ struct rte_eth_dev *dev) -+{ -+ int ret; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return; -+ } -+ -+ ret = dpni_set_multicast_promisc(dpni, CMD_PRI_LOW, priv->token, true); -+ if (ret < 0) -+ PMD_DRV_LOG(ERR, "Unable to enable promiscuous mode"); -+ return; -+} -+ -+static void -+dpaa2_dev_allmulticast_disable(struct rte_eth_dev *dev) -+{ -+ int ret; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return; -+ } -+ -+ ret = dpni_set_multicast_promisc(dpni, CMD_PRI_LOW, priv->token, false); -+ if (ret < 0) -+ PMD_DRV_LOG(ERR, "Unable to enable promiscuous mode"); -+ return; -+} -+ -+static int dpaa2_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) -+{ -+ int ret; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ uint32_t frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return -EINVAL; -+ } -+ -+ /* check that mtu is within the allowed range */ -+ if ((mtu < ETHER_MIN_MTU) || (frame_size > DPAA2_MAX_RX_PKT_LEN)) -+ return -EINVAL; -+ -+ /* Set the Max Rx frame length as 'mtu' + -+ * Maximum Ethernet header length */ -+ ret = dpni_set_max_frame_length(dpni, CMD_PRI_LOW, priv->token, -+ mtu + ETH_VLAN_HLEN); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "setting the max frame length failed"); -+ return -1; -+ } -+ if (priv->options & DPNI_OPT_IPF) { -+ ret = dpni_set_mtu(dpni, CMD_PRI_LOW, priv->token, mtu); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Setting the MTU failed"); -+ return -1; -+ } -+ } -+ -+ PMD_DRV_LOG(INFO, "MTU is configured %d for the device\n", mtu); -+ return 0; -+} -+ -+static int -+dpaa2_flow_ctrl_set(struct rte_eth_dev *dev __rte_unused, -+ struct rte_eth_fc_conf *fc_conf __rte_unused) -+{ -+ return 0; -+} -+static void -+dpaa2_dev_add_mac_addr(struct rte_eth_dev *dev, -+ struct ether_addr *addr, -+ __rte_unused uint32_t index, -+ __rte_unused uint32_t pool) -+{ -+ int ret; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return; -+ } -+ -+ ret = dpni_add_mac_addr(dpni, CMD_PRI_LOW, -+ priv->token, addr->addr_bytes); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Adding the MAC ADDR failed"); -+ } -+ -+ return; -+} -+ -+static void -+dpaa2_dev_remove_mac_addr(struct rte_eth_dev *dev, -+ uint32_t index) -+{ -+ int ret; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ struct rte_eth_dev_data *data = dev->data; -+ struct ether_addr *macaddr; -+ -+ macaddr = &data->mac_addrs[index]; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return; -+ } -+ -+ ret = dpni_remove_mac_addr(dpni, CMD_PRI_LOW, -+ priv->token, macaddr->addr_bytes); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Removing the MAC ADDR failed"); -+ } -+ -+ return; -+} -+ -+static void -+dpaa2_dev_set_mac_addr(struct rte_eth_dev *dev, -+ struct ether_addr *addr) -+{ -+ int ret; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return; -+ } -+ -+ ret = dpni_set_primary_mac_addr(dpni, CMD_PRI_LOW, -+ priv->token, addr->addr_bytes); -+ -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Setting the MAC ADDR failed"); -+ } -+ -+ return; -+} -+ -+int dpaa2_dev_get_mac_addr(struct rte_eth_dev *dev, -+ struct ether_addr *addr) -+{ -+ int ret; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return -EINVAL; -+ } -+ -+ ret = dpni_get_primary_mac_addr(dpni, CMD_PRI_LOW, -+ priv->token, addr->addr_bytes); -+ -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Getting the MAC ADDR failed"); -+ } -+ -+ return ret; -+} -+ -+/*int dpni_clear_mac_filters(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int unicast, -+ int multicast) -+ -+int dpni_set_vlan_insertion(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en) -+ -+dpni_set_errors_behavior -+ -+int dpni_get_l3_chksum_validation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+ -+int dpni_set_l3_chksum_validation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en) -+ -+int dpni_get_l4_chksum_validation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int *en) -+ -+int dpni_set_l4_chksum_validation(struct fsl_mc_io *mc_io, -+ uint32_t cmd_flags, -+ uint16_t token, -+ int en) -+ -+*/ -+ -+static int dpaa2_timestamp_enable(struct rte_eth_dev *dev) -+{ -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ -+ struct dpni_buffer_layout layout; -+ int ret; -+ -+ layout.options = DPNI_BUF_LAYOUT_OPT_TIMESTAMP; -+ layout.pass_timestamp = TRUE; -+ -+ ret = dpni_set_rx_buffer_layout(dpni, CMD_PRI_LOW, priv->token, &layout); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Enabling timestamp for Rx failed with" -+ "err code: %d", ret); -+ return ret; -+ } -+ -+ ret = dpni_set_tx_buffer_layout(dpni, CMD_PRI_LOW, priv->token, &layout); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Enabling timestamp failed for Tx with" -+ "err code: %d", ret); -+ return ret; -+ } -+ -+ ret = dpni_set_tx_conf_buffer_layout(dpni, CMD_PRI_LOW, -+ priv->token, &layout); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Enabling timestamp failed for Tx-conf with" -+ "err code: %d", ret); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int dpaa2_timestamp_disable(struct rte_eth_dev *dev) -+{ -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ struct dpni_buffer_layout layout; -+ int ret; -+ -+ layout.options = DPNI_BUF_LAYOUT_OPT_TIMESTAMP; -+ layout.pass_timestamp = FALSE; -+ -+ ret = dpni_set_rx_buffer_layout(dpni, CMD_PRI_LOW, priv->token, &layout); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Disabling timestamp failed for Rx with" -+ "err code: %d", ret); -+ return ret; -+ } -+ -+ ret = dpni_set_tx_buffer_layout(dpni, CMD_PRI_LOW, priv->token, &layout); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Disabling timestamp failed for Tx with" -+ "err code: %d", ret); -+ return ret; -+ } -+ -+ ret = dpni_set_tx_conf_buffer_layout(dpni, CMD_PRI_LOW, -+ priv->token, &layout); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Disabling timestamp failed for Tx-conf with" -+ "err code: %d", ret); -+ return ret; -+ } -+ -+ return ret; -+} -+ -+/* return 0 means link status changed, -1 means not changed */ -+static int -+dpaa2_dev_get_link_info(struct rte_eth_dev *dev, -+ int wait_to_complete __rte_unused) -+{ -+ int ret; -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ struct rte_eth_link link, old; -+ struct dpni_link_state state = {0}; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return 0; -+ } -+ memset(&old, 0, sizeof(old)); -+ rte_dpni_dev_atomic_read_link_status(dev, &old); -+ -+ ret = dpni_get_link_state(dpni, CMD_PRI_LOW, priv->token, &state); -+ if (ret < 0) { -+ PMD_DRV_LOG(ERR, "dpni_get_link_state"); -+ return 0; -+ } -+ -+ if (state.up == 0) { -+ rte_dpni_dev_atomic_write_link_status(dev, &link); -+ if (state.up == old.link_status) -+ return -1; -+ return 0; -+ } -+ link.link_status = state.up; -+ link.link_speed = state.rate; -+ -+ if (state.options & DPNI_LINK_OPT_HALF_DUPLEX) -+ link.link_duplex = ETH_LINK_HALF_DUPLEX; -+ else -+ link.link_duplex = ETH_LINK_FULL_DUPLEX; -+ -+ rte_dpni_dev_atomic_write_link_status(dev, &link); -+ -+ if (link.link_status == old.link_status) -+ return -1; -+ -+ return 0; -+} -+ -+static -+void dpaa2_dev_stats_get(struct rte_eth_dev *dev, -+ struct rte_eth_stats *stats) -+{ -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ -+ int32_t retcode; -+ uint64_t value; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return; -+ } -+ -+ if (!stats) { -+ PMD_DRV_LOG(ERR, "stats is NULL"); -+ return; -+ } -+ -+ retcode = dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_ING_FRAME, &value); -+ if (retcode) -+ goto error; -+ stats->ipackets = value; -+ retcode = dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_ING_BYTE, &value); -+ if (retcode) -+ goto error; -+ stats->ibytes = value; -+ retcode = dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_ING_FRAME_DROP, &value); -+ if (retcode) -+ goto error; -+ stats->ierrors = value; -+ retcode = dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_ING_FRAME_DISCARD, &value); -+ if (retcode) -+ goto error; -+ stats->ierrors = stats->ierrors + value; -+ retcode = dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_EGR_FRAME, &value); -+ if (retcode) -+ goto error; -+ stats->opackets = value; -+ dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_EGR_BYTE, &value); -+ if (retcode) -+ goto error; -+ stats->obytes = value; -+ retcode = dpni_get_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_EGR_FRAME_DISCARD, &value); -+ if (retcode) -+ goto error; -+ stats->oerrors = value; -+ -+ return; -+ -+error: -+ PMD_DRV_LOG(ERR, "Operation not completed:Error Code = %d\n", retcode); -+ return; -+}; -+ -+static -+void dpaa2_dev_stats_reset(struct rte_eth_dev *dev) -+{ -+ struct dpaa2_dev_priv *priv = dev->data->dev_private; -+ struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw; -+ -+ int32_t retcode; -+ -+ if (dpni == NULL) { -+ PMD_DRV_LOG(ERR, "dpni is NULL"); -+ return; -+ } -+ -+ retcode = dpni_set_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_ING_FRAME, 0); -+ if (retcode) -+ goto error; -+ retcode = dpni_set_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_ING_BYTE, 0); -+ if (retcode) -+ goto error; -+ retcode = dpni_set_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_ING_BCAST_FRAME, 0); -+ if (retcode) -+ goto error; -+ retcode = dpni_set_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_ING_BCAST_BYTES, 0); -+ if (retcode) -+ goto error; -+ retcode = dpni_set_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_ING_MCAST_FRAME, 0); -+ if (retcode) -+ goto error; -+ retcode = dpni_set_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_ING_MCAST_BYTE, 0); -+ if (retcode) -+ goto error; -+ retcode = dpni_set_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_ING_FRAME_DROP, 0); -+ if (retcode) -+ goto error; -+ retcode = dpni_set_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_ING_FRAME_DISCARD, 0); -+ if (retcode) -+ goto error; -+ retcode = dpni_set_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_EGR_FRAME, 0); -+ if (retcode) -+ goto error; -+ retcode = dpni_set_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_EGR_BYTE, 0); -+ if (retcode) -+ goto error; -+ retcode = dpni_set_counter(dpni, CMD_PRI_LOW, priv->token, -+ DPNI_CNT_EGR_FRAME_DISCARD, 0); -+ if (retcode) -+ goto error; -+ -+ return; -+ -+error: -+ PMD_DRV_LOG(ERR, "Operation not completed:Error Code = %d\n", retcode); -+ return; -+}; -+ -+static struct eth_dev_ops ops = { -+ .dev_configure = dpaa2_eth_dev_configure, -+ .dev_start = dpaa2_dev_start, -+ .dev_stop = dpaa2_dev_stop, -+ .dev_close = dpaa2_dev_close, -+ .promiscuous_enable = dpaa2_dev_promiscuous_enable, -+ .promiscuous_disable = dpaa2_dev_promiscuous_disable, -+ .allmulticast_enable = dpaa2_dev_allmulticast_enable, -+ .allmulticast_disable = dpaa2_dev_allmulticast_disable, -+ .dev_set_link_up = NULL, -+ .dev_set_link_down = NULL, -+ .link_update = dpaa2_dev_get_link_info, -+ .stats_get = dpaa2_dev_stats_get, -+ .stats_reset = dpaa2_dev_stats_reset, -+ .dev_infos_get = dpaa2_eth_dev_info, -+ .dev_supported_ptypes_get = dpaa2_supported_ptypes_get, -+ .mtu_set = dpaa2_dev_mtu_set, -+ .vlan_filter_set = dpaa2_vlan_filter_set, -+ .vlan_tpid_set = NULL, -+ .vlan_offload_set = dpaa2_vlan_offload_set, -+ .vlan_strip_queue_set = NULL, -+ .vlan_pvid_set = NULL, -+ .rx_queue_setup = dpaa2_rx_queue_setup, -+ .rx_queue_release = dpaa2_rx_queue_release, -+ .tx_queue_setup = dpaa2_tx_queue_setup, -+ .tx_queue_release = dpaa2_tx_queue_release, -+ .dev_led_on = NULL, -+ .dev_led_off = NULL, -+ .set_queue_rate_limit = NULL, -+ .flow_ctrl_get = NULL, -+ .flow_ctrl_set = dpaa2_flow_ctrl_set, -+ .priority_flow_ctrl_set = NULL, -+ .mac_addr_add = dpaa2_dev_add_mac_addr, -+ .mac_addr_remove = dpaa2_dev_remove_mac_addr, -+ .rxq_info_get = NULL, -+ .txq_info_get = NULL, -+ .timesync_enable = dpaa2_timestamp_enable, -+ .timesync_disable = dpaa2_timestamp_disable, -+ .mac_addr_set = dpaa2_dev_set_mac_addr, -+}; -+ -+static int -+dpaa2_dev_init(struct rte_eth_dev *eth_dev) -+{ -+ struct rte_eth_dev_data *data = eth_dev->data; -+ struct fsl_mc_io *dpni_dev; -+ struct dpni_attr attr; -+ struct dpaa2_dev_priv *priv = eth_dev->data->dev_private; -+ struct dpni_buffer_layout layout; -+ int i, ret, hw_id = eth_dev->pci_dev->addr.devid; -+ struct dpni_extended_cfg *ext_cfg = NULL; -+ int tot_size; -+ -+ PMD_INIT_FUNC_TRACE(); -+ -+ dpni_dev = (struct fsl_mc_io *)malloc(sizeof(struct fsl_mc_io)); -+ if (!dpni_dev) { -+ PMD_DRV_LOG(ERR, "malloc failed for dpni device\n"); -+ return -1; -+ } -+ -+ dpni_dev->regs = mcp_ptr_list[0]; -+ ret = dpni_open(dpni_dev, CMD_PRI_LOW, hw_id, &priv->token); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Failure in opening dpni@%d device with" -+ "error code %d\n", hw_id, ret); -+ return -1; -+ } -+ -+ /* Clean the device first */ -+ ret = dpni_reset(dpni_dev, CMD_PRI_LOW, priv->token); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Failure cleaning dpni@%d device with" -+ "error code %d\n", hw_id, ret); -+ return -1; -+ } -+ -+ ext_cfg = (struct dpni_extended_cfg *)rte_malloc(NULL, 256, -+ RTE_CACHE_LINE_SIZE); -+ if (!ext_cfg) { -+ PMD_DRV_LOG(ERR, "No data memory\n"); -+ return -1; -+ } -+ attr.ext_cfg_iova = (uint64_t)(DPAA2_VADDR_TO_IOVA(ext_cfg)); -+ -+ ret = dpni_get_attributes(dpni_dev, CMD_PRI_LOW, priv->token, &attr); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Failure in getting dpni@%d attribute, " -+ "error code %d\n", hw_id, ret); -+ return -1; -+ } -+ -+ priv->num_tc = attr.max_tcs; -+ for (i = 0; i < attr.max_tcs; i++) { -+ priv->num_dist_per_tc[i] = ext_cfg->tc_cfg[i].max_dist; -+ priv->nb_rx_queues += priv->num_dist_per_tc[i]; -+ break; -+ } -+ if (attr.max_tcs == 1) -+ priv->nb_tx_queues = attr.max_senders; -+ else -+ priv->nb_tx_queues = attr.max_tcs; -+ PMD_DRV_LOG(INFO, "num_tc %d", priv->num_tc); -+ PMD_DRV_LOG(INFO, "nb_rx_queues %d", priv->nb_rx_queues); -+ -+ eth_dev->data->nb_rx_queues = priv->nb_rx_queues; -+ eth_dev->data->nb_tx_queues = priv->nb_tx_queues; -+ -+ priv->hw = dpni_dev; -+ priv->hw_id = hw_id; -+ priv->options = attr.options; -+ -+ priv->max_unicast_filters = attr.max_unicast_filters; -+ priv->max_multicast_filters = attr.max_multicast_filters; -+ -+ if (attr.options & DPNI_OPT_VLAN_FILTER) -+ priv->max_vlan_filters = attr.max_vlan_filters; -+ else -+ priv->max_vlan_filters = 0; -+ -+ ret = dpaa2_alloc_rx_tx_queues(eth_dev); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "dpaa2_alloc_rx_tx_queuesFailed\n"); -+ return -1; -+ } -+ -+ data->mac_addrs = (struct ether_addr *)malloc(sizeof(struct ether_addr)); -+ -+ /* Allocate memory for storing MAC addresses */ -+ eth_dev->data->mac_addrs = rte_zmalloc("dpni", -+ ETHER_ADDR_LEN * attr.max_unicast_filters, 0); -+ if (eth_dev->data->mac_addrs == NULL) { -+ PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to " -+ "store MAC addresses", -+ ETHER_ADDR_LEN * attr.max_unicast_filters); -+ return -ENOMEM; -+ } -+ -+ ret = dpni_get_primary_mac_addr(dpni_dev, CMD_PRI_LOW, -+ priv->token, -+ (uint8_t *)(data->mac_addrs[0].addr_bytes)); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "DPNI get mac address failed:" -+ " Error Code = %d\n", ret); -+ return -1; -+ } -+ -+ PMD_DRV_LOG(INFO, "Adding Broadcast Address..."); -+ memset(data->mac_addrs[1].addr_bytes, 0xff, ETH_ADDR_LEN); -+ ret = dpni_add_mac_addr(dpni_dev, CMD_PRI_LOW, -+ priv->token, -+ (uint8_t *)(data->mac_addrs[1].addr_bytes)); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "DPNI set broadcast mac address failed:" -+ " Error Code = %0x\n", ret); -+ return -1; -+ } -+ -+ /* ... rx buffer layout ... */ -+ tot_size = DPAA2_HW_BUF_RESERVE + RTE_PKTMBUF_HEADROOM; -+ tot_size = RTE_ALIGN_CEIL(tot_size, -+ DPAA2_PACKET_LAYOUT_ALIGN); -+ -+ memset(&layout, 0, sizeof(struct dpni_buffer_layout)); -+ layout.options = DPNI_BUF_LAYOUT_OPT_FRAME_STATUS | -+ DPNI_BUF_LAYOUT_OPT_TIMESTAMP | -+ DPNI_BUF_LAYOUT_OPT_PARSER_RESULT | -+ DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM | -+ DPNI_BUF_LAYOUT_OPT_PRIVATE_DATA_SIZE; -+ -+ layout.pass_frame_status = 1; -+ layout.data_head_room = tot_size -+ - DPAA2_FD_PTA_SIZE - DPAA2_MBUF_HW_ANNOTATION; -+ layout.private_data_size = DPAA2_FD_PTA_SIZE; -+ layout.pass_timestamp = 1; -+ layout.pass_parser_result = 1; -+ PMD_DRV_LOG(INFO, "Tot_size = %d, head room = %d, private = %d", -+ tot_size, layout.data_head_room, layout.private_data_size); -+ ret = dpni_set_rx_buffer_layout(dpni_dev, CMD_PRI_LOW, priv->token, -+ &layout); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Err(%d) in setting rx buffer layout\n", ret); -+ return -1; -+ } -+ -+ /* ... tx buffer layout ... */ -+ memset(&layout, 0, sizeof(struct dpni_buffer_layout)); -+ layout.options = DPNI_BUF_LAYOUT_OPT_FRAME_STATUS | -+ DPNI_BUF_LAYOUT_OPT_TIMESTAMP; -+ layout.pass_frame_status = 1; -+ layout.pass_timestamp = 1; -+ ret = dpni_set_tx_buffer_layout(dpni_dev, CMD_PRI_LOW, priv->token, &layout); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Error (%d) in setting tx buffer layout\n", ret); -+ return -1; -+ } -+ -+ /* ... tx-conf and error buffer layout ... */ -+ memset(&layout, 0, sizeof(struct dpni_buffer_layout)); -+ layout.options = DPNI_BUF_LAYOUT_OPT_FRAME_STATUS | -+ DPNI_BUF_LAYOUT_OPT_TIMESTAMP; -+ layout.pass_frame_status = 1; -+ layout.pass_timestamp = 1; -+ ret = dpni_set_tx_conf_buffer_layout(dpni_dev, CMD_PRI_LOW, priv->token, &layout); -+ if (ret) { -+ PMD_DRV_LOG(ERR, "Error (%d) in setting tx-conf buffer layout\n", ret); -+ return -1; -+ } -+ -+ /* TODO - Set the MTU if required */ -+ -+ eth_dev->dev_ops = &ops; -+ eth_dev->rx_pkt_burst = eth_dpaa2_prefetch_rx;/*eth_dpaa2_rx;*/ -+ eth_dev->tx_pkt_burst = eth_dpaa2_tx; -+ -+ rte_free(ext_cfg); -+ -+ return 0; -+} -+ -+static struct eth_driver rte_dpaa2_dpni = { -+ { -+ .name = "rte_dpaa2_dpni", -+ .id_table = pci_id_dpaa2_map, -+ }, -+ .eth_dev_init = dpaa2_dev_init, -+ .dev_private_size = sizeof(struct dpaa2_dev_priv), -+}; -+ -+static int -+rte_pmd_dpaa2_devinit( -+ const char *name __rte_unused, -+ const char *params __rte_unused) -+{ -+ PMD_DRV_LOG(INFO, "Initializing dpaa2_pmd for %s\n", name); -+ rte_eth_driver_register(&rte_dpaa2_dpni); -+ -+ return 0; -+} -+ -+static struct rte_driver pmd_dpaa2_drv = { -+ .name = "dpaa2_pmd", -+ .type = PMD_PDEV, -+ .init = rte_pmd_dpaa2_devinit, -+}; -+ -+PMD_REGISTER_DRIVER(pmd_dpaa2_drv, dpaa2); -diff --git a/drivers/net/dpaa2/rte_eth_dpni_annot.h b/drivers/net/dpaa2/rte_eth_dpni_annot.h -new file mode 100644 -index 0000000..0c3ae82 ---- /dev/null -+++ b/drivers/net/dpaa2/rte_eth_dpni_annot.h -@@ -0,0 +1,310 @@ -+/*- -+ * BSD LICENSE -+ * -+ * Copyright (c) 2014 Freescale Semiconductor, Inc. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * * Neither the name of Freescale Semiconductor, Inc nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+/** -+ * @file -+ * -+ * DPNI packet parse results - implementation internal -+ */ -+ -+#ifndef RTE_ETH_DPNI_ANNOT_H_ -+#define RTE_ETH_DPNI_ANNOT_H_ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* Annotation valid bits in FD FRC */ -+#define DPAA2_FD_FRC_FASV 0x8000 -+#define DPAA2_FD_FRC_FAEADV 0x4000 -+#define DPAA2_FD_FRC_FAPRV 0x2000 -+#define DPAA2_FD_FRC_FAIADV 0x1000 -+#define DPAA2_FD_FRC_FASWOV 0x0800 -+#define DPAA2_FD_FRC_FAICFDV 0x0400 -+ -+/* Annotation bits in FD CTRL */ -+#define DPAA2_FD_CTRL_ASAL 0x00020000 /* ASAL = 128 */ -+#define DPAA2_FD_CTRL_PTA 0x00800000 -+#define DPAA2_FD_CTRL_PTV1 0x00400000 -+ -+/* Frame annotation status */ -+struct dpaa2_fas { -+ uint8_t reserved; -+ uint8_t ppid; -+ __le16 ifpid; -+ __le32 status; -+} __packed; -+ -+/** -+ * Internal Packet annotation header -+ */ -+struct pkt_annotation { -+ /**< word1: Frame Annotation Status (8 bytes)*/ -+ uint64_t word1; -+ /**< word2: Time Stamp (8 bytes)*/ -+ uint64_t word2; -+ /**< word3: Next Hdr + FAF Extension + FAF (2 + 2 + 4 bytes)*/ -+ uint64_t word3; -+ /**< word4: Frame Annotation Flags-FAF (8 bytes) */ -+ uint64_t word4; -+ /**< word5: -+ ShimOffset_1 + ShimOffset_2 + IPPIDOffset + EthOffset + -+ LLC+SNAPOffset + VLANTCIOffset_1 + VLANTCIOffset_n + -+ LastETypeOffset (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 bytes) -+ */ -+ uint64_t word5; -+ /**< word6: -+ PPPoEOffset + MPLSOffset_1 + MPLSOffset_n + ARPorIPOffset_1 -+ + IPOffset_norMInEncapO + GREOffset + L4Offset + -+ GTPorESPorIPSecOffset(1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 bytes) -+ */ -+ uint64_t word6; -+ /**< word7: -+ RoutingHdrOfset1 + RoutingHdrOfset2 + NxtHdrOffset + IPv6FragOffset + -+ GrossRunningSum + RunningSum(1 + 1 + 1 + 1 + 2 + 2 bytes) -+ */ -+ uint64_t word7; -+ /**< word8: -+ ParseErrorcode + Soft Parsing Context (1 + 7 bytes) -+ */ -+ uint64_t word8; /**< Layer 4 length */ -+}; -+ -+/** -+ * Internal Macros to get/set Packet annotation header -+ */ -+ -+/** General Macro to define a particular bit position*/ -+#define BIT_POS(x) ((uint64_t)1 << ((x))) -+/** Set a bit in the variable */ -+#define BIT_SET_AT_POS(var, pos) (var |= pos) -+/** Reset the bit in the variable */ -+#define BIT_RESET_AT_POS(var, pos) (var &= ~(pos)) -+/** Check the bit is set in the variable */ -+#define BIT_ISSET_AT_POS(var, pos) ((var & pos) ? 1 : 0) -+/** -+ * Macrso to define bit position in word3 -+ */ -+#define NEXT_HDR(var) ((uint64_t)var & 0xFFFF000000000000) -+#define FAF_EXTN_IPV6_ROUTE_HDR_PRESENT(var) BIT_POS(16) -+#define FAF_EXTN_RESERVED(var) ((uint64_t)var & 0x00007FFF00000000) -+#define FAF_USER_DEFINED_RESERVED(var) ((uint64_t)var & 0x00000000FF000000) -+#define SHIM_SHELL_SOFT_PARSING_ERRROR BIT_POS(23) -+#define PARSING_ERROR BIT_POS(22) -+#define L2_ETH_MAC_PRESENT BIT_POS(21) -+#define L2_ETH_MAC_UNICAST BIT_POS(20) -+#define L2_ETH_MAC_MULTICAST BIT_POS(19) -+#define L2_ETH_MAC_BROADCAST BIT_POS(18) -+#define L2_ETH_FRAME_IS_BPDU BIT_POS(17) -+#define L2_ETH_FCOE_PRESENT BIT_POS(16) -+#define L2_ETH_FIP_PRESENT BIT_POS(15) -+#define L2_ETH_PARSING_ERROR BIT_POS(14) -+#define L2_LLC_SNAP_PRESENT BIT_POS(13) -+#define L2_UNKNOWN_LLC_OUI BIT_POS(12) -+#define L2_LLC_SNAP_ERROR BIT_POS(11) -+#define L2_VLAN_1_PRESENT BIT_POS(10) -+#define L2_VLAN_N_PRESENT BIT_POS(9) -+#define L2_VLAN_CFI_BIT_PRESENT BIT_POS(8) -+#define L2_VLAN_PARSING_ERROR BIT_POS(7) -+#define L2_PPPOE_PPP_PRESENT BIT_POS(6) -+#define L2_PPPOE_PPP_PARSING_ERROR BIT_POS(5) -+#define L2_MPLS_1_PRESENT BIT_POS(4) -+#define L2_MPLS_N_PRESENT BIT_POS(3) -+#define L2_MPLS_PARSING_ERROR BIT_POS(2) -+#define L2_ARP_PRESENT BIT_POS(1) -+#define L2_ARP_PARSING_ERROR BIT_POS(0) -+/** -+ * Macrso to define bit position in word4 -+ */ -+#define L2_UNKNOWN_PROTOCOL BIT_POS(63) -+#define L2_SOFT_PARSING_ERROR BIT_POS(62) -+#define L3_IPV4_1_PRESENT BIT_POS(61) -+#define L3_IPV4_1_UNICAST BIT_POS(60) -+#define L3_IPV4_1_MULTICAST BIT_POS(59) -+#define L3_IPV4_1_BROADCAST BIT_POS(58) -+#define L3_IPV4_N_PRESENT BIT_POS(57) -+#define L3_IPV4_N_UNICAST BIT_POS(56) -+#define L3_IPV4_N_MULTICAST BIT_POS(55) -+#define L3_IPV4_N_BROADCAST BIT_POS(54) -+#define L3_IPV6_1_PRESENT BIT_POS(53) -+#define L3_IPV6_1_UNICAST BIT_POS(52) -+#define L3_IPV6_1_MULTICAST BIT_POS(51) -+#define L3_IPV6_N_PRESENT BIT_POS(50) -+#define L3_IPV6_N_UNICAST BIT_POS(49) -+#define L3_IPV6_N_MULTICAST BIT_POS(48) -+#define L3_IP_1_OPT_PRESENT BIT_POS(47) -+#define L3_IP_1_UNKNOWN_PROTOCOL BIT_POS(46) -+#define L3_IP_1_MORE_FRAGMENT BIT_POS(45) -+#define L3_IP_1_FIRST_FRAGMENT BIT_POS(44) -+#define L3_IP_1_PARSING_ERROR BIT_POS(43) -+#define L3_IP_N_OPT_PRESENT BIT_POS(42) -+#define L3_IP_N_UNKNOWN_PROTOCOL BIT_POS(41) -+#define L3_IP_N_MORE_FRAGMENT BIT_POS(40) -+#define L3_IP_N_FIRST_FRAGMENT BIT_POS(39) -+#define L3_PROTO_ICMP_PRESENT BIT_POS(38) -+#define L3_PROTO_IGMP_PRESENT BIT_POS(37) -+#define L3_PROTO_ICMPV6_PRESENT BIT_POS(36) -+#define L3_PROTO_UDP_LIGHT_PRESENT BIT_POS(35) -+#define L3_IP_N_PARSING_ERROR BIT_POS(34) -+#define L3_MIN_ENCAP_PRESENT BIT_POS(33) -+#define L3_MIN_ENCAP_SBIT_PRESENT BIT_POS(32) -+#define L3_MIN_ENCAP_PARSING_ERROR BIT_POS(31) -+#define L3_PROTO_GRE_PRESENT BIT_POS(30) -+#define L3_PROTO_GRE_RBIT_PRESENT BIT_POS(29) -+#define L3_PROTO_GRE_PARSING_ERROR BIT_POS(28) -+#define L3_IP_UNKNOWN_PROTOCOL BIT_POS(27) -+#define L3_SOFT_PARSING_ERROR BIT_POS(26) -+#define L3_PROTO_UDP_PRESENT BIT_POS(25) -+#define L3_PROTO_UDP_PARSING_ERROR BIT_POS(24) -+#define L3_PROTO_TCP_PRESENT BIT_POS(23) -+#define L3_PROTO_TCP_OPT_PRESENT BIT_POS(22) -+#define L3_PROTO_TCP_CTRL_BIT_6_TO_11_PRESENT BIT_POS(21) -+#define L3_PROTO_TCP_CTRL_BIT_3_TO_5_PRESENT BIT_POS(20) -+#define L3_PROTO_TCP_PARSING_ERROR BIT_POS(19) -+#define L3_PROTO_IPSEC_PRESENT BIT_POS(18) -+#define L3_PROTO_IPSEC_ESP_PRESENT BIT_POS(17) -+#define L3_PROTO_IPSEC_AH_PRESENT BIT_POS(16) -+#define L3_PROTO_IPSEC_PARSING_ERROR BIT_POS(15) -+#define L3_PROTO_SCTP_PRESENT BIT_POS(14) -+#define L3_PROTO_SCTP_PARSING_ERROR BIT_POS(13) -+#define L3_PROTO_DCCP_PRESENT BIT_POS(12) -+#define L3_PROTO_DCCP_PARSING_ERROR BIT_POS(11) -+#define L4_UNKNOWN_PROTOCOL BIT_POS(10) -+#define L4_SOFT_PARSING_ERROR BIT_POS(9) -+#define L3_PROTO_GTP_PRESENT BIT_POS(8) -+#define L3_PROTO_GTP_PARSING_ERROR BIT_POS(7) -+#define L3_PROTO_ESP_PRESENT BIT_POS(6) -+#define L3_PROTO_ESP_PARSING_ERROR BIT_POS(5) -+#define L3_PROTO_ISCSI_PRESENT BIT_POS(4) -+#define L3_PROTO_CAPWAN__CTRL_PRESENT BIT_POS(3) -+#define L3_PROTO_CAPWAN__DATA_PRESENT BIT_POS(2) -+#define L5_SOFT_PARSING_ERROR BIT_POS(1) -+#define L3_IPV6_ROUTE_HDR_PRESENT BIT_POS(0) -+ -+/** -+ * Macros to get values in word5 -+ */ -+#define SHIM_OFFSET_1(var) ((uint64_t)var & 0xFF00000000000000) -+#define SHIM_OFFSET_2(var) ((uint64_t)var & 0x00FF000000000000) -+#define IP_PID_OFFSET(var) ((uint64_t)var & 0x0000FF0000000000) -+#define ETH_OFFSET(var) ((uint64_t)var & 0x000000FF00000000) -+#define LLC_SNAP_OFFSET(var) ((uint64_t)var & 0x00000000FF000000) -+#define VLAN_TCI_OFFSET_1(var) ((uint64_t)var & 0x0000000000FF0000) -+#define VLAN_TCI_OFFSET_N(var) ((uint64_t)var & 0x000000000000FF00) -+#define LAST_ETYPE_OFFSET(var) ((uint64_t)var & 0x00000000000000FF) -+ -+/** -+ * Macros to get values in word6 -+ */ -+#define PPPOE_OFFSET(var) ((uint64_t)var & 0xFF00000000000000) -+#define MPLS_OFFSET_1(var) ((uint64_t)var & 0x00FF000000000000) -+#define MPLS_OFFSET_N(var) ((uint64_t)var & 0x0000FF0000000000) -+#define ARP_OR_IP_OFFSET_1(var) ((uint64_t)var & 0x000000FF00000000) -+#define IP_N_OR_MIN_ENCAP_OFFSET(var) ((uint64_t)var & 0x00000000FF000000) -+#define GRE_OFFSET(var) ((uint64_t)var & 0x0000000000FF0000) -+#define L4_OFFSET(var) ((uint64_t)var & 0x000000000000FF00) -+#define GTP_OR_ESP_OR_IPSEC_OFFSET(var) ((uint64_t)var & 0x00000000000000FF) -+ -+/** -+ * Macros to get values in word7 -+ */ -+#define IPV6_ROUTING_HDR_OFFSET_1(var) ((uint64_t)var & 0xFF00000000000000) -+#define IPV6_ROUTING_HDR_OFFSET_2(var) ((uint64_t)var & 0x00FF000000000000) -+#define NEXT_HDR_OFFSET(var) ((uint64_t)var & 0x0000FF0000000000) -+#define IPV6_FRAG_OFFSET(var) ((uint64_t)var & 0x000000FF00000000) -+#define GROSS_RUNNING_SUM(var) ((uint64_t)var & 0x00000000FFFF0000) -+#define RUNNING_SUM(var) ((uint64_t)var & 0x000000000000FFFF) -+ -+/** -+ * Macros to get values in word8 -+ */ -+#define PARSE_ERROR_CODE(var) ((uint64_t)var & 0xFF00000000000000) -+#define SOFT_PARSING_CONTEXT(var) ((uint64_t)var & 0x00FFFFFFFFFFFFFF) -+ -+/* Debug frame, otherwise supposed to be discarded */ -+#define DPAA2_ETH_FAS_DISC 0x80000000 -+/* MACSEC frame */ -+#define DPAA2_ETH_FAS_MS 0x40000000 -+#define DPAA2_ETH_FAS_PTP 0x08000000 -+/* Ethernet multicast frame */ -+#define DPAA2_ETH_FAS_MC 0x04000000 -+/* Ethernet broadcast frame */ -+#define DPAA2_ETH_FAS_BC 0x02000000 -+#define DPAA2_ETH_FAS_KSE 0x00040000 -+#define DPAA2_ETH_FAS_EOFHE 0x00020000 -+#define DPAA2_ETH_FAS_MNLE 0x00010000 -+#define DPAA2_ETH_FAS_TIDE 0x00008000 -+#define DPAA2_ETH_FAS_PIEE 0x00004000 -+/* Frame length error */ -+#define DPAA2_ETH_FAS_FLE 0x00002000 -+/* Frame physical error; our favourite pastime */ -+#define DPAA2_ETH_FAS_FPE 0x00001000 -+#define DPAA2_ETH_FAS_PTE 0x00000080 -+#define DPAA2_ETH_FAS_ISP 0x00000040 -+#define DPAA2_ETH_FAS_PHE 0x00000020 -+#define DPAA2_ETH_FAS_BLE 0x00000010 -+/* L3 csum validation performed */ -+#define DPAA2_ETH_FAS_L3CV 0x00000008 -+/* L3 csum error */ -+#define DPAA2_ETH_FAS_L3CE 0x00000004 -+/* L4 csum validation performed */ -+#define DPAA2_ETH_FAS_L4CV 0x00000002 -+/* L4 csum error */ -+#define DPAA2_ETH_FAS_L4CE 0x00000001 -+ -+/* These bits always signal errors */ -+#define DPAA2_ETH_RX_ERR_MASK (DPAA2_ETH_FAS_KSE | \ -+ DPAA2_ETH_FAS_EOFHE | \ -+ DPAA2_ETH_FAS_MNLE | \ -+ DPAA2_ETH_FAS_TIDE | \ -+ DPAA2_ETH_FAS_PIEE | \ -+ DPAA2_ETH_FAS_FLE | \ -+ DPAA2_ETH_FAS_FPE | \ -+ DPAA2_ETH_FAS_PTE | \ -+ DPAA2_ETH_FAS_ISP | \ -+ DPAA2_ETH_FAS_PHE | \ -+ DPAA2_ETH_FAS_BLE | \ -+ DPAA2_ETH_FAS_L3CE | \ -+ DPAA2_ETH_FAS_L4CE) -+/* Unsupported features in the ingress */ -+#define DPAA2_ETH_RX_UNSUPP_MASK DPAA2_ETH_FAS_MS -+/* Tx errors */ -+#define DPAA2_ETH_TXCONF_ERR_MASK (DPAA2_ETH_FAS_KSE | \ -+ DPAA2_ETH_FAS_EOFHE | \ -+ DPAA2_ETH_FAS_MNLE | \ -+ DPAA2_ETH_FAS_TIDE) -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif -diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h -index 857dc3e..3dc9544 100644 ---- a/lib/librte_eal/common/eal_private.h -+++ b/lib/librte_eal/common/eal_private.h -@@ -325,4 +325,11 @@ int rte_eal_hugepage_init(void); - */ - int rte_eal_hugepage_attach(void); - -+#ifdef RTE_LIBRTE_DPAA2_PMD -+/** -+ * Initialize any soc init related functions if any before thread creation -+ */ -+int rte_eal_soc_pre_init(void); -+#endif -+ - #endif /* _EAL_PRIVATE_H_ */ -diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile -index 182729c..ed5be74 100644 ---- a/lib/librte_eal/linuxapp/eal/Makefile -+++ b/lib/librte_eal/linuxapp/eal/Makefile -@@ -76,6 +76,10 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c - SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c - SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c - SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c -+ifeq ($(CONFIG_RTE_LIBRTE_DPAA2_PMD),y) -+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_soc.c -+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio_fsl_mc.c -+endif - ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y) - SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_ivshmem.c - endif -diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c -index 3fb2188..832c252 100644 ---- a/lib/librte_eal/linuxapp/eal/eal.c -+++ b/lib/librte_eal/linuxapp/eal/eal.c -@@ -814,6 +814,11 @@ rte_eal_init(int argc, char **argv) - if (rte_eal_tailqs_init() < 0) - rte_panic("Cannot init tail queues for objects\n"); - -+#ifdef RTE_LIBRTE_DPAA2_PMD -+ if (rte_eal_soc_pre_init() < 0) -+ rte_panic("Cannot pre init soc\n"); -+#endif -+ - #ifdef RTE_LIBRTE_IVSHMEM - if (rte_eal_ivshmem_obj_init() < 0) - rte_panic("Cannot init IVSHMEM objects\n"); -diff --git a/lib/librte_eal/linuxapp/eal/eal_soc.c b/lib/librte_eal/linuxapp/eal/eal_soc.c -new file mode 100644 -index 0000000..1595f68 ---- /dev/null -+++ b/lib/librte_eal/linuxapp/eal/eal_soc.c -@@ -0,0 +1,67 @@ -+/*- -+ * BSD LICENSE -+ * -+ * Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * * Neither the name of Freescale Semiconductor, Inc or the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include "eal_private.h" -+ -+#ifdef RTE_LIBRTE_DPAA2_PMD -+#include "eal_vfio_fsl_mc.h" -+#endif -+ -+#if (defined RTE_LIBRTE_DPAA_PMD) -+extern int usdpaa_pre_rte_eal_init(void); -+#endif -+ -+/* Initialize any soc init related functions if any before thread creation*/ -+int -+rte_eal_soc_pre_init(void) -+{ -+#ifdef RTE_LIBRTE_DPAA2_PMD -+ if (rte_eal_dpaa2_init() < 0) -+ RTE_LOG(WARNING, EAL, "Cannot init FSL_MC SCAN\n"); -+#endif -+#if (defined RTE_LIBRTE_DPAA_PMD) -+ if (usdpaa_pre_rte_eal_init()) -+ RTE_LOG(WARNING, EAL, "Cannot init FSL_DPAA \n"); -+#endif -+ return 0; -+} -diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_fsl_mc.c b/lib/librte_eal/linuxapp/eal/eal_vfio_fsl_mc.c -new file mode 100644 -index 0000000..0ddaef9 ---- /dev/null -+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_fsl_mc.c -@@ -0,0 +1,650 @@ -+/*- -+ * BSD LICENSE -+ * -+ * Copyright (c) 2014 Freescale Semiconductor, Inc. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * * Neither the name of Freescale Semiconductor nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "rte_pci.h" -+#include "eal_vfio.h" -+ -+#include -+ -+#include "eal_vfio_fsl_mc.h" -+ -+#include "rte_pci_dev_ids.h" -+#include "eal_filesystem.h" -+#include "eal_private.h" -+ -+#ifndef VFIO_MAX_GROUPS -+#define VFIO_MAX_GROUPS 64 -+#endif -+ -+/* #define DPAA2_STAGE2_STASHING */ -+ -+/** Pathname of FSL-MC devices directory. */ -+#define SYSFS_FSL_MC_DEVICES "/sys/bus/fsl-mc/devices" -+ -+/* Number of VFIO containers & groups with in */ -+static struct fsl_vfio_group vfio_groups[VFIO_MAX_GRP]; -+static struct fsl_vfio_container vfio_containers[VFIO_MAX_CONTAINERS]; -+static char *ls2bus_container; -+static int container_device_fd; -+static uint32_t *msi_intr_vaddr; -+void *(*mcp_ptr_list); -+static uint32_t mcp_id; -+ -+static int vfio_connect_container(struct fsl_vfio_group *vfio_group) -+{ -+ struct fsl_vfio_container *container; -+ int i, fd, ret; -+ -+ /* Try connecting to vfio container already created */ -+ for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { -+ container = &vfio_containers[i]; -+ if (!ioctl(vfio_group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { -+ RTE_LOG(ERR, EAL, "Container pre-exists with FD[0x%x]" -+ " for this group\n", container->fd); -+ vfio_group->container = container; -+ return 0; -+ } -+ } -+ -+ /* Opens main vfio file descriptor which represents the "container" */ -+ fd = open("/dev/vfio/vfio", O_RDWR); -+ if (fd < 0) { -+ RTE_LOG(ERR, EAL, "vfio: failed to open /dev/vfio/vfio\n"); -+ return -errno; -+ } -+ -+ ret = ioctl(fd, VFIO_GET_API_VERSION); -+ if (ret != VFIO_API_VERSION) { -+ RTE_LOG(ERR, EAL, "vfio: supported vfio version: %d, " -+ "reported version: %d", VFIO_API_VERSION, ret); -+ close(fd); -+ return -EINVAL; -+ } -+#ifndef DPAA2_STAGE2_STASHING -+ /* Check whether support for SMMU type IOMMU prresent or not */ -+ if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) { -+ /* Connect group to container */ -+ ret = ioctl(vfio_group->fd, VFIO_GROUP_SET_CONTAINER, &fd); -+ if (ret) { -+ RTE_LOG(ERR, EAL, "vfio: failed to set group container:\n"); -+ close(fd); -+ return -errno; -+ } -+ -+ ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU); -+ if (ret) { -+ RTE_LOG(ERR, EAL, "vfio: failed to set iommu for container:\n"); -+ close(fd); -+ return -errno; -+ } -+ } else { -+ RTE_LOG(ERR, EAL, "vfio error: No supported IOMMU\n"); -+ close(fd); -+ return -EINVAL; -+ } -+#else -+ /* Check whether support for SMMU type IOMMU stage 2 present or not */ -+ if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_NESTING_IOMMU)) { -+ /* Connect group to container */ -+ ret = ioctl(vfio_group->fd, VFIO_GROUP_SET_CONTAINER, &fd); -+ if (ret) { -+ RTE_LOG(ERR, EAL, "vfio: failed to set group container:\n"); -+ close(fd); -+ return -errno; -+ } -+ -+ ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_NESTING_IOMMU); -+ if (ret) { -+ RTE_LOG(ERR, EAL, "vfio: failed to set iommu-2 for container:\n"); -+ close(fd); -+ return -errno; -+ } -+ } else { -+ RTE_LOG(ERR, EAL, "vfio error: No supported IOMMU-2\n"); -+ close(fd); -+ return -EINVAL; -+ } -+#endif -+ container = NULL; -+ for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { -+ if (vfio_containers[i].used) -+ continue; -+ RTE_LOG(ERR, EAL, "DPAA2-Unused container at index %d\n", i); -+ container = &vfio_containers[i]; -+ } -+ if (!container) { -+ RTE_LOG(ERR, EAL, "vfio error: No Free Container Found\n"); -+ close(fd); -+ return -ENOMEM; -+ } -+ -+ container->used = 1; -+ container->fd = fd; -+ container->group_list[container->index] = vfio_group; -+ vfio_group->container = container; -+ container->index++; -+ return 0; -+} -+ -+static int vfio_map_irq_region(struct fsl_vfio_group *group) -+{ -+ int ret; -+ unsigned long *vaddr = NULL; -+ struct vfio_iommu_type1_dma_map map = { -+ .argsz = sizeof(map), -+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, -+ .vaddr = 0x6030000, -+ .iova = 0x6030000, -+ .size = 0x1000, -+ }; -+ -+ vaddr = (unsigned long *)mmap(NULL, 0x1000, PROT_WRITE | -+ PROT_READ, MAP_SHARED, container_device_fd, 0x6030000); -+ if (vaddr == MAP_FAILED) { -+ RTE_LOG(ERR, EAL, " mapping GITS region (errno = %d)", errno); -+ return -errno; -+ } -+ -+ msi_intr_vaddr = (uint32_t *)((char *)(vaddr) + 64); -+ map.vaddr = (unsigned long)vaddr; -+ ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA, &map); -+ if (ret == 0) -+ return 0; -+ -+ RTE_LOG(ERR, EAL, "vfio_map_irq_region fails (errno = %d)", errno); -+ return -errno; -+} -+ -+int vfio_dmamap_mem_region(uint64_t vaddr, -+ uint64_t iova, -+ uint64_t size) -+{ -+ struct fsl_vfio_group *group; -+ struct vfio_iommu_type1_dma_map dma_map = { -+ .argsz = sizeof(dma_map), -+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, -+ }; -+ -+ dma_map.vaddr = vaddr; -+ dma_map.size = size; -+ dma_map.iova = iova; -+ -+ /* SET DMA MAP for IOMMU */ -+ group = &vfio_groups[0]; -+ if (ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA, &dma_map)) { -+ RTE_LOG(ERR, EAL, "SWP: VFIO_IOMMU_MAP_DMA API Error %d.\n", errno); -+ return -1; -+ } -+ return 0; -+} -+ -+static int32_t setup_dmamap(void) -+{ -+ int ret; -+ struct fsl_vfio_group *group; -+ struct vfio_iommu_type1_dma_map dma_map = { -+ .argsz = sizeof(struct vfio_iommu_type1_dma_map), -+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, -+ }; -+ -+ int i; -+ const struct rte_memseg *memseg; -+ -+ for (i = 0; i < RTE_MAX_MEMSEG; i++) { -+ memseg = rte_eal_get_physmem_layout(); -+ if (memseg == NULL) { -+ RTE_LOG(ERR, EAL, -+ "\nError Cannot get physical layout\n"); -+ return -ENODEV; -+ } -+ -+ if (memseg[i].addr == NULL && memseg[i].len == 0) { -+ break; -+ } -+ -+ dma_map.size = memseg[i].len; -+ dma_map.vaddr = memseg[i].addr_64; -+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA -+ dma_map.iova = memseg[i].phys_addr; -+#else -+ dma_map.iova = dma_map.vaddr; -+#endif -+ -+ /* SET DMA MAP for IOMMU */ -+ group = &vfio_groups[0]; -+ -+ printf("-->Initial SHM Virtual ADDR %llX\n", dma_map.vaddr); -+ printf("-----> DMA size 0x%llX\n", dma_map.size); -+ ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA, &dma_map); -+ if (ret) { -+ RTE_LOG(ERR, EAL, -+ "\nErr: VFIO_IOMMU_MAP_DMA API Error %d.\n", -+ errno); -+ return ret; -+ } -+ printf("-----> dma_map.vaddr = 0x%llX\n", dma_map.vaddr); -+ } -+ -+ /* TODO - This is a W.A. as VFIO currently does not add the mapping of -+ the interrupt region to SMMU. This should be removed once the -+ support is added in the Kernel. -+ */ -+ vfio_map_irq_region(group); -+ -+ return 0; -+} -+ -+static int vfio_set_group(struct fsl_vfio_group *group, int groupid) -+{ -+ char path[PATH_MAX]; -+ struct vfio_group_status status = { .argsz = sizeof(status) }; -+ -+ /* Open the VFIO file corresponding to the IOMMU group */ -+ snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); -+ -+ group->fd = open(path, O_RDWR); -+ if (group->fd < 0) { -+ RTE_LOG(ERR, EAL, "vfio: error opening %s\n", path); -+ return -1; -+ } -+ -+ /* Test & Verify that group is VIABLE & AVAILABLE */ -+ if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) { -+ RTE_LOG(ERR, EAL, "vfio: error getting group status\n"); -+ close(group->fd); -+ return -1; -+ } -+ if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) { -+ RTE_LOG(ERR, EAL, "vfio: group not viable\n"); -+ close(group->fd); -+ return -1; -+ } -+ /* Since Group is VIABLE, Store the groupid */ -+ group->groupid = groupid; -+ -+ /* Now connect this IOMMU group to given container */ -+ if (vfio_connect_container(group)) { -+ RTE_LOG(ERR, EAL, -+ "vfio: error sonnecting container with group %d\n", -+ groupid); -+ close(group->fd); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static int32_t setup_vfio_grp(char *vfio_container) -+{ -+ char path[PATH_MAX]; -+ char iommu_group_path[PATH_MAX], *group_name; -+ struct fsl_vfio_group *group = NULL; -+ struct stat st; -+ int groupid; -+ int ret, len, i; -+ -+ printf("\tProcessing Container = %s\n", vfio_container); -+ sprintf(path, "/sys/bus/fsl-mc/devices/%s", vfio_container); -+ /* Check whether ls-container exists or not */ -+ printf("\tcontainer device path = %s\n", path); -+ if (stat(path, &st) < 0) { -+ RTE_LOG(ERR, EAL, "vfio: Error (%d) getting FSL-MC device (%s)\n", -+ errno, path); -+ return -errno; -+ } -+ -+ /* DPRC container exists. NOw checkout the IOMMU Group */ -+ strncat(path, "/iommu_group", sizeof(path) - strlen(path) - 1); -+ -+ len = readlink(path, iommu_group_path, PATH_MAX); -+ if (len == -1) { -+ RTE_LOG(ERR, EAL, "\tvfio: error no iommu_group for device\n"); -+ RTE_LOG(ERR, EAL, "\t%s: len = %d, errno = %d\n", -+ path, len, errno); -+ return -errno; -+ } -+ -+ iommu_group_path[len] = 0; -+ group_name = basename(iommu_group_path); -+ if (sscanf(group_name, "%d", &groupid) != 1) { -+ RTE_LOG(ERR, EAL, "\tvfio: error reading %s: %m\n", path); -+ return -errno; -+ } -+ -+ RTE_LOG(INFO, EAL, "\tvfio: iommu group id = %d\n", groupid); -+ -+ /* Check if group already exists */ -+ for (i = 0; i < VFIO_MAX_GRP; i++) { -+ group = &vfio_groups[i]; -+ if (group->groupid == groupid) { -+ RTE_LOG(ERR, EAL, "groupid already exists %d\n", groupid); -+ return 0; -+ } -+ } -+ -+ if (vfio_set_group(group, groupid)) { -+ RTE_LOG(ERR, EAL, "group setup failure - %d\n", groupid); -+ return -ENODEV; -+ } -+ -+ /* Get Device information */ -+ ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, vfio_container); -+ if (ret < 0) { -+ RTE_LOG(ERR, EAL, "\tvfio: error getting device %s fd from group %d\n", -+ vfio_container, group->groupid); -+ return ret; -+ } -+ container_device_fd = ret; -+ RTE_LOG(INFO, EAL, "vfio: Container FD is [0x%X]\n", container_device_fd); -+ /* Set up SMMU */ -+ ret = setup_dmamap(); -+ if (ret) { -+ RTE_LOG(ERR, EAL, ": Setting dma map\n"); -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int64_t vfio_map_mcp_obj(struct fsl_vfio_group *group, char *mcp_obj) -+{ -+ int64_t v_addr = (int64_t)MAP_FAILED; -+ int32_t ret, mc_fd; -+ -+ struct vfio_device_info d_info = { .argsz = sizeof(d_info) }; -+ struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; -+ -+ /* getting the mcp object's fd*/ -+ mc_fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, mcp_obj); -+ if (mc_fd < 0) { -+ RTE_LOG(ERR, EAL, "vfio: error getting device %s fd from group %d\n", -+ mcp_obj, group->fd); -+ return v_addr; -+ } -+ -+ /* getting device info*/ -+ ret = ioctl(mc_fd, VFIO_DEVICE_GET_INFO, &d_info); -+ if (ret < 0) { -+ RTE_LOG(ERR, EAL, "vfio: error getting DEVICE_INFO\n"); -+ goto MC_FAILURE; -+ } -+ -+ /* getting device region info*/ -+ ret = ioctl(mc_fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); -+ if (ret < 0) { -+ RTE_LOG(ERR, EAL, "vfio: error getting REGION_INFO\n"); -+ goto MC_FAILURE; -+ } -+ -+ RTE_LOG(INFO, EAL, "region offset = %llx , region size = %llx\n", -+ reg_info.offset, reg_info.size); -+ -+ v_addr = (uint64_t)mmap(NULL, reg_info.size, -+ PROT_WRITE | PROT_READ, MAP_SHARED, -+ mc_fd, reg_info.offset); -+ -+MC_FAILURE: -+ close(mc_fd); -+ -+ return v_addr; -+} -+ -+/* Following function shall fetch total available list of MC devices -+ * from VFIO container & populate private list of devices and other -+ * data structures -+ */ -+static int vfio_process_group_devices(void) -+{ -+ struct fsl_vfio_device *vdev; -+ struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; -+ char *temp_obj, *object_type, *mcp_obj, *dev_name; -+ int32_t object_id, i, dev_fd, ret; -+ DIR *d; -+ struct dirent *dir; -+ char path[PATH_MAX]; -+ int64_t v_addr; -+ int ndev_count; -+ struct fsl_vfio_group *group = &vfio_groups[0]; -+ -+ sprintf(path, "/sys/kernel/iommu_groups/%d/devices", group->groupid); -+ -+ d = opendir(path); -+ if (!d) { -+ RTE_LOG(ERR, EAL, "Unable to open directory %s\n", path); -+ return -1; -+ } -+ -+ /*Counting the number of devices in a group and getting the mcp ID*/ -+ ndev_count = 0; -+ mcp_obj = NULL; -+ while ((dir = readdir(d)) != NULL) { -+ if (dir->d_type == DT_LNK) { -+ ndev_count++; -+ if (!strncmp("dpmcp", dir->d_name, 5)) { -+ if (mcp_obj) -+ free(mcp_obj); -+ mcp_obj = malloc(sizeof(dir->d_name)); -+ if (!mcp_obj) { -+ RTE_LOG(ERR, EAL, -+ "Unable to allocate memory\n"); -+ return -ENOMEM; -+ } -+ strcpy(mcp_obj, dir->d_name); -+ temp_obj = strtok(dir->d_name, "."); -+ temp_obj = strtok(NULL, "."); -+ sscanf(temp_obj, "%d", &mcp_id); -+ } -+ } -+ } -+ closedir(d); -+ -+ if (!mcp_obj) { -+ RTE_LOG(ERR, EAL, "MCP Object not Found\n"); -+ return -ENODEV; -+ } -+ RTE_LOG(INFO, EAL, "Total devices in conatiner = %d, MCP ID = %d\n", -+ ndev_count, mcp_id); -+ -+ /* Allocate the memory depends upon number of objects in a group*/ -+ group->vfio_device = (struct fsl_vfio_device *)malloc(ndev_count * sizeof(struct fsl_vfio_device)); -+ if (!(group->vfio_device)) { -+ RTE_LOG(ERR, EAL, "Unable to allocate memory\n"); -+ free(mcp_obj); -+ return -ENOMEM; -+ } -+ -+ /* Allocate memory for MC Portal list */ -+ mcp_ptr_list = malloc(sizeof(void *) * 1); -+ if (!mcp_ptr_list) { -+ RTE_LOG(ERR, EAL, "NO Memory!\n"); -+ free(mcp_obj); -+ goto FAILURE; -+ } -+ -+ v_addr = vfio_map_mcp_obj(group, mcp_obj); -+ free(mcp_obj); -+ if (v_addr == (int64_t)MAP_FAILED) { -+ RTE_LOG(ERR, EAL, "mapping region (errno = %d)\n", errno); -+ goto FAILURE; -+ } -+ -+ RTE_LOG(INFO, EAL, "MC has VIR_ADD = 0x%ld\n", v_addr); -+ -+ mcp_ptr_list[0] = (void *)v_addr; -+ -+ d = opendir(path); -+ if (!d) { -+ RTE_LOG(ERR, EAL, "Directory %s not able to open\n", path); -+ goto FAILURE; -+ } -+ -+ i = 0; -+ printf("\nDPAA2 - Parsing MC Device Objects:\n"); -+ /* Parsing each object and initiating them*/ -+ while ((dir = readdir(d)) != NULL) { -+ if (dir->d_type != DT_LNK) -+ continue; -+ if (!strncmp("dprc", dir->d_name, 4) || !strncmp("dpmcp", dir->d_name, 5)) -+ continue; -+ dev_name = malloc(sizeof(dir->d_name)); -+ if (!dev_name) { -+ RTE_LOG(ERR, EAL, "Unable to allocate memory\n"); -+ goto FAILURE; -+ } -+ strcpy(dev_name, dir->d_name); -+ object_type = strtok(dir->d_name, "."); -+ temp_obj = strtok(NULL, "."); -+ sscanf(temp_obj, "%d", &object_id); -+ RTE_LOG(INFO, EAL, "%s ", dev_name); -+ -+ /* getting the device fd*/ -+ dev_fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, dev_name); -+ if (dev_fd < 0) { -+ RTE_LOG(ERR, EAL, "vfio getting device %s fd from group %d\n", -+ dev_name, group->fd); -+ free(dev_name); -+ goto FAILURE; -+ } -+ -+ free(dev_name); -+ vdev = &group->vfio_device[group->object_index++]; -+ vdev->fd = dev_fd; -+ vdev->index = i; -+ i++; -+ /* Get Device inofrmation */ -+ if (ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &device_info)) { -+ RTE_LOG(ERR, EAL, "VFIO_DEVICE_FSL_MC_GET_INFO failed\n"); -+ goto FAILURE; -+ } -+ -+ if (!strcmp(object_type, "dpni") || -+ !strcmp(object_type, "dpseci")) { -+ struct rte_pci_device *dev; -+ -+ dev = malloc(sizeof(struct rte_pci_device)); -+ if (dev == NULL) { -+ return -1; -+ } -+ memset(dev, 0, sizeof(*dev)); -+ /* store hw_id of dpni/dpseci device */ -+ dev->addr.devid = object_id; -+ dev->id.vendor_id = FSL_VENDOR_ID; -+ dev->id.device_id = (strcmp(object_type, "dpseci")) ? -+ FSL_MC_DPNI_DEVID : FSL_MC_DPSECI_DEVID; -+ -+ TAILQ_INSERT_TAIL(&pci_device_list, dev, next); -+ } -+ -+ if (!strcmp(object_type, "dpio")) { -+ dpaa2_create_dpio_device(vdev, &device_info, object_id); -+ } -+ -+ if (!strcmp(object_type, "dpbp")) { -+ dpaa2_create_dpbp_device(object_id); -+ } -+ } -+ closedir(d); -+ -+ ret = dpaa2_affine_qbman_swp(); -+ if (ret) -+ RTE_LOG(ERR, EAL, "%s(): Err in affining qbman swp\n", __func__); -+ -+ return 0; -+ -+FAILURE: -+ free(group->vfio_device); -+ group->vfio_device = NULL; -+ return -1; -+} -+ -+/* -+ * Scan the content of the PCI bus, and the devices in the devices -+ * list -+ */ -+static int -+fsl_mc_scan(void) -+{ -+ char path[PATH_MAX]; -+ struct stat st; -+ -+ ls2bus_container = getenv("DPRC"); -+ -+ if (ls2bus_container == NULL) { -+ RTE_LOG(WARNING, EAL, "vfio container not set in env DPRC\n"); -+ return -1; -+ } -+ -+ snprintf(path, sizeof(path), "%s/%s", SYSFS_FSL_MC_DEVICES, -+ ls2bus_container); -+ /* Check whether LS-Container exists or not */ -+ RTE_LOG(INFO, EAL, "\tcontainer device path = %s\n", path); -+ if (stat(path, &st) < 0) { -+ RTE_LOG(ERR, EAL, "vfio:fsl-mc device does not exists\n"); -+ return -1; -+ } -+ return 0; -+} -+ -+/* Init the FSL-MC- LS2 EAL subsystem */ -+int -+rte_eal_dpaa2_init(void) -+{ -+ if (fsl_mc_scan() < 0) -+ return -1; -+ -+#ifdef VFIO_PRESENT -+ if (setup_vfio_grp(ls2bus_container)) { -+ RTE_LOG(ERR, EAL, "setup_vfio_grp\n"); -+ return -1; -+ } -+ if (vfio_process_group_devices()) { -+ RTE_LOG(ERR, EAL, "vfio_process_group_devices\n"); -+ return -1; -+ } -+#endif -+ return 0; -+} -diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_fsl_mc.h b/lib/librte_eal/linuxapp/eal/eal_vfio_fsl_mc.h -new file mode 100644 -index 0000000..cf2bd38 ---- /dev/null -+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_fsl_mc.h -@@ -0,0 +1,98 @@ -+/*- -+ * BSD LICENSE -+ * -+ * Copyright (c) 2014 Freescale Semiconductor, Inc. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * * Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * * Neither the name of Freescale Semiconductor nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#ifndef _EAL_VFIO_FSL_MC_H_ -+#define _EAL_VFIO_FSL_MC_H_ -+ -+#include -+#include -+#include "eal_vfio.h" -+ -+#define FSL_VENDOR_ID 0x1957 -+#define FSL_MC_DPNI_DEVID 7 -+#define FSL_MC_DPSECI_DEVID 3 -+ -+#define VFIO_MAX_GRP 1 -+#define VFIO_MAX_CONTAINERS 1 -+ -+#define DPAA2_MBUF_HW_ANNOTATION 64 -+#define DPAA2_FD_PTA_SIZE 64 -+ -+#if (DPAA2_MBUF_HW_ANNOTATION + DPAA2_FD_PTA_SIZE) > RTE_PKTMBUF_HEADROOM -+#error "Annotation requirement is more than RTE_PKTMBUF_HEADROOM" -+#endif -+ -+/* we will re-use the HEADROOM for annotation in RX */ -+#define DPAA2_HW_BUF_RESERVE 0 -+#define DPAA2_PACKET_LAYOUT_ALIGN 64 /*changing from 256 */ -+ -+typedef struct fsl_vfio_device { -+ int fd; /* fsl_mc root container device ?? */ -+ int index; /*index of child object */ -+ struct fsl_vfio_device *child; /* Child object */ -+} fsl_vfio_device; -+ -+typedef struct fsl_vfio_group { -+ int fd; /* /dev/vfio/"groupid" */ -+ int groupid; -+ struct fsl_vfio_container *container; -+ int object_index; -+ struct fsl_vfio_device *vfio_device; -+} fsl_vfio_group; -+ -+typedef struct fsl_vfio_container { -+ int fd; /* /dev/vfio/vfio */ -+ int used; -+ int index; /* index in group list */ -+ struct fsl_vfio_group *group_list[VFIO_MAX_GRP]; -+} fsl_vfio_container; -+ -+int vfio_dmamap_mem_region( -+ uint64_t vaddr, -+ uint64_t iova, -+ uint64_t size); -+ -+/* initialize the NXP/FSL dpaa2 accelerators */ -+int rte_eal_dpaa2_init(void); -+ -+int dpaa2_create_dpio_device(struct fsl_vfio_device *vdev, -+ struct vfio_device_info *obj_info, -+ int object_id); -+ -+int dpaa2_create_dpbp_device(int dpbp_id); -+ -+int dpaa2_affine_qbman_swp(void); -+ -+int dpaa2_affine_qbman_swp_sec(void); -+ -+#endif -+ -diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h -index 059ad9e..d620ad1 100644 ---- a/lib/librte_mempool/rte_mempool.h -+++ b/lib/librte_mempool/rte_mempool.h -@@ -262,6 +262,14 @@ struct rte_mempool { - #define MEMPOOL_F_POOL_CREATED 0x0010 /**< Internal: pool is created. */ - #define MEMPOOL_F_NO_PHYS_CONTIG 0x0020 /**< Don't need physically contiguous objs. */ - -+#ifdef RTE_LIBRTE_DPAA2_PMD -+/* TODO: This should be removed once mempool integration is complete. Primary -+ * reason for this is identification of DPAA1/2 memory pool for forwarding -+ * case -+ */ -+#define MEMPOOL_F_HW_PKT_POOL 0x0080 -+#endif -+ - /** - * @internal When debug is enabled, store some statistics. - * -diff --git a/mk/rte.app.mk b/mk/rte.app.mk -index eb28e11..11ae122 100644 ---- a/mk/rte.app.mk -+++ b/mk/rte.app.mk -@@ -101,6 +101,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_CFGFILE) += -lrte_cfgfile - - _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += -lrte_pmd_bond - _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += -lrte_pmd_xenvirt -lxenstore -+_LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += -lrte_pmd_dpaa2 - - ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n) - # plugins (link only if static libraries) --- -2.5.0 - diff --git a/dpdk/dpdk-16.07_patches/0005-drivers-reset-packet_type-before-using-buffer.patch b/dpdk/dpdk-16.07_patches/0005-drivers-reset-packet_type-before-using-buffer.patch deleted file mode 100644 index d20ff28f..00000000 --- a/dpdk/dpdk-16.07_patches/0005-drivers-reset-packet_type-before-using-buffer.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 729a464f9a58fc77bf4e8f527a7848c6153e4b75 Mon Sep 17 00:00:00 2001 -From: Ray Kinsella -Date: Mon, 8 Aug 2016 19:41:59 +0100 -Subject: [PATCH] drivers: reset packet_type before using buffer - -Ensure the packet_type is reset before the buffer is used. This can cause packets to be mishandled in systems with more than one type of driver in use. - -Signed-off-by: Ray Kinsella -Signed-off-by: Todd Foggoa (tfoggoa) ---- - drivers/net/e1000/em_rxtx.c | 2 ++ - drivers/net/virtio/virtio_rxtx.c | 2 ++ - drivers/net/vmxnet3/vmxnet3_rxtx.c | 1 + - 3 files changed, 5 insertions(+) - -diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c -index 6d8750a..693dd9d 100644 ---- a/drivers/net/e1000/em_rxtx.c -+++ b/drivers/net/e1000/em_rxtx.c -@@ -784,6 +784,7 @@ eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, - rxm->ol_flags = rx_desc_status_to_pkt_flags(status); - rxm->ol_flags = rxm->ol_flags | - rx_desc_error_to_pkt_flags(rxd.errors); -+ rxm->packet_type = RTE_PTYPE_UNKNOWN; - - /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */ - rxm->vlan_tci = rte_le_to_cpu_16(rxd.special); -@@ -1010,6 +1011,7 @@ eth_em_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, - first_seg->ol_flags = rx_desc_status_to_pkt_flags(status); - first_seg->ol_flags = first_seg->ol_flags | - rx_desc_error_to_pkt_flags(rxd.errors); -+ first_seg->packet_type = RTE_PTYPE_UNKNOWN; - - /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */ - rxm->vlan_tci = rte_le_to_cpu_16(rxd.special); -diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c -index 724517e..f1a572d 100644 ---- a/drivers/net/virtio/virtio_rxtx.c -+++ b/drivers/net/virtio/virtio_rxtx.c -@@ -677,6 +677,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) - rxm->data_off = RTE_PKTMBUF_HEADROOM; - rxm->ol_flags = 0; - rxm->vlan_tci = 0; -+ rxm->packet_type = RTE_PTYPE_UNKNOWN; - - rxm->nb_segs = 1; - rxm->next = NULL; -@@ -800,6 +801,7 @@ virtio_recv_mergeable_pkts(void *rx_queue, - rxm->vlan_tci = 0; - rxm->pkt_len = (uint32_t)(len[0] - hdr_size); - rxm->data_len = (uint16_t)(len[0] - hdr_size); -+ rxm->packet_type = RTE_PTYPE_UNKNOWN; - - rxm->port = rxvq->port_id; - rx_pkts[nb_rx] = rxm; -diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c -index 9deeb3f..ac11d82 100644 ---- a/drivers/net/vmxnet3/vmxnet3_rxtx.c -+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c -@@ -686,6 +686,7 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) - rxm->data_off = RTE_PKTMBUF_HEADROOM; - rxm->ol_flags = 0; - rxm->vlan_tci = 0; -+ rxm->packet_type = RTE_PTYPE_UNKNOWN; - - /* - * If this is the first buffer of the received packet, --- -1.9.1 - diff --git a/dpdk/dpdk-16.07_patches/0006-Allow-applications-to-override-rte_delay_us.patch b/dpdk/dpdk-16.07_patches/0006-Allow-applications-to-override-rte_delay_us.patch deleted file mode 100644 index 8a32f600..00000000 --- a/dpdk/dpdk-16.07_patches/0006-Allow-applications-to-override-rte_delay_us.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 3432c140c9c51e671a4d58bb428d5852426add1f Mon Sep 17 00:00:00 2001 -From: "Todd Foggoa (tfoggoa)" -Date: Wed, 3 Feb 2016 08:35:27 -0800 -Subject: [PATCH 5/6] Allow applications to override rte_delay_us() - -Some applications may wish to define their own implentation of -usec delay other than the existing blocking one. The default -behavior remains unchanged. - -Signed-off-by: Todd Foggoa (tfoggoa) ---- - lib/librte_eal/common/eal_common_timer.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c -index c4227cd..cc26b91 100644 ---- a/lib/librte_eal/common/eal_common_timer.c -+++ b/lib/librte_eal/common/eal_common_timer.c -@@ -47,9 +47,21 @@ - /* The frequency of the RDTSC timer resolution */ - static uint64_t eal_tsc_resolution_hz; - -+/* Allow an override of the rte_delay_us function */ -+int rte_delay_us_override (unsigned us) __attribute__((weak)); -+ -+int -+rte_delay_us_override(__attribute__((unused)) unsigned us) -+{ -+ return 0; -+} -+ - void - rte_delay_us(unsigned us) - { -+ if (rte_delay_us_override(us)) -+ return; -+ - const uint64_t start = rte_get_timer_cycles(); - const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6; - while ((rte_get_timer_cycles() - start) < ticks) --- -2.7.4 - diff --git a/dpdk/dpdk-16.07_patches/0007-UIO-Fix-a-crash-in-igb_uio-driver-when-the-device-is.patch b/dpdk/dpdk-16.07_patches/0007-UIO-Fix-a-crash-in-igb_uio-driver-when-the-device-is.patch deleted file mode 100644 index 07e1c9c8..00000000 --- a/dpdk/dpdk-16.07_patches/0007-UIO-Fix-a-crash-in-igb_uio-driver-when-the-device-is.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 95c2d549d8d123aac37a372580122f1b043c6165 Mon Sep 17 00:00:00 2001 -From: Ray Kinsella -Date: Wed, 10 Aug 2016 11:59:07 +0100 -Subject: [PATCH] UIO: Fix a crash in igb_uio driver when the device is - removed. - -This crash happens because the device still has MSI configured, -the fix is to free the IRQ. - -Signed-off-by: Todd Foggoa (tfoggoa) -Signed-off-by: Ray Kinsella ---- - lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c -index df41e45..69873e7 100644 ---- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c -+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c -@@ -442,8 +442,15 @@ static void - igbuio_pci_remove(struct pci_dev *dev) - { - struct rte_uio_pci_dev *udev = pci_get_drvdata(dev); -+ struct uio_info *info = pci_get_drvdata(dev); - - sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); -+ -+ if (info->irq && (info->irq != UIO_IRQ_CUSTOM)){ -+ free_irq(info->irq, info->uio_dev); -+ info->irq = UIO_IRQ_NONE; -+ } -+ - uio_unregister_device(&udev->info); - igbuio_pci_release_iomem(&udev->info); - if (udev->mode == RTE_INTR_MODE_MSIX) --- -1.9.1 - diff --git a/dpdk/dpdk-16.07_patches/0008-Temporarily-disable-unthrottled-log-message.patch b/dpdk/dpdk-16.07_patches/0008-Temporarily-disable-unthrottled-log-message.patch deleted file mode 100644 index b637993c..00000000 --- a/dpdk/dpdk-16.07_patches/0008-Temporarily-disable-unthrottled-log-message.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 454e25ed57c17ec18ee76ead4a75f9abdf579608 Mon Sep 17 00:00:00 2001 -From: Dave Barach -Date: Tue, 9 Feb 2016 10:22:39 -0500 -Subject: [PATCH 6/6] Temporarily disable unthrottled log message. - -Signed-off-by: Dave Barach ---- - lib/librte_eal/linuxapp/eal/eal_interrupts.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c -index 06b26a9..8d918a4 100644 ---- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c -+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c -@@ -709,6 +709,8 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) - if (errno == EINTR || errno == EWOULDBLOCK) - continue; - -+ /* $$$ disable to avoid filling /var/log */ -+ if (0) - RTE_LOG(ERR, EAL, "Error reading from file " - "descriptor %d: %s\n", - events[n].data.fd, --- -2.7.4 - diff --git a/dpdk/dpdk-16.07_patches/0009-enic-bad-L4-checksum-ptype-set-on-ICMP-packets.patch b/dpdk/dpdk-16.07_patches/0009-enic-bad-L4-checksum-ptype-set-on-ICMP-packets.patch deleted file mode 100644 index 71a9d9b5..00000000 --- a/dpdk/dpdk-16.07_patches/0009-enic-bad-L4-checksum-ptype-set-on-ICMP-packets.patch +++ /dev/null @@ -1,18 +0,0 @@ -diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c -index 50f0b28..ad59613 100644 ---- a/drivers/net/enic/enic_rxtx.c -+++ b/drivers/net/enic/enic_rxtx.c -@@ -212,9 +212,12 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) - /* checksum flags */ - if (!enic_cq_rx_desc_csum_not_calc(cqrd) && - (mbuf->packet_type & RTE_PTYPE_L3_IPV4)) { -+ uint32_t l4_flags = mbuf->packet_type & RTE_PTYPE_L4_MASK; -+ - if (unlikely(!enic_cq_rx_desc_ipv4_csum_ok(cqrd))) - pkt_flags |= PKT_RX_IP_CKSUM_BAD; -- if (mbuf->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)) { -+ if (l4_flags == RTE_PTYPE_L4_UDP || -+ l4_flags == RTE_PTYPE_L4_TCP) { - if (unlikely(!enic_cq_rx_desc_tcp_udp_csum_ok(cqrd))) - pkt_flags |= PKT_RX_L4_CKSUM_BAD; - } diff --git a/dpdk/dpdk-16.07_patches/0010-virtio-enable-indirect-descriptors-feature.patch b/dpdk/dpdk-16.07_patches/0010-virtio-enable-indirect-descriptors-feature.patch deleted file mode 100644 index 80cd4bff..00000000 --- a/dpdk/dpdk-16.07_patches/0010-virtio-enable-indirect-descriptors-feature.patch +++ /dev/null @@ -1,34 +0,0 @@ -From be1210e77f0f9072ccb8e6970552596b6780a44c Mon Sep 17 00:00:00 2001 -From: Pierre Pfister -Date: Fri, 2 Sep 2016 16:24:57 +0200 -Subject: [PATCH] virtio: enable indirect descriptors feature - -Virtio indirect descriptors are supported by the data-path -but the feature bit is never set during feature negociation. - -This patch simply adds VIRTIO_RING_F_INDIRECT_DESC back to -the supported features bit mask, hence enabling the use of -indirect descriptors when the feature is negociated with the -device. - -Signed-off-by: Pierre Pfister ---- - drivers/net/virtio/virtio_ethdev.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h -index 2ecec6e..31c91a5 100644 ---- a/drivers/net/virtio/virtio_ethdev.h -+++ b/drivers/net/virtio/virtio_ethdev.h -@@ -63,6 +63,7 @@ - 1u << VIRTIO_NET_F_CTRL_RX | \ - 1u << VIRTIO_NET_F_CTRL_VLAN | \ - 1u << VIRTIO_NET_F_MRG_RXBUF | \ -+ 1u << VIRTIO_RING_F_INDIRECT_DESC | \ - 1ULL << VIRTIO_F_VERSION_1) - - /* --- -2.7.4 (Apple Git-66) - - diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h index 1b54460e..79c694f7 100644 --- a/src/vnet/devices/dpdk/dpdk.h +++ b/src/vnet/devices/dpdk/dpdk.h @@ -66,7 +66,6 @@ extern vnet_device_class_t dpdk_device_class; extern vlib_node_registration_t dpdk_input_node; extern vlib_node_registration_t handoff_dispatch_node; -#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0) #define foreach_dpdk_pmd \ _ ("net_thunderx", THUNDERX) \ _ ("net_e1000_em", E1000EM) \ @@ -85,25 +84,6 @@ extern vlib_node_registration_t handoff_dispatch_node; _ ("net_cxgbe", CXGBE) \ _ ("net_mlx5", MLX5) \ _ ("net_dpaa2", DPAA2) -#else -#define foreach_dpdk_pmd \ - _ ("rte_nicvf_pmd", THUNDERX) \ - _ ("rte_em_pmd", E1000EM) \ - _ ("rte_igb_pmd", IGB) \ - _ ("rte_igbvf_pmd", IGBVF) \ - _ ("rte_ixgbe_pmd", IXGBE) \ - _ ("rte_ixgbevf_pmd", IXGBEVF) \ - _ ("rte_i40e_pmd", I40E) \ - _ ("rte_i40evf_pmd", I40EVF) \ - _ ("rte_virtio_pmd", VIRTIO) \ - _ ("rte_enic_pmd", ENIC) \ - _ ("rte_vmxnet3_pmd", VMXNET3) \ - _ ("AF_PACKET PMD", AF_PACKET) \ - _ ("rte_bond_pmd", BOND) \ - _ ("rte_pmd_fm10k", FM10K) \ - _ ("rte_cxgbe_pmd", CXGBE) \ - _ ("rte_dpaa2_dpni", DPAA2) -#endif typedef enum { diff --git a/src/vnet/devices/dpdk/format.c b/src/vnet/devices/dpdk/format.c index cc0d71af..1558630c 100644 --- a/src/vnet/devices/dpdk/format.c +++ b/src/vnet/devices/dpdk/format.c @@ -79,12 +79,6 @@ _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \ _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert") -#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) -/* New ol_flags bits added in DPDK-16.11 */ -#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7) -#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8) -#endif - #define foreach_dpdk_pkt_rx_offload_flag \ _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \ _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \ @@ -98,12 +92,6 @@ _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \ _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped") -#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) -/* PTYPE added in DPDK-16.11 */ -#define RTE_PTYPE_L2_ETHER_VLAN 0x00000006 -#define RTE_PTYPE_L2_ETHER_QINQ 0x00000007 -#endif - #define foreach_dpdk_pkt_type \ _ (L2, ETHER, "Ethernet packet") \ _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \ diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c index ec008c20..f4700133 100755 --- a/src/vnet/devices/dpdk/init.c +++ b/src/vnet/devices/dpdk/init.c @@ -428,11 +428,8 @@ dpdk_lib_init (dpdk_main_t * dm) /* workaround for drivers not setting driver_name */ if ((!dev_info.driver_name) && (dev_info.pci_dev)) -#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) - dev_info.driver_name = dev_info.pci_dev->driver->name; -#else dev_info.driver_name = dev_info.pci_dev->driver->driver.name; -#endif + ASSERT (dev_info.driver_name); if (!xd->pmd) @@ -1608,11 +1605,8 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) struct rte_eth_dev_info dev_info; rte_eth_dev_info_get (i, &dev_info); if (!dev_info.driver_name) -#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) - dev_info.driver_name = dev_info.pci_dev->driver->name; -#else dev_info.driver_name = dev_info.pci_dev->driver->driver.name; -#endif + ASSERT (dev_info.driver_name); if (strncmp (dev_info.driver_name, "rte_bond_pmd", 12) == 0) { diff --git a/src/vnet/devices/dpdk/main.c b/src/vnet/devices/dpdk/main.c index 1e6ec2f8..9ea3aa04 100644 --- a/src/vnet/devices/dpdk/main.c +++ b/src/vnet/devices/dpdk/main.c @@ -61,14 +61,12 @@ rte_delay_us_override (unsigned us) return 0; // no override } -#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0) static void rte_delay_us_override_cb (unsigned us) { if (rte_delay_us_override (us) == 0) rte_delay_us_block (us); } -#endif static clib_error_t * dpdk_main_init (vlib_main_t * vm) { @@ -77,12 +75,9 @@ static clib_error_t * dpdk_main_init (vlib_main_t * vm) if ((error = vlib_call_init_function (vm, dpdk_init))) return error; -#if DPDK -#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0) /* register custom delay function */ rte_delay_us_callback_register (rte_delay_us_override_cb); -#endif -#endif + return error; } diff --git a/src/vnet/devices/dpdk/node.c b/src/vnet/devices/dpdk/node.c index e541cdbc..bde9dfae 100644 --- a/src/vnet/devices/dpdk/node.c +++ b/src/vnet/devices/dpdk/node.c @@ -55,11 +55,6 @@ vlib_buffer_is_mpls (vlib_buffer_t * b) return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); } -#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) -/* New ol_flags bits added in DPDK-16.11 */ -#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7) -#endif - always_inline u32 dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0) { @@ -79,15 +74,8 @@ dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0) always_inline int dpdk_mbuf_is_vlan (struct rte_mbuf *mb) { -#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0) return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) == RTE_PTYPE_L2_ETHER_VLAN; -#else - return - (mb->ol_flags & - (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == - PKT_RX_VLAN_PKT; -#endif } always_inline int -- cgit 1.2.3-korg From b3bb10101ceffec1df0624c785acbd40858870ec Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 28 Feb 2017 21:55:28 +0100 Subject: devices: vnet_get_aggregate_rx_packets should not be dpdk specific Change-Id: I1152db4b7d1602653d7d8b2c6cb28cf5c526c4ca Signed-off-by: Damjan Marion --- src/vnet/devices/af_packet/node.c | 1 + src/vnet/devices/devices.c | 14 ++++++++++++++ src/vnet/devices/devices.h | 36 ++++++++++++++++++++++++++++++++++++ src/vnet/devices/dpdk/dpdk.h | 34 ---------------------------------- src/vnet/devices/dpdk/init.c | 8 -------- src/vnet/devices/dpdk/node.c | 3 +-- src/vnet/devices/netmap/node.c | 2 ++ src/vnet/devices/ssvm/node.c | 2 ++ src/vnet/devices/virtio/vhost-user.c | 2 ++ src/vpp/api/gmon.c | 10 +--------- 10 files changed, 59 insertions(+), 53 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index 476ccca9..69fc11c9 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -239,6 +239,7 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + VNET_INTERFACE_COUNTER_RX, os_get_cpu_number (), apif->hw_if_index, n_rx_packets, n_rx_bytes); + vnet_device_increment_rx_packets (cpu_index, n_rx_packets); return n_rx_packets; } diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index cd4386eb..c81043c6 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -19,6 +19,8 @@ #include #include +vnet_device_main_t vnet_device_main; + static uword device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -82,6 +84,18 @@ VNET_FEATURE_INIT (ethernet_input, static) = { }; /* *INDENT-ON* */ +static clib_error_t * +vnet_device_init (vlib_main_t * vm) +{ + vnet_device_main_t *vdm = &vnet_device_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + vec_validate_aligned (vdm->workers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + return 0; +} + +VLIB_INIT_FUNCTION (vnet_device_init); /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h index c46dab90..a5cbc35e 100644 --- a/src/vnet/devices/devices.h +++ b/src/vnet/devices/devices.h @@ -39,9 +39,45 @@ typedef enum [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = "mpls-input", \ } +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /* total input packet counter */ + u64 aggregate_rx_packets; +} vnet_device_per_worker_data_t; + +typedef struct +{ + vnet_device_per_worker_data_t *workers; +} vnet_device_main_t; + +extern vnet_device_main_t vnet_device_main; extern vlib_node_registration_t device_input_node; extern const u32 device_input_next_node_advance[]; +static inline u64 +vnet_get_aggregate_rx_packets (void) +{ + vnet_device_main_t *vdm = &vnet_device_main; + u64 sum = 0; + vnet_device_per_worker_data_t *pwd; + + vec_foreach (pwd, vdm->workers) sum += pwd->aggregate_rx_packets; + + return sum; +} + +static inline void +vnet_device_increment_rx_packets (u32 cpu_index, u64 count) +{ + vnet_device_main_t *vdm = &vnet_device_main; + vnet_device_per_worker_data_t *pwd; + + pwd = vec_elt_at_index (vdm->workers, cpu_index); + pwd->aggregate_rx_packets += count; +} + #endif /* included_vnet_vnet_device_h */ /* diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h index 79c694f7..bf9f2768 100644 --- a/src/vnet/devices/dpdk/dpdk.h +++ b/src/vnet/devices/dpdk/dpdk.h @@ -223,22 +223,6 @@ typedef struct #define DPDK_LINK_POLL_INTERVAL (3.0) #define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */ -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - - /* total input packet counter */ - u64 aggregate_rx_packets; -} dpdk_worker_t; - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - - /* total input packet counter */ - u64 aggregate_rx_packets; -} dpdk_hqos_thread_t; - typedef struct { u32 device; @@ -360,12 +344,6 @@ typedef struct /* vlib buffer free list, must be same size as an rte_mbuf */ u32 vlib_buffer_free_list_index; - /* dpdk worker "threads" */ - dpdk_worker_t *workers; - - /* dpdk HQoS "threads" */ - dpdk_hqos_thread_t *hqos_threads; - /* Ethernet input node index */ u32 ethernet_input_node_index; @@ -475,18 +453,6 @@ void dpdk_update_link_state (dpdk_device_t * xd, f64 now); void dpdk_device_lock_init (dpdk_device_t * xd); void dpdk_device_lock_free (dpdk_device_t * xd); -static inline u64 -vnet_get_aggregate_rx_packets (void) -{ - dpdk_main_t *dm = &dpdk_main; - u64 sum = 0; - dpdk_worker_t *dw; - - vec_foreach (dw, dm->workers) sum += dw->aggregate_rx_packets; - - return sum; -} - void dpdk_rx_trace (dpdk_main_t * dm, vlib_node_runtime_t * node, dpdk_device_t * xd, diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c index f4700133..29423e15 100755 --- a/src/vnet/devices/dpdk/init.c +++ b/src/vnet/devices/dpdk/init.c @@ -277,9 +277,6 @@ dpdk_lib_init (dpdk_main_t * dm) vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (dm->workers, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - dm->hqos_cpu_first_index = 0; dm->hqos_cpu_count = 0; @@ -296,9 +293,6 @@ dpdk_lib_init (dpdk_main_t * dm) vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (dm->hqos_threads, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - nports = rte_eth_dev_count (); if (nports < 1) { @@ -1756,8 +1750,6 @@ dpdk_init (vlib_main_t * vm) STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) == CLIB_CACHE_LINE_BYTES, "Data in cache line 0 is bigger than cache line size"); - STATIC_ASSERT (offsetof (dpdk_worker_t, cacheline0) == 0, - "Cache line marker must be 1st element in dpdk_worker_t"); STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, "Cache line marker must be 1st element in frame_queue_trace_t"); diff --git a/src/vnet/devices/dpdk/node.c b/src/vnet/devices/dpdk/node.c index bde9dfae..0d64ae08 100644 --- a/src/vnet/devices/dpdk/node.c +++ b/src/vnet/devices/dpdk/node.c @@ -556,8 +556,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, + VNET_INTERFACE_COUNTER_RX, cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); - dpdk_worker_t *dw = vec_elt_at_index (dm->workers, cpu_index); - dw->aggregate_rx_packets += mb_index; + vnet_device_increment_rx_packets (cpu_index, mb_index); return mb_index; } diff --git a/src/vnet/devices/netmap/node.c b/src/vnet/devices/netmap/node.c index 19895e47..835209a3 100644 --- a/src/vnet/devices/netmap/node.c +++ b/src/vnet/devices/netmap/node.c @@ -249,6 +249,8 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + VNET_INTERFACE_COUNTER_RX, os_get_cpu_number (), nif->hw_if_index, n_rx_packets, n_rx_bytes); + vnet_device_increment_rx_packets (cpu_index, n_rx_packets); + return n_rx_packets; } diff --git a/src/vnet/devices/ssvm/node.c b/src/vnet/devices/ssvm/node.c index 3a695b1d..a6c9dfd7 100644 --- a/src/vnet/devices/ssvm/node.c +++ b/src/vnet/devices/ssvm/node.c @@ -287,6 +287,8 @@ out: + VNET_INTERFACE_COUNTER_RX, cpu_index, intfc->vlib_hw_if_index, rx_queue_index, n_rx_bytes); + vnet_device_increment_rx_packets (cpu_index, rx_queue_index); + return rx_queue_index; } diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index c43f6e67..f490f0c1 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -1819,6 +1819,8 @@ vhost_user_if_input (vlib_main_t * vm, + VNET_INTERFACE_COUNTER_RX, os_get_cpu_number (), vui->sw_if_index, n_rx_packets, n_rx_bytes); + vnet_device_increment_rx_packets (cpu_index, n_rx_packets); + return n_rx_packets; } diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c index 20deb6a2..b28608f0 100644 --- a/src/vpp/api/gmon.c +++ b/src/vpp/api/gmon.c @@ -59,17 +59,9 @@ typedef struct } gmon_main_t; -#if DPDK == 0 -static inline u64 -vnet_get_aggregate_rx_packets (void) -{ - return 0; -} -#else #include #include -#include -#endif +#include gmon_main_t gmon_main; -- cgit 1.2.3-korg From c3a814be9dc769be942ff8029c7b6eccd4b3af05 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 28 Feb 2017 19:22:22 +0100 Subject: dpdk: be a plugin Change-Id: I238258cdeb77035adc5e88903d824593d0a1da90 Signed-off-by: Damjan Marion --- src/Makefile.am | 21 - src/plugins/Makefile.am | 5 + src/plugins/dpdk.am | 50 + src/plugins/dpdk/api/dpdk.api | 103 + src/plugins/dpdk/api/dpdk_all_api_h.h | 19 + src/plugins/dpdk/api/dpdk_msg_enum.h | 31 + src/plugins/dpdk/api/dpdk_test.c | 397 ++++ src/plugins/dpdk/buffer.c | 588 ++++++ src/plugins/dpdk/device/cli.c | 2079 ++++++++++++++++++++ src/plugins/dpdk/device/device.c | 852 ++++++++ src/plugins/dpdk/device/dpdk.h | 490 +++++ src/plugins/dpdk/device/dpdk_priv.h | 135 ++ src/plugins/dpdk/device/format.c | 754 +++++++ src/plugins/dpdk/device/node.c | 674 +++++++ src/plugins/dpdk/dir.dox | 27 + src/plugins/dpdk/hqos/hqos.c | 775 ++++++++ src/plugins/dpdk/hqos/qos_doc.md | 411 ++++ src/plugins/dpdk/init.c | 2074 +++++++++++++++++++ src/plugins/dpdk/ipsec/cli.c | 154 ++ src/plugins/dpdk/ipsec/crypto_node.c | 215 ++ src/plugins/dpdk/ipsec/dir.dox | 18 + src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md | 86 + src/plugins/dpdk/ipsec/esp.h | 249 +++ src/plugins/dpdk/ipsec/esp_decrypt.c | 594 ++++++ src/plugins/dpdk/ipsec/esp_encrypt.c | 609 ++++++ src/plugins/dpdk/ipsec/ipsec.c | 430 ++++ src/plugins/dpdk/ipsec/ipsec.h | 227 +++ src/plugins/dpdk/main.c | 95 + src/plugins/dpdk/thread.c | 85 + src/vat/api_format.c | 320 --- src/vnet.am | 41 +- src/vnet/devices/dpdk/buffer.c | 588 ------ src/vnet/devices/dpdk/cli.c | 2079 -------------------- src/vnet/devices/dpdk/device.c | 852 -------- src/vnet/devices/dpdk/dir.dox | 27 - src/vnet/devices/dpdk/dpdk.api | 103 - src/vnet/devices/dpdk/dpdk.h | 487 ----- src/vnet/devices/dpdk/dpdk_api.c | 246 --- src/vnet/devices/dpdk/dpdk_priv.h | 135 -- src/vnet/devices/dpdk/format.c | 754 ------- src/vnet/devices/dpdk/hqos.c | 775 -------- src/vnet/devices/dpdk/init.c | 1801 ----------------- src/vnet/devices/dpdk/ipsec/cli.c | 154 -- src/vnet/devices/dpdk/ipsec/crypto_node.c | 215 -- src/vnet/devices/dpdk/ipsec/dir.dox | 18 - .../devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md | 86 - src/vnet/devices/dpdk/ipsec/esp.h | 249 --- src/vnet/devices/dpdk/ipsec/esp_decrypt.c | 594 ------ src/vnet/devices/dpdk/ipsec/esp_encrypt.c | 609 ------ src/vnet/devices/dpdk/ipsec/ipsec.c | 430 ---- src/vnet/devices/dpdk/ipsec/ipsec.h | 227 --- src/vnet/devices/dpdk/main.c | 85 - src/vnet/devices/dpdk/node.c | 674 ------- src/vnet/devices/dpdk/qos_doc.md | 411 ---- src/vnet/devices/dpdk/thread.c | 85 - src/vnet/devices/virtio/vhost-user.h | 11 - src/vnet/ipsec/ipsec_api.c | 4 - src/vnet/pg/input.c | 11 +- src/vnet/pg/stream.c | 5 +- src/vnet/replication.c | 6 +- src/vnet/vnet_all_api_h.h | 3 - src/vpp/api/custom_dump.c | 64 - src/vpp/api/gmon.c | 3 +- src/vpp/api/vpe.api | 1 - src/vpp/app/l2t.c | 562 ------ src/vpp/app/l2t_l2.c | 267 --- 66 files changed, 12241 insertions(+), 12988 deletions(-) create mode 100644 src/plugins/dpdk.am create mode 100644 src/plugins/dpdk/api/dpdk.api create mode 100644 src/plugins/dpdk/api/dpdk_all_api_h.h create mode 100644 src/plugins/dpdk/api/dpdk_msg_enum.h create mode 100644 src/plugins/dpdk/api/dpdk_test.c create mode 100644 src/plugins/dpdk/buffer.c create mode 100644 src/plugins/dpdk/device/cli.c create mode 100644 src/plugins/dpdk/device/device.c create mode 100644 src/plugins/dpdk/device/dpdk.h create mode 100644 src/plugins/dpdk/device/dpdk_priv.h create mode 100644 src/plugins/dpdk/device/format.c create mode 100644 src/plugins/dpdk/device/node.c create mode 100644 src/plugins/dpdk/dir.dox create mode 100644 src/plugins/dpdk/hqos/hqos.c create mode 100644 src/plugins/dpdk/hqos/qos_doc.md create mode 100755 src/plugins/dpdk/init.c create mode 100644 src/plugins/dpdk/ipsec/cli.c create mode 100644 src/plugins/dpdk/ipsec/crypto_node.c create mode 100644 src/plugins/dpdk/ipsec/dir.dox create mode 100644 src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md create mode 100644 src/plugins/dpdk/ipsec/esp.h create mode 100644 src/plugins/dpdk/ipsec/esp_decrypt.c create mode 100644 src/plugins/dpdk/ipsec/esp_encrypt.c create mode 100644 src/plugins/dpdk/ipsec/ipsec.c create mode 100644 src/plugins/dpdk/ipsec/ipsec.h create mode 100644 src/plugins/dpdk/main.c create mode 100644 src/plugins/dpdk/thread.c delete mode 100644 src/vnet/devices/dpdk/buffer.c delete mode 100644 src/vnet/devices/dpdk/cli.c delete mode 100644 src/vnet/devices/dpdk/device.c delete mode 100644 src/vnet/devices/dpdk/dir.dox delete mode 100644 src/vnet/devices/dpdk/dpdk.api delete mode 100644 src/vnet/devices/dpdk/dpdk.h delete mode 100644 src/vnet/devices/dpdk/dpdk_api.c delete mode 100644 src/vnet/devices/dpdk/dpdk_priv.h delete mode 100644 src/vnet/devices/dpdk/format.c delete mode 100644 src/vnet/devices/dpdk/hqos.c delete mode 100755 src/vnet/devices/dpdk/init.c delete mode 100644 src/vnet/devices/dpdk/ipsec/cli.c delete mode 100644 src/vnet/devices/dpdk/ipsec/crypto_node.c delete mode 100644 src/vnet/devices/dpdk/ipsec/dir.dox delete mode 100644 src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md delete mode 100644 src/vnet/devices/dpdk/ipsec/esp.h delete mode 100644 src/vnet/devices/dpdk/ipsec/esp_decrypt.c delete mode 100644 src/vnet/devices/dpdk/ipsec/esp_encrypt.c delete mode 100644 src/vnet/devices/dpdk/ipsec/ipsec.c delete mode 100644 src/vnet/devices/dpdk/ipsec/ipsec.h delete mode 100644 src/vnet/devices/dpdk/main.c delete mode 100644 src/vnet/devices/dpdk/node.c delete mode 100644 src/vnet/devices/dpdk/qos_doc.md delete mode 100644 src/vnet/devices/dpdk/thread.c delete mode 100644 src/vpp/app/l2t.c delete mode 100644 src/vpp/app/l2t_l2.c (limited to 'src/vnet/devices') diff --git a/src/Makefile.am b/src/Makefile.am index 641707ed..5daaa48e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -38,27 +38,6 @@ install-data-local: GREP_TIME=`echo $$GREP_TIME | awk '{print $$2}'` ; \ echo "Command list built, Time taken: $$GREP_TIME" -############################################################################### -# DPDK -############################################################################### - -if WITH_DPDK -if ENABLE_DPDK_SHARED -DPDK_LD_FLAGS = -Wl,--whole-archive,-ldpdk,--no-whole-archive -else -DPDK_LD_FLAGS = -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive,-lm,-ldl -endif -if WITH_DPDK_CRYPTO_SW -DPDK_LD_ADD = -lIPSec_MB -lisal_crypto -endif -if WITH_DPDK_MLX5_PMD -DPDK_LD_FLAGS += -libverbs -lmlx5 -lnuma -endif -else -DPDK_LD_FLAGS = -DPDK_LD_ADD = -endif - ############################################################################### # Components ############################################################################### diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 06b575d1..c8877899 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -24,6 +24,7 @@ vppplugins_LTLIBRARIES = vppapitestplugins_LTLIBRARIES = noinst_HEADERS = nobase_apiinclude_HEADERS = +nobase_include_HEADERS = vppapitestpluginsdir = ${libdir}/vpp_api_test_plugins vpppluginsdir = ${libdir}/vpp_plugins @@ -32,6 +33,10 @@ if ENABLE_ACL_PLUGIN include acl.am endif +if WITH_DPDK +include dpdk.am +endif + if ENABLE_FLOWPERPKT_PLUGIN include flowperpkt.am endif diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am new file mode 100644 index 00000000..212bbb73 --- /dev/null +++ b/src/plugins/dpdk.am @@ -0,0 +1,50 @@ +# Copyright (c) 2016 Cisco Systems, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vppapitestplugins_LTLIBRARIES += dpdk_test_plugin.la +vppplugins_LTLIBRARIES += dpdk_plugin.la + +dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive,-lm,-ldl + +dpdk_plugin_la_SOURCES = \ + dpdk/init.c \ + dpdk/main.c \ + dpdk/buffer.c \ + dpdk/thread.c \ + dpdk/device/cli.c \ + dpdk/device/dpdk_priv.h \ + dpdk/device/device.c \ + dpdk/device/format.c \ + dpdk/device/node.c \ + dpdk/hqos/hqos.c \ + dpdk/ipsec/esp_encrypt.c \ + dpdk/ipsec/esp_decrypt.c \ + dpdk/ipsec/crypto_node.c \ + dpdk/ipsec/cli.c \ + dpdk/ipsec/ipsec.c \ + dpdk/api/dpdk_plugin.api.h + +API_FILES += dpdk/api/dpdk.api + +nobase_include_HEADERS += \ + dpdk/device/dpdk.h \ + dpdk/api/dpdk_all_api_h.h + +nobase_include_HEADERS += \ + dpdk/ipsec/ipsec.h \ + dpdk/ipsec/esp.h + +dpdk_test_plugin_la_SOURCES = \ + dpdk/api/dpdk_test.c dpdk/api/dpdk_plugin.api.h + +# vi:syntax=automake diff --git a/src/plugins/dpdk/api/dpdk.api b/src/plugins/dpdk/api/dpdk.api new file mode 100644 index 00000000..21215d45 --- /dev/null +++ b/src/plugins/dpdk/api/dpdk.api @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief DPDK interface HQoS pipe profile set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param pipe - pipe ID within its subport + @param profile - pipe profile ID +*/ +define sw_interface_set_dpdk_hqos_pipe { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 pipe; + u32 profile; +}; + +/** \brief DPDK interface HQoS pipe profile set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_pipe_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS subport parameters set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param tb_rate - subport token bucket rate (measured in bytes/second) + @param tb_size - subport token bucket size (measured in credits) + @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) + @param tc_period - enforcement period for rates (measured in milliseconds) +*/ +define sw_interface_set_dpdk_hqos_subport { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 tb_rate; + u32 tb_size; + u32 tc_rate[4]; + u32 tc_period; +}; + +/** \brief DPDK interface HQoS subport parameters set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_subport_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS tctbl entry set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param entry - entry index ID + @param tc - traffic class (0 .. 3) + @param queue - traffic class queue (0 .. 3) +*/ +define sw_interface_set_dpdk_hqos_tctbl { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 entry; + u32 tc; + u32 queue; +}; + +/** \brief DPDK interface HQoS tctbl entry set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_tctbl_reply { + u32 context; + i32 retval; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + \ No newline at end of file diff --git a/src/plugins/dpdk/api/dpdk_all_api_h.h b/src/plugins/dpdk/api/dpdk_all_api_h.h new file mode 100644 index 00000000..15eb98d6 --- /dev/null +++ b/src/plugins/dpdk/api/dpdk_all_api_h.h @@ -0,0 +1,19 @@ + +/* + * dpdk_all_api_h.h - skeleton vpp engine plug-in api #include file + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include diff --git a/src/plugins/dpdk/api/dpdk_msg_enum.h b/src/plugins/dpdk/api/dpdk_msg_enum.h new file mode 100644 index 00000000..952ce6ad --- /dev/null +++ b/src/plugins/dpdk/api/dpdk_msg_enum.h @@ -0,0 +1,31 @@ + +/* + * dpdk_msg_enum.h - skeleton vpp engine plug-in message enumeration + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_dpdk_msg_enum_h +#define included_dpdk_msg_enum_h + +#include + +#define vl_msg_id(n,h) n, +typedef enum { +#include + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_dpdk_msg_enum_h */ diff --git a/src/plugins/dpdk/api/dpdk_test.c b/src/plugins/dpdk/api/dpdk_test.c new file mode 100644 index 00000000..9fe0f934 --- /dev/null +++ b/src/plugins/dpdk/api/dpdk_test.c @@ -0,0 +1,397 @@ + +/* + * dpdk_test.c - skeleton vpp-api-test plug-in + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include + +uword unformat_sw_if_index (unformat_input_t * input, va_list * args); + +/* Declare message IDs */ +#include + +/* define message structures */ +#define vl_typedefs +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include +#undef vl_api_version + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} dpdk_test_main_t; + +dpdk_test_main_t dpdk_test_main; + +#define foreach_standard_reply_retval_handler \ +_(sw_interface_set_dpdk_hqos_pipe_reply) \ +_(sw_interface_set_dpdk_hqos_subport_reply) \ +_(sw_interface_set_dpdk_hqos_tctbl_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = dpdk_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY, \ + sw_interface_set_dpdk_hqos_pipe_reply) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY, \ + sw_interface_set_dpdk_hqos_subport_reply) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY, \ + sw_interface_set_dpdk_hqos_tctbl_reply) + +/* M: construct, but don't yet send a message */ +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + dm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +#define M2(T,t,n) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + dm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + } \ + return -99; \ +} while(0); + +static int +api_sw_interface_set_dpdk_hqos_pipe (vat_main_t * vam) +{ + dpdk_test_main_t * dm = &dpdk_test_main; + unformat_input_t *i = vam->input; + vl_api_sw_interface_set_dpdk_hqos_pipe_t *mp; + f64 timeout; + u32 sw_if_index; + u8 sw_if_index_set = 0; + u32 subport; + u8 subport_set = 0; + u32 pipe; + u8 pipe_set = 0; + u32 profile; + u8 profile_set = 0; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "rx sw_if_index %u", &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "subport %u", &subport)) + subport_set = 1; + else if (unformat (i, "pipe %u", &pipe)) + pipe_set = 1; + else if (unformat (i, "profile %u", &profile)) + profile_set = 1; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + + if (subport_set == 0) + { + errmsg ("missing subport "); + return -99; + } + + if (pipe_set == 0) + { + errmsg ("missing pipe"); + return -99; + } + + if (profile_set == 0) + { + errmsg ("missing profile"); + return -99; + } + + M (SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe); + + mp->sw_if_index = ntohl (sw_if_index); + mp->subport = ntohl (subport); + mp->pipe = ntohl (pipe); + mp->profile = ntohl (profile); + + + S; + W; + /* NOTREACHED */ + return 0; +} + +static int +api_sw_interface_set_dpdk_hqos_subport (vat_main_t * vam) +{ + dpdk_test_main_t * dm = &dpdk_test_main; + unformat_input_t *i = vam->input; + vl_api_sw_interface_set_dpdk_hqos_subport_t *mp; + f64 timeout; + u32 sw_if_index; + u8 sw_if_index_set = 0; + u32 subport; + u8 subport_set = 0; + u32 tb_rate = 1250000000; /* 10GbE */ + u32 tb_size = 1000000; + u32 tc_rate[] = { 1250000000, 1250000000, 1250000000, 1250000000 }; + u32 tc_period = 10; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "rx sw_if_index %u", &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "subport %u", &subport)) + subport_set = 1; + else if (unformat (i, "rate %u", &tb_rate)) + { + u32 tc_id; + + for (tc_id = 0; tc_id < (sizeof (tc_rate) / sizeof (tc_rate[0])); + tc_id++) + tc_rate[tc_id] = tb_rate; + } + else if (unformat (i, "bktsize %u", &tb_size)) + ; + else if (unformat (i, "tc0 %u", &tc_rate[0])) + ; + else if (unformat (i, "tc1 %u", &tc_rate[1])) + ; + else if (unformat (i, "tc2 %u", &tc_rate[2])) + ; + else if (unformat (i, "tc3 %u", &tc_rate[3])) + ; + else if (unformat (i, "period %u", &tc_period)) + ; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + + if (subport_set == 0) + { + errmsg ("missing subport "); + return -99; + } + + M (SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport); + + mp->sw_if_index = ntohl (sw_if_index); + mp->subport = ntohl (subport); + mp->tb_rate = ntohl (tb_rate); + mp->tb_size = ntohl (tb_size); + mp->tc_rate[0] = ntohl (tc_rate[0]); + mp->tc_rate[1] = ntohl (tc_rate[1]); + mp->tc_rate[2] = ntohl (tc_rate[2]); + mp->tc_rate[3] = ntohl (tc_rate[3]); + mp->tc_period = ntohl (tc_period); + + S; + W; + /* NOTREACHED */ + return 0; +} + +static int +api_sw_interface_set_dpdk_hqos_tctbl (vat_main_t * vam) +{ + dpdk_test_main_t * dm = &dpdk_test_main; + unformat_input_t *i = vam->input; + vl_api_sw_interface_set_dpdk_hqos_tctbl_t *mp; + f64 timeout; + u32 sw_if_index; + u8 sw_if_index_set = 0; + u8 entry_set = 0; + u8 tc_set = 0; + u8 queue_set = 0; + u32 entry, tc, queue; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "rx sw_if_index %u", &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "entry %d", &entry)) + entry_set = 1; + else if (unformat (i, "tc %d", &tc)) + tc_set = 1; + else if (unformat (i, "queue %d", &queue)) + queue_set = 1; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + + if (entry_set == 0) + { + errmsg ("missing entry "); + return -99; + } + + if (tc_set == 0) + { + errmsg ("missing traffic class "); + return -99; + } + + if (queue_set == 0) + { + errmsg ("missing queue "); + return -99; + } + + M (SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl); + + mp->sw_if_index = ntohl (sw_if_index); + mp->entry = ntohl (entry); + mp->tc = ntohl (tc); + mp->queue = ntohl (queue); + + S; + W; + /* NOTREACHED */ + return 0; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(sw_interface_set_dpdk_hqos_pipe, \ + "rx sw_if_index subport pipe \n" \ + "profile \n") \ +_(sw_interface_set_dpdk_hqos_subport, \ + "rx sw_if_index subport [rate ]\n" \ + "[bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ]\n") \ +_(sw_interface_set_dpdk_hqos_tctbl, \ + "rx sw_if_index entry tc queue \n") + +void vat_api_hookup (vat_main_t *vam) +{ + dpdk_test_main_t * dm __attribute__((unused)) = &dpdk_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + dm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * vat_plugin_register (vat_main_t *vam) +{ + dpdk_test_main_t * dm = &dpdk_test_main; + u8 * name; + + dm->vat_main = vam; + + /* Ask the vpp engine for the first assigned message-id */ + name = format (0, "dpdk_%08x%c", api_version, 0); + dm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (dm->msg_id_base != (u16) ~0) + vat_api_hookup (vam); + + vec_free(name); + + return 0; +} diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c new file mode 100644 index 00000000..2765c292 --- /dev/null +++ b/src/plugins/dpdk/buffer.c @@ -0,0 +1,588 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * buffer.c: allocate/free network buffers. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @file + * + * Allocate/free network buffers. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM, + "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM"); + +static_always_inline void +dpdk_rte_pktmbuf_free (vlib_main_t * vm, vlib_buffer_t * b) +{ + vlib_buffer_t *hb = b; + struct rte_mbuf *mb; + u32 next, flags; + mb = rte_mbuf_from_vlib_buffer (hb); + +next: + flags = b->flags; + next = b->next_buffer; + mb = rte_mbuf_from_vlib_buffer (b); + + if (PREDICT_FALSE (b->n_add_refs)) + { + rte_mbuf_refcnt_update (mb, b->n_add_refs); + b->n_add_refs = 0; + } + + rte_pktmbuf_free_seg (mb); + + if (flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, next); + goto next; + } +} + +static void +del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) +{ + u32 i; + vlib_buffer_t *b; + + for (i = 0; i < vec_len (f->buffers); i++) + { + b = vlib_get_buffer (vm, f->buffers[i]); + dpdk_rte_pktmbuf_free (vm, b); + } + + vec_free (f->name); + vec_free (f->buffers); +} + +/* Add buffer free list. */ +static void +dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *f; + u32 merge_index; + int i; + + ASSERT (os_get_cpu_number () == 0); + + f = vlib_buffer_get_free_list (vm, free_list_index); + + merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); + if (merge_index != ~0 && merge_index != free_list_index) + { + vlib_buffer_merge_free_lists (pool_elt_at_index + (bm->buffer_free_list_pool, merge_index), + f); + } + + del_free_list (vm, f); + + /* Poison it. */ + memset (f, 0xab, sizeof (f[0])); + + pool_put (bm->buffer_free_list_pool, f); + + for (i = 1; i < vec_len (vlib_mains); i++) + { + bm = vlib_mains[i]->buffer_main; + f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);; + memset (f, 0xab, sizeof (f[0])); + pool_put (bm->buffer_free_list_pool, f); + } +} + +/* Make sure free list has at least given number of free buffers. */ +static uword +fill_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * fl, uword min_free_buffers) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_buffer_t *b0, *b1, *b2, *b3; + int n, i; + u32 bi0, bi1, bi2, bi3; + unsigned socket_id = rte_socket_id (); + struct rte_mempool *rmp = dm->pktmbuf_pools[socket_id]; + struct rte_mbuf *mb0, *mb1, *mb2, *mb3; + + /* Too early? */ + if (PREDICT_FALSE (rmp == 0)) + return 0; + + /* Already have enough free buffers on free list? */ + n = min_free_buffers - vec_len (fl->buffers); + if (n <= 0) + return min_free_buffers; + + /* Always allocate round number of buffers. */ + n = round_pow2 (n, CLIB_CACHE_LINE_BYTES / sizeof (u32)); + + /* Always allocate new buffers in reasonably large sized chunks. */ + n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); + + vec_validate (vm->mbuf_alloc_list, n - 1); + + if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0) + return 0; + + _vec_len (vm->mbuf_alloc_list) = n; + + i = 0; + + while (i < (n - 7)) + { + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 4]), STORE); + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 5]), STORE); + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 6]), STORE); + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 7]), STORE); + + mb0 = vm->mbuf_alloc_list[i]; + mb1 = vm->mbuf_alloc_list[i + 1]; + mb2 = vm->mbuf_alloc_list[i + 2]; + mb3 = vm->mbuf_alloc_list[i + 3]; + + ASSERT (rte_mbuf_refcnt_read (mb0) == 0); + ASSERT (rte_mbuf_refcnt_read (mb1) == 0); + ASSERT (rte_mbuf_refcnt_read (mb2) == 0); + ASSERT (rte_mbuf_refcnt_read (mb3) == 0); + + rte_mbuf_refcnt_set (mb0, 1); + rte_mbuf_refcnt_set (mb1, 1); + rte_mbuf_refcnt_set (mb2, 1); + rte_mbuf_refcnt_set (mb3, 1); + + b0 = vlib_buffer_from_rte_mbuf (mb0); + b1 = vlib_buffer_from_rte_mbuf (mb1); + b2 = vlib_buffer_from_rte_mbuf (mb2); + b3 = vlib_buffer_from_rte_mbuf (mb3); + + bi0 = vlib_get_buffer_index (vm, b0); + bi1 = vlib_get_buffer_index (vm, b1); + bi2 = vlib_get_buffer_index (vm, b2); + bi3 = vlib_get_buffer_index (vm, b3); + + vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); + vec_add1_aligned (fl->buffers, bi1, CLIB_CACHE_LINE_BYTES); + vec_add1_aligned (fl->buffers, bi2, CLIB_CACHE_LINE_BYTES); + vec_add1_aligned (fl->buffers, bi3, CLIB_CACHE_LINE_BYTES); + + vlib_buffer_init_for_free_list (b0, fl); + vlib_buffer_init_for_free_list (b1, fl); + vlib_buffer_init_for_free_list (b2, fl); + vlib_buffer_init_for_free_list (b3, fl); + + if (fl->buffer_init_function) + { + fl->buffer_init_function (vm, fl, &bi0, 1); + fl->buffer_init_function (vm, fl, &bi1, 1); + fl->buffer_init_function (vm, fl, &bi2, 1); + fl->buffer_init_function (vm, fl, &bi3, 1); + } + i += 4; + } + + while (i < n) + { + mb0 = vm->mbuf_alloc_list[i]; + + ASSERT (rte_mbuf_refcnt_read (mb0) == 0); + rte_mbuf_refcnt_set (mb0, 1); + + b0 = vlib_buffer_from_rte_mbuf (mb0); + bi0 = vlib_get_buffer_index (vm, b0); + + vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); + + vlib_buffer_init_for_free_list (b0, fl); + + if (fl->buffer_init_function) + fl->buffer_init_function (vm, fl, &bi0, 1); + i++; + } + + fl->n_alloc += n; + + return n; +} + +static u32 +alloc_from_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * free_list, + u32 * alloc_buffers, u32 n_alloc_buffers) +{ + u32 *dst, *src; + uword len, n_filled; + + dst = alloc_buffers; + + n_filled = fill_free_list (vm, free_list, n_alloc_buffers); + if (n_filled == 0) + return 0; + + len = vec_len (free_list->buffers); + ASSERT (len >= n_alloc_buffers); + + src = free_list->buffers + len - n_alloc_buffers; + clib_memcpy (dst, src, n_alloc_buffers * sizeof (u32)); + + _vec_len (free_list->buffers) -= n_alloc_buffers; + + return n_alloc_buffers; +} + +/* Allocate a given number of buffers into given array. + Returns number actually allocated which will be either zero or + number requested. */ +u32 +dpdk_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + return alloc_from_free_list + (vm, + pool_elt_at_index (bm->buffer_free_list_pool, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX), + buffers, n_buffers); +} + + +u32 +dpdk_buffer_alloc_from_free_list (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *f; + f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); + return alloc_from_free_list (vm, f, buffers, n_buffers); +} + +static_always_inline void +vlib_buffer_free_inline (vlib_main_t * vm, + u32 * buffers, u32 n_buffers, u32 follow_buffer_next) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *fl; + u32 fi; + int i; + u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, + u32 follow_buffer_next); + + cb = bm->buffer_free_callback; + + if (PREDICT_FALSE (cb != 0)) + n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); + + if (!n_buffers) + return; + + for (i = 0; i < n_buffers; i++) + { + vlib_buffer_t *b; + + b = vlib_get_buffer (vm, buffers[i]); + + fl = vlib_buffer_get_buffer_free_list (vm, b, &fi); + + /* The only current use of this callback: multicast recycle */ + if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) + { + int j; + + vlib_buffer_add_to_free_list + (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); + + for (j = 0; j < vec_len (bm->announce_list); j++) + { + if (fl == bm->announce_list[j]) + goto already_announced; + } + vec_add1 (bm->announce_list, fl); + already_announced: + ; + } + else + { + if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) + dpdk_rte_pktmbuf_free (vm, b); + } + } + if (vec_len (bm->announce_list)) + { + vlib_buffer_free_list_t *fl; + for (i = 0; i < vec_len (bm->announce_list); i++) + { + fl = bm->announce_list[i]; + fl->buffers_added_to_freelist_function (vm, fl); + } + _vec_len (bm->announce_list) = 0; + } +} + +static void +dpdk_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ + 1); +} + +static void +dpdk_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ + 0); +} + +static void +dpdk_packet_template_init (vlib_main_t * vm, + void *vt, + void *packet_data, + uword n_packet_data_bytes, + uword min_n_buffers_each_physmem_alloc, u8 * name) +{ + vlib_packet_template_t *t = (vlib_packet_template_t *) vt; + + vlib_worker_thread_barrier_sync (vm); + memset (t, 0, sizeof (t[0])); + + vec_add (t->packet_data, packet_data, n_packet_data_bytes); + + vlib_worker_thread_barrier_release (vm); +} + +clib_error_t * +vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, + unsigned socket_id) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_physmem_main_t *vpm = &vm->physmem_main; + struct rte_mempool *rmp; + int i; + + vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); + + /* pool already exists, nothing to do */ + if (dm->pktmbuf_pools[socket_id]) + return 0; + + u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0); + + rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */ + num_mbufs, /* number of mbufs */ + 512, /* cache size */ + VLIB_BUFFER_HDR_SIZE, /* priv size */ + VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */ + socket_id); /* cpu socket */ + + if (rmp) + { + { + uword this_pool_end; + uword this_pool_start; + uword this_pool_size; + uword save_vpm_start, save_vpm_end, save_vpm_size; + struct rte_mempool_memhdr *memhdr; + + this_pool_start = ~0ULL; + this_pool_end = 0LL; + + STAILQ_FOREACH (memhdr, &rmp->mem_list, next) + { + if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end) + this_pool_end = (uword) (memhdr->addr + memhdr->len); + if (((uword) memhdr->addr) < this_pool_start) + this_pool_start = (uword) (memhdr->addr); + } + ASSERT (this_pool_start < ~0ULL && this_pool_end > 0); + this_pool_size = this_pool_end - this_pool_start; + + if (CLIB_DEBUG > 1) + { + clib_warning ("%s: pool start %llx pool end %llx pool size %lld", + pool_name, this_pool_start, this_pool_end, + this_pool_size); + clib_warning + ("before: virtual.start %llx virtual.end %llx virtual.size %lld", + vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); + } + + save_vpm_start = vpm->virtual.start; + save_vpm_end = vpm->virtual.end; + save_vpm_size = vpm->virtual.size; + + if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0) + vpm->virtual.start = this_pool_start; + if (this_pool_end > vpm->virtual.end) + vpm->virtual.end = this_pool_end; + + vpm->virtual.size = vpm->virtual.end - vpm->virtual.start; + + if (CLIB_DEBUG > 1) + { + clib_warning + ("after: virtual.start %llx virtual.end %llx virtual.size %lld", + vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); + } + + /* check if fits into buffer index range */ + if ((u64) vpm->virtual.size > + ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) + { + clib_warning ("physmem: virtual size out of range!"); + vpm->virtual.start = save_vpm_start; + vpm->virtual.end = save_vpm_end; + vpm->virtual.size = save_vpm_size; + rmp = 0; + } + } + if (rmp) + { + dm->pktmbuf_pools[socket_id] = rmp; + vec_free (pool_name); + return 0; + } + } + + vec_free (pool_name); + + /* no usable pool for this socket, try to use pool from another one */ + for (i = 0; i < vec_len (dm->pktmbuf_pools); i++) + { + if (dm->pktmbuf_pools[i]) + { + clib_warning + ("WARNING: Failed to allocate mempool for CPU socket %u. " + "Threads running on socket %u will use socket %u mempool.", + socket_id, socket_id, i); + dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i]; + return 0; + } + } + + return clib_error_return (0, "failed to allocate mempool on socket %u", + socket_id); +} + +#if CLIB_DEBUG > 0 + +u32 *vlib_buffer_state_validation_lock; +uword *vlib_buffer_state_validation_hash; +void *vlib_buffer_state_heap; + +static clib_error_t * +buffer_state_validation_init (vlib_main_t * vm) +{ + void *oldheap; + + vlib_buffer_state_heap = mheap_alloc (0, 10 << 20); + + oldheap = clib_mem_set_heap (vlib_buffer_state_heap); + + vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword)); + vec_validate_aligned (vlib_buffer_state_validation_lock, 0, + CLIB_CACHE_LINE_BYTES); + clib_mem_set_heap (oldheap); + return 0; +} + +VLIB_INIT_FUNCTION (buffer_state_validation_init); +#endif + +static vlib_buffer_callbacks_t callbacks = { + .vlib_buffer_alloc_cb = &dpdk_buffer_alloc, + .vlib_buffer_alloc_from_free_list_cb = &dpdk_buffer_alloc_from_free_list, + .vlib_buffer_free_cb = &dpdk_buffer_free, + .vlib_buffer_free_no_next_cb = &dpdk_buffer_free_no_next, + .vlib_packet_template_init_cb = &dpdk_packet_template_init, + .vlib_buffer_delete_free_list_cb = &dpdk_buffer_delete_free_list, +}; + +static clib_error_t * +dpdk_buffer_init (vlib_main_t * vm) +{ + vlib_buffer_cb_register (vm, &callbacks); + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_buffer_init); + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c new file mode 100644 index 00000000..d2def2fc --- /dev/null +++ b/src/plugins/dpdk/device/cli.c @@ -0,0 +1,2079 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +/** + * @file + * @brief CLI for DPDK Abstraction Layer and pcap Tx Trace. + * + * This file contains the source code for CLI for DPDK + * Abstraction Layer and pcap Tx Trace. + */ + + +static clib_error_t * +get_hqos (u32 hw_if_index, u32 subport_id, dpdk_device_t ** xd, + dpdk_device_config_t ** devconf) +{ + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + struct rte_eth_dev_info dev_info; + uword *p = 0; + clib_error_t *error = NULL; + + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + if (subport_id != 0) + { + error = clib_error_return (0, "Invalid subport"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + *xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get ((*xd)->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + (*devconf) = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + (*devconf) = &dm->conf->default_devconf; + +done: + return error; +} + +static clib_error_t * +pcap_trace_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ +#define PCAP_DEF_PKT_TO_CAPTURE (100) + + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + u8 *filename; + u8 *chroot_filename = 0; + u32 max = 0; + int enabled = 0; + int errorFlag = 0; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "on")) + { + if (dm->tx_pcap_enable == 0) + { + enabled = 1; + } + else + { + vlib_cli_output (vm, "pcap tx capture already on..."); + errorFlag = 1; + break; + } + } + else if (unformat (line_input, "off")) + { + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, "captured %d pkts...", + dm->pcap_main.n_packets_captured + 1); + if (dm->pcap_main.n_packets_captured) + { + dm->pcap_main.n_packets_to_capture = + dm->pcap_main.n_packets_captured; + error = pcap_write (&dm->pcap_main); + if (error) + clib_error_report (error); + else + vlib_cli_output (vm, "saved to %s...", dm->pcap_filename); + } + + dm->tx_pcap_enable = 0; + } + else + { + vlib_cli_output (vm, "pcap tx capture already off..."); + errorFlag = 1; + break; + } + } + else if (unformat (line_input, "max %d", &max)) + { + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, + "can't change max value while pcap tx capture active..."); + errorFlag = 1; + break; + } + } + else if (unformat (line_input, "intfc %U", + unformat_vnet_sw_interface, dm->vnet_main, + &dm->pcap_sw_if_index)) + ; + + else if (unformat (line_input, "intfc any")) + { + dm->pcap_sw_if_index = 0; + } + else if (unformat (line_input, "file %s", &filename)) + { + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, + "can't change file while pcap tx capture active..."); + errorFlag = 1; + break; + } + + /* Brain-police user path input */ + if (strstr ((char *) filename, "..") + || index ((char *) filename, '/')) + { + vlib_cli_output (vm, "illegal characters in filename '%s'", + filename); + vlib_cli_output (vm, + "Hint: Only filename, do not enter directory structure."); + vec_free (filename); + errorFlag = 1; + break; + } + + chroot_filename = format (0, "/tmp/%s%c", filename, 0); + vec_free (filename); + } + else if (unformat (line_input, "status")) + { + if (dm->pcap_sw_if_index == 0) + { + vlib_cli_output (vm, "max is %d for any interface to file %s", + dm-> + pcap_pkts_to_capture ? dm->pcap_pkts_to_capture + : PCAP_DEF_PKT_TO_CAPTURE, + dm-> + pcap_filename ? dm->pcap_filename : (u8 *) + "/tmp/vpe.pcap"); + } + else + { + vlib_cli_output (vm, "max is %d for interface %U to file %s", + dm-> + pcap_pkts_to_capture ? dm->pcap_pkts_to_capture + : PCAP_DEF_PKT_TO_CAPTURE, + format_vnet_sw_if_index_name, dm->vnet_main, + dm->pcap_sw_if_index, + dm-> + pcap_filename ? dm->pcap_filename : (u8 *) + "/tmp/vpe.pcap"); + } + + if (dm->tx_pcap_enable == 0) + { + vlib_cli_output (vm, "pcap tx capture is off..."); + } + else + { + vlib_cli_output (vm, "pcap tx capture is on: %d of %d pkts...", + dm->pcap_main.n_packets_captured, + dm->pcap_main.n_packets_to_capture); + } + break; + } + + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + errorFlag = 1; + break; + } + } + unformat_free (line_input); + + + if (errorFlag == 0) + { + /* Since no error, save configured values. */ + if (chroot_filename) + { + if (dm->pcap_filename) + vec_free (dm->pcap_filename); + vec_add1 (chroot_filename, 0); + dm->pcap_filename = chroot_filename; + } + + if (max) + dm->pcap_pkts_to_capture = max; + + + if (enabled) + { + if (dm->pcap_filename == 0) + dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0); + + memset (&dm->pcap_main, 0, sizeof (dm->pcap_main)); + dm->pcap_main.file_name = (char *) dm->pcap_filename; + dm->pcap_main.n_packets_to_capture = PCAP_DEF_PKT_TO_CAPTURE; + if (dm->pcap_pkts_to_capture) + dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture; + + dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; + dm->tx_pcap_enable = 1; + vlib_cli_output (vm, "pcap tx capture on..."); + } + } + else if (chroot_filename) + vec_free (chroot_filename); + + + return error; +} + +/*? + * This command is used to start or stop a packet capture, or show + * the status of packet capture. + * + * This command has the following optional parameters: + * + * - on|off - Used to start or stop a packet capture. + * + * - max - Depth of local buffer. Once 'nn' number + * of packets have been received, buffer is flushed to file. Once another + * 'nn' number of packets have been received, buffer is flushed + * to file, overwriting previous write. If not entered, value defaults + * to 100. Can only be updated if packet capture is off. + * + * - intfc |any - Used to specify a given interface, + * or use 'any' to run packet capture on all interfaces. + * 'any' is the default if not provided. Settings from a previous + * packet capture are preserved, so 'any' can be used to reset + * the interface setting. + * + * - file - Used to specify the output filename. The file will + * be placed in the '/tmp' directory, so only the filename is + * supported. Directory should not be entered. If file already exists, file + * will be overwritten. If no filename is provided, '/tmp/vpe.pcap' + * will be used. Can only be updated if packet capture is off. + * + * - status - Displays the current status and configured attributes + * associated with a packet capture. If packet capture is in progress, + * 'status' also will return the number of packets currently in + * the local buffer. All additional attributes entered on command line + * with 'status' will be ingnored and not applied. + * + * @cliexpar + * Example of how to display the status of a tx packet capture when off: + * @cliexstart{pcap tx trace status} + * max is 100, for any interface to file /tmp/vpe.pcap + * pcap tx capture is off... + * @cliexend + * Example of how to start a tx packet capture: + * @cliexstart{pcap tx trace on max 35 intfc GigabitEthernet0/8/0 file vppTest.pcap} + * pcap tx capture on... + * @cliexend + * Example of how to display the status of a tx packet capture in progress: + * @cliexstart{pcap tx trace status} + * max is 35, for interface GigabitEthernet0/8/0 to file /tmp/vppTest.pcap + * pcap tx capture is on: 20 of 35 pkts... + * @cliexend + * Example of how to stop a tx packet capture: + * @cliexstart{vppctl pcap tx trace off} + * captured 21 pkts... + * saved to /tmp/vppTest.pcap... + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (pcap_trace_command, static) = { + .path = "pcap tx trace", + .short_help = + "pcap tx trace [on|off] [max ] [intfc |any] [file ] [status]", + .function = pcap_trace_command_fn, +}; +/* *INDENT-ON* */ + + +static clib_error_t * +show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + struct rte_mempool *rmp; + int i; + + for (i = 0; i < vec_len (dpdk_main.pktmbuf_pools); i++) + { + rmp = dpdk_main.pktmbuf_pools[i]; + if (rmp) + { + unsigned count = rte_mempool_avail_count (rmp); + unsigned free_count = rte_mempool_in_use_count (rmp); + + vlib_cli_output (vm, + "name=\"%s\" available = %7d allocated = %7d total = %7d\n", + rmp->name, (u32) count, (u32) free_count, + (u32) (count + free_count)); + } + else + { + vlib_cli_output (vm, "rte_mempool is NULL (!)\n"); + } + } + return 0; +} + +/*? + * This command displays statistics of each DPDK mempool. + * + * @cliexpar + * Example of how to display DPDK buffer data: + * @cliexstart{show dpdk buffer} + * name="mbuf_pool_socket0" available = 15104 allocated = 1280 total = 16384 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_bufferr,static) = { + .path = "show dpdk buffer", + .short_help = "show dpdk buffer", + .function = show_dpdk_buffer, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static clib_error_t * +test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + static u32 *allocated_buffers; + u32 n_alloc = 0; + u32 n_free = 0; + u32 first, actual_alloc; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "allocate %d", &n_alloc)) + ; + else if (unformat (input, "free %d", &n_free)) + ; + else + break; + } + + if (n_free) + { + if (vec_len (allocated_buffers) < n_free) + return clib_error_return (0, "Can't free %d, only %d allocated", + n_free, vec_len (allocated_buffers)); + + first = vec_len (allocated_buffers) - n_free; + vlib_buffer_free (vm, allocated_buffers + first, n_free); + _vec_len (allocated_buffers) = first; + } + if (n_alloc) + { + first = vec_len (allocated_buffers); + vec_validate (allocated_buffers, + vec_len (allocated_buffers) + n_alloc - 1); + + actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first, + n_alloc); + _vec_len (allocated_buffers) = first + actual_alloc; + + if (actual_alloc < n_alloc) + vlib_cli_output (vm, "WARNING: only allocated %d buffers", + actual_alloc); + } + + vlib_cli_output (vm, "Currently %d buffers allocated", + vec_len (allocated_buffers)); + + if (allocated_buffers && vec_len (allocated_buffers) == 0) + vec_free (allocated_buffers); + + return 0; +} + +/*? + * This command tests the allocation and freeing of DPDK buffers. + * If both 'allocate' and 'free' are entered on the + * same command, the 'free' is executed first. If no + * parameters are provided, this command display how many DPDK buffers + * the test command has allocated. + * + * @cliexpar + * @parblock + * + * Example of how to display how many DPDK buffer test command has allcoated: + * @cliexstart{test dpdk buffer} + * Currently 0 buffers allocated + * @cliexend + * + * Example of how to allocate DPDK buffers using the test command: + * @cliexstart{test dpdk buffer allocate 10} + * Currently 10 buffers allocated + * @cliexend + * + * Example of how to free DPDK buffers allocated by the test command: + * @cliexstart{test dpdk buffer free 10} + * Currently 0 buffers allocated + * @cliexend + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = { + .path = "test dpdk buffer", + .short_help = "test dpdk buffer [allocate ] [free ]", + .function = test_dpdk_buffer, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 nb_rx_desc = (u32) ~ 0; + u32 nb_tx_desc = (u32) ~ 0; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "tx %d", &nb_tx_desc)) + ; + else if (unformat (line_input, "rx %d", &nb_rx_desc)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + { + error = + clib_error_return (0, + "number of descriptors can be set only for " + "physical devices"); + goto done; + } + + if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) && + (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc)) + { + error = clib_error_return (0, "nothing changed"); + goto done; + } + + if (nb_rx_desc != (u32) ~ 0) + xd->nb_rx_desc = nb_rx_desc; + + if (nb_tx_desc != (u32) ~ 0) + xd->nb_tx_desc = nb_tx_desc; + + error = dpdk_port_setup (dm, xd); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command sets the number of DPDK 'rx' and + * 'tx' descriptors for the given physical interface. Use + * the command 'show hardware-interface' to display the + * current descriptor allocation. + * + * @cliexpar + * Example of how to set the DPDK interface descriptors: + * @cliexcmd{set dpdk interface descriptors GigabitEthernet0/8/0 rx 512 tx 512} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = { + .path = "set dpdk interface descriptors", + .short_help = "set dpdk interface descriptors [rx ] [tx ]", + .function = set_dpdk_if_desc, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + int cpu; + + if (tm->n_vlib_mains == 1) + vlib_cli_output (vm, "All interfaces are handled by main thread"); + + for (cpu = 0; cpu < vec_len (dm->devices_by_cpu); cpu++) + { + if (cpu >= dm->input_cpu_first_index && + cpu < (dm->input_cpu_first_index + dm->input_cpu_count)) + vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, + vlib_worker_threads[cpu].name, + vlib_worker_threads[cpu].lcore_id); + + /* *INDENT-OFF* */ + vec_foreach(dq, dm->devices_by_cpu[cpu]) + { + u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; + vnet_hw_interface_t * hi = vnet_get_hw_interface(dm->vnet_main, hw_if_index); + vlib_cli_output(vm, " %v queue %u", hi->name, dq->queue_id); + } + /* *INDENT-ON* */ + } + return 0; +} + +/*? + * This command is used to display the thread and core each + * DPDK interface and queue is assigned too. + * + * @cliexpar + * Example of how to display the DPDK interface placement: + * @cliexstart{show dpdk interface placement} + * Thread 1 (vpp_wk_0 at lcore 1): + * GigabitEthernet0/8/0 queue 0 + * GigabitEthernet0/9/0 queue 0 + * Thread 2 (vpp_wk_1 at lcore 2): + * GigabitEthernet0/8/0 queue 1 + * GigabitEthernet0/9/0 queue 1 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_if_placement,static) = { + .path = "show dpdk interface placement", + .short_help = "show dpdk interface placement", + .function = show_dpdk_if_placement, +}; +/* *INDENT-ON* */ + +static int +dpdk_device_queue_sort (void *a1, void *a2) +{ + dpdk_device_and_queue_t *dq1 = a1; + dpdk_device_and_queue_t *dq2 = a2; + + if (dq1->device > dq2->device) + return 1; + else if (dq1->device < dq2->device) + return -1; + else if (dq1->queue_id > dq2->queue_id) + return 1; + else if (dq1->queue_id < dq2->queue_id) + return -1; + else + return 0; +} + +static clib_error_t * +set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 queue = (u32) 0; + u32 cpu = (u32) ~ 0; + int i; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "queue %d", &queue)) + ; + else if (unformat (line_input, "thread %d", &cpu)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + if (cpu < dm->input_cpu_first_index || + cpu >= (dm->input_cpu_first_index + dm->input_cpu_count)) + { + error = clib_error_return (0, "please specify valid thread id"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + for (i = 0; i < vec_len (dm->devices_by_cpu); i++) + { + /* *INDENT-OFF* */ + vec_foreach(dq, dm->devices_by_cpu[i]) + { + if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index && + queue == dq->queue_id) + { + if (cpu == i) /* nothing to do */ + goto done; + + vec_del1(dm->devices_by_cpu[i], dq - dm->devices_by_cpu[i]); + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->queue_id = queue; + dq->device = xd->device_index; + xd->cpu_socket_id_by_queue[queue] = + rte_lcore_to_socket_id(vlib_worker_threads[cpu].lcore_id); + + vec_sort_with_function(dm->devices_by_cpu[i], + dpdk_device_queue_sort); + + vec_sort_with_function(dm->devices_by_cpu[cpu], + dpdk_device_queue_sort); + + if (vec_len(dm->devices_by_cpu[i]) == 0) + vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, + VLIB_NODE_STATE_DISABLED); + + if (vec_len(dm->devices_by_cpu[cpu]) == 1) + vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + + goto done; + } + } + /* *INDENT-ON* */ + } + + error = clib_error_return (0, "not found"); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to assign a given interface, and optionally a + * given queue, to a different thread. This will not create a thread, + * so the thread must already exist. Use '/etc/vpp/startup.conf' + * for the initial thread creation. If the 'queue' is not provided, + * it defaults to 0. + * + * @cliexpar + * Example of how to display the DPDK interface placement: + * @cliexstart{show dpdk interface placement} + * Thread 1 (vpp_wk_0 at lcore 1): + * GigabitEthernet0/8/0 queue 0 + * GigabitEthernet0/9/0 queue 0 + * Thread 2 (vpp_wk_1 at lcore 2): + * GigabitEthernet0/8/0 queue 1 + * GigabitEthernet0/9/0 queue 1 + * @cliexend + * Example of how to assign a DPDK interface and queue to a thread: + * @cliexcmd{set dpdk interface placement GigabitEthernet0/8/0 queue 1 thread 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { + .path = "set dpdk interface placement", + .short_help = "set dpdk interface placement [queue ] thread ", + .function = set_dpdk_if_placement, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + int cpu; + + if (tm->n_vlib_mains == 1) + vlib_cli_output (vm, "All interfaces are handled by main thread"); + + for (cpu = 0; cpu < vec_len (dm->devices_by_hqos_cpu); cpu++) + { + if (cpu >= dm->hqos_cpu_first_index && + cpu < (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) + vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, + vlib_worker_threads[cpu].name, + vlib_worker_threads[cpu].lcore_id); + + vec_foreach (dq, dm->devices_by_hqos_cpu[cpu]) + { + u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; + vnet_hw_interface_t *hi = + vnet_get_hw_interface (dm->vnet_main, hw_if_index); + vlib_cli_output (vm, " %v queue %u", hi->name, dq->queue_id); + } + } + return 0; +} + +/*? + * This command is used to display the thread and core each + * DPDK output interface and HQoS queue is assigned too. + * + * @cliexpar + * Example of how to display the DPDK output interface and HQoS queue placement: + * @cliexstart{show dpdk interface hqos placement} + * Thread 1 (vpp_hqos-threads_0 at lcore 3): + * GigabitEthernet0/8/0 queue 0 + * Thread 2 (vpp_hqos-threads_1 at lcore 4): + * GigabitEthernet0/9/0 queue 0 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos_placement, static) = { + .path = "show dpdk interface hqos placement", + .short_help = "show dpdk interface hqos placement", + .function = show_dpdk_if_hqos_placement, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 cpu = (u32) ~ 0; + int i; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "thread %d", &cpu)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + + if (cpu < dm->hqos_cpu_first_index || + cpu >= (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) + { + error = clib_error_return (0, "please specify valid thread id"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + for (i = 0; i < vec_len (dm->devices_by_hqos_cpu); i++) + { + vec_foreach (dq, dm->devices_by_hqos_cpu[i]) + { + if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index) + { + if (cpu == i) /* nothing to do */ + goto done; + + vec_del1 (dm->devices_by_hqos_cpu[i], + dq - dm->devices_by_hqos_cpu[i]); + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->queue_id = 0; + dq->device = xd->device_index; + + vec_sort_with_function (dm->devices_by_hqos_cpu[i], + dpdk_device_queue_sort); + + vec_sort_with_function (dm->devices_by_hqos_cpu[cpu], + dpdk_device_queue_sort); + + goto done; + } + } + } + + error = clib_error_return (0, "not found"); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to assign a given DPDK output interface and + * HQoS queue to a different thread. This will not create a thread, + * so the thread must already exist. Use '/etc/vpp/startup.conf' + * for the initial thread creation. See @ref qos_doc for more details. + * + * @cliexpar + * Example of how to display the DPDK output interface and HQoS queue placement: + * @cliexstart{show dpdk interface hqos placement} + * Thread 1 (vpp_hqos-threads_0 at lcore 3): + * GigabitEthernet0/8/0 queue 0 + * Thread 2 (vpp_hqos-threads_1 at lcore 4): + * GigabitEthernet0/9/0 queue 0 + * @cliexend + * Example of how to assign a DPDK output interface and HQoS queue to a thread: + * @cliexcmd{set dpdk interface hqos placement GigabitEthernet0/8/0 thread 2} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_placement, static) = { + .path = "set dpdk interface hqos placement", + .short_help = "set dpdk interface hqos placement thread ", + .function = set_dpdk_if_hqos_placement, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 subport_id = (u32) ~ 0; + u32 pipe_id = (u32) ~ 0; + u32 profile_id = (u32) ~ 0; + int rv; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "subport %d", &subport_id)) + ; + else if (unformat (line_input, "pipe %d", &pipe_id)) + ; + else if (unformat (line_input, "profile %d", &profile_id)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = + rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, + profile_id); + if (rv) + { + error = clib_error_return (0, "pipe configuration failed"); + goto done; + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to change the profile associate with a HQoS pipe. The + * '' is zero based. Use the command + * 'show dpdk interface hqos' to display the content of each profile. + * See @ref qos_doc for more details. + * + * @note + * Currently there is not an API to create a new HQoS pipe profile. One is + * created by default in the code (search for 'hqos_pipe_params_default''). + * Additional profiles can be created in code and code recompiled. Then use this + * command to assign it. + * + * @cliexpar + * Example of how to assign a new profile to a HQoS pipe: + * @cliexcmd{set dpdk interface hqos pipe GigabitEthernet0/8/0 subport 0 pipe 2 profile 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pipe, static) = +{ + .path = "set dpdk interface hqos pipe", + .short_help = "set dpdk interface hqos pipe subport pipe " + "profile ", + .function = set_dpdk_if_hqos_pipe, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = NULL; + u32 hw_if_index = (u32) ~ 0; + u32 subport_id = (u32) ~ 0; + struct rte_sched_subport_params p; + int rv; + clib_error_t *error = NULL; + u32 tb_rate = (u32) ~ 0; + u32 tb_size = (u32) ~ 0; + u32 tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = + { (u32) ~ 0, (u32) ~ 0, (u32) ~ 0, (u32) ~ 0 }; + u32 tc_period = (u32) ~ 0; + dpdk_device_config_t *devconf = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "subport %d", &subport_id)) + ; + else if (unformat (line_input, "rate %d", &tb_rate)) + ; + else if (unformat (line_input, "bktsize %d", &tb_size)) + ; + else if (unformat (line_input, "tc0 %d", &tc_rate[0])) + ; + else if (unformat (line_input, "tc1 %d", &tc_rate[1])) + ; + else if (unformat (line_input, "tc2 %d", &tc_rate[2])) + ; + else if (unformat (line_input, "tc3 %d", &tc_rate[3])) + ; + else if (unformat (line_input, "period %d", &tc_period)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + error = get_hqos (hw_if_index, subport_id, &xd, &devconf); + + if (error == NULL) + { + /* Copy the current values over to local structure. */ + memcpy (&p, &devconf->hqos.subport[subport_id], sizeof (p)); + + /* Update local structure with input values. */ + if (tb_rate != (u32) ~ 0) + { + p.tb_rate = tb_rate; + p.tc_rate[0] = tb_rate; + p.tc_rate[1] = tb_rate; + p.tc_rate[2] = tb_rate; + p.tc_rate[3] = tb_rate; + } + if (tb_size != (u32) ~ 0) + { + p.tb_size = tb_size; + } + if (tc_rate[0] != (u32) ~ 0) + { + p.tc_rate[0] = tc_rate[0]; + } + if (tc_rate[1] != (u32) ~ 0) + { + p.tc_rate[1] = tc_rate[1]; + } + if (tc_rate[2] != (u32) ~ 0) + { + p.tc_rate[2] = tc_rate[2]; + } + if (tc_rate[3] != (u32) ~ 0) + { + p.tc_rate[3] = tc_rate[3]; + } + if (tc_period != (u32) ~ 0) + { + p.tc_period = tc_period; + } + + /* Apply changes. */ + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p); + if (rv) + { + error = clib_error_return (0, "subport configuration failed"); + goto done; + } + else + { + /* Successfully applied, so save of the input values. */ + memcpy (&devconf->hqos.subport[subport_id], &p, sizeof (p)); + } + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to set the subport level parameters such as token + * bucket rate (bytes per seconds), token bucket size (bytes), traffic class + * rates (bytes per seconds) and token update period (Milliseconds). + * + * By default, the 'rate' is set to 1250000000 bytes/second (10GbE + * rate) and each of the four traffic classes is set to 100% of the port rate. + * If the 'rate' is updated by this command, all four traffic classes + * are assigned the same value. Each of the four traffic classes can be updated + * individually. + * + * @cliexpar + * Example of how modify the subport attributes for a 1GbE link: + * @cliexcmd{set dpdk interface hqos subport GigabitEthernet0/8/0 subport 0 rate 125000000} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_subport, static) = { + .path = "set dpdk interface hqos subport", + .short_help = "set dpdk interface hqos subport subport " + "[rate ] [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] " + "[period ]", + .function = set_dpdk_if_hqos_subport, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 tc = (u32) ~ 0; + u32 queue = (u32) ~ 0; + u32 entry = (u32) ~ 0; + u32 val, i; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "entry %d", &entry)) + ; + else if (unformat (line_input, "tc %d", &tc)) + ; + else if (unformat (line_input, "queue %d", &queue)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + if (entry >= 64) + { + error = clib_error_return (0, "invalid entry"); + goto done; + } + if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + { + error = clib_error_return (0, "invalid traffic class"); + goto done; + } + if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + { + error = clib_error_return (0, "invalid traffic class queue"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + /* Should never happen, shut up Coverity warning */ + if (p == 0) + { + error = clib_error_return (0, "no worker registrations?"); + goto done; + } + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + for (i = 0; i < worker_thread_count; i++) + xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to set the traffic class translation table. The + * traffic class translation table is used to map 64 values (0-63) to one of + * four traffic class and one of four HQoS input queue. Use the 'show + * dpdk interface hqos' command to display the traffic class translation + * table. See @ref qos_doc for more details. + * + * This command has the following parameters: + * + * - - Used to specify the output interface. + * + * - entry - Mapped value (0-63) to assign traffic class and queue to. + * + * - tc - Traffic class (0-3) to be used by the provided mapped value. + * + * - queue - HQoS input queue (0-3) to be used by the provided mapped value. + * + * @cliexpar + * Example of how modify the traffic class translation table: + * @cliexcmd{set dpdk interface hqos tctbl GigabitEthernet0/8/0 entry 16 tc 2 queue 2} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_tctbl, static) = { + .path = "set dpdk interface hqos tctbl", + .short_help = "set dpdk interface hqos tctbl entry tc queue ", + .function = set_dpdk_if_hqos_tctbl, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + clib_error_t *error = NULL; + + /* Device specific data */ + struct rte_eth_dev_info dev_info; + dpdk_device_config_t *devconf = 0; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + /* Should never happen, shut up Coverity warning */ + if (p == 0) + return clib_error_return (0, "no worker registrations?"); + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + /* Packet field configuration */ + u64 mask = (u64) ~ 0; + u32 id = (u32) ~ 0; + u32 offset = (u32) ~ 0; + + /* HQoS params */ + u32 n_subports_per_port, n_pipes_per_subport, tctbl_size; + + u32 i; + + /* Parse input arguments */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "id subport")) + id = 0; + else if (unformat (line_input, "id pipe")) + id = 1; + else if (unformat (line_input, "id tc")) + id = 2; + else if (unformat (line_input, "id %d", &id)) + ; + else if (unformat (line_input, "offset %d", &offset)) + ; + else if (unformat (line_input, "mask %llx", &mask)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + /* Get interface */ + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get (xd->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + if (devconf->hqos_enabled == 0) + { + vlib_cli_output (vm, "HQoS disabled for this interface"); + goto done; + } + + n_subports_per_port = devconf->hqos.port.n_subports_per_port; + n_pipes_per_subport = devconf->hqos.port.n_pipes_per_subport; + tctbl_size = RTE_DIM (devconf->hqos.tc_table); + + /* Validate packet field configuration: id, offset and mask */ + if (id >= 3) + { + error = clib_error_return (0, "invalid packet field id"); + goto done; + } + + switch (id) + { + case 0: + if (dpdk_hqos_validate_mask (mask, n_subports_per_port) != 0) + { + error = clib_error_return (0, "invalid subport ID mask " + "(n_subports_per_port = %u)", + n_subports_per_port); + goto done; + } + break; + case 1: + if (dpdk_hqos_validate_mask (mask, n_pipes_per_subport) != 0) + { + error = clib_error_return (0, "invalid pipe ID mask " + "(n_pipes_per_subport = %u)", + n_pipes_per_subport); + goto done; + } + break; + case 2: + default: + if (dpdk_hqos_validate_mask (mask, tctbl_size) != 0) + { + error = clib_error_return (0, "invalid TC table index mask " + "(TC table size = %u)", tctbl_size); + goto done; + } + } + + /* Propagate packet field configuration to all workers */ + for (i = 0; i < worker_thread_count; i++) + switch (id) + { + case 0: + xd->hqos_wt[worker_thread_first + i].hqos_field0_slabpos = offset; + xd->hqos_wt[worker_thread_first + i].hqos_field0_slabmask = mask; + xd->hqos_wt[worker_thread_first + i].hqos_field0_slabshr = + __builtin_ctzll (mask); + break; + case 1: + xd->hqos_wt[worker_thread_first + i].hqos_field1_slabpos = offset; + xd->hqos_wt[worker_thread_first + i].hqos_field1_slabmask = mask; + xd->hqos_wt[worker_thread_first + i].hqos_field1_slabshr = + __builtin_ctzll (mask); + break; + case 2: + default: + xd->hqos_wt[worker_thread_first + i].hqos_field2_slabpos = offset; + xd->hqos_wt[worker_thread_first + i].hqos_field2_slabmask = mask; + xd->hqos_wt[worker_thread_first + i].hqos_field2_slabshr = + __builtin_ctzll (mask); + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to set the packet fields required for classifiying the + * incoming packet. As a result of classification process, packet field + * information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, + * color) and stored in packet mbuf. + * + * This command has the following parameters: + * + * - - Used to specify the output interface. + * + * - id subport|pipe|tc - Classification occurs across three fields. + * This parameter indicates which of the three masks are being configured. Legacy + * code used 0-2 to represent these three fields, so 0-2 is still accepted. + * - subport|0 - Currently only one subport is supported, so only + * an empty mask is supported for the subport classification. + * - pipe|1 - Currently, 4096 pipes per subport are supported, so a + * 12-bit mask should be configure to map to the 0-4095 pipes. + * - tc|2 - The translation table (see 'set dpdk interface hqos + * tctbl' command) maps each value (0-63) into one of the 4 traffic classes + * per pipe. A 6-bit mask should be configure to map this field to a traffic class. + * + * - offset - Offset in the packet to apply the 64-bit mask for classification. + * The offset should be on an 8-byte boundary (0,8,16,24..). + * + * - mask - 64-bit mask to apply to packet at the given 'offset'. + * Bits must be contiguous and should not include '0x'. + * + * The default values for the 'pktfield' assumes Ethernet/IPv4/UDP packets with + * no VLAN. Adjust based on expected packet format and desired classification field. + * - 'subport' is always empty (offset 0 mask 0000000000000000) + * - By default, 'pipe' maps to the UDP payload bits 12 .. 23 (offset 40 + * mask 0000000fff000000) + * - By default, 'tc' maps to the DSCP field in IP header (offset 48 mask + * 00000000000000fc) + * + * @cliexpar + * Example of how modify the 'pipe' classification filter to match VLAN: + * @cliexcmd{set dpdk interface hqos pktfield GigabitEthernet0/8/0 id pipe offset 8 mask 0000000000000FFF} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pktfield, static) = { + .path = "set dpdk interface hqos pktfield", + .short_help = "set dpdk interface hqos pktfield id subport|pipe|tc offset " + "mask ", + .function = set_dpdk_if_hqos_pktfield, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + dpdk_device_config_hqos_t *cfg; + dpdk_device_hqos_per_hqos_thread_t *ht; + dpdk_device_hqos_per_worker_thread_t *wk; + u32 *tctbl; + u32 hw_if_index = (u32) ~ 0; + u32 profile_id, subport_id, i; + struct rte_eth_dev_info dev_info; + dpdk_device_config_t *devconf = 0; + vlib_thread_registration_t *tr; + uword *p = 0; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify interface name!!"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get (xd->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + if (devconf->hqos_enabled == 0) + { + vlib_cli_output (vm, "HQoS disabled for this interface"); + goto done; + } + + /* Detect the set of worker threads */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + + /* Should never happen, shut up Coverity warning */ + if (p == 0) + { + error = clib_error_return (0, "no worker registrations?"); + goto done; + } + + tr = (vlib_thread_registration_t *) p[0]; + + cfg = &devconf->hqos; + ht = xd->hqos_ht; + wk = &xd->hqos_wt[tr->first_index]; + tctbl = wk->hqos_tc_table; + + vlib_cli_output (vm, " Thread:"); + vlib_cli_output (vm, " Input SWQ size = %u packets", cfg->swq_size); + vlib_cli_output (vm, " Enqueue burst size = %u packets", + ht->hqos_burst_enq); + vlib_cli_output (vm, " Dequeue burst size = %u packets", + ht->hqos_burst_deq); + + vlib_cli_output (vm, + " Packet field 0: slab position = %4u, slab bitmask = 0x%016llx (subport)", + wk->hqos_field0_slabpos, wk->hqos_field0_slabmask); + vlib_cli_output (vm, + " Packet field 1: slab position = %4u, slab bitmask = 0x%016llx (pipe)", + wk->hqos_field1_slabpos, wk->hqos_field1_slabmask); + vlib_cli_output (vm, + " Packet field 2: slab position = %4u, slab bitmask = 0x%016llx (tc)", + wk->hqos_field2_slabpos, wk->hqos_field2_slabmask); + vlib_cli_output (vm, + " Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...)"); + vlib_cli_output (vm, + " [ 0 .. 15]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[0] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[0] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[1] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[1] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[2] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[2] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[3] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[3] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[4] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[4] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[5] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[5] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[6] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[6] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[7] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[7] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[8] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[8] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[9] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[9] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[10] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[10] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[11] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[11] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[12] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[12] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[13] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[13] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[14] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[14] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[15] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[15] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, + " [16 .. 31]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[16] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[16] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[17] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[17] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[18] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[18] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[19] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[19] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[20] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[20] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[21] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[21] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[22] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[22] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[23] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[23] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[24] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[24] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[25] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[25] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[26] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[26] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[27] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[27] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[28] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[28] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[29] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[29] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[30] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[30] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[31] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[31] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, + " [32 .. 47]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[32] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[32] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[33] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[33] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[34] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[34] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[35] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[35] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[36] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[36] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[37] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[37] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[38] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[38] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[39] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[39] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[40] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[40] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[41] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[41] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[42] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[42] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[43] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[43] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[44] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[44] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[45] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[45] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[46] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[46] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[47] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[47] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, + " [48 .. 63]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[48] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[48] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[49] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[49] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[50] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[50] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[51] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[51] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[52] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[52] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[53] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[53] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[54] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[54] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[55] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[55] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[56] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[56] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[57] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[57] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[58] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[58] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[59] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[59] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[60] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[60] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[61] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[61] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[62] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[62] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[63] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[63] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, " Port:"); + vlib_cli_output (vm, " Rate = %u bytes/second", cfg->port.rate); + vlib_cli_output (vm, " MTU = %u bytes", cfg->port.mtu); + vlib_cli_output (vm, " Frame overhead = %u bytes", + cfg->port.frame_overhead); + vlib_cli_output (vm, " Number of subports = %u", + cfg->port.n_subports_per_port); + vlib_cli_output (vm, " Number of pipes per subport = %u", + cfg->port.n_pipes_per_subport); + vlib_cli_output (vm, + " Packet queue size: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u packets", + cfg->port.qsize[0], cfg->port.qsize[1], cfg->port.qsize[2], + cfg->port.qsize[3]); + vlib_cli_output (vm, " Number of pipe profiles = %u", + cfg->port.n_pipe_profiles); + + for (subport_id = 0; subport_id < vec_len (cfg->subport); subport_id++) + { + vlib_cli_output (vm, " Subport %u:", subport_id); + vlib_cli_output (vm, " Rate = %u bytes/second", + cfg->subport[subport_id].tb_rate); + vlib_cli_output (vm, " Token bucket size = %u bytes", + cfg->subport[subport_id].tb_size); + vlib_cli_output (vm, + " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", + cfg->subport[subport_id].tc_rate[0], + cfg->subport[subport_id].tc_rate[1], + cfg->subport[subport_id].tc_rate[2], + cfg->subport[subport_id].tc_rate[3]); + vlib_cli_output (vm, " TC period = %u milliseconds", + cfg->subport[subport_id].tc_period); + } + + for (profile_id = 0; profile_id < vec_len (cfg->pipe); profile_id++) + { + vlib_cli_output (vm, " Pipe profile %u:", profile_id); + vlib_cli_output (vm, " Rate = %u bytes/second", + cfg->pipe[profile_id].tb_rate); + vlib_cli_output (vm, " Token bucket size = %u bytes", + cfg->pipe[profile_id].tb_size); + vlib_cli_output (vm, + " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", + cfg->pipe[profile_id].tc_rate[0], + cfg->pipe[profile_id].tc_rate[1], + cfg->pipe[profile_id].tc_rate[2], + cfg->pipe[profile_id].tc_rate[3]); + vlib_cli_output (vm, " TC period = %u milliseconds", + cfg->pipe[profile_id].tc_period); +#ifdef RTE_SCHED_SUBPORT_TC_OV + vlib_cli_output (vm, " TC3 oversubscription_weight = %u", + cfg->pipe[profile_id].tc_ov_weight); +#endif + + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + { + vlib_cli_output (vm, + " TC%u WRR weights: Q0 = %u, Q1 = %u, Q2 = %u, Q3 = %u", + i, cfg->pipe[profile_id].wrr_weights[i * 4], + cfg->pipe[profile_id].wrr_weights[i * 4 + 1], + cfg->pipe[profile_id].wrr_weights[i * 4 + 2], + cfg->pipe[profile_id].wrr_weights[i * 4 + 3]); + } + } + +#ifdef RTE_SCHED_RED + vlib_cli_output (vm, " Weighted Random Early Detection (WRED):"); + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + { + vlib_cli_output (vm, " TC%u min: G = %u, Y = %u, R = %u", i, + cfg->port.red_params[i][e_RTE_METER_GREEN].min_th, + cfg->port.red_params[i][e_RTE_METER_YELLOW].min_th, + cfg->port.red_params[i][e_RTE_METER_RED].min_th); + + vlib_cli_output (vm, " TC%u max: G = %u, Y = %u, R = %u", i, + cfg->port.red_params[i][e_RTE_METER_GREEN].max_th, + cfg->port.red_params[i][e_RTE_METER_YELLOW].max_th, + cfg->port.red_params[i][e_RTE_METER_RED].max_th); + + vlib_cli_output (vm, + " TC%u inverted probability: G = %u, Y = %u, R = %u", + i, cfg->port.red_params[i][e_RTE_METER_GREEN].maxp_inv, + cfg->port.red_params[i][e_RTE_METER_YELLOW].maxp_inv, + cfg->port.red_params[i][e_RTE_METER_RED].maxp_inv); + + vlib_cli_output (vm, " TC%u weight: R = %u, Y = %u, R = %u", i, + cfg->port.red_params[i][e_RTE_METER_GREEN].wq_log2, + cfg->port.red_params[i][e_RTE_METER_YELLOW].wq_log2, + cfg->port.red_params[i][e_RTE_METER_RED].wq_log2); + } +#endif + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to display details of an output interface's HQoS + * settings. + * + * @cliexpar + * Example of how to display HQoS settings for an interfaces: + * @cliexstart{show dpdk interface hqos GigabitEthernet0/8/0} + * Thread: + * Input SWQ size = 4096 packets + * Enqueue burst size = 256 packets + * Dequeue burst size = 220 packets + * Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) + * Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) + * Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) + * Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) + * [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * Port: + * Rate = 1250000000 bytes/second + * MTU = 1514 bytes + * Frame overhead = 24 bytes + * Number of subports = 1 + * Number of pipes per subport = 4096 + * Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets + * Number of pipe profiles = 2 + * Subport 0: + * Rate = 1250000000 bytes/second + * Token bucket size = 1000000 bytes + * Traffic class rate: TC0 = 1250000000, TC1 = 1250000000, TC2 = 1250000000, TC3 = 1250000000 bytes/second + * TC period = 10 milliseconds + * Pipe profile 0: + * Rate = 305175 bytes/second + * Token bucket size = 1000000 bytes + * Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second + * TC period = 40 milliseconds + * TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos, static) = { + .path = "show dpdk interface hqos", + .short_help = "show dpdk interface hqos ", + .function = show_dpdk_if_hqos, +}; + +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; +#ifdef RTE_SCHED_COLLECT_STATS + dpdk_main_t *dm = &dpdk_main; + u32 hw_if_index = (u32) ~ 0; + u32 subport = (u32) ~ 0; + u32 pipe = (u32) ~ 0; + u32 tc = (u32) ~ 0; + u32 tc_q = (u32) ~ 0; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + uword *p = 0; + struct rte_eth_dev_info dev_info; + dpdk_device_config_t *devconf = 0; + u32 qindex; + struct rte_sched_queue_stats stats; + u16 qlen; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + + else if (unformat (line_input, "subport %d", &subport)) + ; + + else if (unformat (line_input, "pipe %d", &pipe)) + ; + + else if (unformat (line_input, "tc %d", &tc)) + ; + + else if (unformat (line_input, "tc_q %d", &tc_q)) + ; + + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify interface name!!"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get (xd->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + if (devconf->hqos_enabled == 0) + { + vlib_cli_output (vm, "HQoS disabled for this interface"); + goto done; + } + + /* + * Figure out which queue to query. cf rte_sched_port_qindex. (Not sure why + * that method isn't made public by DPDK - how _should_ we get the queue ID?) + */ + qindex = subport * devconf->hqos.port.n_pipes_per_subport + pipe; + qindex = qindex * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + tc; + qindex = qindex * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + tc_q; + + if (rte_sched_queue_read_stats (xd->hqos_ht->hqos, qindex, &stats, &qlen) != + 0) + { + error = clib_error_return (0, "failed to read stats"); + goto done; + } + + vlib_cli_output (vm, "%=24s%=16s", "Stats Parameter", "Value"); + vlib_cli_output (vm, "%=24s%=16d", "Packets", stats.n_pkts); + vlib_cli_output (vm, "%=24s%=16d", "Packets dropped", stats.n_pkts_dropped); +#ifdef RTE_SCHED_RED + vlib_cli_output (vm, "%=24s%=16d", "Packets dropped (RED)", + stats.n_pkts_red_dropped); +#endif + vlib_cli_output (vm, "%=24s%=16d", "Bytes", stats.n_bytes); + vlib_cli_output (vm, "%=24s%=16d", "Bytes dropped", stats.n_bytes_dropped); + +#else + + /* Get a line of input */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + vlib_cli_output (vm, "RTE_SCHED_COLLECT_STATS disabled in DPDK"); + goto done; + +#endif + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to display statistics associated with a HQoS traffic class + * queue. + * + * @note + * Statistic collection by the scheduler is disabled by default in DPDK. In order to + * turn it on, add the following line to '../vpp/dpdk/Makefile': + * - $(call set,RTE_SCHED_COLLECT_STATS,y) + * + * @cliexpar + * Example of how to display statistics of HQoS a HQoS traffic class queue: + * @cliexstart{show dpdk hqos queue GigabitEthernet0/9/0 subport 0 pipe 3181 tc 0 tc_q 0} + * Stats Parameter Value + * Packets 140 + * Packets dropped 0 + * Bytes 8400 + * Bytes dropped 0 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = { + .path = "show dpdk hqos queue", + .short_help = "show dpdk hqos queue subport pipe tc tc_q ", + .function = show_dpdk_hqos_queue_stats, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_version_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ +#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c); + _("DPDK Version", "%s", rte_version ()); + _("DPDK EAL init args", "%s", dpdk_config_main.eal_init_args_str); +#undef _ + return 0; +} + +/*? + * This command is used to display the current DPDK version and + * the list of arguments passed to DPDK when started. + * + * @cliexpar + * Example of how to display how many DPDK buffer test command has allcoated: + * @cliexstart{show dpdk version} + * DPDK Version: DPDK 16.11.0 + * DPDK EAL init args: -c 1 -n 4 --huge-dir /run/vpp/hugepages --file-prefix vpp -w 0000:00:08.0 -w 0000:00:09.0 --master-lcore 0 --socket-mem 256 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_vpe_version_command, static) = { + .path = "show dpdk version", + .short_help = "show dpdk version", + .function = show_dpdk_version_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +dpdk_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c new file mode 100644 index 00000000..50b26689 --- /dev/null +++ b/src/plugins/dpdk/device/device.c @@ -0,0 +1,852 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#define foreach_dpdk_tx_func_error \ + _(BAD_RETVAL, "DPDK tx function returned an error") \ + _(RING_FULL, "Tx packet drops (ring full)") \ + _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \ + _(REPL_FAIL, "Tx packet drops (replication failure)") + +typedef enum +{ +#define _(f,s) DPDK_TX_FUNC_ERROR_##f, + foreach_dpdk_tx_func_error +#undef _ + DPDK_TX_FUNC_N_ERROR, +} dpdk_tx_func_error_t; + +static char *dpdk_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_dpdk_tx_func_error +#undef _ +}; + +clib_error_t * +dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address) +{ + int error; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + error = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) address); + + if (error) + { + return clib_error_return (0, "mac address set failed: %d", error); + } + else + { + vec_reset_length (xd->default_mac_address); + vec_add (xd->default_mac_address, address, sizeof (address)); + return NULL; + } +} + +clib_error_t * +dpdk_set_mc_filter (vnet_hw_interface_t * hi, + struct ether_addr mc_addr_vec[], int naddr) +{ + int error; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + error = rte_eth_dev_set_mc_addr_list (xd->device_index, mc_addr_vec, naddr); + + if (error) + { + return clib_error_return (0, "mc addr list failed: %d", error); + } + else + { + return NULL; + } +} + +struct rte_mbuf * +dpdk_replicate_packet_mb (vlib_buffer_t * b) +{ + dpdk_main_t *dm = &dpdk_main; + struct rte_mbuf **mbufs = 0, *s, *d; + u8 nb_segs; + unsigned socket_id = rte_socket_id (); + int i; + + ASSERT (dm->pktmbuf_pools[socket_id]); + s = rte_mbuf_from_vlib_buffer (b); + nb_segs = s->nb_segs; + vec_validate (mbufs, nb_segs - 1); + + if (rte_pktmbuf_alloc_bulk (dm->pktmbuf_pools[socket_id], mbufs, nb_segs)) + { + vec_free (mbufs); + return 0; + } + + d = mbufs[0]; + d->nb_segs = s->nb_segs; + d->data_len = s->data_len; + d->pkt_len = s->pkt_len; + d->data_off = s->data_off; + clib_memcpy (d->buf_addr, s->buf_addr, RTE_PKTMBUF_HEADROOM + s->data_len); + + for (i = 1; i < nb_segs; i++) + { + d->next = mbufs[i]; + d = mbufs[i]; + s = s->next; + d->data_len = s->data_len; + clib_memcpy (d->buf_addr, s->buf_addr, + RTE_PKTMBUF_HEADROOM + s->data_len); + } + + d = mbufs[0]; + vec_free (mbufs); + return d; +} + +static void +dpdk_tx_trace_buffer (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 buffer_index, vlib_buffer_t * buffer) +{ + vlib_main_t *vm = vlib_get_main (); + dpdk_tx_dma_trace_t *t0; + struct rte_mbuf *mb; + + mb = rte_mbuf_from_vlib_buffer (buffer); + + t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0])); + t0->queue_index = queue_id; + t0->device_index = xd->device_index; + t0->buffer_index = buffer_index; + clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); + clib_memcpy (&t0->buffer, buffer, + sizeof (buffer[0]) - sizeof (buffer->pre_data)); + clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data, + sizeof (t0->buffer.pre_data)); +} + +static_always_inline void +dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b, + int maybe_multiseg) +{ + struct rte_mbuf *mb, *first_mb, *last_mb; + + /* buffer is coming from non-dpdk source so we need to init + rte_mbuf header */ + if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_EXT_HDR_VALID) == 0)) + { + vlib_buffer_t *b2 = b; + last_mb = mb = rte_mbuf_from_vlib_buffer (b2); + rte_pktmbuf_reset (mb); + while (maybe_multiseg && (b2->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + b2 = vlib_get_buffer (vm, b2->next_buffer); + mb = rte_mbuf_from_vlib_buffer (b2); + rte_pktmbuf_reset (mb); + } + } + + last_mb = first_mb = mb = rte_mbuf_from_vlib_buffer (b); + first_mb->nb_segs = 1; + mb->data_len = b->current_length; + mb->pkt_len = maybe_multiseg ? vlib_buffer_length_in_chain (vm, b) : + b->current_length; + mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; + + while (maybe_multiseg && (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + b = vlib_get_buffer (vm, b->next_buffer); + mb = rte_mbuf_from_vlib_buffer (b); + last_mb->next = mb; + last_mb = mb; + mb->data_len = b->current_length; + mb->pkt_len = b->current_length; + mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; + first_mb->nb_segs++; + if (PREDICT_FALSE (b->n_add_refs)) + { + rte_mbuf_refcnt_update (mb, b->n_add_refs); + b->n_add_refs = 0; + } + } +} + +/* + * This function calls the dpdk's tx_burst function to transmit the packets + * on the tx_vector. It manages a lock per-device if the device does not + * support multiple queues. It returns the number of packets untransmitted + * on the tx_vector. If all packets are transmitted (the normal case), the + * function returns 0. + * + * The function assumes there is at least one packet on the tx_vector. + */ +static_always_inline + u32 tx_burst_vector_internal (vlib_main_t * vm, + dpdk_device_t * xd, + struct rte_mbuf **tx_vector) +{ + dpdk_main_t *dm = &dpdk_main; + u32 n_packets; + u32 tx_head; + u32 tx_tail; + u32 n_retry; + int rv; + int queue_id; + tx_ring_hdr_t *ring; + + ring = vec_header (tx_vector, sizeof (*ring)); + + n_packets = ring->tx_head - ring->tx_tail; + + tx_head = ring->tx_head % xd->nb_tx_desc; + + /* + * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to + * unpredictable results. + */ + ASSERT (n_packets > 0); + + /* + * Check for tx_vector overflow. If this fails it is a system configuration + * error. The ring should be sized big enough to handle the largest un-flowed + * off burst from a traffic manager. A larger size also helps performance + * a bit because it decreases the probability of having to issue two tx_burst + * calls due to a ring wrap. + */ + ASSERT (n_packets < xd->nb_tx_desc); + ASSERT (ring->tx_tail == 0); + + n_retry = 16; + queue_id = vm->cpu_index; + + do + { + /* start the burst at the tail */ + tx_tail = ring->tx_tail % xd->nb_tx_desc; + + /* + * This device only supports one TX queue, + * and we're running multi-threaded... + */ + if (PREDICT_FALSE (xd->lockp != 0)) + { + queue_id = queue_id % xd->tx_q_used; + while (__sync_lock_test_and_set (xd->lockp[queue_id], 1)) + /* zzzz */ + queue_id = (queue_id + 1) % xd->tx_q_used; + } + + if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */ + { + /* no wrap, transmit in one burst */ + dpdk_device_hqos_per_worker_thread_t *hqos = + &xd->hqos_wt[vm->cpu_index]; + + ASSERT (hqos->swq != NULL); + + dpdk_hqos_metadata_set (hqos, + &tx_vector[tx_tail], tx_head - tx_tail); + rv = rte_ring_sp_enqueue_burst (hqos->swq, + (void **) &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); + } + else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) + { + /* no wrap, transmit in one burst */ + rv = rte_eth_tx_burst (xd->device_index, + (uint16_t) queue_id, + &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); + } + else + { + ASSERT (0); + rv = 0; + } + + if (PREDICT_FALSE (xd->lockp != 0)) + *xd->lockp[queue_id] = 0; + + if (PREDICT_FALSE (rv < 0)) + { + // emit non-fatal message, bump counter + vnet_main_t *vnm = dm->vnet_main; + vnet_interface_main_t *im = &vnm->interface_main; + u32 node_index; + + node_index = vec_elt_at_index (im->hw_interfaces, + xd->vlib_hw_if_index)->tx_node_index; + + vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1); + clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index, + rv); + return n_packets; // untransmitted packets + } + ring->tx_tail += (u16) rv; + n_packets -= (uint16_t) rv; + } + while (rv && n_packets && (n_retry > 0)); + + return n_packets; +} + +static_always_inline void +dpdk_prefetch_buffer_by_index (vlib_main_t * vm, u32 bi) +{ + vlib_buffer_t *b; + struct rte_mbuf *mb; + b = vlib_get_buffer (vm, bi); + mb = rte_mbuf_from_vlib_buffer (b); + CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); +} + +static_always_inline void +dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp) +{ + dpdk_main_t *dm = &dpdk_main; + u32 my_cpu = vm->cpu_index; + struct rte_mbuf *mb_new; + + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0) + return; + + mb_new = dpdk_replicate_packet_mb (b); + if (PREDICT_FALSE (mb_new == 0)) + { + vlib_error_count (vm, node->node_index, + DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); + b->flags |= VLIB_BUFFER_REPL_FAIL; + } + else + *mbp = mb_new; + + vec_add1 (dm->recycle[my_cpu], bi); +} + +/* + * Transmits the packets on the frame to the interface associated with the + * node. It first copies packets on the frame to a tx_vector containing the + * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal + * which calls the dpdk tx_burst function. + */ +static uword +dpdk_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * f) +{ + dpdk_main_t *dm = &dpdk_main; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance); + u32 n_packets = f->n_vectors; + u32 n_left; + u32 *from; + struct rte_mbuf **tx_vector; + u16 i; + u16 nb_tx_desc = xd->nb_tx_desc; + int queue_id; + u32 my_cpu; + u32 tx_pkts = 0; + tx_ring_hdr_t *ring; + u32 n_on_ring; + + my_cpu = vm->cpu_index; + + queue_id = my_cpu; + + tx_vector = xd->tx_vectors[queue_id]; + ring = vec_header (tx_vector, sizeof (*ring)); + + n_on_ring = ring->tx_head - ring->tx_tail; + from = vlib_frame_vector_args (f); + + ASSERT (n_packets <= VLIB_FRAME_SIZE); + + if (PREDICT_FALSE (n_on_ring + n_packets > nb_tx_desc)) + { + /* + * Overflowing the ring should never happen. + * If it does then drop the whole frame. + */ + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL, + n_packets); + + while (n_packets--) + { + u32 bi0 = from[n_packets]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer (b0); + rte_pktmbuf_free (mb0); + } + return n_on_ring; + } + + if (PREDICT_FALSE (dm->tx_pcap_enable)) + { + n_left = n_packets; + while (n_left > 0) + { + u32 bi0 = from[0]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + if (dm->pcap_sw_if_index == 0 || + dm->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_TX]) + pcap_add_buffer (&dm->pcap_main, vm, bi0, 512); + from++; + n_left--; + } + } + + from = vlib_frame_vector_args (f); + n_left = n_packets; + i = ring->tx_head % nb_tx_desc; + + while (n_left >= 8) + { + u32 bi0, bi1, bi2, bi3; + struct rte_mbuf *mb0, *mb1, *mb2, *mb3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 or_flags; + + dpdk_prefetch_buffer_by_index (vm, from[4]); + dpdk_prefetch_buffer_by_index (vm, from[5]); + dpdk_prefetch_buffer_by_index (vm, from[6]); + dpdk_prefetch_buffer_by_index (vm, from[7]); + + bi0 = from[0]; + bi1 = from[1]; + bi2 = from[2]; + bi3 = from[3]; + from += 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + or_flags = b0->flags | b1->flags | b2->flags | b3->flags; + + if (or_flags & VLIB_BUFFER_NEXT_PRESENT) + { + dpdk_validate_rte_mbuf (vm, b0, 1); + dpdk_validate_rte_mbuf (vm, b1, 1); + dpdk_validate_rte_mbuf (vm, b2, 1); + dpdk_validate_rte_mbuf (vm, b3, 1); + } + else + { + dpdk_validate_rte_mbuf (vm, b0, 0); + dpdk_validate_rte_mbuf (vm, b1, 0); + dpdk_validate_rte_mbuf (vm, b2, 0); + dpdk_validate_rte_mbuf (vm, b3, 0); + } + + mb0 = rte_mbuf_from_vlib_buffer (b0); + mb1 = rte_mbuf_from_vlib_buffer (b1); + mb2 = rte_mbuf_from_vlib_buffer (b2); + mb3 = rte_mbuf_from_vlib_buffer (b3); + + if (PREDICT_FALSE (or_flags & VLIB_BUFFER_RECYCLE)) + { + dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); + dpdk_buffer_recycle (vm, node, b1, bi1, &mb1); + dpdk_buffer_recycle (vm, node, b2, bi2, &mb2); + dpdk_buffer_recycle (vm, node, b3, bi3, &mb3); + + /* dont enqueue packets if replication failed as they must + be sent back to recycle */ + if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb0; + if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb1; + if (PREDICT_TRUE ((b2->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb2; + if (PREDICT_TRUE ((b3->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb3; + } + else + { + if (PREDICT_FALSE (i + 3 >= nb_tx_desc)) + { + tx_vector[i++ % nb_tx_desc] = mb0; + tx_vector[i++ % nb_tx_desc] = mb1; + tx_vector[i++ % nb_tx_desc] = mb2; + tx_vector[i++ % nb_tx_desc] = mb3; + i %= nb_tx_desc; + } + else + { + tx_vector[i++] = mb0; + tx_vector[i++] = mb1; + tx_vector[i++] = mb2; + tx_vector[i++] = mb3; + } + } + + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); + if (b1->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1); + if (b2->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi2, b2); + if (b3->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi3, b3); + } + + n_left -= 4; + } + while (n_left > 0) + { + u32 bi0; + struct rte_mbuf *mb0; + vlib_buffer_t *b0; + + bi0 = from[0]; + from++; + + b0 = vlib_get_buffer (vm, bi0); + + dpdk_validate_rte_mbuf (vm, b0, 1); + + mb0 = rte_mbuf_from_vlib_buffer (b0); + dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + if (b0->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); + + if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + { + tx_vector[i % nb_tx_desc] = mb0; + i++; + } + n_left--; + } + + /* account for additional packets in the ring */ + ring->tx_head += n_packets; + n_on_ring = ring->tx_head - ring->tx_tail; + + /* transmit as many packets as possible */ + n_packets = tx_burst_vector_internal (vm, xd, tx_vector); + + /* + * tx_pkts is the number of packets successfully transmitted + * This is the number originally on ring minus the number remaining on ring + */ + tx_pkts = n_on_ring - n_packets; + + { + /* If there is no callback then drop any non-transmitted packets */ + if (PREDICT_FALSE (n_packets)) + { + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + n_packets); + + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, + n_packets); + + while (n_packets--) + rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]); + } + + /* Reset head/tail to avoid unnecessary wrap */ + ring->tx_head = 0; + ring->tx_tail = 0; + } + + /* Recycle replicated buffers */ + if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu]))) + { + vlib_buffer_free (vm, dm->recycle[my_cpu], + vec_len (dm->recycle[my_cpu])); + _vec_len (dm->recycle[my_cpu]) = 0; + } + + ASSERT (ring->tx_head >= ring->tx_tail); + + return tx_pkts; +} + +static void +dpdk_clear_hw_interface_counters (u32 instance) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, instance); + + /* + * Set the "last_cleared_stats" to the current stats, so that + * things appear to clear from a display perspective. + */ + dpdk_update_counters (xd, vlib_time_now (dm->vlib_main)); + + clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof (xd->stats)); + clib_memcpy (xd->last_cleared_xstats, xd->xstats, + vec_len (xd->last_cleared_xstats) * + sizeof (xd->last_cleared_xstats[0])); + +} + +static clib_error_t * +dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); + uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hif->dev_instance); + int rv = 0; + + if (is_up) + { + f64 now = vlib_time_now (dm->vlib_main); + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) + { + rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + } + + if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) + rte_eth_promiscuous_enable (xd->device_index); + else + rte_eth_promiscuous_disable (xd->device_index); + + rte_eth_allmulticast_enable (xd->device_index); + xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP; + dpdk_update_counters (xd, now); + dpdk_update_link_state (xd, now); + } + else + { + xd->flags &= ~DPDK_DEVICE_FLAG_ADMIN_UP; + + rte_eth_allmulticast_disable (xd->device_index); + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); + rte_eth_dev_stop (xd->device_index); + + /* For bonded interface, stop slave links */ + if (xd->pmd == VNET_DPDK_PMD_BOND) + { + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16); + while (nlink >= 1) + { + u8 dpdk_port = slink[--nlink]; + rte_eth_dev_stop (dpdk_port); + } + } + } + + if (rv < 0) + clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop", rv); + + return /* no error */ 0; +} + +/* + * Dynamically redirect all pkts from a specific interface + * to the specified node + */ +static void +dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + dpdk_main_t *xm = &dpdk_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + xd->per_interface_next_index = node_index; + return; + } + + xd->per_interface_next_index = + vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index); +} + + +static clib_error_t * +dpdk_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + dpdk_main_t *xm = &dpdk_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + vnet_sw_interface_t *t = (vnet_sw_interface_t *) st; + int r, vlan_offload; + u32 prev_subifs = xd->num_subifs; + clib_error_t *err = 0; + + if (is_add) + xd->num_subifs++; + else if (xd->num_subifs) + xd->num_subifs--; + + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + goto done; + + /* currently we program VLANS only for IXGBE VF and I40E VF */ + if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && (xd->pmd != VNET_DPDK_PMD_I40EVF)) + goto done; + + if (t->sub.eth.flags.no_tags == 1) + goto done; + + if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1)) + { + xd->num_subifs = prev_subifs; + err = clib_error_return (0, "unsupported VLAN setup"); + goto done; + } + + vlan_offload = rte_eth_dev_get_vlan_offload (xd->device_index); + vlan_offload |= ETH_VLAN_FILTER_OFFLOAD; + + if ((r = rte_eth_dev_set_vlan_offload (xd->device_index, vlan_offload))) + { + xd->num_subifs = prev_subifs; + err = clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d", + xd->device_index, r); + goto done; + } + + + if ((r = + rte_eth_dev_vlan_filter (xd->device_index, t->sub.eth.outer_vlan_id, + is_add))) + { + xd->num_subifs = prev_subifs; + err = clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d", + xd->device_index, r); + goto done; + } + +done: + if (xd->num_subifs) + xd->flags |= DPDK_DEVICE_FLAG_HAVE_SUBIF; + else + xd->flags &= ~DPDK_DEVICE_FLAG_HAVE_SUBIF; + + return err; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (dpdk_device_class) = { + .name = "dpdk", + .tx_function = dpdk_interface_tx, + .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR, + .tx_function_error_strings = dpdk_tx_func_error_strings, + .format_device_name = format_dpdk_device_name, + .format_device = format_dpdk_device, + .format_tx_trace = format_dpdk_tx_dma_trace, + .clear_counters = dpdk_clear_hw_interface_counters, + .admin_up_down_function = dpdk_interface_admin_up_down, + .subif_add_del_function = dpdk_subif_add_del_function, + .rx_redirect_to_node = dpdk_set_interface_next_node, + .mac_addr_change_function = dpdk_set_mac_address, +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx) +/* *INDENT-ON* */ + +#define UP_DOWN_FLAG_EVENT 1 + +uword +admin_up_down_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + clib_error_t *error = 0; + uword event_type; + uword *event_data = 0; + u32 sw_if_index; + u32 flags; + + while (1) + { + vlib_process_wait_for_event (vm); + + event_type = vlib_process_get_events (vm, &event_data); + + dpdk_main.admin_up_down_in_progress = 1; + + switch (event_type) + { + case UP_DOWN_FLAG_EVENT: + { + if (vec_len (event_data) == 2) + { + sw_if_index = event_data[0]; + flags = event_data[1]; + error = + vnet_sw_interface_set_flags (vnet_get_main (), sw_if_index, + flags); + clib_error_report (error); + } + } + break; + } + + vec_reset_length (event_data); + + dpdk_main.admin_up_down_in_progress = 0; + + } + return 0; /* or not */ +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (admin_up_down_process_node,static) = { + .function = admin_up_down_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "admin-up-down-process", + .process_log2_n_stack_bytes = 17, // 256KB +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h new file mode 100644 index 00000000..2a1a6205 --- /dev/null +++ b/src/plugins/dpdk/device/dpdk.h @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_dpdk_h__ +#define __included_dpdk_h__ + +/* $$$$ We should rename always_inline -> clib_always_inline */ +#undef always_inline + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if CLIB_DEBUG > 0 +#define always_inline static inline +#else +#define always_inline static inline __attribute__ ((__always_inline__)) +#endif + +#include + +#define NB_MBUF (16<<10) + +extern vnet_device_class_t dpdk_device_class; +extern vlib_node_registration_t dpdk_input_node; +extern vlib_node_registration_t handoff_dispatch_node; + +#define foreach_dpdk_pmd \ + _ ("net_thunderx", THUNDERX) \ + _ ("net_e1000_em", E1000EM) \ + _ ("net_e1000_igb", IGB) \ + _ ("net_e1000_igb_vf", IGBVF) \ + _ ("net_ixgbe", IXGBE) \ + _ ("net_ixgbe_vf", IXGBEVF) \ + _ ("net_i40e", I40E) \ + _ ("net_i40e_vf", I40EVF) \ + _ ("net_virtio", VIRTIO) \ + _ ("net_enic", ENIC) \ + _ ("net_vmxnet3", VMXNET3) \ + _ ("AF_PACKET PMD", AF_PACKET) \ + _ ("rte_bond_pmd", BOND) \ + _ ("net_fm10k", FM10K) \ + _ ("net_cxgbe", CXGBE) \ + _ ("net_mlx5", MLX5) \ + _ ("net_dpaa2", DPAA2) + +typedef enum +{ + VNET_DPDK_PMD_NONE, +#define _(s,f) VNET_DPDK_PMD_##f, + foreach_dpdk_pmd +#undef _ + VNET_DPDK_PMD_UNKNOWN, /* must be last */ +} dpdk_pmd_t; + +typedef enum +{ + VNET_DPDK_PORT_TYPE_ETH_1G, + VNET_DPDK_PORT_TYPE_ETH_10G, + VNET_DPDK_PORT_TYPE_ETH_40G, + VNET_DPDK_PORT_TYPE_ETH_100G, + VNET_DPDK_PORT_TYPE_ETH_BOND, + VNET_DPDK_PORT_TYPE_ETH_SWITCH, + VNET_DPDK_PORT_TYPE_AF_PACKET, + VNET_DPDK_PORT_TYPE_UNKNOWN, +} dpdk_port_type_t; + +/* + * The header for the tx_vector in dpdk_device_t. + * Head and tail are indexes into the tx_vector and are of type + * u64 so they never overflow. + */ +typedef struct +{ + u64 tx_head; + u64 tx_tail; +} tx_ring_hdr_t; + +typedef struct +{ + struct rte_ring *swq; + + u64 hqos_field0_slabmask; + u32 hqos_field0_slabpos; + u32 hqos_field0_slabshr; + u64 hqos_field1_slabmask; + u32 hqos_field1_slabpos; + u32 hqos_field1_slabshr; + u64 hqos_field2_slabmask; + u32 hqos_field2_slabpos; + u32 hqos_field2_slabshr; + u32 hqos_tc_table[64]; +} dpdk_device_hqos_per_worker_thread_t; + +typedef struct +{ + struct rte_ring **swq; + struct rte_mbuf **pkts_enq; + struct rte_mbuf **pkts_deq; + struct rte_sched_port *hqos; + u32 hqos_burst_enq; + u32 hqos_burst_deq; + u32 pkts_enq_len; + u32 swq_pos; + u32 flush_count; +} dpdk_device_hqos_per_hqos_thread_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + volatile u32 **lockp; + + /* Instance ID */ + u32 device_index; + + u32 vlib_hw_if_index; + u32 vlib_sw_if_index; + + /* next node index if we decide to steal the rx graph arc */ + u32 per_interface_next_index; + + /* dpdk rte_mbuf rx and tx vectors, VLIB_FRAME_SIZE */ + struct rte_mbuf ***tx_vectors; /* one per worker thread */ + struct rte_mbuf ***rx_vectors; + + /* vector of traced contexts, per device */ + u32 **d_trace_buffers; + + dpdk_pmd_t pmd:8; + i8 cpu_socket; + + u16 flags; +#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0) +#define DPDK_DEVICE_FLAG_PROMISC (1 << 1) +#define DPDK_DEVICE_FLAG_PMD (1 << 2) +#define DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE (1 << 3) +#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4) +#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5) +#define DPDK_DEVICE_FLAG_HQOS (1 << 6) + + u16 nb_tx_desc; + CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); + + u8 *interface_name_suffix; + + /* number of sub-interfaces */ + u16 num_subifs; + + /* PMD related */ + u16 tx_q_used; + u16 rx_q_used; + u16 nb_rx_desc; + u16 *cpu_socket_id_by_queue; + struct rte_eth_conf port_conf; + struct rte_eth_txconf tx_conf; + + /* HQoS related */ + dpdk_device_hqos_per_worker_thread_t *hqos_wt; + dpdk_device_hqos_per_hqos_thread_t *hqos_ht; + + /* af_packet */ + u8 af_packet_port_id; + + struct rte_eth_link link; + f64 time_last_link_update; + + struct rte_eth_stats stats; + struct rte_eth_stats last_stats; + struct rte_eth_stats last_cleared_stats; + struct rte_eth_xstat *xstats; + struct rte_eth_xstat *last_cleared_xstats; + f64 time_last_stats_update; + dpdk_port_type_t port_type; + + /* mac address */ + u8 *default_mac_address; +} dpdk_device_t; + +#define DPDK_STATS_POLL_INTERVAL (10.0) +#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */ + +#define DPDK_LINK_POLL_INTERVAL (3.0) +#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */ + +typedef struct +{ + u32 device; + u16 queue_id; +} dpdk_device_and_queue_t; + +#ifndef DPDK_HQOS_DBG_BYPASS +#define DPDK_HQOS_DBG_BYPASS 0 +#endif + +#ifndef HQOS_FLUSH_COUNT_THRESHOLD +#define HQOS_FLUSH_COUNT_THRESHOLD 100000 +#endif + +typedef struct dpdk_device_config_hqos_t +{ + u32 hqos_thread; + u32 hqos_thread_valid; + + u32 swq_size; + u32 burst_enq; + u32 burst_deq; + + u32 pktfield0_slabpos; + u32 pktfield1_slabpos; + u32 pktfield2_slabpos; + u64 pktfield0_slabmask; + u64 pktfield1_slabmask; + u64 pktfield2_slabmask; + u32 tc_table[64]; + + struct rte_sched_port_params port; + struct rte_sched_subport_params *subport; + struct rte_sched_pipe_params *pipe; + uint32_t *pipe_map; +} dpdk_device_config_hqos_t; + +int dpdk_hqos_validate_mask (u64 mask, u32 n); +void dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * + hqos, u32 pipe_profile_id); +void dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos); +clib_error_t *dpdk_port_setup_hqos (dpdk_device_t * xd, + dpdk_device_config_hqos_t * hqos); +void dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, + struct rte_mbuf **pkts, u32 n_pkts); + +#define foreach_dpdk_device_config_item \ + _ (num_rx_queues) \ + _ (num_tx_queues) \ + _ (num_rx_desc) \ + _ (num_tx_desc) \ + _ (rss_fn) + +typedef struct +{ + vlib_pci_addr_t pci_addr; + u8 is_blacklisted; + u8 vlan_strip_offload; +#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0 +#define DPDK_DEVICE_VLAN_STRIP_OFF 1 +#define DPDK_DEVICE_VLAN_STRIP_ON 2 + +#define _(x) uword x; + foreach_dpdk_device_config_item +#undef _ + clib_bitmap_t * workers; + u32 hqos_enabled; + dpdk_device_config_hqos_t hqos; +} dpdk_device_config_t; + +typedef struct +{ + + /* Config stuff */ + u8 **eal_init_args; + u8 *eal_init_args_str; + u8 *uio_driver_name; + u8 no_multi_seg; + u8 enable_tcp_udp_checksum; + u8 cryptodev; + + /* Required config parameters */ + u8 coremask_set_manually; + u8 nchannels_set_manually; + u32 coremask; + u32 nchannels; + u32 num_mbufs; + u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */ + + /* + * format interface names ala xxxEthernet%d/%d/%d instead of + * xxxEthernet%x/%x/%x. + */ + u8 interface_name_format_decimal; + + /* per-device config */ + dpdk_device_config_t default_devconf; + dpdk_device_config_t *dev_confs; + uword *device_config_index_by_pci_addr; + +} dpdk_config_main_t; + +dpdk_config_main_t dpdk_config_main; + +typedef struct +{ + + /* Devices */ + dpdk_device_t *devices; + dpdk_device_and_queue_t **devices_by_cpu; + dpdk_device_and_queue_t **devices_by_hqos_cpu; + + /* per-thread recycle lists */ + u32 **recycle; + + /* buffer flags template, configurable to enable/disable tcp / udp cksum */ + u32 buffer_flags_template; + + /* vlib buffer free list, must be same size as an rte_mbuf */ + u32 vlib_buffer_free_list_index; + + /* Ethernet input node index */ + u32 ethernet_input_node_index; + + /* pcap tracing [only works if (CLIB_DEBUG > 0)] */ + int tx_pcap_enable; + pcap_main_t pcap_main; + u8 *pcap_filename; + u32 pcap_sw_if_index; + u32 pcap_pkts_to_capture; + + /* hashes */ + uword *dpdk_device_by_kni_port_id; + uword *vu_sw_if_index_by_listener_fd; + uword *vu_sw_if_index_by_sock_fd; + u32 *vu_inactive_interfaces_device_index; + + /* + * flag indicating that a posted admin up/down + * (via post_sw_interface_set_flags) is in progress + */ + u8 admin_up_down_in_progress; + + u8 use_rss; + + /* which cpus are running dpdk-input */ + int input_cpu_first_index; + int input_cpu_count; + + /* which cpus are running I/O TX */ + int hqos_cpu_first_index; + int hqos_cpu_count; + + /* control interval of dpdk link state and stat polling */ + f64 link_state_poll_interval; + f64 stat_poll_interval; + + /* Sleep for this many MS after each device poll */ + u32 poll_sleep; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + dpdk_config_main_t *conf; + + /* mempool */ + struct rte_mempool **pktmbuf_pools; + + /* API message ID base */ + u16 msg_id_base; +} dpdk_main_t; + +dpdk_main_t dpdk_main; + +typedef struct +{ + u32 buffer_index; + u16 device_index; + u8 queue_index; + struct rte_mbuf mb; + /* Copy of VLIB buffer; packet data stored in pre_data. */ + vlib_buffer_t buffer; +} dpdk_tx_dma_trace_t; + +typedef struct +{ + u32 buffer_index; + u16 device_index; + u16 queue_index; + struct rte_mbuf mb; + vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */ + u8 data[256]; /* First 256 data bytes, used for hexdump */ +} dpdk_rx_dma_trace_t; + +void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b); + +clib_error_t *dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address); + +clib_error_t *dpdk_set_mc_filter (vnet_hw_interface_t * hi, + struct ether_addr mc_addr_vec[], int naddr); + +void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd); + +clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd); + +u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance); + +struct rte_mbuf *dpdk_replicate_packet_mb (vlib_buffer_t * b); +struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b); + +#define foreach_dpdk_error \ + _(NONE, "no error") \ + _(RX_PACKET_ERROR, "Rx packet errors") \ + _(RX_BAD_FCS, "Rx bad fcs") \ + _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \ + _(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \ + _(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \ + _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error") + +typedef enum +{ +#define _(f,s) DPDK_ERROR_##f, + foreach_dpdk_error +#undef _ + DPDK_N_ERROR, +} dpdk_error_t; + +int dpdk_set_stat_poll_interval (f64 interval); +int dpdk_set_link_state_poll_interval (f64 interval); +void dpdk_update_link_state (dpdk_device_t * xd, f64 now); +void dpdk_device_lock_init (dpdk_device_t * xd); +void dpdk_device_lock_free (dpdk_device_t * xd); + +void dpdk_rx_trace (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 * buffers, uword n_buffers); + +#define EFD_OPERATION_LESS_THAN 0 +#define EFD_OPERATION_GREATER_OR_EQUAL 1 + +format_function_t format_dpdk_device_name; +format_function_t format_dpdk_device; +format_function_t format_dpdk_tx_dma_trace; +format_function_t format_dpdk_rx_dma_trace; +format_function_t format_dpdk_rte_mbuf; +format_function_t format_dpdk_rx_rte_mbuf; +unformat_function_t unformat_socket_mem; +clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn); +clib_error_t *unformat_hqos (unformat_input_t * input, + dpdk_device_config_hqos_t * hqos); + +uword +admin_up_down_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f); + +#endif /* __included_dpdk_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h new file mode 100644 index 00000000..dd40ff48 --- /dev/null +++ b/src/plugins/dpdk/device/dpdk_priv.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1) +#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1)) + +#define DPDK_NB_RX_DESC_DEFAULT 1024 +#define DPDK_NB_TX_DESC_DEFAULT 1024 +#define DPDK_NB_RX_DESC_VIRTIO 256 +#define DPDK_NB_TX_DESC_VIRTIO 256 + +#define I40E_DEV_ID_SFP_XL710 0x1572 +#define I40E_DEV_ID_QSFP_A 0x1583 +#define I40E_DEV_ID_QSFP_B 0x1584 +#define I40E_DEV_ID_QSFP_C 0x1585 +#define I40E_DEV_ID_10G_BASE_T 0x1586 +#define I40E_DEV_ID_VF 0x154C + +/* These args appear by themselves */ +#define foreach_eal_double_hyphen_predicate_arg \ +_(no-shconf) \ +_(no-hpet) \ +_(no-huge) \ +_(vmware-tsc-map) + +#define foreach_eal_single_hyphen_mandatory_arg \ +_(coremask, c) \ +_(nchannels, n) \ + +#define foreach_eal_single_hyphen_arg \ +_(blacklist, b) \ +_(mem-alloc-request, m) \ +_(force-ranks, r) + +/* These args are preceeded by "--" and followed by a single string */ +#define foreach_eal_double_hyphen_arg \ +_(huge-dir) \ +_(proc-type) \ +_(file-prefix) \ +_(vdev) + +static inline void +dpdk_get_xstats (dpdk_device_t * xd) +{ + int len; + if ((len = rte_eth_xstats_get (xd->device_index, NULL, 0)) > 0) + { + vec_validate (xd->xstats, len - 1); + vec_validate (xd->last_cleared_xstats, len - 1); + + len = + rte_eth_xstats_get (xd->device_index, xd->xstats, + vec_len (xd->xstats)); + + ASSERT (vec_len (xd->xstats) == len); + ASSERT (vec_len (xd->last_cleared_xstats) == len); + + _vec_len (xd->xstats) = len; + _vec_len (xd->last_cleared_xstats) = len; + + } +} + + +static inline void +dpdk_update_counters (dpdk_device_t * xd, f64 now) +{ + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); + u32 my_cpu = os_get_cpu_number (); + u64 rxerrors, last_rxerrors; + + /* only update counters for PMD interfaces */ + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + return; + + xd->time_last_stats_update = now ? now : xd->time_last_stats_update; + clib_memcpy (&xd->last_stats, &xd->stats, sizeof (xd->last_stats)); + rte_eth_stats_get (xd->device_index, &xd->stats); + + /* maybe bump interface rx no buffer counter */ + if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_NO_BUF); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + xd->stats.rx_nombuf - + xd->last_stats.rx_nombuf); + } + + /* missed pkt counter */ + if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_MISS); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + xd->stats.imissed - + xd->last_stats.imissed); + } + rxerrors = xd->stats.ierrors; + last_rxerrors = xd->last_stats.ierrors; + + if (PREDICT_FALSE (rxerrors != last_rxerrors)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_ERROR); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + rxerrors - last_rxerrors); + } + + dpdk_get_xstats (xd); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/format.c b/src/plugins/dpdk/device/format.c new file mode 100644 index 00000000..25a8c5cb --- /dev/null +++ b/src/plugins/dpdk/device/format.c @@ -0,0 +1,754 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#define foreach_dpdk_counter \ + _ (tx_frames_ok, opackets) \ + _ (tx_bytes_ok, obytes) \ + _ (tx_errors, oerrors) \ + _ (rx_frames_ok, ipackets) \ + _ (rx_bytes_ok, ibytes) \ + _ (rx_errors, ierrors) \ + _ (rx_missed, imissed) \ + _ (rx_no_bufs, rx_nombuf) + +#define foreach_dpdk_q_counter \ + _ (rx_frames_ok, q_ipackets) \ + _ (tx_frames_ok, q_opackets) \ + _ (rx_bytes_ok, q_ibytes) \ + _ (tx_bytes_ok, q_obytes) \ + _ (rx_errors, q_errors) + +#define foreach_dpdk_rss_hf \ + _(ETH_RSS_FRAG_IPV4, "ipv4-frag") \ + _(ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \ + _(ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \ + _(ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \ + _(ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \ + _(ETH_RSS_IPV4, "ipv4") \ + _(ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \ + _(ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \ + _(ETH_RSS_FRAG_IPV6, "ipv6-frag") \ + _(ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \ + _(ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \ + _(ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \ + _(ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \ + _(ETH_RSS_L2_PAYLOAD, "l2-payload") \ + _(ETH_RSS_IPV6_EX, "ipv6-ex") \ + _(ETH_RSS_IPV6, "ipv6") + + +#define foreach_dpdk_rx_offload_caps \ + _(DEV_RX_OFFLOAD_VLAN_STRIP, "vlan-strip") \ + _(DEV_RX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ + _(DEV_RX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ + _(DEV_RX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ + _(DEV_RX_OFFLOAD_TCP_LRO , "rcp-lro") \ + _(DEV_RX_OFFLOAD_QINQ_STRIP, "qinq-strip") + +#define foreach_dpdk_tx_offload_caps \ + _(DEV_TX_OFFLOAD_VLAN_INSERT, "vlan-insert") \ + _(DEV_TX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ + _(DEV_TX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ + _(DEV_TX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ + _(DEV_TX_OFFLOAD_SCTP_CKSUM , "sctp-cksum") \ + _(DEV_TX_OFFLOAD_TCP_TSO , "tcp-tso") \ + _(DEV_TX_OFFLOAD_UDP_TSO , "udp-tso") \ + _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \ + _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert") + +#define foreach_dpdk_pkt_rx_offload_flag \ + _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \ + _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \ + _ (PKT_RX_FDIR, "RX packet with FDIR infos") \ + _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \ + _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \ + _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \ + _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \ + _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \ + _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \ + _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \ + _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped") + +#define foreach_dpdk_pkt_type \ + _ (L2, ETHER, "Ethernet packet") \ + _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \ + _ (L2, ETHER_ARP, "ARP packet") \ + _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet") \ + _ (L2, ETHER_NSH, "NSH (Network Service Header) packet") \ + _ (L2, ETHER_VLAN, "VLAN packet") \ + _ (L2, ETHER_QINQ, "QinQ packet") \ + _ (L3, IPV4, "IPv4 packet without extension headers") \ + _ (L3, IPV4_EXT, "IPv4 packet with extension headers") \ + _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \ + _ (L3, IPV6, "IPv6 packet without extension headers") \ + _ (L3, IPV6_EXT, "IPv6 packet with extension headers") \ + _ (L3, IPV6_EXT_UNKNOWN, "IPv6 packet with or without extension headers") \ + _ (L4, TCP, "TCP packet") \ + _ (L4, UDP, "UDP packet") \ + _ (L4, FRAG, "Fragmented IP packet") \ + _ (L4, SCTP, "SCTP (Stream Control Transmission Protocol) packet") \ + _ (L4, ICMP, "ICMP packet") \ + _ (L4, NONFRAG, "Non-fragmented IP packet") \ + _ (TUNNEL, GRE, "GRE tunneling packet") \ + _ (TUNNEL, VXLAN, "VXLAN tunneling packet") \ + _ (TUNNEL, NVGRE, "NVGRE Tunneling packet") \ + _ (TUNNEL, GENEVE, "GENEVE Tunneling packet") \ + _ (TUNNEL, GRENAT, "Teredo, VXLAN or GRE Tunneling packet") \ + _ (INNER_L2, ETHER, "Inner Ethernet packet") \ + _ (INNER_L2, ETHER_VLAN, "Inner Ethernet packet with VLAN") \ + _ (INNER_L3, IPV4, "Inner IPv4 packet without extension headers") \ + _ (INNER_L3, IPV4_EXT, "Inner IPv4 packet with extension headers") \ + _ (INNER_L3, IPV4_EXT_UNKNOWN, "Inner IPv4 packet with or without extension headers") \ + _ (INNER_L3, IPV6, "Inner IPv6 packet without extension headers") \ + _ (INNER_L3, IPV6_EXT, "Inner IPv6 packet with extension headers") \ + _ (INNER_L3, IPV6_EXT_UNKNOWN, "Inner IPv6 packet with or without extension headers") \ + _ (INNER_L4, TCP, "Inner TCP packet") \ + _ (INNER_L4, UDP, "Inner UDP packet") \ + _ (INNER_L4, FRAG, "Inner fagmented IP packet") \ + _ (INNER_L4, SCTP, "Inner SCTP (Stream Control Transmission Protocol) packet") \ + _ (INNER_L4, ICMP, "Inner ICMP packet") \ + _ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet") + +#define foreach_dpdk_pkt_tx_offload_flag \ + _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \ + _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp") + +#define foreach_dpdk_pkt_offload_flag \ + foreach_dpdk_pkt_rx_offload_flag \ + foreach_dpdk_pkt_tx_offload_flag + +u8 * +format_dpdk_device_name (u8 * s, va_list * args) +{ + dpdk_main_t *dm = &dpdk_main; + char *devname_format; + char *device_name; + u32 i = va_arg (*args, u32); + struct rte_eth_dev_info dev_info; + u8 *ret; + + if (dm->conf->interface_name_format_decimal) + devname_format = "%s%d/%d/%d"; + else + devname_format = "%s%x/%x/%x"; + + switch (dm->devices[i].port_type) + { + case VNET_DPDK_PORT_TYPE_ETH_1G: + device_name = "GigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_10G: + device_name = "TenGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_40G: + device_name = "FortyGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_100G: + device_name = "HundredGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_BOND: + return format (s, "BondEthernet%d", dm->devices[i].device_index); + + case VNET_DPDK_PORT_TYPE_ETH_SWITCH: + device_name = "EthernetSwitch"; + break; + + case VNET_DPDK_PORT_TYPE_AF_PACKET: + rte_eth_dev_info_get (i, &dev_info); + return format (s, "af_packet%d", dm->devices[i].af_packet_port_id); + + default: + case VNET_DPDK_PORT_TYPE_UNKNOWN: + device_name = "UnknownEthernet"; + break; + } + + rte_eth_dev_info_get (i, &dev_info); + + if (dev_info.pci_dev) + ret = format (s, devname_format, device_name, dev_info.pci_dev->addr.bus, + dev_info.pci_dev->addr.devid, + dev_info.pci_dev->addr.function); + else + ret = format (s, "%s%d", device_name, dm->devices[i].device_index); + + if (dm->devices[i].interface_name_suffix) + return format (ret, "/%s", dm->devices[i].interface_name_suffix); + return ret; +} + +static u8 * +format_dpdk_device_type (u8 * s, va_list * args) +{ + dpdk_main_t *dm = &dpdk_main; + char *dev_type; + u32 i = va_arg (*args, u32); + + switch (dm->devices[i].pmd) + { + case VNET_DPDK_PMD_E1000EM: + dev_type = "Intel 82540EM (e1000)"; + break; + + case VNET_DPDK_PMD_IGB: + dev_type = "Intel e1000"; + break; + + case VNET_DPDK_PMD_I40E: + dev_type = "Intel X710/XL710 Family"; + break; + + case VNET_DPDK_PMD_I40EVF: + dev_type = "Intel X710/XL710 Family VF"; + break; + + case VNET_DPDK_PMD_FM10K: + dev_type = "Intel FM10000 Family Ethernet Switch"; + break; + + case VNET_DPDK_PMD_IGBVF: + dev_type = "Intel e1000 VF"; + break; + + case VNET_DPDK_PMD_VIRTIO: + dev_type = "Red Hat Virtio"; + break; + + case VNET_DPDK_PMD_IXGBEVF: + dev_type = "Intel 82599 VF"; + break; + + case VNET_DPDK_PMD_IXGBE: + dev_type = "Intel 82599"; + break; + + case VNET_DPDK_PMD_ENIC: + dev_type = "Cisco VIC"; + break; + + case VNET_DPDK_PMD_CXGBE: + dev_type = "Chelsio T4/T5"; + break; + + case VNET_DPDK_PMD_MLX5: + dev_type = "Mellanox ConnectX-4 Family"; + break; + + case VNET_DPDK_PMD_VMXNET3: + dev_type = "VMware VMXNET3"; + break; + + case VNET_DPDK_PMD_AF_PACKET: + dev_type = "af_packet"; + break; + + case VNET_DPDK_PMD_BOND: + dev_type = "Ethernet Bonding"; + break; + + case VNET_DPDK_PMD_DPAA2: + dev_type = "NXP DPAA2 Mac"; + break; + + default: + case VNET_DPDK_PMD_UNKNOWN: + dev_type = "### UNKNOWN ###"; + break; + } + + return format (s, dev_type); +} + +static u8 * +format_dpdk_link_status (u8 * s, va_list * args) +{ + dpdk_device_t *xd = va_arg (*args, dpdk_device_t *); + struct rte_eth_link *l = &xd->link; + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index); + + s = format (s, "%s ", l->link_status ? "up" : "down"); + if (l->link_status) + { + u32 promisc = rte_eth_promiscuous_get (xd->device_index); + + s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ? + "full" : "half"); + s = format (s, "speed %u mtu %d %s\n", l->link_speed, + hi->max_packet_bytes, promisc ? " promisc" : ""); + } + else + s = format (s, "\n"); + + return s; +} + +#define _line_len 72 +#define _(v, str) \ +if (bitmap & v) { \ + if (format_get_indent (s) > next_split ) { \ + next_split += _line_len; \ + s = format(s,"\n%U", format_white_space, indent); \ + } \ + s = format(s, "%s ", str); \ +} + +static u8 * +format_dpdk_rss_hf_name (u8 * s, va_list * args) +{ + u64 bitmap = va_arg (*args, u64); + int next_split = _line_len; + int indent = format_get_indent (s); + + if (!bitmap) + return format (s, "none"); + + foreach_dpdk_rss_hf return s; +} + +static u8 * +format_dpdk_rx_offload_caps (u8 * s, va_list * args) +{ + u32 bitmap = va_arg (*args, u32); + int next_split = _line_len; + int indent = format_get_indent (s); + + if (!bitmap) + return format (s, "none"); + + foreach_dpdk_rx_offload_caps return s; +} + +static u8 * +format_dpdk_tx_offload_caps (u8 * s, va_list * args) +{ + u32 bitmap = va_arg (*args, u32); + int next_split = _line_len; + int indent = format_get_indent (s); + if (!bitmap) + return format (s, "none"); + + foreach_dpdk_tx_offload_caps return s; +} + +#undef _line_len +#undef _ + +u8 * +format_dpdk_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance); + uword indent = format_get_indent (s); + f64 now = vlib_time_now (dm->vlib_main); + struct rte_eth_dev_info di; + + dpdk_update_counters (xd, now); + dpdk_update_link_state (xd, now); + + s = format (s, "%U\n%Ucarrier %U", + format_dpdk_device_type, xd->device_index, + format_white_space, indent + 2, format_dpdk_link_status, xd); + + rte_eth_dev_info_get (xd->device_index, &di); + + if (verbose > 1 && xd->flags & DPDK_DEVICE_FLAG_PMD) + { + struct rte_pci_device *pci; + struct rte_eth_rss_conf rss_conf; + int vlan_off; + int retval; + + rss_conf.rss_key = 0; + retval = rte_eth_dev_rss_hash_conf_get (xd->device_index, &rss_conf); + if (retval < 0) + clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval); + pci = di.pci_dev; + + if (pci) + s = + format (s, + "%Upci id: device %04x:%04x subsystem %04x:%04x\n" + "%Upci address: %04x:%02x:%02x.%02x\n", + format_white_space, indent + 2, pci->id.vendor_id, + pci->id.device_id, pci->id.subsystem_vendor_id, + pci->id.subsystem_device_id, format_white_space, indent + 2, + pci->addr.domain, pci->addr.bus, pci->addr.devid, + pci->addr.function); + s = + format (s, "%Umax rx packet len: %d\n", format_white_space, + indent + 2, di.max_rx_pktlen); + s = + format (s, "%Umax num of queues: rx %d tx %d\n", format_white_space, + indent + 2, di.max_rx_queues, di.max_tx_queues); + s = + format (s, "%Upromiscuous: unicast %s all-multicast %s\n", + format_white_space, indent + 2, + rte_eth_promiscuous_get (xd->device_index) ? "on" : "off", + rte_eth_promiscuous_get (xd->device_index) ? "on" : "off"); + vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); + s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n", + format_white_space, indent + 2, + vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off", + vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off", + vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off"); + s = format (s, "%Urx offload caps: %U\n", + format_white_space, indent + 2, + format_dpdk_rx_offload_caps, di.rx_offload_capa); + s = format (s, "%Utx offload caps: %U\n", + format_white_space, indent + 2, + format_dpdk_tx_offload_caps, di.tx_offload_capa); + s = format (s, "%Urss active: %U\n" + "%Urss supported: %U\n", + format_white_space, indent + 2, + format_dpdk_rss_hf_name, rss_conf.rss_hf, + format_white_space, indent + 2, + format_dpdk_rss_hf_name, di.flow_type_rss_offloads); + } + + s = format (s, "%Urx queues %d, rx desc %d, tx queues %d, tx desc %d\n", + format_white_space, indent + 2, + xd->rx_q_used, xd->nb_rx_desc, xd->tx_q_used, xd->nb_tx_desc); + + if (xd->cpu_socket > -1) + s = format (s, "%Ucpu socket %d\n", + format_white_space, indent + 2, xd->cpu_socket); + + /* $$$ MIB counters */ + { +#define _(N, V) \ + if ((xd->stats.V - xd->last_cleared_stats.V) != 0) { \ + s = format (s, "\n%U%-40U%16Ld", \ + format_white_space, indent + 2, \ + format_c_identifier, #N, \ + xd->stats.V - xd->last_cleared_stats.V); \ + } \ + + foreach_dpdk_counter +#undef _ + } + + u8 *xs = 0; + u32 i = 0; + struct rte_eth_xstat *xstat, *last_xstat; + struct rte_eth_xstat_name *xstat_names = 0; + int len = rte_eth_xstats_get_names (xd->device_index, NULL, 0); + vec_validate (xstat_names, len - 1); + rte_eth_xstats_get_names (xd->device_index, xstat_names, len); + + ASSERT (vec_len (xd->xstats) == vec_len (xd->last_cleared_xstats)); + + /* *INDENT-OFF* */ + vec_foreach_index(i, xd->xstats) + { + u64 delta = 0; + xstat = vec_elt_at_index(xd->xstats, i); + last_xstat = vec_elt_at_index(xd->last_cleared_xstats, i); + + delta = xstat->value - last_xstat->value; + if (verbose == 2 || (verbose && delta)) + { + /* format_c_identifier doesn't like c strings inside vector */ + u8 * name = format(0,"%s", xstat_names[i].name); + xs = format(xs, "\n%U%-38U%16Ld", + format_white_space, indent + 4, + format_c_identifier, name, delta); + vec_free(name); + } + } + /* *INDENT-ON* */ + + vec_free (xstat_names); + + if (xs) + { + s = format (s, "\n%Uextended stats:%v", + format_white_space, indent + 2, xs); + vec_free (xs); + } + + return s; +} + +u8 * +format_dpdk_tx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); + dpdk_tx_dma_trace_t *t = va_arg (*va, dpdk_tx_dma_trace_t *); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); + uword indent = format_get_indent (s); + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + + s = format (s, "%U tx queue %d", + format_vnet_sw_interface_name, vnm, sw, t->queue_index); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U%U", format_white_space, indent, + format_ethernet_header_with_length, t->buffer.pre_data, + sizeof (t->buffer.pre_data)); + + return s; +} + +u8 * +format_dpdk_rx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); + dpdk_rx_dma_trace_t *t = va_arg (*va, dpdk_rx_dma_trace_t *); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); + format_function_t *f; + uword indent = format_get_indent (s); + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + + s = format (s, "%U rx queue %d", + format_vnet_sw_interface_name, vnm, sw, t->queue_index); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U%U", + format_white_space, indent, + format_dpdk_rte_mbuf, &t->mb, &t->data); + + if (vm->trace_main.verbose) + { + s = format (s, "\n%UPacket Dump%s", format_white_space, indent + 2, + t->mb.data_len > sizeof (t->data) ? " (truncated)" : ""); + s = format (s, "\n%U%U", format_white_space, indent + 4, + format_hexdump, &t->data, + t->mb.data_len > + sizeof (t->data) ? sizeof (t->data) : t->mb.data_len); + } + f = node->format_buffer; + if (!f) + f = format_hex_bytes; + s = format (s, "\n%U%U", format_white_space, indent, + f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); + + return s; +} + + +static inline u8 * +format_dpdk_pkt_types (u8 * s, va_list * va) +{ + u32 *pkt_types = va_arg (*va, u32 *); + uword indent __attribute__ ((unused)) = format_get_indent (s) + 2; + + if (!*pkt_types) + return s; + + s = format (s, "Packet Types"); + +#define _(L, F, S) \ + if ((*pkt_types & RTE_PTYPE_##L##_MASK) == RTE_PTYPE_##L##_##F) \ + { \ + s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \ + "RTE_PTYPE_" #L "_" #F, RTE_PTYPE_##L##_##F, S); \ + } + + foreach_dpdk_pkt_type +#undef _ + return s; +} + +static inline u8 * +format_dpdk_pkt_offload_flags (u8 * s, va_list * va) +{ + u64 *ol_flags = va_arg (*va, u64 *); + uword indent = format_get_indent (s) + 2; + + if (!*ol_flags) + return s; + + s = format (s, "Packet Offload Flags"); + +#define _(F, S) \ + if (*ol_flags & F) \ + { \ + s = format (s, "\n%U%s (0x%04x) %s", \ + format_white_space, indent, #F, F, S); \ + } + + foreach_dpdk_pkt_offload_flag +#undef _ + return s; +} + +u8 * +format_dpdk_rte_mbuf_vlan (u8 * s, va_list * va) +{ + ethernet_vlan_header_tv_t *vlan_hdr = + va_arg (*va, ethernet_vlan_header_tv_t *); + + if (clib_net_to_host_u16 (vlan_hdr->type) == ETHERNET_TYPE_DOT1AD) + { + s = format (s, "%U 802.1q vlan ", + format_ethernet_vlan_tci, + clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); + vlan_hdr++; + } + + s = format (s, "%U", + format_ethernet_vlan_tci, + clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); + + return s; +} + +u8 * +format_dpdk_rte_mbuf (u8 * s, va_list * va) +{ + struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *); + ethernet_header_t *eth_hdr = va_arg (*va, ethernet_header_t *); + uword indent = format_get_indent (s) + 2; + + s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d" + "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x, data_off %d, phys_addr 0x%x" + "\n%Upacket_type 0x%x", + mb->port, mb->nb_segs, mb->pkt_len, + format_white_space, indent, + mb->buf_len, mb->data_len, mb->ol_flags, mb->data_off, + mb->buf_physaddr, format_white_space, indent, mb->packet_type); + + if (mb->ol_flags) + s = format (s, "\n%U%U", format_white_space, indent, + format_dpdk_pkt_offload_flags, &mb->ol_flags); + + if ((mb->ol_flags & PKT_RX_VLAN_PKT) && + ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0)) + { + ethernet_vlan_header_tv_t *vlan_hdr = + ((ethernet_vlan_header_tv_t *) & (eth_hdr->type)); + s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr); + } + + if (mb->packet_type) + s = format (s, "\n%U%U", format_white_space, indent, + format_dpdk_pkt_types, &mb->packet_type); + + return s; +} + +/* FIXME is this function used? */ +#if 0 +uword +unformat_socket_mem (unformat_input_t * input, va_list * va) +{ + uword **r = va_arg (*va, uword **); + int i = 0; + u32 mem; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, ",")) + hash_set (*r, i, 1024); + else if (unformat (input, "%u,", &mem)) + hash_set (*r, i, mem); + else if (unformat (input, "%u", &mem)) + hash_set (*r, i, mem); + else + { + unformat_put_input (input); + goto done; + } + i++; + } + +done: + return 1; +} +#endif + +clib_error_t * +unformat_rss_fn (unformat_input_t * input, uword * rss_fn) +{ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (0) + ; +#undef _ +#define _(f, s) \ + else if (unformat (input, s)) \ + *rss_fn |= f; + + foreach_dpdk_rss_hf +#undef _ + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + } + return 0; +} + +clib_error_t * +unformat_hqos (unformat_input_t * input, dpdk_device_config_hqos_t * hqos) +{ + clib_error_t *error = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "hqos-thread %u", &hqos->hqos_thread)) + hqos->hqos_thread_valid = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + break; + } + } + + return error; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c new file mode 100644 index 00000000..8824d789 --- /dev/null +++ b/src/plugins/dpdk/device/node.c @@ -0,0 +1,674 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +static char *dpdk_error_strings[] = { +#define _(n,s) s, + foreach_dpdk_error +#undef _ +}; + +always_inline int +vlib_buffer_is_ip4 (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); +} + +always_inline int +vlib_buffer_is_ip6 (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); +} + +always_inline int +vlib_buffer_is_mpls (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); +} + +always_inline u32 +dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0) +{ + if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0))) + if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0)) + return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; + else + return VNET_DEVICE_INPUT_NEXT_IP4_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0))) + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) + return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; + else + return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; +} + +always_inline int +dpdk_mbuf_is_vlan (struct rte_mbuf *mb) +{ + return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) == + RTE_PTYPE_L2_ETHER_VLAN; +} + +always_inline int +dpdk_mbuf_is_ip4 (struct rte_mbuf *mb) +{ + return RTE_ETH_IS_IPV4_HDR (mb->packet_type) != 0; +} + +always_inline int +dpdk_mbuf_is_ip6 (struct rte_mbuf *mb) +{ + return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0; +} + +always_inline u32 +dpdk_rx_next_from_mb (struct rte_mbuf * mb, vlib_buffer_t * b0) +{ + if (PREDICT_FALSE (dpdk_mbuf_is_vlan (mb))) + return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb))) + return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; + else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb))) + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) + return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; + else + return dpdk_rx_next_from_etype (mb, b0); +} + +always_inline void +dpdk_rx_error_from_mb (struct rte_mbuf *mb, u32 * next, u8 * error) +{ + if (mb->ol_flags & PKT_RX_IP_CKSUM_BAD) + { + *error = DPDK_ERROR_IP_CHECKSUM_ERROR; + *next = VNET_DEVICE_INPUT_NEXT_DROP; + } + else + *error = DPDK_ERROR_NONE; +} + +void +dpdk_rx_trace (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 * buffers, uword n_buffers) +{ + vlib_main_t *vm = vlib_get_main (); + u32 *b, n_left; + u32 next0; + + n_left = n_buffers; + b = buffers; + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t *b0; + dpdk_rx_dma_trace_t *t0; + struct rte_mbuf *mb; + u8 error0; + + bi0 = b[0]; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + mb = rte_mbuf_from_vlib_buffer (b0); + + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) + next0 = xd->per_interface_next_index; + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + next0 = dpdk_rx_next_from_mb (mb, b0); + else + next0 = dpdk_rx_next_from_etype (mb, b0); + + dpdk_rx_error_from_mb (mb, &next0, &error0); + + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->queue_index = queue_id; + t0->device_index = xd->device_index; + t0->buffer_index = bi0; + + clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); + clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + clib_memcpy (t0->buffer.pre_data, b0->data, + sizeof (t0->buffer.pre_data)); + clib_memcpy (&t0->data, mb->buf_addr + mb->data_off, sizeof (t0->data)); + + b += 1; + } +} + +static inline u32 +dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id) +{ + u32 n_buffers; + u32 n_left; + u32 n_this_chunk; + + n_left = VLIB_FRAME_SIZE; + n_buffers = 0; + + if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) + { + while (n_left) + { + n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id, + xd->rx_vectors[queue_id] + + n_buffers, n_left); + n_buffers += n_this_chunk; + n_left -= n_this_chunk; + + /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */ + if (n_this_chunk < 32) + break; + } + } + else + { + ASSERT (0); + } + + return n_buffers; +} + + +static_always_inline void +dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, + struct rte_mbuf *mb, vlib_buffer_free_list_t * fl) +{ + u8 nb_seg = 1; + struct rte_mbuf *mb_seg = 0; + vlib_buffer_t *b_seg, *b_chain = 0; + mb_seg = mb->next; + b_chain = b; + + while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) + { + ASSERT (mb_seg != 0); + + b_seg = vlib_buffer_from_rte_mbuf (mb_seg); + vlib_buffer_init_for_free_list (b_seg, fl); + + ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); + ASSERT (b_seg->current_data == 0); + + /* + * The driver (e.g. virtio) may not put the packet data at the start + * of the segment, so don't assume b_seg->current_data == 0 is correct. + */ + b_seg->current_data = + (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data; + + b_seg->current_length = mb_seg->data_len; + b->total_length_not_including_first_buffer += mb_seg->data_len; + + b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT; + b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg); + + b_chain = b_seg; + mb_seg = mb_seg->next; + nb_seg++; + } +} + +static_always_inline void +dpdk_prefetch_buffer (struct rte_mbuf *mb) +{ + vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); + CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE); +} + +/* + * This function is used when there are no worker threads. + * The main thread performs IO and forwards the packets. + */ +static_always_inline u32 +dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, + vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id) +{ + u32 n_buffers; + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + u32 n_left_to_next, *to_next; + u32 mb_index; + vlib_main_t *vm = vlib_get_main (); + uword n_rx_bytes = 0; + u32 n_trace, trace_cnt __attribute__ ((unused)); + vlib_buffer_free_list_t *fl; + u32 buffer_flags_template; + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) + return 0; + + n_buffers = dpdk_rx_burst (dm, xd, queue_id); + + if (n_buffers == 0) + { + return 0; + } + + buffer_flags_template = dm->buffer_flags_template; + + vec_reset_length (xd->d_trace_buffers[cpu_index]); + trace_cnt = n_trace = vlib_get_trace_count (vm, node); + + if (n_trace > 0) + { + u32 n = clib_min (n_trace, n_buffers); + mb_index = 0; + + while (n--) + { + struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++]; + vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); + vec_add1 (xd->d_trace_buffers[cpu_index], + vlib_get_buffer_index (vm, b)); + } + } + + fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + mb_index = 0; + + while (n_buffers > 0) + { + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 bi0, next0, l3_offset0; + u32 bi1, next1, l3_offset1; + u32 bi2, next2, l3_offset2; + u32 bi3, next3, l3_offset3; + u8 error0, error1, error2, error3; + u64 or_ol_flags; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_buffers > 8 && n_left_to_next > 4) + { + struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; + struct rte_mbuf *mb1 = xd->rx_vectors[queue_id][mb_index + 1]; + struct rte_mbuf *mb2 = xd->rx_vectors[queue_id][mb_index + 2]; + struct rte_mbuf *mb3 = xd->rx_vectors[queue_id][mb_index + 3]; + + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 4]); + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 5]); + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 6]); + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 7]); + + if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) + { + if (PREDICT_FALSE (mb0->nb_segs > 1)) + dpdk_prefetch_buffer (mb0->next); + if (PREDICT_FALSE (mb1->nb_segs > 1)) + dpdk_prefetch_buffer (mb1->next); + if (PREDICT_FALSE (mb2->nb_segs > 1)) + dpdk_prefetch_buffer (mb2->next); + if (PREDICT_FALSE (mb3->nb_segs > 1)) + dpdk_prefetch_buffer (mb3->next); + } + + ASSERT (mb0); + ASSERT (mb1); + ASSERT (mb2); + ASSERT (mb3); + + or_ol_flags = (mb0->ol_flags | mb1->ol_flags | + mb2->ol_flags | mb3->ol_flags); + b0 = vlib_buffer_from_rte_mbuf (mb0); + b1 = vlib_buffer_from_rte_mbuf (mb1); + b2 = vlib_buffer_from_rte_mbuf (mb2); + b3 = vlib_buffer_from_rte_mbuf (mb3); + + vlib_buffer_init_for_free_list (b0, fl); + vlib_buffer_init_for_free_list (b1, fl); + vlib_buffer_init_for_free_list (b2, fl); + vlib_buffer_init_for_free_list (b3, fl); + + bi0 = vlib_get_buffer_index (vm, b0); + bi1 = vlib_get_buffer_index (vm, b1); + bi2 = vlib_get_buffer_index (vm, b2); + bi3 = vlib_get_buffer_index (vm, b3); + + to_next[0] = bi0; + to_next[1] = bi1; + to_next[2] = bi2; + to_next[3] = bi3; + to_next += 4; + n_left_to_next -= 4; + + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) + { + next0 = next1 = next2 = next3 = xd->per_interface_next_index; + } + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + { + next0 = dpdk_rx_next_from_mb (mb0, b0); + next1 = dpdk_rx_next_from_mb (mb1, b1); + next2 = dpdk_rx_next_from_mb (mb2, b2); + next3 = dpdk_rx_next_from_mb (mb3, b3); + } + else + { + next0 = dpdk_rx_next_from_etype (mb0, b0); + next1 = dpdk_rx_next_from_etype (mb1, b1); + next2 = dpdk_rx_next_from_etype (mb2, b2); + next3 = dpdk_rx_next_from_etype (mb3, b3); + } + + if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD)) + { + dpdk_rx_error_from_mb (mb0, &next0, &error0); + dpdk_rx_error_from_mb (mb1, &next1, &error1); + dpdk_rx_error_from_mb (mb2, &next2, &error2); + dpdk_rx_error_from_mb (mb3, &next3, &error3); + b0->error = node->errors[error0]; + b1->error = node->errors[error1]; + b2->error = node->errors[error2]; + b3->error = node->errors[error3]; + } + else + { + b0->error = b1->error = node->errors[DPDK_ERROR_NONE]; + b2->error = b3->error = node->errors[DPDK_ERROR_NONE]; + } + + l3_offset0 = device_input_next_node_advance[next0]; + l3_offset1 = device_input_next_node_advance[next1]; + l3_offset2 = device_input_next_node_advance[next2]; + l3_offset3 = device_input_next_node_advance[next3]; + + b0->current_data = l3_offset0 + mb0->data_off; + b1->current_data = l3_offset1 + mb1->data_off; + b2->current_data = l3_offset2 + mb2->data_off; + b3->current_data = l3_offset3 + mb3->data_off; + + b0->current_data -= RTE_PKTMBUF_HEADROOM; + b1->current_data -= RTE_PKTMBUF_HEADROOM; + b2->current_data -= RTE_PKTMBUF_HEADROOM; + b3->current_data -= RTE_PKTMBUF_HEADROOM; + + b0->current_length = mb0->data_len - l3_offset0; + b1->current_length = mb1->data_len - l3_offset1; + b2->current_length = mb2->data_len - l3_offset2; + b3->current_length = mb3->data_len - l3_offset3; + + b0->flags = buffer_flags_template; + b1->flags = buffer_flags_template; + b2->flags = buffer_flags_template; + b3->flags = buffer_flags_template; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b2)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b3)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b2)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b3)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + n_rx_bytes += mb0->pkt_len; + n_rx_bytes += mb1->pkt_len; + n_rx_bytes += mb2->pkt_len; + n_rx_bytes += mb3->pkt_len; + + /* Process subsequent segments of multi-segment packets */ + if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) + { + dpdk_process_subseq_segs (vm, b0, mb0, fl); + dpdk_process_subseq_segs (vm, b1, mb1, fl); + dpdk_process_subseq_segs (vm, b2, mb2, fl); + dpdk_process_subseq_segs (vm, b3, mb3, fl); + } + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); + + /* Do we have any driver RX features configured on the interface? */ + vnet_feature_start_device_input_x4 (xd->vlib_sw_if_index, + &next0, &next1, &next2, &next3, + b0, b1, b2, b3, + l3_offset0, l3_offset1, + l3_offset2, l3_offset3); + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + n_buffers -= 4; + mb_index += 4; + } + while (n_buffers > 0 && n_left_to_next > 0) + { + struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; + + ASSERT (mb0); + + b0 = vlib_buffer_from_rte_mbuf (mb0); + + /* Prefetch one next segment if it exists. */ + if (PREDICT_FALSE (mb0->nb_segs > 1)) + dpdk_prefetch_buffer (mb0->next); + + vlib_buffer_init_for_free_list (b0, fl); + + bi0 = vlib_get_buffer_index (vm, b0); + + to_next[0] = bi0; + to_next++; + n_left_to_next--; + + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) + next0 = xd->per_interface_next_index; + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + next0 = dpdk_rx_next_from_mb (mb0, b0); + else + next0 = dpdk_rx_next_from_etype (mb0, b0); + + dpdk_rx_error_from_mb (mb0, &next0, &error0); + b0->error = node->errors[error0]; + + l3_offset0 = device_input_next_node_advance[next0]; + + b0->current_data = l3_offset0; + b0->current_data += mb0->data_off - RTE_PKTMBUF_HEADROOM; + b0->current_length = mb0->data_len - l3_offset0; + + b0->flags = buffer_flags_template; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + n_rx_bytes += mb0->pkt_len; + + /* Process subsequent segments of multi-segment packets */ + dpdk_process_subseq_segs (vm, b0, mb0, fl); + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + /* Do we have any driver RX features configured on the interface? */ + vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0, + b0, l3_offset0); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + n_buffers--; + mb_index++; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0)) + { + dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index], + vec_len (xd->d_trace_buffers[cpu_index])); + vlib_set_trace_count (vm, node, n_trace - + vec_len (xd->d_trace_buffers[cpu_index])); + } + + vlib_increment_combined_counter + (vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); + + vnet_device_increment_rx_packets (cpu_index, mb_index); + + return mb_index; +} + +static inline void +poll_rate_limit (dpdk_main_t * dm) +{ + /* Limit the poll rate by sleeping for N msec between polls */ + if (PREDICT_FALSE (dm->poll_sleep != 0)) + { + struct timespec ts, tsrem; + + ts.tv_sec = 0; + ts.tv_nsec = 1000 * 1000 * dm->poll_sleep; /* 1ms */ + + while (nanosleep (&ts, &tsrem) < 0) + { + ts = tsrem; + } + } +} + +/** \brief Main DPDK input node + @node dpdk-input + + This is the main DPDK input node: across each assigned interface, + call rte_eth_rx_burst(...) or similar to obtain a vector of + packets to process. Handle early packet discard. Derive @c + vlib_buffer_t metadata from struct rte_mbuf metadata, + Depending on the resulting metadata: adjust b->current_data, + b->current_length and dispatch directly to + ip4-input-no-checksum, or ip6-input. Trace the packet if required. + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param f vlib_frame_t input-node, not used. + + @par Graph mechanics: buffer metadata, next index usage + + @em Uses: + - struct rte_mbuf mb->ol_flags + - PKT_RX_IP_CKSUM_BAD + - RTE_ETH_IS_xxx_HDR(mb->packet_type) + - packet classification result + + @em Sets: + - b->error if the packet is to be dropped immediately + - b->current_data, b->current_length + - adjusted as needed to skip the L2 header in direct-dispatch cases + - vnet_buffer(b)->sw_if_index[VLIB_RX] + - rx interface sw_if_index + - vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0 + - required by ipX-lookup + - b->flags + - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc. + + Next Nodes: + - Static arcs to: error-drop, ethernet-input, + ip4-input-no-checksum, ip6-input, mpls-input + - per-interface redirection, controlled by + xd->per_interface_next_index +*/ + +static uword +dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + uword n_rx_packets = 0; + dpdk_device_and_queue_t *dq; + u32 cpu_index = os_get_cpu_number (); + + /* + * Poll all devices on this cpu for input/interrupts. + */ + /* *INDENT-OFF* */ + vec_foreach (dq, dm->devices_by_cpu[cpu_index]) + { + xd = vec_elt_at_index(dm->devices, dq->device); + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id); + } + /* *INDENT-ON* */ + + poll_rate_limit (dm); + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_input_node) = { + .function = dpdk_input, + .type = VLIB_NODE_TYPE_INPUT, + .name = "dpdk-input", + .sibling_of = "device-input", + + /* Will be enabled if/when hardware is detected. */ + .state = VLIB_NODE_STATE_DISABLED, + + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_dpdk_rx_dma_trace, + + .n_errors = DPDK_N_ERROR, + .error_strings = dpdk_error_strings, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input); +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/dir.dox b/src/plugins/dpdk/dir.dox new file mode 100644 index 00000000..43e36753 --- /dev/null +++ b/src/plugins/dpdk/dir.dox @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Doxygen directory documentation */ + +/** +@dir +@brief DPDK Abstraction Layer. + +This directory contains the source code for the DPDK abstraction layer. + +*/ +/*? %%clicmd:group_label DPDK and pcap tx %% ?*/ +/*? %%syscfg:group_label DPDK and pcap tx %% ?*/ diff --git a/src/plugins/dpdk/hqos/hqos.c b/src/plugins/dpdk/hqos/hqos.c new file mode 100644 index 00000000..a288fca7 --- /dev/null +++ b/src/plugins/dpdk/hqos/hqos.c @@ -0,0 +1,775 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include /* enumerate all vlib messages */ + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +dpdk_main_t dpdk_main; + +/*** + * + * HQoS default configuration values + * + ***/ + +static dpdk_device_config_hqos_t hqos_params_default = { + .hqos_thread_valid = 0, + + .swq_size = 4096, + .burst_enq = 256, + .burst_deq = 220, + + /* + * Packet field to identify the subport. + * + * Default value: Since only one subport is defined by default (see below: + * n_subports_per_port = 1), the subport ID is hardcoded to 0. + */ + .pktfield0_slabpos = 0, + .pktfield0_slabmask = 0, + + /* + * Packet field to identify the pipe. + * + * Default value: Assuming Ethernet/IPv4/UDP packets, UDP payload bits 12 .. 23 + */ + .pktfield1_slabpos = 40, + .pktfield1_slabmask = 0x0000000FFF000000LLU, + + /* Packet field used as index into TC translation table to identify the traffic + * class and queue. + * + * Default value: Assuming Ethernet/IPv4 packets, IPv4 DSCP field + */ + .pktfield2_slabpos = 8, + .pktfield2_slabmask = 0x00000000000000FCLLU, + .tc_table = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + }, + + /* port */ + .port = { + .name = NULL, /* Set at init */ + .socket = 0, /* Set at init */ + .rate = 1250000000, /* Assuming 10GbE port */ + .mtu = 14 + 1500, /* Assuming Ethernet/IPv4 pkt (Ethernet FCS not included) */ + .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT, + .n_subports_per_port = 1, + .n_pipes_per_subport = 4096, + .qsize = {64, 64, 64, 64}, + .pipe_profiles = NULL, /* Set at config */ + .n_pipe_profiles = 1, + +#ifdef RTE_SCHED_RED + .red_params = { + /* Traffic Class 0 Colors Green / Yellow / Red */ + [0][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [0][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [0][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + + /* Traffic Class 1 - Colors Green / Yellow / Red */ + [1][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [1][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [1][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + + /* Traffic Class 2 - Colors Green / Yellow / Red */ + [2][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [2][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [2][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + + /* Traffic Class 3 - Colors Green / Yellow / Red */ + [3][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [3][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [3][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9} + }, +#endif /* RTE_SCHED_RED */ + }, +}; + +static struct rte_sched_subport_params hqos_subport_params_default = { + .tb_rate = 1250000000, /* 10GbE line rate (measured in bytes/second) */ + .tb_size = 1000000, + .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000}, + .tc_period = 10, +}; + +static struct rte_sched_pipe_params hqos_pipe_params_default = { + .tb_rate = 305175, /* 10GbE line rate divided by 4K pipes */ + .tb_size = 1000000, + .tc_rate = {305175, 305175, 305175, 305175}, + .tc_period = 40, +#ifdef RTE_SCHED_SUBPORT_TC_OV + .tc_ov_weight = 1, +#endif + .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, +}; + +/*** + * + * HQoS configuration + * + ***/ + +int +dpdk_hqos_validate_mask (u64 mask, u32 n) +{ + int count = __builtin_popcountll (mask); + int pos_lead = sizeof (u64) * 8 - __builtin_clzll (mask); + int pos_trail = __builtin_ctzll (mask); + int count_expected = __builtin_popcount (n - 1); + + /* Handle the exceptions */ + if (n == 0) + return -1; /* Error */ + + if ((mask == 0) && (n == 1)) + return 0; /* OK */ + + if (((mask == 0) && (n != 1)) || ((mask != 0) && (n == 1))) + return -2; /* Error */ + + /* Check that mask is contiguous */ + if ((pos_lead - pos_trail) != count) + return -3; /* Error */ + + /* Check that mask contains the expected number of bits set */ + if (count != count_expected) + return -4; /* Error */ + + return 0; /* OK */ +} + +void +dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * + hqos, u32 pipe_profile_id) +{ + memcpy (&hqos->pipe[pipe_profile_id], &hqos_pipe_params_default, + sizeof (hqos_pipe_params_default)); +} + +void +dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos) +{ + struct rte_sched_subport_params *subport_params; + struct rte_sched_pipe_params *pipe_params; + u32 *pipe_map; + u32 i; + + memcpy (hqos, &hqos_params_default, sizeof (hqos_params_default)); + + /* pipe */ + vec_add2 (hqos->pipe, pipe_params, hqos->port.n_pipe_profiles); + + for (i = 0; i < vec_len (hqos->pipe); i++) + memcpy (&pipe_params[i], + &hqos_pipe_params_default, sizeof (hqos_pipe_params_default)); + + hqos->port.pipe_profiles = hqos->pipe; + + /* subport */ + vec_add2 (hqos->subport, subport_params, hqos->port.n_subports_per_port); + + for (i = 0; i < vec_len (hqos->subport); i++) + memcpy (&subport_params[i], + &hqos_subport_params_default, + sizeof (hqos_subport_params_default)); + + /* pipe profile */ + vec_add2 (hqos->pipe_map, + pipe_map, + hqos->port.n_subports_per_port * hqos->port.n_pipes_per_subport); + + for (i = 0; i < vec_len (hqos->pipe_map); i++) + pipe_map[i] = 0; +} + +/*** + * + * HQoS init + * + ***/ + +clib_error_t * +dpdk_port_setup_hqos (dpdk_device_t * xd, dpdk_device_config_hqos_t * hqos) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + char name[32]; + u32 subport_id, i; + int rv; + + /* Detect the set of worker threads */ + int worker_thread_first = 0; + int worker_thread_count = 0; + + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + vlib_thread_registration_t *tr = + p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + worker_thread_first = tr->first_index; + worker_thread_count = tr->count; + } + + /* Allocate the per-thread device data array */ + vec_validate_aligned (xd->hqos_wt, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + memset (xd->hqos_wt, 0, tm->n_vlib_mains * sizeof (xd->hqos_wt[0])); + + vec_validate_aligned (xd->hqos_ht, 0, CLIB_CACHE_LINE_BYTES); + memset (xd->hqos_ht, 0, sizeof (xd->hqos_ht[0])); + + /* Allocate space for one SWQ per worker thread in the I/O TX thread data structure */ + vec_validate (xd->hqos_ht->swq, worker_thread_count); + + /* SWQ */ + for (i = 0; i < worker_thread_count + 1; i++) + { + u32 swq_flags = RING_F_SP_ENQ | RING_F_SC_DEQ; + + snprintf (name, sizeof (name), "SWQ-worker%u-to-device%u", i, + xd->device_index); + xd->hqos_ht->swq[i] = + rte_ring_create (name, hqos->swq_size, xd->cpu_socket, swq_flags); + if (xd->hqos_ht->swq[i] == NULL) + return clib_error_return (0, + "SWQ-worker%u-to-device%u: rte_ring_create err", + i, xd->device_index); + } + + /* + * HQoS + */ + + /* HQoS port */ + snprintf (name, sizeof (name), "HQoS%u", xd->device_index); + hqos->port.name = strdup (name); + if (hqos->port.name == NULL) + return clib_error_return (0, "HQoS%u: strdup err", xd->device_index); + + hqos->port.socket = rte_eth_dev_socket_id (xd->device_index); + if (hqos->port.socket == SOCKET_ID_ANY) + hqos->port.socket = 0; + + xd->hqos_ht->hqos = rte_sched_port_config (&hqos->port); + if (xd->hqos_ht->hqos == NULL) + return clib_error_return (0, "HQoS%u: rte_sched_port_config err", + xd->device_index); + + /* HQoS subport */ + for (subport_id = 0; subport_id < hqos->port.n_subports_per_port; + subport_id++) + { + u32 pipe_id; + + rv = + rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, + &hqos->subport[subport_id]); + if (rv) + return clib_error_return (0, + "HQoS%u subport %u: rte_sched_subport_config err (%d)", + xd->device_index, subport_id, rv); + + /* HQoS pipe */ + for (pipe_id = 0; pipe_id < hqos->port.n_pipes_per_subport; pipe_id++) + { + u32 pos = subport_id * hqos->port.n_pipes_per_subport + pipe_id; + u32 profile_id = hqos->pipe_map[pos]; + + rv = + rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, + profile_id); + if (rv) + return clib_error_return (0, + "HQoS%u subport %u pipe %u: rte_sched_pipe_config err (%d)", + xd->device_index, subport_id, pipe_id, + rv); + } + } + + /* Set up per-thread device data for the I/O TX thread */ + xd->hqos_ht->hqos_burst_enq = hqos->burst_enq; + xd->hqos_ht->hqos_burst_deq = hqos->burst_deq; + vec_validate (xd->hqos_ht->pkts_enq, 2 * hqos->burst_enq - 1); + vec_validate (xd->hqos_ht->pkts_deq, hqos->burst_deq - 1); + xd->hqos_ht->pkts_enq_len = 0; + xd->hqos_ht->swq_pos = 0; + xd->hqos_ht->flush_count = 0; + + /* Set up per-thread device data for each worker thread */ + for (i = 0; i < worker_thread_count + 1; i++) + { + u32 tid; + if (i) + tid = worker_thread_first + (i - 1); + else + tid = i; + + xd->hqos_wt[tid].swq = xd->hqos_ht->swq[i]; + xd->hqos_wt[tid].hqos_field0_slabpos = hqos->pktfield0_slabpos; + xd->hqos_wt[tid].hqos_field0_slabmask = hqos->pktfield0_slabmask; + xd->hqos_wt[tid].hqos_field0_slabshr = + __builtin_ctzll (hqos->pktfield0_slabmask); + xd->hqos_wt[tid].hqos_field1_slabpos = hqos->pktfield1_slabpos; + xd->hqos_wt[tid].hqos_field1_slabmask = hqos->pktfield1_slabmask; + xd->hqos_wt[tid].hqos_field1_slabshr = + __builtin_ctzll (hqos->pktfield1_slabmask); + xd->hqos_wt[tid].hqos_field2_slabpos = hqos->pktfield2_slabpos; + xd->hqos_wt[tid].hqos_field2_slabmask = hqos->pktfield2_slabmask; + xd->hqos_wt[tid].hqos_field2_slabshr = + __builtin_ctzll (hqos->pktfield2_slabmask); + memcpy (xd->hqos_wt[tid].hqos_tc_table, hqos->tc_table, + sizeof (hqos->tc_table)); + } + + return 0; +} + +/*** + * + * HQoS run-time + * + ***/ +/* + * dpdk_hqos_thread - Contains the main loop of an HQoS thread. + * + * w + * Information for the current thread + */ +static_always_inline void +dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + u32 cpu_index = vm->cpu_index; + u32 dev_pos; + + dev_pos = 0; + while (1) + { + vlib_worker_thread_barrier_check (); + + u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); + if (dev_pos >= n_devs) + dev_pos = 0; + + dpdk_device_and_queue_t *dq = + vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); + + dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; + u32 device_index = xd->device_index; + u16 queue_id = dq->queue_id; + + struct rte_mbuf **pkts_enq = hqos->pkts_enq; + u32 pkts_enq_len = hqos->pkts_enq_len; + u32 swq_pos = hqos->swq_pos; + u32 n_swq = vec_len (hqos->swq), i; + u32 flush_count = hqos->flush_count; + + for (i = 0; i < n_swq; i++) + { + /* Get current SWQ for this device */ + struct rte_ring *swq = hqos->swq[swq_pos]; + + /* Read SWQ burst to packet buffer of this device */ + pkts_enq_len += rte_ring_sc_dequeue_burst (swq, + (void **) + &pkts_enq[pkts_enq_len], + hqos->hqos_burst_enq); + + /* Get next SWQ for this device */ + swq_pos++; + if (swq_pos >= n_swq) + swq_pos = 0; + hqos->swq_pos = swq_pos; + + /* HWQ TX enqueue when burst available */ + if (pkts_enq_len >= hqos->hqos_burst_enq) + { + u32 n_pkts = rte_eth_tx_burst (device_index, + (uint16_t) queue_id, + pkts_enq, + (uint16_t) pkts_enq_len); + + for (; n_pkts < pkts_enq_len; n_pkts++) + rte_pktmbuf_free (pkts_enq[n_pkts]); + + pkts_enq_len = 0; + flush_count = 0; + break; + } + } + if (pkts_enq_len) + { + flush_count++; + if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) + { + rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); + + pkts_enq_len = 0; + flush_count = 0; + } + } + hqos->pkts_enq_len = pkts_enq_len; + hqos->flush_count = flush_count; + + /* Advance to next device */ + dev_pos++; + } +} + +static_always_inline void +dpdk_hqos_thread_internal (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + u32 cpu_index = vm->cpu_index; + u32 dev_pos; + + dev_pos = 0; + while (1) + { + vlib_worker_thread_barrier_check (); + + u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); + if (PREDICT_FALSE (n_devs == 0)) + { + dev_pos = 0; + continue; + } + if (dev_pos >= n_devs) + dev_pos = 0; + + dpdk_device_and_queue_t *dq = + vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); + + dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; + u32 device_index = xd->device_index; + u16 queue_id = dq->queue_id; + + struct rte_mbuf **pkts_enq = hqos->pkts_enq; + struct rte_mbuf **pkts_deq = hqos->pkts_deq; + u32 pkts_enq_len = hqos->pkts_enq_len; + u32 swq_pos = hqos->swq_pos; + u32 n_swq = vec_len (hqos->swq), i; + u32 flush_count = hqos->flush_count; + + /* + * SWQ dequeue and HQoS enqueue for current device + */ + for (i = 0; i < n_swq; i++) + { + /* Get current SWQ for this device */ + struct rte_ring *swq = hqos->swq[swq_pos]; + + /* Read SWQ burst to packet buffer of this device */ + pkts_enq_len += rte_ring_sc_dequeue_burst (swq, + (void **) + &pkts_enq[pkts_enq_len], + hqos->hqos_burst_enq); + + /* Get next SWQ for this device */ + swq_pos++; + if (swq_pos >= n_swq) + swq_pos = 0; + hqos->swq_pos = swq_pos; + + /* HQoS enqueue when burst available */ + if (pkts_enq_len >= hqos->hqos_burst_enq) + { + rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); + + pkts_enq_len = 0; + flush_count = 0; + break; + } + } + if (pkts_enq_len) + { + flush_count++; + if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) + { + rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); + + pkts_enq_len = 0; + flush_count = 0; + } + } + hqos->pkts_enq_len = pkts_enq_len; + hqos->flush_count = flush_count; + + /* + * HQoS dequeue and HWQ TX enqueue for current device + */ + { + u32 pkts_deq_len, n_pkts; + + pkts_deq_len = rte_sched_port_dequeue (hqos->hqos, + pkts_deq, + hqos->hqos_burst_deq); + + for (n_pkts = 0; n_pkts < pkts_deq_len;) + n_pkts += rte_eth_tx_burst (device_index, + (uint16_t) queue_id, + &pkts_deq[n_pkts], + (uint16_t) (pkts_deq_len - n_pkts)); + } + + /* Advance to next device */ + dev_pos++; + } +} + +void +dpdk_hqos_thread (vlib_worker_thread_t * w) +{ + vlib_main_t *vm; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + + vm = vlib_get_main (); + + ASSERT (vm->cpu_index == os_get_cpu_number ()); + + clib_time_init (&vm->clib_time); + clib_mem_set_heap (w->thread_mheap); + + /* Wait until the dpdk init sequence is complete */ + while (tm->worker_thread_release == 0) + vlib_worker_thread_barrier_check (); + + if (vec_len (dm->devices_by_hqos_cpu[vm->cpu_index]) == 0) + return + clib_error + ("current I/O TX thread does not have any devices assigned to it"); + + if (DPDK_HQOS_DBG_BYPASS) + dpdk_hqos_thread_internal_hqos_dbg_bypass (vm); + else + dpdk_hqos_thread_internal (vm); +} + +void +dpdk_hqos_thread_fn (void *arg) +{ + vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; + vlib_worker_thread_init (w); + dpdk_hqos_thread (w); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_THREAD (hqos_thread_reg, static) = +{ + .name = "hqos-threads", + .short_name = "hqos-threads", + .function = dpdk_hqos_thread_fn, +}; +/* *INDENT-ON* */ + +/* + * HQoS run-time code to be called by the worker threads + */ +#define BITFIELD(byte_array, slab_pos, slab_mask, slab_shr) \ +({ \ + u64 slab = *((u64 *) &byte_array[slab_pos]); \ + u64 val = (rte_be_to_cpu_64(slab) & slab_mask) >> slab_shr; \ + val; \ +}) + +#define RTE_SCHED_PORT_HIERARCHY(subport, pipe, traffic_class, queue, color) \ + ((((u64) (queue)) & 0x3) | \ + ((((u64) (traffic_class)) & 0x3) << 2) | \ + ((((u64) (color)) & 0x3) << 4) | \ + ((((u64) (subport)) & 0xFFFF) << 16) | \ + ((((u64) (pipe)) & 0xFFFFFFFF) << 32)) + +void +dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, + struct rte_mbuf **pkts, u32 n_pkts) +{ + u32 i; + + for (i = 0; i < (n_pkts & (~0x3)); i += 4) + { + struct rte_mbuf *pkt0 = pkts[i]; + struct rte_mbuf *pkt1 = pkts[i + 1]; + struct rte_mbuf *pkt2 = pkts[i + 2]; + struct rte_mbuf *pkt3 = pkts[i + 3]; + + u8 *pkt0_data = rte_pktmbuf_mtod (pkt0, u8 *); + u8 *pkt1_data = rte_pktmbuf_mtod (pkt1, u8 *); + u8 *pkt2_data = rte_pktmbuf_mtod (pkt2, u8 *); + u8 *pkt3_data = rte_pktmbuf_mtod (pkt3, u8 *); + + u64 pkt0_subport = BITFIELD (pkt0_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt0_pipe = BITFIELD (pkt0_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt0_dscp = BITFIELD (pkt0_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt0_tc = hqos->hqos_tc_table[pkt0_dscp & 0x3F] >> 2; + u32 pkt0_tc_q = hqos->hqos_tc_table[pkt0_dscp & 0x3F] & 0x3; + + u64 pkt1_subport = BITFIELD (pkt1_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt1_pipe = BITFIELD (pkt1_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt1_dscp = BITFIELD (pkt1_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt1_tc = hqos->hqos_tc_table[pkt1_dscp & 0x3F] >> 2; + u32 pkt1_tc_q = hqos->hqos_tc_table[pkt1_dscp & 0x3F] & 0x3; + + u64 pkt2_subport = BITFIELD (pkt2_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt2_pipe = BITFIELD (pkt2_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt2_dscp = BITFIELD (pkt2_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt2_tc = hqos->hqos_tc_table[pkt2_dscp & 0x3F] >> 2; + u32 pkt2_tc_q = hqos->hqos_tc_table[pkt2_dscp & 0x3F] & 0x3; + + u64 pkt3_subport = BITFIELD (pkt3_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt3_pipe = BITFIELD (pkt3_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt3_dscp = BITFIELD (pkt3_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt3_tc = hqos->hqos_tc_table[pkt3_dscp & 0x3F] >> 2; + u32 pkt3_tc_q = hqos->hqos_tc_table[pkt3_dscp & 0x3F] & 0x3; + + u64 pkt0_sched = RTE_SCHED_PORT_HIERARCHY (pkt0_subport, + pkt0_pipe, + pkt0_tc, + pkt0_tc_q, + 0); + u64 pkt1_sched = RTE_SCHED_PORT_HIERARCHY (pkt1_subport, + pkt1_pipe, + pkt1_tc, + pkt1_tc_q, + 0); + u64 pkt2_sched = RTE_SCHED_PORT_HIERARCHY (pkt2_subport, + pkt2_pipe, + pkt2_tc, + pkt2_tc_q, + 0); + u64 pkt3_sched = RTE_SCHED_PORT_HIERARCHY (pkt3_subport, + pkt3_pipe, + pkt3_tc, + pkt3_tc_q, + 0); + + pkt0->hash.sched.lo = pkt0_sched & 0xFFFFFFFF; + pkt0->hash.sched.hi = pkt0_sched >> 32; + pkt1->hash.sched.lo = pkt1_sched & 0xFFFFFFFF; + pkt1->hash.sched.hi = pkt1_sched >> 32; + pkt2->hash.sched.lo = pkt2_sched & 0xFFFFFFFF; + pkt2->hash.sched.hi = pkt2_sched >> 32; + pkt3->hash.sched.lo = pkt3_sched & 0xFFFFFFFF; + pkt3->hash.sched.hi = pkt3_sched >> 32; + } + + for (; i < n_pkts; i++) + { + struct rte_mbuf *pkt = pkts[i]; + + u8 *pkt_data = rte_pktmbuf_mtod (pkt, u8 *); + + u64 pkt_subport = BITFIELD (pkt_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt_pipe = BITFIELD (pkt_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt_dscp = BITFIELD (pkt_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt_tc = hqos->hqos_tc_table[pkt_dscp & 0x3F] >> 2; + u32 pkt_tc_q = hqos->hqos_tc_table[pkt_dscp & 0x3F] & 0x3; + + u64 pkt_sched = RTE_SCHED_PORT_HIERARCHY (pkt_subport, + pkt_pipe, + pkt_tc, + pkt_tc_q, + 0); + + pkt->hash.sched.lo = pkt_sched & 0xFFFFFFFF; + pkt->hash.sched.hi = pkt_sched >> 32; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/hqos/qos_doc.md b/src/plugins/dpdk/hqos/qos_doc.md new file mode 100644 index 00000000..7c064246 --- /dev/null +++ b/src/plugins/dpdk/hqos/qos_doc.md @@ -0,0 +1,411 @@ +# QoS Hierarchical Scheduler {#qos_doc} + +The Quality-of-Service (QoS) scheduler performs egress-traffic management by +prioritizing the transmission of the packets of different type services and +subcribers based on the Service Level Agreements (SLAs). The QoS scheduler can +be enabled on one or more NIC output interfaces depending upon the +requirement. + + +## Overview + +The QoS schdeuler supports a number of scheduling and shaping levels which +construct hierarchical-tree. The first level in the hierarchy is port (i.e. +the physical interface) that constitutes the root node of the tree. The +subsequent level is subport which represents the group of the +users/subscribers. The individual user/subscriber is represented by the pipe +at the next level. Each user can have different traffic type based on the +criteria of specific loss rate, jitter, and latency. These traffic types are +represented at the traffic-class level in the form of different traffic- +classes. The last level contains number of queues which are grouped together +to host the packets of the specific class type traffic. + +The QoS scheduler implementation requires flow classification, enqueue and +dequeue operations. The flow classification is mandatory stage for HQoS where +incoming packets are classified by mapping the packet fields information to +5-tuple (HQoS subport, pipe, traffic class, queue within traffic class, and +color) and storing that information in mbuf sched field. The enqueue operation +uses this information to determine the queue for storing the packet, and at +this stage, if the specific queue is full, QoS drops the packet. The dequeue +operation consists of scheduling the packet based on its length and available +credits, and handing over the scheduled packet to the output interface. + +For more information on QoS Scheduler, please refer DPDK Programmer's Guide- +http://dpdk.org/doc/guides/prog_guide/qos_framework.html + + +### QoS Schdeuler Parameters + +Following illustrates the default HQoS configuration for each 10GbE output +port: + +Single subport (subport 0): + - Subport rate set to 100% of port rate + - Each of the 4 traffic classes has rate set to 100% of port rate + +4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration: + - Pipe rate set to 1/4K of port rate + - Each of the 4 traffic classes has rate set to 100% of pipe rate + - Within each traffic class, the byte-level WRR weights for the 4 queues are set to 1:1:1:1 + + +#### Port configuration + +``` +port { + rate 1250000000 /* Assuming 10GbE port */ + frame_overhead 24 /* Overhead fields per Ethernet frame: + * 7B (Preamble) + + * 1B (Start of Frame Delimiter (SFD)) + + * 4B (Frame Check Sequence (FCS)) + + * 12B (Inter Frame Gap (IFG)) + */ + mtu 1522 /* Assuming Ethernet/IPv4 pkt (FCS not included) */ + n_subports_per_port 1 /* Number of subports per output interface */ + n_pipes_per_subport 4096 /* Number of pipes (users/subscribers) */ + queue_sizes 64 64 64 64 /* Packet queue size for each traffic class. + * All queues within the same pipe traffic class + * have the same size. Queues from different + * pipes serving the same traffic class have + * the same size. */ +} +``` + + +#### Subport configuration + +``` +subport 0 { + tb_rate 1250000000 /* Subport level token bucket rate (bytes per second) */ + tb_size 1000000 /* Subport level token bucket size (bytes) */ + tc0_rate 1250000000 /* Subport level token bucket rate for traffic class 0 (bytes per second) */ + tc1_rate 1250000000 /* Subport level token bucket rate for traffic class 1 (bytes per second) */ + tc2_rate 1250000000 /* Subport level token bucket rate for traffic class 2 (bytes per second) */ + tc3_rate 1250000000 /* Subport level token bucket rate for traffic class 3 (bytes per second) */ + tc_period 10 /* Time interval for refilling the token bucket associated with traffic class (Milliseconds) */ + pipe 0 4095 profile 0 /* pipes (users/subscribers) configured with pipe profile 0 */ +} +``` + + +#### Pipe configuration + +``` +pipe_profile 0 { + tb_rate 305175 /* Pipe level token bucket rate (bytes per second) */ + tb_size 1000000 /* Pipe level token bucket size (bytes) */ + tc0_rate 305175 /* Pipe level token bucket rate for traffic class 0 (bytes per second) */ + tc1_rate 305175 /* Pipe level token bucket rate for traffic class 1 (bytes per second) */ + tc2_rate 305175 /* Pipe level token bucket rate for traffic class 2 (bytes per second) */ + tc3_rate 305175 /* Pipe level token bucket rate for traffic class 3 (bytes per second) */ + tc_period 40 /* Time interval for refilling the token bucket associated with traffic class at pipe level (Milliseconds) */ + tc3_oversubscription_weight 1 /* Weight traffic class 3 oversubscription */ + tc0_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 0 */ + tc1_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 1 */ + tc2_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 2 */ + tc3_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 3 */ +} +``` + + +#### Random Early Detection (RED) parameters per traffic class and color (Green / Yellow / Red) + +``` +red { + tc0_wred_min 48 40 32 /* Minimum threshold for traffic class 0 queue (min_th) in number of packets */ + tc0_wred_max 64 64 64 /* Maximum threshold for traffic class 0 queue (max_th) in number of packets */ + tc0_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 0 queue (maxp = 1 / maxp_inv) */ + tc0_wred_weight 9 9 9 /* Traffic Class 0 queue weight */ + tc1_wred_min 48 40 32 /* Minimum threshold for traffic class 1 queue (min_th) in number of packets */ + tc1_wred_max 64 64 64 /* Maximum threshold for traffic class 1 queue (max_th) in number of packets */ + tc1_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 1 queue (maxp = 1 / maxp_inv) */ + tc1_wred_weight 9 9 9 /* Traffic Class 1 queue weight */ + tc2_wred_min 48 40 32 /* Minimum threshold for traffic class 2 queue (min_th) in number of packets */ + tc2_wred_max 64 64 64 /* Maximum threshold for traffic class 2 queue (max_th) in number of packets */ + tc2_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 2 queue (maxp = 1 / maxp_inv) */ + tc2_wred_weight 9 9 9 /* Traffic Class 2 queue weight */ + tc3_wred_min 48 40 32 /* Minimum threshold for traffic class 3 queue (min_th) in number of packets */ + tc3_wred_max 64 64 64 /* Maximum threshold for traffic class 3 queue (max_th) in number of packets */ + tc3_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 3 queue (maxp = 1 / maxp_inv) */ + tc3_wred_weight 9 9 9 /* Traffic Class 3 queue weight */ +} +``` + + +### DPDK QoS Scheduler Integration in VPP + +The Hierarchical Quaity-of-Service (HQoS) scheduler object could be seen as +part of the logical NIC output interface. To enable HQoS on specific output +interface, vpp startup.conf file has to be configured accordingly. The output +interface that requires HQoS, should have "hqos" parameter specified in dpdk +section. Another optional parameter "hqos-thread" has been defined which can +be used to associate the output interface with specific hqos thread. In cpu +section of the config file, "corelist-hqos-threads" is introduced to assign +logical cpu cores to run the HQoS threads. A HQoS thread can run multiple HQoS +objects each associated with different output interfaces. All worker threads +instead of writing packets to NIC TX queue directly, write the packets to a +software queues. The hqos_threads read the software queues, and enqueue the +packets to HQoS objects, as well as dequeue packets from HQOS objects and +write them to NIC output interfaces. The worker threads need to be able to +send the packets to any output interface, therefore, each HQoS object +associated with NIC output interface should have software queues equal to +worker threads count. + +Following illustrates the sample startup configuration file with 4x worker +threads feeding 2x hqos threads that handle each QoS scheduler for 1x output +interface. + +``` +dpdk { + socket-mem 16384,16384 + + dev 0000:02:00.0 { + num-rx-queues 2 + hqos + } + dev 0000:06:00.0 { + num-rx-queues 2 + hqos + } + + num-mbufs 1000000 +} + +cpu { + main-core 0 + corelist-workers 1, 2, 3, 4 + corelist-hqos-threads 5, 6 +} +``` + + +### QoS scheduler CLI Commands + +Each QoS scheduler instance is initialised with default parameters required to +configure hqos port, subport, pipe and queues. Some of the parameters can be +re-configured in run-time through CLI commands. + + +#### Configuration + +Following commands can be used to configure QoS scheduler parameters. + +The command below can be used to set the subport level parameters such as +token bucket rate (bytes per seconds), token bucket size (bytes), traffic +class rates (bytes per seconds) and token update period (Milliseconds). + +``` +set dpdk interface hqos subport subport [rate ] + [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] +``` + +For setting the pipe profile, following command can be used. + +``` +set dpdk interface hqos pipe subport pipe + profile +``` + +To assign QoS scheduler instance to the specific thread, following command can +be used. + +``` +set dpdk interface hqos placement thread +``` + +The command below is used to set the packet fields required for classifiying +the incoming packet. As a result of classification process, packet field +information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, +color) and stored in packet mbuf. + +``` +set dpdk interface hqos pktfield id subport|pipe|tc offset + mask +``` + +The DSCP table entries used for idenfiying the traffic class and queue can be set using the command below; + +``` +set dpdk interface hqos tctbl entry tc queue +``` + + +#### Show Command + +The QoS Scheduler configuration can displayed using the command below. + +``` + vpp# show dpdk interface hqos TenGigabitEthernet2/0/0 + Thread: + Input SWQ size = 4096 packets + Enqueue burst size = 256 packets + Dequeue burst size = 220 packets + Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) + Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) + Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) + Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) + [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + Port: + Rate = 1250000000 bytes/second + MTU = 1514 bytes + Frame overhead = 24 bytes + Number of subports = 1 + Number of pipes per subport = 4096 + Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets + Number of pipe profiles = 1 + Subport 0: + Rate = 120000000 bytes/second + Token bucket size = 1000000 bytes + Traffic class rate: TC0 = 120000000, TC1 = 120000000, TC2 = 120000000, TC3 = 120000000 bytes/second + TC period = 10 milliseconds + Pipe profile 0: + Rate = 305175 bytes/second + Token bucket size = 1000000 bytes + Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second + TC period = 40 milliseconds + TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 +``` + +The QoS Scheduler placement over the logical cpu cores can be displayed using +below command. + +``` + vpp# show dpdk interface hqos placement + Thread 5 (vpp_hqos-threads_0 at lcore 5): + TenGigabitEthernet2/0/0 queue 0 + Thread 6 (vpp_hqos-threads_1 at lcore 6): + TenGigabitEthernet4/0/1 queue 0 +``` + + +### QoS Scheduler Binary APIs + +This section explans the available binary APIs for configuring QoS scheduler +parameters in run-time. + +The following API can be used to set the pipe profile of a pipe that belongs +to a given subport: + +``` +sw_interface_set_dpdk_hqos_pipe rx | sw_if_index + subport pipe profile +``` + +The data structures used for set the pipe profile parameter are as follows; + +``` + /** \\brief DPDK interface HQoS pipe profile set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param pipe - pipe ID within its subport + @param profile - pipe profile ID + */ + define sw_interface_set_dpdk_hqos_pipe { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 pipe; + u32 profile; + }; + + /** \\brief DPDK interface HQoS pipe profile set reply + @param context - sender context, to match reply w/ request + @param retval - request return code + */ + define sw_interface_set_dpdk_hqos_pipe_reply { + u32 context; + i32 retval; + }; +``` + +The following API can be used to set the subport level parameters, for +example- token bucket rate (bytes per seconds), token bucket size (bytes), +traffic class rate (bytes per seconds) and tokens update period. + +``` +sw_interface_set_dpdk_hqos_subport rx | sw_if_index + subport [rate ] [bktsize ] + [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] +``` + +The data structures used for set the subport level parameter are as follows; + +``` + /** \\brief DPDK interface HQoS subport parameters set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param tb_rate - subport token bucket rate (measured in bytes/second) + @param tb_size - subport token bucket size (measured in credits) + @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) + @param tc_period - enforcement period for rates (measured in milliseconds) + */ + define sw_interface_set_dpdk_hqos_subport { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 tb_rate; + u32 tb_size; + u32 tc_rate[4]; + u32 tc_period; + }; + + /** \\brief DPDK interface HQoS subport parameters set reply + @param context - sender context, to match reply w/ request + @param retval - request return code + */ + define sw_interface_set_dpdk_hqos_subport_reply { + u32 context; + i32 retval; + }; +``` + +The following API can be used set the DSCP table entry. The DSCP table have +64 entries to map the packet DSCP field onto traffic class and hqos input +queue. + +``` +sw_interface_set_dpdk_hqos_tctbl rx | sw_if_index + entry tc queue +``` + +The data structures used for setting DSCP table entries are given below. + +``` + /** \\brief DPDK interface HQoS tctbl entry set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param entry - entry index ID + @param tc - traffic class (0 .. 3) + @param queue - traffic class queue (0 .. 3) + */ + define sw_interface_set_dpdk_hqos_tctbl { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 entry; + u32 tc; + u32 queue; + }; + + /** \\brief DPDK interface HQoS tctbl entry set reply + @param context - sender context, to match reply w/ request + @param retval - request return code + */ + define sw_interface_set_dpdk_hqos_tctbl_reply { + u32 context; + i32 retval; + }; +``` diff --git a/src/plugins/dpdk/init.c b/src/plugins/dpdk/init.c new file mode 100755 index 00000000..e009ef3e --- /dev/null +++ b/src/plugins/dpdk/init.c @@ -0,0 +1,2074 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +dpdk_main_t dpdk_main; + +#include +#include + +/* define message IDs */ +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include +#undef vl_api_version + +/* Macro to finish up custom dump fns */ +#define FINISH \ + vec_add1 (s, 0); \ + vl_print (handle, (char *)s); \ + vec_free (s); \ + return handle; + +#include + +static void + vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + u32 pipe = ntohl (mp->pipe); + u32 profile = ntohl (mp->profile); + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_pipe "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = format (s, "subport %u pipe %u profile %u ", + ntohl (mp->subport), ntohl (mp->pipe), ntohl (mp->profile)); + + FINISH; +} + +static void + vl_api_sw_interface_set_dpdk_hqos_subport_t_handler + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + struct rte_sched_subport_params p; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + p.tb_rate = ntohl (mp->tb_rate); + p.tb_size = ntohl (mp->tb_size); + p.tc_rate[0] = ntohl (mp->tc_rate[0]); + p.tc_rate[1] = ntohl (mp->tc_rate[1]); + p.tc_rate[2] = ntohl (mp->tc_rate[2]); + p.tc_rate[3] = ntohl (mp->tc_rate[3]); + p.tc_period = ntohl (mp->tc_period); + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_subport_t_print + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_subport "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = + format (s, + "subport %u rate %u bkt_size %u tc0 %u tc1 %u tc2 %u tc3 %u period %u", + ntohl (mp->subport), ntohl (mp->tb_rate), ntohl (mp->tb_size), + ntohl (mp->tc_rate[0]), ntohl (mp->tc_rate[1]), + ntohl (mp->tc_rate[2]), ntohl (mp->tc_rate[3]), + ntohl (mp->tc_period)); + + FINISH; +} + +static void + vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 entry = ntohl (mp->entry); + u32 tc = ntohl (mp->tc); + u32 queue = ntohl (mp->queue); + u32 val, i; + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + { + clib_warning ("invalid traffic class !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + { + clib_warning ("invalid queue !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + + if (p == 0) + { + clib_warning ("worker thread registration AWOL !!"); + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto done; + } + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + for (i = 0; i < worker_thread_count; i++) + xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; + + BAD_SW_IF_INDEX_LABEL; +done: + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_tctbl "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = format (s, "entry %u tc %u queue %u", + ntohl (mp->entry), ntohl (mp->tc), ntohl (mp->queue)); + + FINISH; +} + +#define foreach_dpdk_plugin_api_msg \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) + +/* Set up the API message handling tables */ +static clib_error_t * +dpdk_plugin_api_hookup (vlib_main_t * vm) +{ + dpdk_main_t *dm __attribute__ ((unused)) = &dpdk_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + dm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_dpdk_plugin_api_msg; +#undef _ + return 0; +} + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (dpdk_main_t * dm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + dm->msg_id_base); + foreach_vl_msg_name_crc_dpdk; +#undef _ +} + +// TODO +/* +static void plugin_custom_dump_configure (dpdk_main_t * dm) +{ +#define _(n,f) dm->api_main->msg_print_handlers \ + [VL_API_##n + dm->msg_id_base] \ + = (void *) vl_api_##f##_t_print; + foreach_dpdk_plugin_api_msg; +#undef _ +} +*/ +/* force linker to link functions used by vlib and declared weak */ +void *vlib_weakly_linked_functions[] = { + &rte_pktmbuf_init, + &rte_pktmbuf_pool_init, +}; + +#define LINK_STATE_ELOGS 0 + +#define DEFAULT_HUGE_DIR "/run/vpp/hugepages" +#define VPP_RUN_DIR "/run/vpp" + +/* Port configuration, mildly modified Intel app values */ + +static struct rte_eth_conf port_conf_template = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +clib_error_t * +dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) +{ + int rv; + int j; + + ASSERT (os_get_cpu_number () == 0); + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0); + rte_eth_dev_stop (xd->device_index); + } + + rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, + xd->tx_q_used, &xd->port_conf); + + if (rv < 0) + return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", + xd->device_index, rv); + + /* Set up one TX-queue per worker thread */ + for (j = 0; j < xd->tx_q_used; j++) + { + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + xd->cpu_socket, &xd->tx_conf); + + /* retry with any other CPU socket */ + if (rv < 0) + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + SOCKET_ID_ANY, &xd->tx_conf); + if (rv < 0) + break; + } + + if (rv < 0) + return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", + xd->device_index, rv); + + for (j = 0; j < xd->rx_q_used; j++) + { + + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + xd->cpu_socket, 0, + dm-> + pktmbuf_pools[xd->cpu_socket_id_by_queue + [j]]); + + /* retry with any other CPU socket */ + if (rv < 0) + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + SOCKET_ID_ANY, 0, + dm-> + pktmbuf_pools[xd->cpu_socket_id_by_queue + [j]]); + if (rv < 0) + return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", + xd->device_index, rv); + } + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + int rv; + rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + if (rv < 0) + clib_warning ("rte_eth_dev_start %d returned %d", + xd->device_index, rv); + } + return 0; +} + +static u32 +dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + u32 old = 0; + + if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC (flags)) + { + old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0; + + if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) + xd->flags |= DPDK_DEVICE_FLAG_PROMISC; + else + xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC; + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) + rte_eth_promiscuous_enable (xd->device_index); + else + rte_eth_promiscuous_disable (xd->device_index); + } + } + else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags)) + { + /* + * DAW-FIXME: The Cisco VIC firmware does not provide an api for a + * driver to dynamically change the mtu. If/when the + * VIC firmware gets fixed, then this should be removed. + */ + if (xd->pmd == VNET_DPDK_PMD_ENIC) + { + struct rte_eth_dev_info dev_info; + + /* + * Restore mtu to what has been set by CIMC in the firmware cfg. + */ + rte_eth_dev_info_get (xd->device_index, &dev_info); + hi->max_packet_bytes = dev_info.max_rx_pktlen; + + vlib_cli_output (vlib_get_main (), + "Cisco VIC mtu can only be changed " + "using CIMC then rebooting the server!"); + } + else + { + int rv; + + xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + rte_eth_dev_stop (xd->device_index); + + rv = rte_eth_dev_configure + (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); + + if (rv < 0) + vlib_cli_output (vlib_get_main (), + "rte_eth_dev_configure[%d]: err %d", + xd->device_index, rv); + + rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + int rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + if (rv < 0) + clib_warning ("rte_eth_dev_start %d returned %d", + xd->device_index, rv); + } + } + } + return old; +} + +void +dpdk_device_lock_init (dpdk_device_t * xd) +{ + int q; + vec_validate (xd->lockp, xd->tx_q_used - 1); + for (q = 0; q < xd->tx_q_used; q++) + { + xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES); + } +} + +void +dpdk_device_lock_free (dpdk_device_t * xd) +{ + int q; + + for (q = 0; q < vec_len (xd->lockp); q++) + clib_mem_free ((void *) xd->lockp[q]); + vec_free (xd->lockp); + xd->lockp = 0; +} + +static clib_error_t * +dpdk_lib_init (dpdk_main_t * dm) +{ + u32 nports; + u32 nb_desc = 0; + int i; + clib_error_t *error; + vlib_main_t *vm = vlib_get_main (); + vlib_thread_main_t *tm = vlib_get_thread_main (); + vnet_sw_interface_t *sw; + vnet_hw_interface_t *hi; + dpdk_device_t *xd; + vlib_pci_addr_t last_pci_addr; + u32 last_pci_addr_port = 0; + vlib_thread_registration_t *tr, *tr_hqos; + uword *p, *p_hqos; + + u32 next_cpu = 0, next_hqos_cpu = 0; + u8 af_packet_port_id = 0; + last_pci_addr.as_u32 = ~0; + + dm->input_cpu_first_index = 0; + dm->input_cpu_count = 1; + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + dm->input_cpu_first_index = tr->first_index; + dm->input_cpu_count = tr->count; + } + + vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + dm->hqos_cpu_first_index = 0; + dm->hqos_cpu_count = 0; + + /* find out which cpus will be used for I/O TX */ + p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads"); + tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0; + + if (tr_hqos && tr_hqos->count > 0) + { + dm->hqos_cpu_first_index = tr_hqos->first_index; + dm->hqos_cpu_count = tr_hqos->count; + } + + vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + nports = rte_eth_dev_count (); + if (nports < 1) + { + clib_warning ("DPDK drivers found no ports..."); + } + + if (CLIB_DEBUG > 0) + clib_warning ("DPDK drivers found %d ports...", nports); + + /* + * All buffers are all allocated from the same rte_mempool. + * Thus they all have the same number of data bytes. + */ + dm->vlib_buffer_free_list_index = + vlib_buffer_get_or_create_free_list (vm, + VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, + "dpdk rx"); + + if (dm->conf->enable_tcp_udp_checksum) + dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT + | IP_BUFFER_L4_CHECKSUM_COMPUTED); + + for (i = 0; i < nports; i++) + { + u8 addr[6]; + u8 vlan_strip = 0; + int j; + struct rte_eth_dev_info dev_info; + clib_error_t *rv; + struct rte_eth_link l; + dpdk_device_config_t *devconf = 0; + vlib_pci_addr_t pci_addr; + uword *p = 0; + + rte_eth_dev_info_get (i, &dev_info); + if (dev_info.pci_dev) /* bonded interface has no pci info */ + { + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + p = + hash_get (dm->conf->device_config_index_by_pci_addr, + pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + /* Create vnet interface */ + vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); + xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; + xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; + xd->cpu_socket = (i8) rte_eth_dev_socket_id (i); + + /* Handle interface naming for devices with multiple ports sharing same PCI ID */ + if (dev_info.pci_dev) + { + struct rte_eth_dev_info di = { 0 }; + rte_eth_dev_info_get (i + 1, &di); + if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 && + memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr, + sizeof (struct rte_pci_addr)) == 0) + { + xd->interface_name_suffix = format (0, "0"); + last_pci_addr.as_u32 = pci_addr.as_u32; + last_pci_addr_port = i; + } + else if (pci_addr.as_u32 == last_pci_addr.as_u32) + { + xd->interface_name_suffix = + format (0, "%u", i - last_pci_addr_port); + } + else + { + last_pci_addr.as_u32 = ~0; + } + } + else + last_pci_addr.as_u32 = ~0; + + clib_memcpy (&xd->tx_conf, &dev_info.default_txconf, + sizeof (struct rte_eth_txconf)); + if (dm->conf->no_multi_seg) + { + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 0; + } + else + { + xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 1; + xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG; + } + + clib_memcpy (&xd->port_conf, &port_conf_template, + sizeof (struct rte_eth_conf)); + + xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains); + + if (devconf->num_tx_queues > 0 + && devconf->num_tx_queues < xd->tx_q_used) + xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues); + + if (devconf->num_rx_queues > 1 && dm->use_rss == 0) + { + dm->use_rss = 1; + } + + if (devconf->num_rx_queues > 1 + && dev_info.max_rx_queues >= devconf->num_rx_queues) + { + xd->rx_q_used = devconf->num_rx_queues; + xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; + if (devconf->rss_fn == 0) + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = + ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; + else + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn; + } + else + xd->rx_q_used = 1; + + xd->flags |= DPDK_DEVICE_FLAG_PMD; + + /* workaround for drivers not setting driver_name */ + if ((!dev_info.driver_name) && (dev_info.pci_dev)) + dev_info.driver_name = dev_info.pci_dev->driver->driver.name; + + ASSERT (dev_info.driver_name); + + if (!xd->pmd) + { + + +#define _(s,f) else if (dev_info.driver_name && \ + !strcmp(dev_info.driver_name, s)) \ + xd->pmd = VNET_DPDK_PMD_##f; + if (0) + ; + foreach_dpdk_pmd +#undef _ + else + xd->pmd = VNET_DPDK_PMD_UNKNOWN; + + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; + xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; + + switch (xd->pmd) + { + /* 1G adapters */ + case VNET_DPDK_PMD_E1000EM: + case VNET_DPDK_PMD_IGB: + case VNET_DPDK_PMD_IGBVF: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + break; + + /* 10G adapters */ + case VNET_DPDK_PMD_IXGBE: + case VNET_DPDK_PMD_IXGBEVF: + case VNET_DPDK_PMD_THUNDERX: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + case VNET_DPDK_PMD_DPAA2: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + + /* Cisco VIC */ + case VNET_DPDK_PMD_ENIC: + rte_eth_link_get_nowait (i, &l); + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + if (l.link_speed == 40000) + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + else + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + + /* Intel Fortville */ + case VNET_DPDK_PMD_I40E: + case VNET_DPDK_PMD_I40EVF: + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + + switch (dev_info.pci_dev->id.device_id) + { + case I40E_DEV_ID_10G_BASE_T: + case I40E_DEV_ID_SFP_XL710: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + case I40E_DEV_ID_QSFP_A: + case I40E_DEV_ID_QSFP_B: + case I40E_DEV_ID_QSFP_C: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case I40E_DEV_ID_VF: + rte_eth_link_get_nowait (i, &l); + xd->port_type = l.link_speed == 10000 ? + VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; + break; + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + case VNET_DPDK_PMD_CXGBE: + switch (dev_info.pci_dev->id.device_id) + { + case 0x540d: /* T580-CR */ + case 0x5410: /* T580-LP-cr */ + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case 0x5403: /* T540-CR */ + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + case VNET_DPDK_PMD_MLX5: + { + char *pn_100g[] = { "MCX415A-CCAT", "MCX416A-CCAT", 0 }; + char *pn_40g[] = { "MCX413A-BCAT", "MCX414A-BCAT", + "MCX415A-BCAT", "MCX416A-BCAT", "MCX4131A-BCAT", 0 + }; + char *pn_10g[] = { "MCX4111A-XCAT", "MCX4121A-XCAT", 0 }; + + vlib_pci_device_t *pd = vlib_get_pci_device (&pci_addr); + u8 *pn = 0; + char **c; + int found = 0; + pn = format (0, "%U%c", + format_vlib_pci_vpd, pd->vpd_r, "PN", 0); + + if (!pn) + break; + + c = pn_100g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_100G; + break; + } + c++; + } + + c = pn_40g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + } + c++; + } + + c = pn_10g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + } + c++; + } + + vec_free (pn); + } + + break; + /* Intel Red Rock Canyon */ + case VNET_DPDK_PMD_FM10K: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; + break; + + /* virtio */ + case VNET_DPDK_PMD_VIRTIO: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO; + xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO; + break; + + /* vmxnet3 */ + case VNET_DPDK_PMD_VMXNET3: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + break; + + case VNET_DPDK_PMD_AF_PACKET: + xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET; + xd->af_packet_port_id = af_packet_port_id++; + break; + + case VNET_DPDK_PMD_BOND: + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; + break; + + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + + if (devconf->num_rx_desc) + xd->nb_rx_desc = devconf->num_rx_desc; + + if (devconf->num_tx_desc) + xd->nb_tx_desc = devconf->num_tx_desc; + } + + /* + * Ensure default mtu is not > the mtu read from the hardware. + * Otherwise rte_eth_dev_configure() will fail and the port will + * not be available. + */ + if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen) + { + /* + * This device does not support the platforms's max frame + * size. Use it's advertised mru instead. + */ + xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen; + } + else + { + xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES; + + /* + * Some platforms do not account for Ethernet FCS (4 bytes) in + * MTU calculations. To interop with them increase mru but only + * if the device's settings can support it. + */ + if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) && + xd->port_conf.rxmode.hw_strip_crc) + { + /* + * Allow additional 4 bytes (for Ethernet FCS). These bytes are + * stripped by h/w and so will not consume any buffer memory. + */ + xd->port_conf.rxmode.max_rx_pkt_len += 4; + } + } + + if (xd->pmd == VNET_DPDK_PMD_AF_PACKET) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + clib_memcpy (addr + 2, &rnd, sizeof (rnd)); + addr[0] = 2; + addr[1] = 0xfe; + } + else + rte_eth_macaddr_get (i, (struct ether_addr *) addr); + + if (xd->tx_q_used < tm->n_vlib_mains) + dpdk_device_lock_init (xd); + + xd->device_index = xd - dm->devices; + ASSERT (i == xd->device_index); + xd->per_interface_next_index = ~0; + + /* assign interface to input thread */ + dpdk_device_and_queue_t *dq; + int q; + + if (devconf->workers) + { + int i; + q = 0; + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, devconf->workers, ({ + int cpu = dm->input_cpu_first_index + i; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; + vec_validate(xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q++; + })); + /* *INDENT-ON* */ + } + else + for (q = 0; q < xd->rx_q_used; q++) + { + int cpu = dm->input_cpu_first_index + next_cpu; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; + + /* + * numa node for worker thread handling this queue + * needed for taking buffers from the right mempool + */ + vec_validate (xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore); + + /* + * construct vector of (device,queue) pairs for each worker thread + */ + vec_add2 (dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q; + + next_cpu++; + if (next_cpu == dm->input_cpu_count) + next_cpu = 0; + } + + + if (devconf->hqos_enabled) + { + xd->flags |= DPDK_DEVICE_FLAG_HQOS; + + if (devconf->hqos.hqos_thread_valid) + { + int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread; + + if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count) + return clib_error_return (0, "invalid HQoS thread index"); + + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + } + else + { + int cpu = dm->hqos_cpu_first_index + next_hqos_cpu; + + if (dm->hqos_cpu_count == 0) + return clib_error_return (0, "no HQoS threads available"); + + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + + next_hqos_cpu++; + if (next_hqos_cpu == dm->hqos_cpu_count) + next_hqos_cpu = 0; + + devconf->hqos.hqos_thread_valid = 1; + devconf->hqos.hqos_thread = cpu; + } + } + + vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < tm->n_vlib_mains; j++) + { + vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc, + sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } + + vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < xd->rx_q_used; j++) + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } + + vec_validate_aligned (xd->d_trace_buffers, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + + rv = dpdk_port_setup (dm, xd); + + if (rv) + return rv; + + if (devconf->hqos_enabled) + { + rv = dpdk_port_setup_hqos (xd, &devconf->hqos); + if (rv) + return rv; + } + + /* count the number of descriptors used for this device */ + nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; + + error = ethernet_register_interface + (dm->vnet_main, dpdk_device_class.index, xd->device_index, + /* ethernet address */ addr, + &xd->vlib_hw_if_index, dpdk_flag_change); + if (error) + return error; + + sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); + xd->vlib_sw_if_index = sw->sw_if_index; + hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); + + /* + * DAW-FIXME: The Cisco VIC firmware does not provide an api for a + * driver to dynamically change the mtu. If/when the + * VIC firmware gets fixed, then this should be removed. + */ + if (xd->pmd == VNET_DPDK_PMD_ENIC) + { + /* + * Initialize mtu to what has been set by CIMC in the firmware cfg. + */ + hi->max_packet_bytes = dev_info.max_rx_pktlen; + if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF) + vlan_strip = 1; /* remove vlan tag from VIC port by default */ + else + clib_warning ("VLAN strip disabled for interface\n"); + } + else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON) + vlan_strip = 1; + + if (vlan_strip) + { + int vlan_off; + vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); + vlan_off |= ETH_VLAN_STRIP_OFFLOAD; + xd->port_conf.rxmode.hw_vlan_strip = vlan_off; + if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0) + clib_warning ("VLAN strip enabled for interface\n"); + else + clib_warning ("VLAN strip cannot be supported by interface\n"); + } + + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = + xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t); + + rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); + } + + if (nb_desc > dm->conf->num_mbufs) + clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", + dm->conf->num_mbufs, nb_desc); + + return 0; +} + +static void +dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) +{ + vlib_pci_main_t *pm = &pci_main; + clib_error_t *error; + vlib_pci_device_t *d; + u8 *pci_addr = 0; + int num_whitelisted = vec_len (conf->dev_confs); + + /* *INDENT-OFF* */ + pool_foreach (d, pm->pci_devs, ({ + dpdk_device_config_t * devconf = 0; + vec_reset_length (pci_addr); + pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); + + if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO) + continue; + + if (num_whitelisted) + { + uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32); + + if (!p) + continue; + + devconf = pool_elt_at_index (conf->dev_confs, p[0]); + } + + /* virtio */ + if (d->vendor_id == 0x1af4 && d->device_id == 0x1000) + ; + /* vmxnet3 */ + else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0) + ; + /* all Intel devices */ + else if (d->vendor_id == 0x8086) + ; + /* Cisco VIC */ + else if (d->vendor_id == 0x1137 && d->device_id == 0x0043) + ; + /* Chelsio T4/T5 */ + else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000) + ; + else + { + clib_warning ("Unsupported Ethernet PCI device 0x%04x:0x%04x found " + "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id, + pci_addr); + continue; + } + + error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name); + + if (error) + { + if (devconf == 0) + { + pool_get (conf->dev_confs, devconf); + hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32, + devconf - conf->dev_confs); + devconf->pci_addr.as_u32 = d->bus_address.as_u32; + } + devconf->is_blacklisted = 1; + clib_error_report (error); + } + })); + /* *INDENT-ON* */ + vec_free (pci_addr); +} + +static clib_error_t * +dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, + unformat_input_t * input, u8 is_default) +{ + clib_error_t *error = 0; + uword *p; + dpdk_device_config_t *devconf; + unformat_input_t sub_input; + + if (is_default) + { + devconf = &conf->default_devconf; + } + else + { + p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32); + + if (!p) + { + pool_get (conf->dev_confs, devconf); + hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32, + devconf - conf->dev_confs); + } + else + return clib_error_return (0, + "duplicate configuration for PCI address %U", + format_vlib_pci_addr, &pci_addr); + } + + devconf->pci_addr.as_u32 = pci_addr.as_u32; + devconf->hqos_enabled = 0; + dpdk_device_config_hqos_default (&devconf->hqos); + + if (!input) + return 0; + + unformat_skip_white_space (input); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues)) + ; + else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues)) + ; + else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc)) + ; + else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc)) + ; + else if (unformat (input, "workers %U", unformat_bitmap_list, + &devconf->workers)) + ; + else + if (unformat + (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input)) + { + error = unformat_rss_fn (&sub_input, &devconf->rss_fn); + if (error) + break; + } + else if (unformat (input, "vlan-strip-offload off")) + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; + else if (unformat (input, "vlan-strip-offload on")) + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; + else + if (unformat + (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input)) + { + devconf->hqos_enabled = 1; + error = unformat_hqos (&sub_input, &devconf->hqos); + if (error) + break; + } + else if (unformat (input, "hqos")) + { + devconf->hqos_enabled = 1; + } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + break; + } + } + + if (error) + return error; + + if (devconf->workers && devconf->num_rx_queues == 0) + devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers); + else if (devconf->workers && + clib_bitmap_count_set_bits (devconf->workers) != + devconf->num_rx_queues) + error = + clib_error_return (0, + "%U: number of worker threadds must be " + "equal to number of rx queues", format_vlib_pci_addr, + &pci_addr); + + return error; +} + +static clib_error_t * +dpdk_config (vlib_main_t * vm, unformat_input_t * input) +{ + clib_error_t *error = 0; + dpdk_main_t *dm = &dpdk_main; + dpdk_config_main_t *conf = &dpdk_config_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_config_t *devconf; + vlib_pci_addr_t pci_addr; + unformat_input_t sub_input; + u8 *s, *tmp = 0; + u8 *rte_cmd = 0, *ethname = 0; + u32 log_level; + int ret, i; + int num_whitelisted = 0; + u8 no_pci = 0; + u8 no_huge = 0; + u8 huge_dir = 0; + u8 file_prefix = 0; + u8 *socket_mem = 0; + + conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + /* Prime the pump */ + if (unformat (input, "no-hugetlb")) + { + vec_add1 (conf->eal_init_args, (u8 *) "no-huge"); + no_huge = 1; + } + + else if (unformat (input, "enable-tcp-udp-checksum")) + conf->enable_tcp_udp_checksum = 1; + + else if (unformat (input, "decimal-interface-names")) + conf->interface_name_format_decimal = 1; + + else if (unformat (input, "no-multi-seg")) + conf->no_multi_seg = 1; + + else if (unformat (input, "enable-cryptodev")) + conf->cryptodev = 1; + + else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input, + &sub_input)) + { + error = + dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input, + 1); + + if (error) + return error; + } + else + if (unformat + (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr, + unformat_vlib_cli_sub_input, &sub_input)) + { + error = dpdk_device_config (conf, pci_addr, &sub_input, 0); + + if (error) + return error; + + num_whitelisted++; + } + else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr)) + { + error = dpdk_device_config (conf, pci_addr, 0, 0); + + if (error) + return error; + + num_whitelisted++; + } + else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) + ; + else if (unformat (input, "kni %d", &conf->num_kni)) + ; + else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) + ; + else if (unformat (input, "socket-mem %s", &socket_mem)) + ; + else if (unformat (input, "no-pci")) + { + no_pci = 1; + tmp = format (0, "--no-pci%c", 0); + vec_add1 (conf->eal_init_args, tmp); + } + else if (unformat (input, "poll-sleep %d", &dm->poll_sleep)) + ; + +#define _(a) \ + else if (unformat(input, #a)) \ + { \ + tmp = format (0, "--%s%c", #a, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + } + foreach_eal_double_hyphen_predicate_arg +#undef _ +#define _(a) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + if (!strncmp(#a, "huge-dir", 8)) \ + huge_dir = 1; \ + else if (!strncmp(#a, "file-prefix", 11)) \ + file_prefix = 1; \ + tmp = format (0, "--%s%c", #a, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + if (!strncmp(#a, "vdev", 4)) \ + if (strstr((char*)s, "af_packet")) \ + clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \ + vec_add1 (conf->eal_init_args, s); \ + } + foreach_eal_double_hyphen_arg +#undef _ +#define _(a,b) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + tmp = format (0, "-%s%c", #b, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (conf->eal_init_args, s); \ + } + foreach_eal_single_hyphen_arg +#undef _ +#define _(a,b) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + tmp = format (0, "-%s%c", #b, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (conf->eal_init_args, s); \ + conf->a##_set_manually = 1; \ + } + foreach_eal_single_hyphen_mandatory_arg +#undef _ + else if (unformat (input, "default")) + ; + + else if (unformat_skip_white_space (input)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (!conf->uio_driver_name) + conf->uio_driver_name = format (0, "uio_pci_generic%c", 0); + + /* + * Use 1G huge pages if available. + */ + if (!no_huge && !huge_dir) + { + u32 x, *mem_by_socket = 0; + uword c = 0; + u8 use_1g = 1; + u8 use_2m = 1; + u8 less_than_1g = 1; + int rv; + + umount (DEFAULT_HUGE_DIR); + + /* Process "socket-mem" parameter value */ + if (vec_len (socket_mem)) + { + unformat_input_t in; + unformat_init_vector (&in, socket_mem); + while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT) + { + if (unformat (&in, "%u,", &x)) + ; + else if (unformat (&in, "%u", &x)) + ; + else if (unformat (&in, ",")) + x = 0; + else + break; + + vec_add1 (mem_by_socket, x); + + if (x > 1023) + less_than_1g = 0; + } + /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ + unformat_free (&in); + socket_mem = 0; + } + else + { + /* *INDENT-OFF* */ + clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( + { + vec_validate(mem_by_socket, c); + mem_by_socket[c] = 256; /* default per-socket mem */ + } + )); + /* *INDENT-ON* */ + } + + /* check if available enough 1GB pages for each socket */ + /* *INDENT-OFF* */ + clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( + { + int pages_avail, page_size, mem; + + vec_validate(mem_by_socket, c); + mem = mem_by_socket[c]; + + page_size = 1024; + pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); + + if (pages_avail < 0 || page_size * pages_avail < mem) + use_1g = 0; + + page_size = 2; + pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); + + if (pages_avail < 0 || page_size * pages_avail < mem) + use_2m = 0; + })); + /* *INDENT-ON* */ + + if (mem_by_socket == 0) + { + error = clib_error_return (0, "mem_by_socket NULL"); + goto done; + } + _vec_len (mem_by_socket) = c + 1; + + /* regenerate socket_mem string */ + vec_foreach_index (x, mem_by_socket) + socket_mem = format (socket_mem, "%s%u", + socket_mem ? "," : "", mem_by_socket[x]); + socket_mem = format (socket_mem, "%c", 0); + + vec_free (mem_by_socket); + + rv = mkdir (VPP_RUN_DIR, 0755); + if (rv && errno != EEXIST) + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + VPP_RUN_DIR, errno); + goto done; + } + + rv = mkdir (DEFAULT_HUGE_DIR, 0755); + if (rv && errno != EEXIST) + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + DEFAULT_HUGE_DIR, errno); + goto done; + } + + if (use_1g && !(less_than_1g && use_2m)) + { + rv = + mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); + } + else if (use_2m) + { + rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); + } + else + { + return clib_error_return (0, "not enough free huge pages"); + } + + if (rv) + { + error = clib_error_return (0, "mount failed %d", errno); + goto done; + } + + tmp = format (0, "--huge-dir%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); + vec_add1 (conf->eal_init_args, tmp); + if (!file_prefix) + { + tmp = format (0, "--file-prefix%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "vpp%c", 0); + vec_add1 (conf->eal_init_args, tmp); + } + } + + vec_free (rte_cmd); + vec_free (ethname); + + if (error) + return error; + + /* I'll bet that -c and -n must be the first and second args... */ + if (!conf->coremask_set_manually) + { + vlib_thread_registration_t *tr; + uword *coremask = 0; + int i; + + /* main thread core */ + coremask = clib_bitmap_set (coremask, tm->main_lcore, 1); + + for (i = 0; i < vec_len (tm->registrations); i++) + { + tr = tm->registrations[i]; + coremask = clib_bitmap_or (coremask, tr->coremask); + } + + vec_insert (conf->eal_init_args, 2, 1); + conf->eal_init_args[1] = (u8 *) "-c"; + tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0); + conf->eal_init_args[2] = tmp; + clib_bitmap_free (coremask); + } + + if (!conf->nchannels_set_manually) + { + vec_insert (conf->eal_init_args, 2, 3); + conf->eal_init_args[3] = (u8 *) "-n"; + tmp = format (0, "%d", conf->nchannels); + conf->eal_init_args[4] = tmp; + } + + if (no_pci == 0 && geteuid () == 0) + dpdk_bind_devices_to_uio (conf); + +#define _(x) \ + if (devconf->x == 0 && conf->default_devconf.x > 0) \ + devconf->x = conf->default_devconf.x ; + + /* *INDENT-OFF* */ + pool_foreach (devconf, conf->dev_confs, ({ + + /* default per-device config items */ + foreach_dpdk_device_config_item + + /* add DPDK EAL whitelist/blacklist entry */ + if (num_whitelisted > 0 && devconf->is_blacklisted == 0) + { + tmp = format (0, "-w%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); + vec_add1 (conf->eal_init_args, tmp); + } + else if (num_whitelisted == 0 && devconf->is_blacklisted != 0) + { + tmp = format (0, "-b%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); + vec_add1 (conf->eal_init_args, tmp); + } + })); + /* *INDENT-ON* */ + +#undef _ + + /* set master-lcore */ + tmp = format (0, "--master-lcore%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%u%c", tm->main_lcore, 0); + vec_add1 (conf->eal_init_args, tmp); + + /* set socket-mem */ + tmp = format (0, "--socket-mem%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%s%c", socket_mem, 0); + vec_add1 (conf->eal_init_args, tmp); + + /* NULL terminate the "argv" vector, in case of stupidity */ + vec_add1 (conf->eal_init_args, 0); + _vec_len (conf->eal_init_args) -= 1; + + /* Set up DPDK eal and packet mbuf pool early. */ + + log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE; + + rte_set_log_level (log_level); + + vm = vlib_get_main (); + + /* make copy of args as rte_eal_init tends to mess up with arg array */ + for (i = 1; i < vec_len (conf->eal_init_args); i++) + conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", + conf->eal_init_args[i]); + + ret = + rte_eal_init (vec_len (conf->eal_init_args), + (char **) conf->eal_init_args); + + /* lazy umount hugepages */ + umount2 (DEFAULT_HUGE_DIR, MNT_DETACH); + + if (ret < 0) + return clib_error_return (0, "rte_eal_init returned %d", ret); + + /* Dump the physical memory layout prior to creating the mbuf_pool */ + fprintf (stdout, "DPDK physical memory layout:\n"); + rte_dump_physmem_layout (stdout); + + /* main thread 1st */ + error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); + if (error) + return error; + + for (i = 0; i < RTE_MAX_LCORE; i++) + { + error = vlib_buffer_pool_create (vm, conf->num_mbufs, + rte_lcore_to_socket_id (i)); + if (error) + return error; + } + +done: + return error; +} + +VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk"); + +void +dpdk_update_link_state (dpdk_device_t * xd, f64 now) +{ + vnet_main_t *vnm = vnet_get_main (); + struct rte_eth_link prev_link = xd->link; + u32 hw_flags = 0; + u8 hw_flags_chg = 0; + + /* only update link state for PMD interfaces */ + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + return; + + xd->time_last_link_update = now ? now : xd->time_last_link_update; + memset (&xd->link, 0, sizeof (xd->link)); + rte_eth_link_get_nowait (xd->device_index, &xd->link); + + if (LINK_STATE_ELOGS) + { + vlib_main_t *vm = vlib_get_main (); + ELOG_TYPE_DECLARE (e) = + { + .format = + "update-link-state: sw_if_index %d, admin_up %d," + "old link_state %d new link_state %d",.format_args = "i4i1i1i1",}; + + struct + { + u32 sw_if_index; + u8 admin_up; + u8 old_link_state; + u8 new_link_state; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0; + ed->old_link_state = (u8) + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index); + ed->new_link_state = (u8) xd->link.link_status; + } + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) && + ((xd->link.link_status != 0) ^ + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index))) + { + hw_flags_chg = 1; + hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + } + + if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex)) + { + hw_flags_chg = 1; + switch (xd->link.link_duplex) + { + case ETH_LINK_HALF_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX; + break; + case ETH_LINK_FULL_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX; + break; + default: + break; + } + } + if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) + { + hw_flags_chg = 1; + switch (xd->link.link_speed) + { + case ETH_SPEED_NUM_10M: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; + break; + case ETH_SPEED_NUM_100M: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; + break; + case ETH_SPEED_NUM_1G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; + break; + case ETH_SPEED_NUM_10G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; + break; + case ETH_SPEED_NUM_40G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; + break; + case 0: + break; + default: + clib_warning ("unknown link speed %d", xd->link.link_speed); + break; + } + } + if (hw_flags_chg) + { + if (LINK_STATE_ELOGS) + { + vlib_main_t *vm = vlib_get_main (); + + ELOG_TYPE_DECLARE (e) = + { + .format = + "update-link-state: sw_if_index %d, new flags %d",.format_args + = "i4i4",}; + + struct + { + u32 sw_if_index; + u32 flags; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->flags = hw_flags; + } + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags); + } +} + +static uword +dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + clib_error_t *error; + vnet_main_t *vnm = vnet_get_main (); + dpdk_main_t *dm = &dpdk_main; + ethernet_main_t *em = ðernet_main; + dpdk_device_t *xd; + vlib_thread_main_t *tm = vlib_get_thread_main (); + int i; + + error = dpdk_lib_init (dm); + + /* + * Turn on the input node if we found some devices to drive + * and we're not running worker threads or i/o threads + */ + + if (error == 0 && vec_len (dm->devices) > 0) + { + if (tm->n_vlib_mains == 1) + vlib_node_set_state (vm, dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + else + for (i = 0; i < tm->n_vlib_mains; i++) + if (vec_len (dm->devices_by_cpu[i]) > 0) + vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + } + + if (error) + clib_error_report (error); + + tm->worker_thread_release = 1; + + f64 now = vlib_time_now (vm); + vec_foreach (xd, dm->devices) + { + dpdk_update_link_state (xd, now); + } + + { + /* + * Extra set up for bond interfaces: + * 1. Setup MACs for bond interfaces and their slave links which was set + * in dpdk_port_setup() but needs to be done again here to take effect. + * 2. Set up info for bond interface related CLI support. + */ + int nports = rte_eth_dev_count (); + if (nports > 0) + { + for (i = 0; i < nports; i++) + { + struct rte_eth_dev_info dev_info; + rte_eth_dev_info_get (i, &dev_info); + if (!dev_info.driver_name) + dev_info.driver_name = dev_info.pci_dev->driver->driver.name; + + ASSERT (dev_info.driver_name); + if (strncmp (dev_info.driver_name, "rte_bond_pmd", 12) == 0) + { + u8 addr[6]; + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get (i, slink, 16); + if (nlink > 0) + { + vnet_hw_interface_t *bhi; + ethernet_interface_t *bei; + int rv; + + /* Get MAC of 1st slave link */ + rte_eth_macaddr_get (slink[0], + (struct ether_addr *) addr); + /* Set MAC of bounded interface to that of 1st slave link */ + rv = + rte_eth_bond_mac_address_set (i, + (struct ether_addr *) + addr); + if (rv < 0) + clib_warning ("Failed to set MAC address"); + + /* Populate MAC of bonded interface in VPP hw tables */ + bhi = + vnet_get_hw_interface (vnm, + dm->devices[i].vlib_hw_if_index); + bei = + pool_elt_at_index (em->interfaces, bhi->hw_instance); + clib_memcpy (bhi->hw_address, addr, 6); + clib_memcpy (bei->address, addr, 6); + /* Init l3 packet size allowed on bonded interface */ + bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES; + bhi->max_l3_packet_bytes[VLIB_RX] = + bhi->max_l3_packet_bytes[VLIB_TX] = + ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t); + while (nlink >= 1) + { /* for all slave links */ + int slave = slink[--nlink]; + dpdk_device_t *sdev = &dm->devices[slave]; + vnet_hw_interface_t *shi; + vnet_sw_interface_t *ssi; + /* Add MAC to all slave links except the first one */ + if (nlink) + rte_eth_dev_mac_addr_add (slave, + (struct ether_addr *) + addr, 0); + /* Set slaves bitmap for bonded interface */ + bhi->bond_info = + clib_bitmap_set (bhi->bond_info, + sdev->vlib_hw_if_index, 1); + /* Set slave link flags on slave interface */ + shi = + vnet_get_hw_interface (vnm, sdev->vlib_hw_if_index); + ssi = + vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); + shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; + ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; + + /* Set l3 packet size allowed as the lowest of slave */ + if (bhi->max_l3_packet_bytes[VLIB_RX] > + shi->max_l3_packet_bytes[VLIB_RX]) + bhi->max_l3_packet_bytes[VLIB_RX] = + bhi->max_l3_packet_bytes[VLIB_TX] = + shi->max_l3_packet_bytes[VLIB_RX]; + + /* Set max packet size allowed as the lowest of slave */ + if (bhi->max_packet_bytes > shi->max_packet_bytes) + bhi->max_packet_bytes = shi->max_packet_bytes; + } + } + } + } + } + } + + while (1) + { + /* + * check each time through the loop in case intervals are changed + */ + f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ? + dm->link_state_poll_interval : dm->stat_poll_interval; + + vlib_process_wait_for_event_or_clock (vm, min_wait); + + if (dm->admin_up_down_in_progress) + /* skip the poll if an admin up down is in progress (on any interface) */ + continue; + + vec_foreach (xd, dm->devices) + { + f64 now = vlib_time_now (vm); + if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval) + dpdk_update_counters (xd, now); + if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) + dpdk_update_link_state (xd, now); + + } + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_process_node,static) = { + .function = dpdk_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dpdk-process", + .process_log2_n_stack_bytes = 17, +}; +/* *INDENT-ON* */ + +int +dpdk_set_stat_poll_interval (f64 interval) +{ + if (interval < DPDK_MIN_STATS_POLL_INTERVAL) + return (VNET_API_ERROR_INVALID_VALUE); + + dpdk_main.stat_poll_interval = interval; + + return 0; +} + +int +dpdk_set_link_state_poll_interval (f64 interval) +{ + if (interval < DPDK_MIN_LINK_POLL_INTERVAL) + return (VNET_API_ERROR_INVALID_VALUE); + + dpdk_main.link_state_poll_interval = interval; + + return 0; +} + +clib_error_t * +dpdk_init (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_node_t *ei; + clib_error_t *error = 0; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + /* verify that structs are cacheline aligned */ + STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0, + "Cache line marker must be 1st element in dpdk_device_t"); + STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) == + CLIB_CACHE_LINE_BYTES, + "Data in cache line 0 is bigger than cache line size"); + STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, + "Cache line marker must be 1st element in frame_queue_trace_t"); + + u8 *name; + name = format (0, "dpdk_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + dm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + vec_free (name); + + dm->vlib_main = vm; + dm->vnet_main = vnet_get_main (); + dm->conf = &dpdk_config_main; + + error = dpdk_plugin_api_hookup (vm); + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (dm, &api_main); + +// TODO +// plugin_custom_dump_configure (dm); + + ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); + if (ei == 0) + return clib_error_return (0, "ethernet-input node AWOL"); + + dm->ethernet_input_node_index = ei->index; + + dm->conf->nchannels = 4; + dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; + vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); + + dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword)); + dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword)); + dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword)); + + /* $$$ use n_thread_stacks since it's known-good at this point */ + vec_validate (dm->recycle, tm->n_thread_stacks - 1); + + /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ + dm->buffer_flags_template = + (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID + | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); + + dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; + dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL; + + /* init CLI */ + if ((error = vlib_call_init_function (vm, dpdk_cli_init))) + return error; + + return error; +} + +VLIB_INIT_FUNCTION (dpdk_init); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/cli.c b/src/plugins/dpdk/ipsec/cli.c new file mode 100644 index 00000000..40cee39b --- /dev/null +++ b/src/plugins/dpdk/ipsec/cli.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +static void +dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display) +{ + dpdk_config_main_t *conf = &dpdk_config_main; + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 i, skip_master; + + if (!conf->cryptodev) + { + vlib_cli_output (vm, "DPDK Cryptodev support is disabled\n"); + return; + } + + if (detail_display) + vlib_cli_output (vm, "worker\t%10s\t%15s\tdir\tdev\tqp\n", + "cipher", "auth"); + else + vlib_cli_output (vm, "worker\tcrypto device id(type)\n"); + + skip_master = vlib_num_workers () > 0; + + for (i = 0; i < tm->n_vlib_mains; i++) + { + uword key, data; + u32 cpu_index = vlib_mains[i]->cpu_index; + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + u8 *s = 0; + + if (skip_master) + { + skip_master = 0; + continue; + } + + if (!detail_display) + { + i32 last_cdev = -1; + crypto_qp_data_t *qpd; + + s = format (s, "%u\t", cpu_index); + + /* *INDENT-OFF* */ + vec_foreach (qpd, cwm->qp_data) + { + u32 dev_id = qpd->dev_id; + + if ((u16) last_cdev != dev_id) + { + struct rte_cryptodev_info cdev_info; + + rte_cryptodev_info_get (dev_id, &cdev_info); + + s = format(s, "%u(%s)\t", dev_id, cdev_info.feature_flags & + RTE_CRYPTODEV_FF_HW_ACCELERATED ? "HW" : "SW"); + } + last_cdev = dev_id; + } + /* *INDENT-ON* */ + vlib_cli_output (vm, "%s", s); + } + else + { + char cipher_str[15], auth_str[15]; + struct rte_cryptodev_capabilities cap; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; + /* *INDENT-OFF* */ + hash_foreach (key, data, cwm->algo_qp_map, + ({ + cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER; + cap.sym.cipher.algo = p_key->cipher_algo; + check_algo_is_supported (&cap, cipher_str); + cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AUTH; + cap.sym.auth.algo = p_key->auth_algo; + check_algo_is_supported (&cap, auth_str); + vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n", + vlib_mains[i]->cpu_index, cipher_str, auth_str, + p_key->is_outbound ? "out" : "in", + cwm->qp_data[data].dev_id, + cwm->qp_data[data].qp_id); + })); + /* *INDENT-ON* */ + } + } +} + +static clib_error_t * +lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u16 detail = 0; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "verbose")) + detail = 1; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + dpdk_ipsec_show_mapping (vm, detail); + +done: + unformat_free (line_input); + + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (lcore_cryptodev_map, static) = { + .path = "show crypto device mapping", + .short_help = + "show cryptodev device mapping ", + .function = lcore_cryptodev_map_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/crypto_node.c b/src/plugins/dpdk/ipsec/crypto_node.c new file mode 100644 index 00000000..dc3452b2 --- /dev/null +++ b/src/plugins/dpdk/ipsec/crypto_node.c @@ -0,0 +1,215 @@ +/* + *------------------------------------------------------------------ + * crypto_node.c - DPDK Cryptodev input node + * + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include + +#include +#include +#include + +#define foreach_dpdk_crypto_input_next \ + _(DROP, "error-drop") \ + _(ENCRYPT_POST, "dpdk-esp-encrypt-post") \ + _(DECRYPT_POST, "dpdk-esp-decrypt-post") + +typedef enum +{ +#define _(f,s) DPDK_CRYPTO_INPUT_NEXT_##f, + foreach_dpdk_crypto_input_next +#undef _ + DPDK_CRYPTO_INPUT_N_NEXT, +} dpdk_crypto_input_next_t; + +#define foreach_dpdk_crypto_input_error \ + _(DQ_COPS, "Crypto ops dequeued") \ + _(COP_FAILED, "Crypto op failed") + +typedef enum +{ +#define _(f,s) DPDK_CRYPTO_INPUT_ERROR_##f, + foreach_dpdk_crypto_input_error +#undef _ + DPDK_CRYPTO_INPUT_N_ERROR, +} dpdk_crypto_input_error_t; + +static char *dpdk_crypto_input_error_strings[] = { +#define _(n, s) s, + foreach_dpdk_crypto_input_error +#undef _ +}; + +vlib_node_registration_t dpdk_crypto_input_node; + +typedef struct +{ + u32 cdev; + u32 qp; + u32 status; + u32 sa_idx; + u32 next_index; +} dpdk_crypto_input_trace_t; + +static u8 * +format_dpdk_crypto_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + dpdk_crypto_input_trace_t *t = va_arg (*args, dpdk_crypto_input_trace_t *); + + s = format (s, "dpdk_crypto: cryptodev-id %u queue-pair %u next-index %d", + t->cdev, t->qp, t->next_index); + + s = format (s, " status %u sa-idx %u\n", t->status, t->sa_idx); + + return s; +} + +static_always_inline u32 +dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, + crypto_qp_data_t * qpd) +{ + u32 n_deq, *to_next = 0, next_index, n_cops, def_next_index; + struct rte_crypto_op **cops = qpd->cops; + + if (qpd->inflights == 0) + return 0; + + if (qpd->is_outbound) + def_next_index = DPDK_CRYPTO_INPUT_NEXT_ENCRYPT_POST; + else + def_next_index = DPDK_CRYPTO_INPUT_NEXT_DECRYPT_POST; + + n_cops = rte_cryptodev_dequeue_burst (qpd->dev_id, qpd->qp_id, + cops, VLIB_FRAME_SIZE); + n_deq = n_cops; + next_index = def_next_index; + + qpd->inflights -= n_cops; + ASSERT (qpd->inflights >= 0); + + while (n_cops > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_cops > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t *b0 = 0; + struct rte_crypto_op *cop; + struct rte_crypto_sym_op *sym_cop; + + cop = cops[0]; + cops += 1; + n_cops -= 1; + n_left_to_next -= 1; + + next0 = def_next_index; + + if (PREDICT_FALSE (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) + { + next0 = DPDK_CRYPTO_INPUT_NEXT_DROP; + vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, + DPDK_CRYPTO_INPUT_ERROR_COP_FAILED, + 1); + } + cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; + + sym_cop = (struct rte_crypto_sym_op *) (cop + 1); + b0 = vlib_buffer_from_rte_mbuf (sym_cop->m_src); + bi0 = vlib_get_buffer_index (vm, b0); + + to_next[0] = bi0; + to_next += 1; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_next_frame (vm, node, next0); + dpdk_crypto_input_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->cdev = qpd->dev_id; + tr->qp = qpd->qp_id; + tr->status = cop->status; + tr->next_index = next0; + tr->sa_idx = vnet_buffer (b0)->ipsec.sad_index; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + crypto_free_cop (qpd, qpd->cops, n_deq); + + vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, + DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, n_deq); + return n_deq; +} + +static uword +dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 cpu_index = os_get_cpu_number (); + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + crypto_qp_data_t *qpd; + u32 n_deq = 0; + + /* *INDENT-OFF* */ + vec_foreach (qpd, cwm->qp_data) + n_deq += dpdk_crypto_dequeue(vm, node, qpd); + /* *INDENT-ON* */ + + return n_deq; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_crypto_input_node) = +{ + .function = dpdk_crypto_input_fn, + .name = "dpdk-crypto-input", + .format_trace = format_dpdk_crypto_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, + .n_errors = DPDK_CRYPTO_INPUT_N_ERROR, + .error_strings = dpdk_crypto_input_error_strings, + .n_next_nodes = DPDK_CRYPTO_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [DPDK_CRYPTO_INPUT_NEXT_##s] = n, + foreach_dpdk_crypto_input_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_crypto_input_node, dpdk_crypto_input_fn) +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/dir.dox b/src/plugins/dpdk/ipsec/dir.dox new file mode 100644 index 00000000..ffebfc4d --- /dev/null +++ b/src/plugins/dpdk/ipsec/dir.dox @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + @dir vnet/vnet/devices/dpdk/ipsec + @brief IPSec ESP encrypt/decrypt using DPDK Cryptodev API +*/ diff --git a/src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md b/src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md new file mode 100644 index 00000000..fed2fe0e --- /dev/null +++ b/src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md @@ -0,0 +1,86 @@ +# VPP IPSec implementation using DPDK Cryptodev API {#dpdk_crypto_ipsec_doc} + +This document is meant to contain all related information about implementation and usability. + + +## VPP IPsec with DPDK Cryptodev + +DPDK Cryptodev is an asynchronous crypto API that supports both Hardware and Software implementations (for more details refer to [DPDK Cryptography Device Library documentation](http://dpdk.org/doc/guides/prog_guide/cryptodev_lib.html)). + +When DPDK support is enabled and there are enough Cryptodev resources for all workers, the node graph is reconfigured by adding and changing default next nodes. + +The following nodes are added: +* dpdk-crypto-input : polling input node, basically dequeuing from crypto devices. +* dpdk-esp-encrypt : internal node. +* dpdk-esp-decrypt : internal node. +* dpdk-esp-encrypt-post : internal node. +* dpdk-esp-decrypt-post : internal node. + +Set new default next nodes: +* for esp encryption: esp-encrypt -> dpdk-esp-encrypt +* for esp decryption: esp-decrypt -> dpdk-esp-decrypt + + +### How to enable VPP IPSec with DPDK Cryptodev support + +DPDK Cryptodev is supported in DPDK enabled VPP. +By default, only HW Cryptodev is supported but needs to be explicetly enabled with the following config option: + +``` +dpdk { + enable-cryptodev +} +``` + +To enable SW Cryptodev support (AESNI-MB-PMD and GCM-PMD), we need the following env option: + + vpp_uses_dpdk_cryptodev_sw=yes + +A couple of ways to achive this: +* uncomment/add it in the platforms config (ie. build-data/platforms/vpp.mk) +* set the option when building vpp (ie. make vpp_uses_dpdk_cryptodev_sw=yes build-release) + +When enabling SW Cryptodev support, it means that you need to pre-build the required crypto libraries needed by those SW Cryptodev PMDs. + + +### Crypto Resources allocation + +VPP allocates crypto resources based on a best effort approach: +* first allocate Hardware crypto resources, then Software. +* if there are not enough crypto resources for all workers, the graph node is not modifed, therefore the default VPP IPsec implementation based in OpenSSL is used. The following message is displayed: + + 0: dpdk_ipsec_init: not enough cryptodevs for ipsec + + +### Configuration example + +To enable DPDK Cryptodev the user just need to provide the startup.conf option +as mentioned previously. + +Example startup.conf: + +``` +dpdk { + socket-mem 1024,1024 + num-mbufs 131072 + dev 0000:81:00.0 + dev 0000:81:00.1 + enable-cryptodev + dev 0000:85:01.0 + dev 0000:85:01.1 + vdev cryptodev_aesni_mb_pmd,socket_id=1 + vdev cryptodev_aesni_mb_pmd,socket_id=1 +} +``` + +In the above configuration: +* 0000:85:01.0 and 0000:85:01.1 are crypto BDFs and they require the same driver binding as DPDK Ethernet devices but they do not support any extra configuration options. +* Two AESNI-MB Software Cryptodev PMDs are created in NUMA node 1. + +For further details refer to [DPDK Crypto Device Driver documentation](http://dpdk.org/doc/guides/cryptodevs/index.html) + +### Operational data + +The following CLI command displays the Cryptodev/Worker mapping: + + show crypto device mapping [verbose] diff --git a/src/plugins/dpdk/ipsec/esp.h b/src/plugins/dpdk/ipsec/esp.h new file mode 100644 index 00000000..320295b1 --- /dev/null +++ b/src/plugins/dpdk/ipsec/esp.h @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __DPDK_ESP_H__ +#define __DPDK_ESP_H__ + +#include +#include +#include + +typedef struct +{ + enum rte_crypto_cipher_algorithm algo; + u8 key_len; + u8 iv_len; +} dpdk_esp_crypto_alg_t; + +typedef struct +{ + enum rte_crypto_auth_algorithm algo; + u8 trunc_size; +} dpdk_esp_integ_alg_t; + +typedef struct +{ + dpdk_esp_crypto_alg_t *esp_crypto_algs; + dpdk_esp_integ_alg_t *esp_integ_algs; +} dpdk_esp_main_t; + +dpdk_esp_main_t dpdk_esp_main; + +static_always_inline void +dpdk_esp_init () +{ + dpdk_esp_main_t *em = &dpdk_esp_main; + dpdk_esp_integ_alg_t *i; + dpdk_esp_crypto_alg_t *c; + + vec_validate (em->esp_crypto_algs, IPSEC_CRYPTO_N_ALG - 1); + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_128]; + c->algo = RTE_CRYPTO_CIPHER_AES_CBC; + c->key_len = 16; + c->iv_len = 16; + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_192]; + c->algo = RTE_CRYPTO_CIPHER_AES_CBC; + c->key_len = 24; + c->iv_len = 16; + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_256]; + c->algo = RTE_CRYPTO_CIPHER_AES_CBC; + c->key_len = 32; + c->iv_len = 16; + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_GCM_128]; + c->algo = RTE_CRYPTO_CIPHER_AES_GCM; + c->key_len = 16; + c->iv_len = 8; + + vec_validate (em->esp_integ_algs, IPSEC_INTEG_N_ALG - 1); + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA1_96]; + i->algo = RTE_CRYPTO_AUTH_SHA1_HMAC; + i->trunc_size = 12; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_96]; + i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + i->trunc_size = 12; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_128]; + i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + i->trunc_size = 16; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_384_192]; + i->algo = RTE_CRYPTO_AUTH_SHA384_HMAC; + i->trunc_size = 24; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256]; + i->algo = RTE_CRYPTO_AUTH_SHA512_HMAC; + i->trunc_size = 32; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_AES_GCM_128]; + i->algo = RTE_CRYPTO_AUTH_AES_GCM; + i->trunc_size = 16; +} + +static_always_inline int +translate_crypto_algo (ipsec_crypto_alg_t crypto_algo, + struct rte_crypto_sym_xform *cipher_xform) +{ + switch (crypto_algo) + { + case IPSEC_CRYPTO_ALG_NONE: + cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL; + break; + case IPSEC_CRYPTO_ALG_AES_CBC_128: + case IPSEC_CRYPTO_ALG_AES_CBC_192: + case IPSEC_CRYPTO_ALG_AES_CBC_256: + cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; + break; + case IPSEC_CRYPTO_ALG_AES_GCM_128: + cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_GCM; + break; + default: + return -1; + } + + cipher_xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + + return 0; +} + +static_always_inline int +translate_integ_algo (ipsec_integ_alg_t integ_alg, + struct rte_crypto_sym_xform *auth_xform, int use_esn) +{ + switch (integ_alg) + { + case IPSEC_INTEG_ALG_NONE: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_NULL; + auth_xform->auth.digest_length = 0; + break; + case IPSEC_INTEG_ALG_SHA1_96: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC; + auth_xform->auth.digest_length = 12; + break; + case IPSEC_INTEG_ALG_SHA_256_96: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + auth_xform->auth.digest_length = 12; + break; + case IPSEC_INTEG_ALG_SHA_256_128: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + auth_xform->auth.digest_length = 16; + break; + case IPSEC_INTEG_ALG_SHA_384_192: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA384_HMAC; + auth_xform->auth.digest_length = 24; + break; + case IPSEC_INTEG_ALG_SHA_512_256: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA512_HMAC; + auth_xform->auth.digest_length = 32; + break; + case IPSEC_INTEG_ALG_AES_GCM_128: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM; + auth_xform->auth.digest_length = 16; + auth_xform->auth.add_auth_data_length = use_esn ? 12 : 8; + break; + default: + return -1; + } + + auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; + + return 0; +} + +static_always_inline int +create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, + u8 is_outbound) +{ + u32 cpu_index = os_get_cpu_number (); + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + struct rte_crypto_sym_xform cipher_xform = { 0 }; + struct rte_crypto_sym_xform auth_xform = { 0 }; + struct rte_crypto_sym_xform *xfs; + uword key = 0, *data; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; + + if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + sa->crypto_key_len -= 4; + clib_memcpy (&sa->salt, &sa->crypto_key[sa->crypto_key_len], 4); + } + else + { + u32 seed = (u32) clib_cpu_time_now (); + sa->salt = random_u32 (&seed); + } + + cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; + cipher_xform.cipher.key.data = sa->crypto_key; + cipher_xform.cipher.key.length = sa->crypto_key_len; + + auth_xform.type = RTE_CRYPTO_SYM_XFORM_AUTH; + auth_xform.auth.key.data = sa->integ_key; + auth_xform.auth.key.length = sa->integ_key_len; + + if (translate_crypto_algo (sa->crypto_alg, &cipher_xform) < 0) + return -1; + p_key->cipher_algo = cipher_xform.cipher.algo; + + if (translate_integ_algo (sa->integ_alg, &auth_xform, sa->use_esn) < 0) + return -1; + p_key->auth_algo = auth_xform.auth.algo; + + if (is_outbound) + { + cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; + auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; + cipher_xform.next = &auth_xform; + xfs = &cipher_xform; + } + else + { + cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; + auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_VERIFY; + auth_xform.next = &cipher_xform; + xfs = &auth_xform; + } + + p_key->is_outbound = is_outbound; + + data = hash_get (cwm->algo_qp_map, key); + if (!data) + return -1; + + sa_sess->sess = + rte_cryptodev_sym_session_create (cwm->qp_data[*data].dev_id, xfs); + + if (!sa_sess->sess) + return -1; + + sa_sess->qp_index = (u8) * data; + + return 0; +} + +#endif /* __DPDK_ESP_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/esp_decrypt.c b/src/plugins/dpdk/ipsec/esp_decrypt.c new file mode 100644 index 00000000..286e03f8 --- /dev/null +++ b/src/plugins/dpdk/ipsec/esp_decrypt.c @@ -0,0 +1,594 @@ +/* + * esp_decrypt.c : IPSec ESP Decrypt node using DPDK Cryptodev + * + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define foreach_esp_decrypt_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(IP6_INPUT, "ip6-input") + +#define _(v, s) ESP_DECRYPT_NEXT_##v, +typedef enum { + foreach_esp_decrypt_next +#undef _ + ESP_DECRYPT_N_NEXT, +} esp_decrypt_next_t; + +#define foreach_esp_decrypt_error \ + _(RX_PKTS, "ESP pkts received") \ + _(DECRYPTION_FAILED, "ESP decryption failed") \ + _(REPLAY, "SA replayed packet") \ + _(NOT_IP, "Not IP packet (dropped)") \ + _(ENQ_FAIL, "Enqueue failed (buffer full)") \ + _(NO_CRYPTODEV, "Cryptodev not configured") \ + _(BAD_LEN, "Invalid ciphertext length") \ + _(UNSUPPORTED, "Cipher/Auth not supported") + + +typedef enum { +#define _(sym,str) ESP_DECRYPT_ERROR_##sym, + foreach_esp_decrypt_error +#undef _ + ESP_DECRYPT_N_ERROR, +} esp_decrypt_error_t; + +static char * esp_decrypt_error_strings[] = { +#define _(sym,string) string, + foreach_esp_decrypt_error +#undef _ +}; + +vlib_node_registration_t dpdk_esp_decrypt_node; + +typedef struct { + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; +} esp_decrypt_trace_t; + +/* packet trace format function */ +static u8 * format_esp_decrypt_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *); + + s = format (s, "esp: crypto %U integrity %U", + format_ipsec_crypto_alg, t->crypto_alg, + format_ipsec_integ_alg, t->integ_alg); + return s; +} + +static uword +dpdk_esp_decrypt_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next, next_index; + ipsec_main_t *im = &ipsec_main; + u32 cpu_index = os_get_cpu_number(); + dpdk_crypto_main_t * dcm = &dpdk_crypto_main; + dpdk_esp_main_t * em = &dpdk_esp_main; + u32 i; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (PREDICT_FALSE(!dcm->workers_main)) + { + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_NO_CRYPTODEV, n_left_from); + vlib_buffer_free(vm, from, n_left_from); + return n_left_from; + } + + crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, cpu_index); + u32 n_qps = vec_len(cwm->qp_data); + struct rte_crypto_op ** cops_to_enq[n_qps]; + u32 n_cop_qp[n_qps], * bi_to_enq[n_qps]; + + for (i = 0; i < n_qps; i++) + { + bi_to_enq[i] = cwm->qp_data[i].bi; + cops_to_enq[i] = cwm->qp_data[i].cops; + } + + memset(n_cop_qp, 0, n_qps * sizeof(u32)); + + crypto_alloc_cops(); + + next_index = ESP_DECRYPT_NEXT_DROP; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, sa_index0 = ~0, seq, icv_size, iv_size; + vlib_buffer_t * b0; + esp_header_t * esp0; + ipsec_sa_t * sa0; + struct rte_mbuf * mb0 = 0; + const int BLOCK_SIZE = 16; + crypto_sa_session_t * sa_sess; + void * sess; + u16 qp_index; + struct rte_crypto_op * cop = 0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + esp0 = vlib_buffer_get_current (b0); + + sa_index0 = vnet_buffer(b0)->ipsec.sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + seq = clib_host_to_net_u32(esp0->seq); + + /* anti-replay check */ + if (sa0->use_anti_replay) + { + int rv = 0; + + if (PREDICT_TRUE(sa0->use_esn)) + rv = esp_replay_check_esn(sa0, seq); + else + rv = esp_replay_check(sa0, seq); + + if (PREDICT_FALSE(rv)) + { + clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_REPLAY, 1); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + } + + sa0->total_data_size += b0->current_length; + + if (PREDICT_FALSE(sa0->integ_alg == IPSEC_INTEG_ALG_NONE) || + PREDICT_FALSE(sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE)) + { + clib_warning ("SPI %u : only cipher + auth supported", sa0->spi); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_UNSUPPORTED, 1); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + + sa_sess = pool_elt_at_index(cwm->sa_sess_d[0], sa_index0); + + if (PREDICT_FALSE(!sa_sess->sess)) + { + int ret = create_sym_sess(sa0, sa_sess, 0); + + if (PREDICT_FALSE (ret)) + { + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + } + + sess = sa_sess->sess; + qp_index = sa_sess->qp_index; + + ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); + cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); + ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); + + cops_to_enq[qp_index][0] = cop; + cops_to_enq[qp_index] += 1; + n_cop_qp[qp_index] += 1; + bi_to_enq[qp_index][0] = bi0; + bi_to_enq[qp_index] += 1; + + rte_crypto_op_attach_sym_session(cop, sess); + + icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; + iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + + /* Convert vlib buffer to mbuf */ + mb0 = rte_mbuf_from_vlib_buffer(b0); + mb0->data_len = b0->current_length; + mb0->pkt_len = b0->current_length; + mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; + + /* Outer IP header has already been stripped */ + u16 payload_len = rte_pktmbuf_pkt_len(mb0) - sizeof (esp_header_t) - + iv_size - icv_size; + + if ((payload_len & (BLOCK_SIZE - 1)) || (payload_len <= 0)) + { + clib_warning ("payload %u not multiple of %d\n", + payload_len, BLOCK_SIZE); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_BAD_LEN, 1); + vec_add (vec_elt (cwm->qp_data, qp_index).free_cops, &cop, 1); + bi_to_enq[qp_index] -= 1; + cops_to_enq[qp_index] -= 1; + n_cop_qp[qp_index] -= 1; + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + + struct rte_crypto_sym_op *sym_cop = (struct rte_crypto_sym_op *)(cop + 1); + + sym_cop->m_src = mb0; + sym_cop->cipher.data.offset = sizeof (esp_header_t) + iv_size; + sym_cop->cipher.data.length = payload_len; + + u8 *iv = rte_pktmbuf_mtod_offset(mb0, void*, sizeof (esp_header_t)); + dpdk_cop_priv_t * priv = (dpdk_cop_priv_t *)(sym_cop + 1); + + if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + dpdk_gcm_cnt_blk *icb = &priv->cb; + icb->salt = sa0->salt; + clib_memcpy(icb->iv, iv, 8); + icb->cnt = clib_host_to_net_u32(1); + sym_cop->cipher.iv.data = (u8 *)icb; + sym_cop->cipher.iv.phys_addr = cop->phys_addr + + (uintptr_t)icb - (uintptr_t)cop; + sym_cop->cipher.iv.length = 16; + + u8 *aad = priv->aad; + clib_memcpy(aad, iv - sizeof(esp_header_t), 8); + sym_cop->auth.aad.data = aad; + sym_cop->auth.aad.phys_addr = cop->phys_addr + + (uintptr_t)aad - (uintptr_t)cop; + if (sa0->use_esn) + { + *((u32*)&aad[8]) = sa0->seq_hi; + sym_cop->auth.aad.length = 12; + } + else + { + sym_cop->auth.aad.length = 8; + } + + sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.length = icv_size; + + } + else + { + sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(mb0, void*, + sizeof (esp_header_t)); + sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(mb0, + sizeof (esp_header_t)); + sym_cop->cipher.iv.length = iv_size; + + if (sa0->use_esn) + { + dpdk_cop_priv_t* priv = (dpdk_cop_priv_t*) (sym_cop + 1); + u8* payload_end = rte_pktmbuf_mtod_offset( + mb0, u8*, sizeof(esp_header_t) + iv_size + payload_len); + + clib_memcpy (priv->icv, payload_end, icv_size); + *((u32*) payload_end) = sa0->seq_hi; + sym_cop->auth.data.offset = 0; + sym_cop->auth.data.length = sizeof(esp_header_t) + iv_size + + payload_len + sizeof(sa0->seq_hi); + sym_cop->auth.digest.data = priv->icv; + sym_cop->auth.digest.phys_addr = cop->phys_addr + + (uintptr_t) priv->icv - (uintptr_t) cop; + sym_cop->auth.digest.length = icv_size; + } + else + { + sym_cop->auth.data.offset = 0; + sym_cop->auth.data.length = sizeof(esp_header_t) + + iv_size + payload_len; + + sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.length = icv_size; + } + } + +trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_RX_PKTS, + from_frame->n_vectors); + crypto_qp_data_t *qpd; + /* *INDENT-OFF* */ + vec_foreach_index (i, cwm->qp_data) + { + u32 enq; + + qpd = vec_elt_at_index(cwm->qp_data, i); + enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, + qpd->cops, n_cop_qp[i]); + qpd->inflights += enq; + + if (PREDICT_FALSE(enq < n_cop_qp[i])) + { + crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); + vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); + + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_ENQ_FAIL, + n_cop_qp[i] - enq); + } + } + /* *INDENT-ON* */ + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_decrypt_node) = { + .function = dpdk_esp_decrypt_node_fn, + .name = "dpdk-esp-decrypt", + .vector_size = sizeof (u32), + .format_trace = format_esp_decrypt_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(esp_decrypt_error_strings), + .error_strings = esp_decrypt_error_strings, + + .n_next_nodes = ESP_DECRYPT_N_NEXT, + .next_nodes = { +#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, + foreach_esp_decrypt_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_node, dpdk_esp_decrypt_node_fn) + +/* + * Decrypt Post Node + */ + +#define foreach_esp_decrypt_post_error \ + _(PKTS, "ESP post pkts") + +typedef enum { +#define _(sym,str) ESP_DECRYPT_POST_ERROR_##sym, + foreach_esp_decrypt_post_error +#undef _ + ESP_DECRYPT_POST_N_ERROR, +} esp_decrypt_post_error_t; + +static char * esp_decrypt_post_error_strings[] = { +#define _(sym,string) string, + foreach_esp_decrypt_post_error +#undef _ +}; + +vlib_node_registration_t dpdk_esp_decrypt_post_node; + +static u8 * format_esp_decrypt_post_trace (u8 * s, va_list * args) +{ + return s; +} + +static uword +dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next = 0, next_index; + ipsec_sa_t * sa0; + u32 sa_index0 = ~0; + ipsec_main_t *im = &ipsec_main; + dpdk_esp_main_t *em = &dpdk_esp_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + esp_footer_t * f0; + u32 bi0, next0, icv_size, iv_size; + vlib_buffer_t * b0 = 0; + ip4_header_t *ih4 = 0, *oh4 = 0; + ip6_header_t *ih6 = 0, *oh6 = 0; + u8 tunnel_mode = 1; + u8 transport_ip6 = 0; + + next0 = ESP_DECRYPT_NEXT_DROP; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sa_index0 = vnet_buffer(b0)->ipsec.sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + to_next[0] = bi0; + to_next += 1; + + icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; + iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + + if (sa0->use_anti_replay) + { + esp_header_t * esp0 = vlib_buffer_get_current (b0); + u32 seq; + seq = clib_host_to_net_u32(esp0->seq); + if (PREDICT_TRUE(sa0->use_esn)) + esp_replay_advance_esn(sa0, seq); + else + esp_replay_advance(sa0, seq); + } + + ih4 = (ip4_header_t *) (b0->data + sizeof(ethernet_header_t)); + vlib_buffer_advance (b0, sizeof (esp_header_t) + iv_size); + + b0->current_length -= (icv_size + 2); + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (b0) + + b0->current_length); + b0->current_length -= f0->pad_length; + + /* transport mode */ + if (PREDICT_FALSE(!sa0->is_tunnel && !sa0->is_tunnel_ip6)) + { + tunnel_mode = 0; + + if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) != 0x40)) + { + if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) == 0x60)) + transport_ip6 = 1; + else + { + clib_warning("next header: 0x%x", f0->next_header); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_NOT_IP, 1); + goto trace; + } + } + } + + if (PREDICT_TRUE (tunnel_mode)) + { + if (PREDICT_TRUE(f0->next_header == IP_PROTOCOL_IP_IN_IP)) + next0 = ESP_DECRYPT_NEXT_IP4_INPUT; + else if (f0->next_header == IP_PROTOCOL_IPV6) + next0 = ESP_DECRYPT_NEXT_IP6_INPUT; + else + { + clib_warning("next header: 0x%x", f0->next_header); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_DECRYPTION_FAILED, + 1); + goto trace; + } + } + /* transport mode */ + else + { + if (PREDICT_FALSE(transport_ip6)) + { + ih6 = (ip6_header_t *) (b0->data + sizeof(ethernet_header_t)); + vlib_buffer_advance (b0, -sizeof(ip6_header_t)); + oh6 = vlib_buffer_get_current (b0); + memmove(oh6, ih6, sizeof(ip6_header_t)); + + next0 = ESP_DECRYPT_NEXT_IP6_INPUT; + oh6->protocol = f0->next_header; + oh6->payload_length = + clib_host_to_net_u16 ( + vlib_buffer_length_in_chain(vm, b0) - + sizeof (ip6_header_t)); + } + else + { + vlib_buffer_advance (b0, -sizeof(ip4_header_t)); + oh4 = vlib_buffer_get_current (b0); + memmove(oh4, ih4, sizeof(ip4_header_t)); + + next0 = ESP_DECRYPT_NEXT_IP4_INPUT; + oh4->ip_version_and_header_length = 0x45; + oh4->fragment_id = 0; + oh4->flags_and_fragment_offset = 0; + oh4->protocol = f0->next_header; + oh4->length = clib_host_to_net_u16 ( + vlib_buffer_length_in_chain (vm, b0)); + oh4->checksum = ip4_header_checksum (oh4); + } + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0; + +trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, dpdk_esp_decrypt_post_node.index, + ESP_DECRYPT_POST_ERROR_PKTS, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_decrypt_post_node) = { + .function = dpdk_esp_decrypt_post_node_fn, + .name = "dpdk-esp-decrypt-post", + .vector_size = sizeof (u32), + .format_trace = format_esp_decrypt_post_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(esp_decrypt_post_error_strings), + .error_strings = esp_decrypt_post_error_strings, + + .n_next_nodes = ESP_DECRYPT_N_NEXT, + .next_nodes = { +#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, + foreach_esp_decrypt_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_post_node, dpdk_esp_decrypt_post_node_fn) diff --git a/src/plugins/dpdk/ipsec/esp_encrypt.c b/src/plugins/dpdk/ipsec/esp_encrypt.c new file mode 100644 index 00000000..5b03de73 --- /dev/null +++ b/src/plugins/dpdk/ipsec/esp_encrypt.c @@ -0,0 +1,609 @@ +/* + * esp_encrypt.c : IPSec ESP encrypt node using DPDK Cryptodev + * + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define foreach_esp_encrypt_next \ +_(DROP, "error-drop") \ +_(IP4_LOOKUP, "ip4-lookup") \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(INTERFACE_OUTPUT, "interface-output") + +#define _(v, s) ESP_ENCRYPT_NEXT_##v, +typedef enum +{ + foreach_esp_encrypt_next +#undef _ + ESP_ENCRYPT_N_NEXT, +} esp_encrypt_next_t; + +#define foreach_esp_encrypt_error \ + _(RX_PKTS, "ESP pkts received") \ + _(SEQ_CYCLED, "sequence number cycled") \ + _(ENQ_FAIL, "Enqueue failed (buffer full)") \ + _(NO_CRYPTODEV, "Cryptodev not configured") \ + _(UNSUPPORTED, "Cipher/Auth not supported") + + +typedef enum +{ +#define _(sym,str) ESP_ENCRYPT_ERROR_##sym, + foreach_esp_encrypt_error +#undef _ + ESP_ENCRYPT_N_ERROR, +} esp_encrypt_error_t; + +static char *esp_encrypt_error_strings[] = { +#define _(sym,string) string, + foreach_esp_encrypt_error +#undef _ +}; + +vlib_node_registration_t dpdk_esp_encrypt_node; + +typedef struct +{ + u32 spi; + u32 seq; + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; +} esp_encrypt_trace_t; + +/* packet trace format function */ +static u8 * +format_esp_encrypt_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *); + + s = format (s, "esp: spi %u seq %u crypto %U integrity %U", + t->spi, t->seq, + format_ipsec_crypto_alg, t->crypto_alg, + format_ipsec_integ_alg, t->integ_alg); + return s; +} + +static uword +dpdk_esp_encrypt_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next, next_index; + ipsec_main_t *im = &ipsec_main; + u32 cpu_index = os_get_cpu_number (); + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + dpdk_esp_main_t *em = &dpdk_esp_main; + u32 i; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (PREDICT_FALSE (!dcm->workers_main)) + { + /* Likely there are not enough cryptodevs, so drop frame */ + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_NO_CRYPTODEV, + n_left_from); + vlib_buffer_free (vm, from, n_left_from); + return n_left_from; + } + + crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, cpu_index); + u32 n_qps = vec_len (cwm->qp_data); + struct rte_crypto_op **cops_to_enq[n_qps]; + u32 n_cop_qp[n_qps], *bi_to_enq[n_qps]; + + for (i = 0; i < n_qps; i++) + { + bi_to_enq[i] = cwm->qp_data[i].bi; + cops_to_enq[i] = cwm->qp_data[i].cops; + } + + memset (n_cop_qp, 0, n_qps * sizeof (u32)); + + crypto_alloc_cops (); + + next_index = ESP_ENCRYPT_NEXT_DROP; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t *b0 = 0; + u32 sa_index0; + ipsec_sa_t *sa0; + ip4_and_esp_header_t *ih0, *oh0 = 0; + ip6_and_esp_header_t *ih6_0, *oh6_0 = 0; + struct rte_mbuf *mb0 = 0; + esp_footer_t *f0; + u8 is_ipv6; + u8 ip_hdr_size; + u8 next_hdr_type; + u8 transport_mode = 0; + const int BLOCK_SIZE = 16; + u32 iv_size; + u16 orig_sz; + crypto_sa_session_t *sa_sess; + void *sess; + struct rte_crypto_op *cop = 0; + u16 qp_index; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sa_index0 = vnet_buffer (b0)->ipsec.sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + if (PREDICT_FALSE (esp_seq_advance (sa0))) + { + clib_warning ("sequence number counter has cycled SPI %u", + sa0->spi); + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1); + //TODO: rekey SA + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + + sa0->total_data_size += b0->current_length; + + sa_sess = pool_elt_at_index (cwm->sa_sess_d[1], sa_index0); + if (PREDICT_FALSE (!sa_sess->sess)) + { + int ret = create_sym_sess (sa0, sa_sess, 1); + + if (PREDICT_FALSE (ret)) + { + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + } + + qp_index = sa_sess->qp_index; + sess = sa_sess->sess; + + ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); + cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); + ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); + + cops_to_enq[qp_index][0] = cop; + cops_to_enq[qp_index] += 1; + n_cop_qp[qp_index] += 1; + bi_to_enq[qp_index][0] = bi0; + bi_to_enq[qp_index] += 1; + + ssize_t adv; + iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + ih0 = vlib_buffer_get_current (b0); + orig_sz = b0->current_length; + is_ipv6 = (ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60; + /* is ipv6 */ + if (PREDICT_TRUE (sa0->is_tunnel)) + { + if (PREDICT_TRUE (!is_ipv6)) + adv = -sizeof (ip4_and_esp_header_t); + else + adv = -sizeof (ip6_and_esp_header_t); + } + else + { + adv = -sizeof (esp_header_t); + if (PREDICT_TRUE (!is_ipv6)) + orig_sz -= sizeof (ip4_header_t); + else + orig_sz -= sizeof (ip6_header_t); + } + + /*transport mode save the eth header before it is overwritten */ + if (PREDICT_FALSE (!sa0->is_tunnel)) + { + ethernet_header_t *ieh0 = (ethernet_header_t *) + ((u8 *) vlib_buffer_get_current (b0) - + sizeof (ethernet_header_t)); + ethernet_header_t *oeh0 = + (ethernet_header_t *) ((u8 *) ieh0 + (adv - iv_size)); + clib_memcpy (oeh0, ieh0, sizeof (ethernet_header_t)); + } + + vlib_buffer_advance (b0, adv - iv_size); + + /* XXX IP6/ip4 and IP4/IP6 not supported, only IP4/IP4 and IP6/IP6 */ + + /* is ipv6 */ + if (PREDICT_FALSE (is_ipv6)) + { + ih6_0 = (ip6_and_esp_header_t *) ih0; + ip_hdr_size = sizeof (ip6_header_t); + oh6_0 = vlib_buffer_get_current (b0); + + if (PREDICT_TRUE (sa0->is_tunnel)) + { + next_hdr_type = IP_PROTOCOL_IPV6; + oh6_0->ip6.ip_version_traffic_class_and_flow_label = + ih6_0->ip6.ip_version_traffic_class_and_flow_label; + } + else + { + next_hdr_type = ih6_0->ip6.protocol; + memmove (oh6_0, ih6_0, sizeof (ip6_header_t)); + } + + oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP; + oh6_0->ip6.hop_limit = 254; + oh6_0->esp.spi = clib_net_to_host_u32 (sa0->spi); + oh6_0->esp.seq = clib_net_to_host_u32 (sa0->seq); + } + else + { + ip_hdr_size = sizeof (ip4_header_t); + oh0 = vlib_buffer_get_current (b0); + + if (PREDICT_TRUE (sa0->is_tunnel)) + { + next_hdr_type = IP_PROTOCOL_IP_IN_IP; + oh0->ip4.tos = ih0->ip4.tos; + } + else + { + next_hdr_type = ih0->ip4.protocol; + memmove (oh0, ih0, sizeof (ip4_header_t)); + } + + oh0->ip4.ip_version_and_header_length = 0x45; + oh0->ip4.fragment_id = 0; + oh0->ip4.flags_and_fragment_offset = 0; + oh0->ip4.ttl = 254; + oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP; + oh0->esp.spi = clib_net_to_host_u32 (sa0->spi); + oh0->esp.seq = clib_net_to_host_u32 (sa0->seq); + } + + if (PREDICT_TRUE (sa0->is_tunnel && !sa0->is_tunnel_ip6)) + { + oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32; + oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32; + + /* in tunnel mode send it back to FIB */ + next0 = ESP_ENCRYPT_NEXT_IP4_LOOKUP; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + } + else if (sa0->is_tunnel && sa0->is_tunnel_ip6) + { + oh6_0->ip6.src_address.as_u64[0] = + sa0->tunnel_src_addr.ip6.as_u64[0]; + oh6_0->ip6.src_address.as_u64[1] = + sa0->tunnel_src_addr.ip6.as_u64[1]; + oh6_0->ip6.dst_address.as_u64[0] = + sa0->tunnel_dst_addr.ip6.as_u64[0]; + oh6_0->ip6.dst_address.as_u64[1] = + sa0->tunnel_dst_addr.ip6.as_u64[1]; + + /* in tunnel mode send it back to FIB */ + next0 = ESP_ENCRYPT_NEXT_IP6_LOOKUP; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + } + else + { + next0 = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT; + transport_mode = 1; + } + + ASSERT (sa0->crypto_alg < IPSEC_CRYPTO_N_ALG); + ASSERT (sa0->crypto_alg != IPSEC_CRYPTO_ALG_NONE); + + int blocks = 1 + (orig_sz + 1) / BLOCK_SIZE; + + /* pad packet in input buffer */ + u8 pad_bytes = BLOCK_SIZE * blocks - 2 - orig_sz; + u8 i; + u8 *padding = vlib_buffer_get_current (b0) + b0->current_length; + + for (i = 0; i < pad_bytes; ++i) + padding[i] = i + 1; + + f0 = vlib_buffer_get_current (b0) + b0->current_length + pad_bytes; + f0->pad_length = pad_bytes; + f0->next_header = next_hdr_type; + b0->current_length += pad_bytes + 2 + + em->esp_integ_algs[sa0->integ_alg].trunc_size; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + vnet_buffer (b0)->sw_if_index[VLIB_RX]; + b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + + struct rte_crypto_sym_op *sym_cop; + sym_cop = (struct rte_crypto_sym_op *) (cop + 1); + + dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *) (sym_cop + 1); + + vnet_buffer (b0)->unused[0] = next0; + + mb0 = rte_mbuf_from_vlib_buffer (b0); + mb0->data_len = b0->current_length; + mb0->pkt_len = b0->current_length; + mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; + + rte_crypto_op_attach_sym_session (cop, sess); + + sym_cop->m_src = mb0; + + dpdk_gcm_cnt_blk *icb = &priv->cb; + icb->salt = sa0->salt; + icb->iv[0] = sa0->seq; + icb->iv[1] = sa0->seq_hi; + + if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + icb->cnt = clib_host_to_net_u32 (1); + clib_memcpy (vlib_buffer_get_current (b0) + ip_hdr_size + + sizeof (esp_header_t), icb->iv, 8); + sym_cop->cipher.data.offset = + ip_hdr_size + sizeof (esp_header_t) + iv_size; + sym_cop->cipher.data.length = BLOCK_SIZE * blocks; + sym_cop->cipher.iv.length = 16; + } + else + { + sym_cop->cipher.data.offset = + ip_hdr_size + sizeof (esp_header_t); + sym_cop->cipher.data.length = BLOCK_SIZE * blocks + iv_size; + sym_cop->cipher.iv.length = iv_size; + } + + sym_cop->cipher.iv.data = (u8 *) icb; + sym_cop->cipher.iv.phys_addr = cop->phys_addr + (uintptr_t) icb + - (uintptr_t) cop; + + + ASSERT (sa0->integ_alg < IPSEC_INTEG_N_ALG); + ASSERT (sa0->integ_alg != IPSEC_INTEG_ALG_NONE); + + if (PREDICT_FALSE (sa0->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128)) + { + u8 *aad = priv->aad; + clib_memcpy (aad, vlib_buffer_get_current (b0) + ip_hdr_size, + 8); + sym_cop->auth.aad.data = aad; + sym_cop->auth.aad.phys_addr = cop->phys_addr + + (uintptr_t) aad - (uintptr_t) cop; + + if (PREDICT_FALSE (sa0->use_esn)) + { + *((u32 *) & aad[8]) = sa0->seq_hi; + sym_cop->auth.aad.length = 12; + } + else + { + sym_cop->auth.aad.length = 8; + } + } + else + { + sym_cop->auth.data.offset = ip_hdr_size; + sym_cop->auth.data.length = b0->current_length - ip_hdr_size + - em->esp_integ_algs[sa0->integ_alg].trunc_size; + + if (PREDICT_FALSE (sa0->use_esn)) + { + u8 *payload_end = + vlib_buffer_get_current (b0) + b0->current_length; + *((u32 *) payload_end) = sa0->seq_hi; + sym_cop->auth.data.length += sizeof (sa0->seq_hi); + } + } + sym_cop->auth.digest.data = vlib_buffer_get_current (b0) + + b0->current_length - + em->esp_integ_algs[sa0->integ_alg].trunc_size; + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset (mb0, + b0->current_length + - + em->esp_integ_algs + [sa0->integ_alg].trunc_size); + sym_cop->auth.digest.length = + em->esp_integ_algs[sa0->integ_alg].trunc_size; + + + if (PREDICT_FALSE (is_ipv6)) + { + oh6_0->ip6.payload_length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - + sizeof (ip6_header_t)); + } + else + { + oh0->ip4.length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4); + } + + if (transport_mode) + vlib_buffer_advance (b0, -sizeof (ethernet_header_t)); + + trace: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + esp_encrypt_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->spi = sa0->spi; + tr->seq = sa0->seq - 1; + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_RX_PKTS, + from_frame->n_vectors); + crypto_qp_data_t *qpd; + /* *INDENT-OFF* */ + vec_foreach_index (i, cwm->qp_data) + { + u32 enq; + + qpd = vec_elt_at_index(cwm->qp_data, i); + enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, + qpd->cops, n_cop_qp[i]); + qpd->inflights += enq; + + if (PREDICT_FALSE(enq < n_cop_qp[i])) + { + crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); + vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); + + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_ENQ_FAIL, + n_cop_qp[i] - enq); + } + } + /* *INDENT-ON* */ + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_encrypt_node) = +{ + .function = dpdk_esp_encrypt_node_fn,.name = "dpdk-esp-encrypt",.flags = + VLIB_NODE_FLAG_IS_OUTPUT,.vector_size = sizeof (u32),.format_trace = + format_esp_encrypt_trace,.n_errors = + ARRAY_LEN (esp_encrypt_error_strings),.error_strings = + esp_encrypt_error_strings,.n_next_nodes = 1,.next_nodes = + { + [ESP_ENCRYPT_NEXT_DROP] = "error-drop",} +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_node, dpdk_esp_encrypt_node_fn) +/* + * ESP Encrypt Post Node + */ +#define foreach_esp_encrypt_post_error \ + _(PKTS, "ESP post pkts") + typedef enum + { +#define _(sym,str) ESP_ENCRYPT_POST_ERROR_##sym, + foreach_esp_encrypt_post_error +#undef _ + ESP_ENCRYPT_POST_N_ERROR, + } esp_encrypt_post_error_t; + + static char *esp_encrypt_post_error_strings[] = { +#define _(sym,string) string, + foreach_esp_encrypt_post_error +#undef _ + }; + +vlib_node_registration_t dpdk_esp_encrypt_post_node; + +static u8 * +format_esp_encrypt_post_trace (u8 * s, va_list * args) +{ + return s; +} + +static uword +dpdk_esp_encrypt_post_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next = 0, next_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t *b0 = 0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + to_next[0] = bi0; + to_next += 1; + + next0 = vnet_buffer (b0)->unused[0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, bi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, dpdk_esp_encrypt_post_node.index, + ESP_ENCRYPT_POST_ERROR_PKTS, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_encrypt_post_node) = +{ + .function = dpdk_esp_encrypt_post_node_fn,.name = + "dpdk-esp-encrypt-post",.vector_size = sizeof (u32),.format_trace = + format_esp_encrypt_post_trace,.type = VLIB_NODE_TYPE_INTERNAL,.n_errors = + ARRAY_LEN (esp_encrypt_post_error_strings),.error_strings = + esp_encrypt_post_error_strings,.n_next_nodes = + ESP_ENCRYPT_N_NEXT,.next_nodes = + { +#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n, + foreach_esp_encrypt_next +#undef _ + } +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_post_node, + dpdk_esp_encrypt_post_node_fn) +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c new file mode 100644 index 00000000..16bec20a --- /dev/null +++ b/src/plugins/dpdk/ipsec/ipsec.c @@ -0,0 +1,430 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#define DPDK_CRYPTO_NB_SESS_OBJS 20000 +#define DPDK_CRYPTO_CACHE_SIZE 512 +#define DPDK_CRYPTO_PRIV_SIZE 128 +#define DPDK_CRYPTO_N_QUEUE_DESC 1024 +#define DPDK_CRYPTO_NB_COPS (1024 * 4) + +static int +add_del_sa_sess (u32 sa_index, u8 is_add) +{ + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm; + u8 skip_master = vlib_num_workers () > 0; + + /* *INDENT-OFF* */ + vec_foreach (cwm, dcm->workers_main) + { + crypto_sa_session_t *sa_sess; + u8 is_outbound; + + if (skip_master) + { + skip_master = 0; + continue; + } + + for (is_outbound = 0; is_outbound < 2; is_outbound++) + { + if (is_add) + { + pool_get (cwm->sa_sess_d[is_outbound], sa_sess); + } + else + { + u8 dev_id; + + sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index); + dev_id = cwm->qp_data[sa_sess->qp_index].dev_id; + + if (!sa_sess->sess) + continue; + + if (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess)) + { + clib_warning("failed to free session"); + return -1; + } + memset(sa_sess, 0, sizeof(sa_sess[0])); + } + } + } + /* *INDENT-OFF* */ + + return 0; +} + +static void +update_qp_data (crypto_worker_main_t * cwm, + u8 cdev_id, u16 qp_id, u8 is_outbound, u16 * idx) +{ + crypto_qp_data_t *qpd; + + /* *INDENT-OFF* */ + vec_foreach_index (*idx, cwm->qp_data) + { + qpd = vec_elt_at_index(cwm->qp_data, *idx); + + if (qpd->dev_id == cdev_id && qpd->qp_id == qp_id && + qpd->is_outbound == is_outbound) + return; + } + /* *INDENT-ON* */ + + vec_add2 (cwm->qp_data, qpd, 1); + + qpd->dev_id = cdev_id; + qpd->qp_id = qp_id; + qpd->is_outbound = is_outbound; +} + +/* + * return: + * 0: already exist + * 1: mapped + */ +static int +add_mapping (crypto_worker_main_t * cwm, + u8 cdev_id, u16 qp, u8 is_outbound, + const struct rte_cryptodev_capabilities *cipher_cap, + const struct rte_cryptodev_capabilities *auth_cap) +{ + u16 qp_index; + uword key = 0, data, *ret; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; + + p_key->cipher_algo = (u8) cipher_cap->sym.cipher.algo; + p_key->auth_algo = (u8) auth_cap->sym.auth.algo; + p_key->is_outbound = is_outbound; + + ret = hash_get (cwm->algo_qp_map, key); + if (ret) + return 0; + + update_qp_data (cwm, cdev_id, qp, is_outbound, &qp_index); + + data = (uword) qp_index; + hash_set (cwm->algo_qp_map, key, data); + + return 1; +} + +/* + * return: + * 0: already exist + * 1: mapped + */ +static int +add_cdev_mapping (crypto_worker_main_t * cwm, + struct rte_cryptodev_info *dev_info, u8 cdev_id, + u16 qp, u8 is_outbound) +{ + const struct rte_cryptodev_capabilities *i, *j; + u32 mapped = 0; + + for (i = dev_info->capabilities; i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++) + { + if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER) + continue; + + if (check_algo_is_supported (i, NULL) != 0) + continue; + + for (j = dev_info->capabilities; j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; + j++) + { + if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH) + continue; + + if (check_algo_is_supported (j, NULL) != 0) + continue; + + mapped |= add_mapping (cwm, cdev_id, qp, is_outbound, i, j); + } + } + + return mapped; +} + +static int +check_cryptodev_queues () +{ + u32 n_qs = 0; + u8 cdev_id; + u32 n_req_qs = 2; + + if (vlib_num_workers () > 0) + n_req_qs = vlib_num_workers () * 2; + + for (cdev_id = 0; cdev_id < rte_cryptodev_count (); cdev_id++) + { + struct rte_cryptodev_info cdev_info; + + rte_cryptodev_info_get (cdev_id, &cdev_info); + + if (! + (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) + continue; + + n_qs += cdev_info.max_nb_queue_pairs; + } + + if (n_qs >= n_req_qs) + return 0; + else + return -1; +} + +static clib_error_t * +dpdk_ipsec_check_support (ipsec_sa_t * sa) +{ + if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + if (sa->integ_alg != IPSEC_INTEG_ALG_NONE) + return clib_error_return (0, "unsupported integ-alg %U with " + "crypto-algo aes-gcm-128", + format_ipsec_integ_alg, sa->integ_alg); + sa->integ_alg = IPSEC_INTEG_ALG_AES_GCM_128; + } + else + { + if (sa->integ_alg == IPSEC_INTEG_ALG_NONE || + sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128) + return clib_error_return (0, "unsupported integ-alg %U", + format_ipsec_integ_alg, sa->integ_alg); + } + + return 0; +} + +static uword +dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + dpdk_config_main_t *conf = &dpdk_config_main; + ipsec_main_t *im = &ipsec_main; + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + struct rte_cryptodev_config dev_conf; + struct rte_cryptodev_qp_conf qp_conf; + struct rte_cryptodev_info cdev_info; + struct rte_mempool *rmp; + i32 dev_id, ret; + u32 i, skip_master; + + if (!conf->cryptodev) + { + clib_warning ("DPDK Cryptodev support is disabled, " + "default to OpenSSL IPsec"); + return 0; + } + + if (check_cryptodev_queues () < 0) + { + conf->cryptodev = 0; + clib_warning ("not enough Cryptodevs, default to OpenSSL IPsec"); + return 0; + } + + vec_alloc (dcm->workers_main, tm->n_vlib_mains); + _vec_len (dcm->workers_main) = tm->n_vlib_mains; + + fprintf (stdout, "DPDK Cryptodevs info:\n"); + fprintf (stdout, "dev_id\tn_qp\tnb_obj\tcache_size\n"); + /* HW cryptodevs have higher dev_id, use HW first */ + for (dev_id = rte_cryptodev_count () - 1; dev_id >= 0; dev_id--) + { + u16 max_nb_qp, qp = 0; + skip_master = vlib_num_workers () > 0; + + rte_cryptodev_info_get (dev_id, &cdev_info); + + if (! + (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) + continue; + + max_nb_qp = cdev_info.max_nb_queue_pairs; + + for (i = 0; i < tm->n_vlib_mains; i++) + { + u8 is_outbound; + crypto_worker_main_t *cwm; + uword *map; + + if (skip_master) + { + skip_master = 0; + continue; + } + + cwm = vec_elt_at_index (dcm->workers_main, i); + map = cwm->algo_qp_map; + + if (!map) + { + map = hash_create (0, sizeof (crypto_worker_qp_key_t)); + if (!map) + { + clib_warning ("unable to create hash table for worker %u", + vlib_mains[i]->cpu_index); + goto error; + } + cwm->algo_qp_map = map; + } + + for (is_outbound = 0; is_outbound < 2 && qp < max_nb_qp; + is_outbound++) + qp += add_cdev_mapping (cwm, &cdev_info, dev_id, qp, is_outbound); + } + + if (qp == 0) + continue; + + dev_conf.socket_id = rte_cryptodev_socket_id (dev_id); + dev_conf.nb_queue_pairs = cdev_info.max_nb_queue_pairs; + dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_SESS_OBJS; + dev_conf.session_mp.cache_size = DPDK_CRYPTO_CACHE_SIZE; + + ret = rte_cryptodev_configure (dev_id, &dev_conf); + if (ret < 0) + { + clib_warning ("cryptodev %u config error", dev_id); + goto error; + } + + qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC; + for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++) + { + ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf, + dev_conf.socket_id); + if (ret < 0) + { + clib_warning ("cryptodev %u qp %u setup error", dev_id, qp); + goto error; + } + } + vec_validate_aligned (dcm->cop_pools, dev_conf.socket_id, + CLIB_CACHE_LINE_BYTES); + + if (!vec_elt (dcm->cop_pools, dev_conf.socket_id)) + { + u8 *pool_name = format (0, "crypto_op_pool_socket%u%c", + dev_conf.socket_id, 0); + + rmp = rte_crypto_op_pool_create ((char *) pool_name, + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + DPDK_CRYPTO_NB_COPS * + (1 + vlib_num_workers ()), + DPDK_CRYPTO_CACHE_SIZE, + DPDK_CRYPTO_PRIV_SIZE, + dev_conf.socket_id); + vec_free (pool_name); + + if (!rmp) + { + clib_warning ("failed to allocate mempool on socket %u", + dev_conf.socket_id); + goto error; + } + vec_elt (dcm->cop_pools, dev_conf.socket_id) = rmp; + } + + fprintf (stdout, "%u\t%u\t%u\t%u\n", dev_id, dev_conf.nb_queue_pairs, + DPDK_CRYPTO_NB_SESS_OBJS, DPDK_CRYPTO_CACHE_SIZE); + } + + dpdk_esp_init (); + + /* Add new next node and set as default */ + vlib_node_t *node, *next_node; + + next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-encrypt"); + ASSERT (next_node); + node = vlib_get_node_by_name (vm, (u8 *) "ipsec-output-ip4"); + ASSERT (node); + im->esp_encrypt_node_index = next_node->index; + im->esp_encrypt_next_index = + vlib_node_add_next (vm, node->index, next_node->index); + + next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-decrypt"); + ASSERT (next_node); + node = vlib_get_node_by_name (vm, (u8 *) "ipsec-input-ip4"); + ASSERT (node); + im->esp_decrypt_node_index = next_node->index; + im->esp_decrypt_next_index = + vlib_node_add_next (vm, node->index, next_node->index); + + im->cb.check_support_cb = dpdk_ipsec_check_support; + im->cb.add_del_sa_sess_cb = add_del_sa_sess; + + if (vec_len (vlib_mains) == 0) + vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index, + VLIB_NODE_STATE_POLLING); + else + for (i = 1; i < tm->n_vlib_mains; i++) + vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index, + VLIB_NODE_STATE_POLLING); + + /* TODO cryptodev counters */ + + return 0; + +error: + ; + crypto_worker_main_t *cwm; + struct rte_mempool **mp; + /* *INDENT-OFF* */ + vec_foreach (cwm, dcm->workers_main) + hash_free (cwm->algo_qp_map); + + vec_foreach (mp, dcm->cop_pools) + { + if (mp) + rte_mempool_free (mp[0]); + } + /* *INDENT-ON* */ + vec_free (dcm->workers_main); + vec_free (dcm->cop_pools); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_ipsec_process_node,static) = { + .function = dpdk_ipsec_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dpdk-ipsec-process", + .process_log2_n_stack_bytes = 17, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/ipsec.h b/src/plugins/dpdk/ipsec/ipsec.h new file mode 100644 index 00000000..3465b361 --- /dev/null +++ b/src/plugins/dpdk/ipsec/ipsec.h @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __DPDK_IPSEC_H__ +#define __DPDK_IPSEC_H__ + +#include + +#undef always_inline +#include +#include + +#if CLIB_DEBUG > 0 +#define always_inline static inline +#else +#define always_inline static inline __attribute__ ((__always_inline__)) +#endif + + +#define MAX_QP_PER_LCORE 16 + +typedef struct +{ + u32 salt; + u32 iv[2]; + u32 cnt; +} dpdk_gcm_cnt_blk; + +typedef struct +{ + dpdk_gcm_cnt_blk cb; + union + { + u8 aad[12]; + u8 icv[64]; + }; +} dpdk_cop_priv_t; + +typedef struct +{ + u8 cipher_algo; + u8 auth_algo; + u8 is_outbound; +} crypto_worker_qp_key_t; + +typedef struct +{ + u16 dev_id; + u16 qp_id; + u16 is_outbound; + i16 inflights; + u32 bi[VLIB_FRAME_SIZE]; + struct rte_crypto_op *cops[VLIB_FRAME_SIZE]; + struct rte_crypto_op **free_cops; +} crypto_qp_data_t; + +typedef struct +{ + u8 qp_index; + void *sess; +} crypto_sa_session_t; + +typedef struct +{ + crypto_sa_session_t *sa_sess_d[2]; + crypto_qp_data_t *qp_data; + uword *algo_qp_map; +} crypto_worker_main_t; + +typedef struct +{ + struct rte_mempool **cop_pools; + crypto_worker_main_t *workers_main; +} dpdk_crypto_main_t; + +dpdk_crypto_main_t dpdk_crypto_main; + +extern vlib_node_registration_t dpdk_crypto_input_node; + +#define CRYPTO_N_FREE_COPS (VLIB_FRAME_SIZE * 3) + +static_always_inline void +crypto_alloc_cops () +{ + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + u32 cpu_index = os_get_cpu_number (); + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + unsigned socket_id = rte_socket_id (); + crypto_qp_data_t *qpd; + + /* *INDENT-OFF* */ + vec_foreach (qpd, cwm->qp_data) + { + u32 l = vec_len (qpd->free_cops); + + if (PREDICT_FALSE (l < VLIB_FRAME_SIZE)) + { + u32 n_alloc; + + if (PREDICT_FALSE (!qpd->free_cops)) + vec_alloc (qpd->free_cops, CRYPTO_N_FREE_COPS); + + n_alloc = rte_crypto_op_bulk_alloc (dcm->cop_pools[socket_id], + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + &qpd->free_cops[l], + CRYPTO_N_FREE_COPS - l - 1); + + _vec_len (qpd->free_cops) = l + n_alloc; + } + } + /* *INDENT-ON* */ +} + +static_always_inline void +crypto_free_cop (crypto_qp_data_t * qpd, struct rte_crypto_op **cops, u32 n) +{ + u32 l = vec_len (qpd->free_cops); + + if (l + n >= CRYPTO_N_FREE_COPS) + { + l -= VLIB_FRAME_SIZE; + rte_mempool_put_bulk (cops[0]->mempool, + (void **) &qpd->free_cops[l], VLIB_FRAME_SIZE); + } + clib_memcpy (&qpd->free_cops[l], cops, sizeof (*cops) * n); + + _vec_len (qpd->free_cops) = l + n; +} + +static_always_inline int +check_algo_is_supported (const struct rte_cryptodev_capabilities *cap, + char *name) +{ + struct + { + uint8_t cipher_algo; + enum rte_crypto_sym_xform_type type; + union + { + enum rte_crypto_auth_algorithm auth; + enum rte_crypto_cipher_algorithm cipher; + }; + char *name; + } supported_algo[] = + { + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_NULL,.name = "NULL"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_AES_CBC,.name = "AES_CBC"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_AES_CTR,.name = "AES_CTR"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_3DES_CBC,.name = "3DES-CBC"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA1_HMAC,.name = "HMAC-SHA1"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA256_HMAC,.name = "HMAC-SHA256"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA384_HMAC,.name = "HMAC-SHA384"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA512_HMAC,.name = "HMAC-SHA512"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_AES_XCBC_MAC,.name = "AES-XCBC-MAC"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_AES_GCM,.name = "AES-GCM"}, + { + /* tail */ + .type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED},}; + uint32_t i = 0; + + if (cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC) + return -1; + + while (supported_algo[i].type != RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED) + { + if (cap->sym.xform_type == supported_algo[i].type) + { + if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER && + cap->sym.cipher.algo == supported_algo[i].cipher) || + (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH && + cap->sym.auth.algo == supported_algo[i].auth)) + { + if (name) + strcpy (name, supported_algo[i].name); + return 0; + } + } + + i++; + } + + return -1; +} + +#endif /* __DPDK_IPSEC_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c new file mode 100644 index 00000000..8073a50a --- /dev/null +++ b/src/plugins/dpdk/main.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +/* + * Called by the dpdk driver's rte_delay_us() function. + * Return 0 to have the dpdk do a regular delay loop. + * Return 1 if to skip the delay loop because we are suspending + * the calling vlib process instead. + */ +int +rte_delay_us_override (unsigned us) +{ + vlib_main_t *vm; + + /* Don't bother intercepting for short delays */ + if (us < 10) + return 0; + + /* + * Only intercept if we are in a vlib process. + * If we are called from a vlib worker thread or the vlib main + * thread then do not intercept. (Must not be called from an + * independent pthread). + */ + if (os_get_cpu_number () == 0) + { + /* + * We're in the vlib main thread or a vlib process. Make sure + * the process is running and we're not still initializing. + */ + vm = vlib_get_main (); + if (vlib_in_process_context (vm)) + { + /* Only suspend for the admin_down_process */ + vlib_process_t *proc = vlib_get_current_process (vm); + if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) || + (proc->node_runtime.function != admin_up_down_process)) + return 0; + + f64 delay = 1e-6 * us; + vlib_process_suspend (vm, delay); + return 1; + } + } + return 0; // no override +} + +static void +rte_delay_us_override_cb (unsigned us) +{ + if (rte_delay_us_override (us) == 0) + rte_delay_us_block (us); +} + +static clib_error_t * dpdk_main_init (vlib_main_t * vm) +{ + dpdk_main_t * dm = &dpdk_main; + clib_error_t * error = 0; + + dm->vlib_main = vm; + dm->vnet_main = vnet_get_main (); + + if ((error = vlib_call_init_function (vm, dpdk_init))) + return error; + + /* register custom delay function */ + rte_delay_us_callback_register (rte_delay_us_override_cb); + + return error; +} + +VLIB_INIT_FUNCTION (dpdk_main_init); + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, +}; +/* *INDENT-ON* */ diff --git a/src/plugins/dpdk/thread.c b/src/plugins/dpdk/thread.c new file mode 100644 index 00000000..3a3fcc6c --- /dev/null +++ b/src/plugins/dpdk/thread.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static clib_error_t * +dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) +{ + int r; + r = rte_eal_remote_launch (fp, (void *) w, lcore_id); + if (r) + return clib_error_return (0, "Failed to launch thread %u", lcore_id); + return 0; +} + +static clib_error_t * +dpdk_thread_set_lcore (u32 thread, u16 lcore) +{ + return 0; +} + +static vlib_thread_callbacks_t callbacks = { + .vlib_launch_thread_cb = &dpdk_launch_thread, + .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore, +}; + +static clib_error_t * +dpdk_thread_init (vlib_main_t * vm) +{ + vlib_thread_cb_register (vm, &callbacks); + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_thread_init); + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 999f9869..14e78817 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -3970,13 +3970,6 @@ _(feature_enable_disable_reply) \ _(sw_interface_tag_add_del_reply) \ _(sw_interface_set_mtu_reply) -#if DPDK > 0 -#define foreach_standard_dpdk_reply_retval_handler \ -_(sw_interface_set_dpdk_hqos_pipe_reply) \ -_(sw_interface_set_dpdk_hqos_subport_reply) \ -_(sw_interface_set_dpdk_hqos_tctbl_reply) -#endif - #define _(n) \ static void vl_api_##n##_t_handler \ (vl_api_##n##_t * mp) \ @@ -4008,39 +4001,6 @@ foreach_standard_reply_retval_handler; foreach_standard_reply_retval_handler; #undef _ -#if DPDK > 0 -#define _(n) \ - static void vl_api_##n##_t_handler \ - (vl_api_##n##_t * mp) \ - { \ - vat_main_t * vam = &vat_main; \ - i32 retval = ntohl(mp->retval); \ - if (vam->async_mode) { \ - vam->async_errors += (retval < 0); \ - } else { \ - vam->retval = retval; \ - vam->result_ready = 1; \ - } \ - } -foreach_standard_dpdk_reply_retval_handler; -#undef _ - -#define _(n) \ - static void vl_api_##n##_t_handler_json \ - (vl_api_##n##_t * mp) \ - { \ - vat_main_t * vam = &vat_main; \ - vat_json_node_t node; \ - vat_json_init_object(&node); \ - vat_json_object_add_int(&node, "retval", ntohl(mp->retval)); \ - vat_json_print(vam->ofp, &node); \ - vam->retval = ntohl(mp->retval); \ - vam->result_ready = 1; \ - } -foreach_standard_dpdk_reply_retval_handler; -#undef _ -#endif - /* * Table of message reply handlers, must include boilerplate handlers * we just generated @@ -4272,16 +4232,6 @@ _(SW_INTERFACE_SET_MTU_REPLY, sw_interface_set_mtu_reply) \ _(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) -#if DPDK > 0 -#define foreach_vpe_dpdk_api_reply_msg \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY, \ - sw_interface_set_dpdk_hqos_pipe_reply) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY, \ - sw_interface_set_dpdk_hqos_subport_reply) \ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY, \ - sw_interface_set_dpdk_hqos_tctbl_reply) -#endif - typedef struct { u8 *name; @@ -5081,226 +5031,6 @@ api_sw_interface_clear_stats (vat_main_t * vam) return ret; } -#if DPDK >0 -static int -api_sw_interface_set_dpdk_hqos_pipe (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_sw_interface_set_dpdk_hqos_pipe_t *mp; - u32 sw_if_index; - u8 sw_if_index_set = 0; - u32 subport; - u8 subport_set = 0; - u32 pipe; - u8 pipe_set = 0; - u32 profile; - u8 profile_set = 0; - int ret; - - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "rx %U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "sw_if_index %u", &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "subport %u", &subport)) - subport_set = 1; - else - if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "pipe %u", &pipe)) - pipe_set = 1; - else if (unformat (i, "profile %u", &profile)) - profile_set = 1; - else - break; - } - - if (sw_if_index_set == 0) - { - errmsg ("missing interface name or sw_if_index"); - return -99; - } - - if (subport_set == 0) - { - errmsg ("missing subport "); - return -99; - } - - if (pipe_set == 0) - { - errmsg ("missing pipe"); - return -99; - } - - if (profile_set == 0) - { - errmsg ("missing profile"); - return -99; - } - - M (SW_INTERFACE_SET_DPDK_HQOS_PIPE, mp); - - mp->sw_if_index = ntohl (sw_if_index); - mp->subport = ntohl (subport); - mp->pipe = ntohl (pipe); - mp->profile = ntohl (profile); - - - S (mp); - W (ret); - return ret; -} - -static int -api_sw_interface_set_dpdk_hqos_subport (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_sw_interface_set_dpdk_hqos_subport_t *mp; - u32 sw_if_index; - u8 sw_if_index_set = 0; - u32 subport; - u8 subport_set = 0; - u32 tb_rate = 1250000000; /* 10GbE */ - u32 tb_size = 1000000; - u32 tc_rate[] = { 1250000000, 1250000000, 1250000000, 1250000000 }; - u32 tc_period = 10; - int ret; - - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "rx %U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "sw_if_index %u", &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "subport %u", &subport)) - subport_set = 1; - else - if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "rate %u", &tb_rate)) - { - u32 tc_id; - - for (tc_id = 0; tc_id < (sizeof (tc_rate) / sizeof (tc_rate[0])); - tc_id++) - tc_rate[tc_id] = tb_rate; - } - else if (unformat (i, "bktsize %u", &tb_size)) - ; - else if (unformat (i, "tc0 %u", &tc_rate[0])) - ; - else if (unformat (i, "tc1 %u", &tc_rate[1])) - ; - else if (unformat (i, "tc2 %u", &tc_rate[2])) - ; - else if (unformat (i, "tc3 %u", &tc_rate[3])) - ; - else if (unformat (i, "period %u", &tc_period)) - ; - else - break; - } - - if (sw_if_index_set == 0) - { - errmsg ("missing interface name or sw_if_index"); - return -99; - } - - if (subport_set == 0) - { - errmsg ("missing subport "); - return -99; - } - - M (SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, mp); - - mp->sw_if_index = ntohl (sw_if_index); - mp->subport = ntohl (subport); - mp->tb_rate = ntohl (tb_rate); - mp->tb_size = ntohl (tb_size); - mp->tc_rate[0] = ntohl (tc_rate[0]); - mp->tc_rate[1] = ntohl (tc_rate[1]); - mp->tc_rate[2] = ntohl (tc_rate[2]); - mp->tc_rate[3] = ntohl (tc_rate[3]); - mp->tc_period = ntohl (tc_period); - - S (mp); - W (ret); - return ret; -} - -static int -api_sw_interface_set_dpdk_hqos_tctbl (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_sw_interface_set_dpdk_hqos_tctbl_t *mp; - u32 sw_if_index; - u8 sw_if_index_set = 0; - u8 entry_set = 0; - u8 tc_set = 0; - u8 queue_set = 0; - u32 entry, tc, queue; - int ret; - - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "rx %U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "sw_if_index %u", &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "entry %d", &entry)) - entry_set = 1; - else if (unformat (i, "tc %d", &tc)) - tc_set = 1; - else if (unformat (i, "queue %d", &queue)) - queue_set = 1; - else - break; - } - - if (sw_if_index_set == 0) - { - errmsg ("missing interface name or sw_if_index"); - return -99; - } - - if (entry_set == 0) - { - errmsg ("missing entry "); - return -99; - } - - if (tc_set == 0) - { - errmsg ("missing traffic class "); - return -99; - } - - if (queue_set == 0) - { - errmsg ("missing queue "); - return -99; - } - - M (SW_INTERFACE_SET_DPDK_HQOS_TCTBL, mp); - - mp->sw_if_index = ntohl (sw_if_index); - mp->entry = ntohl (entry); - mp->tc = ntohl (tc); - mp->queue = ntohl (queue); - - S (mp); - W (ret); - return ret; -} -#endif - static int api_sw_interface_add_del_address (vat_main_t * vam) { @@ -18656,18 +18386,6 @@ _(sw_interface_set_mtu, " | sw_if_index mtu ") \ _(ip_neighbor_dump, "[ip6] | sw_if_index ") \ _(sw_interface_get_table, " | sw_if_index [ipv6]") -#if DPDK > 0 -#define foreach_vpe_dpdk_api_msg \ -_(sw_interface_set_dpdk_hqos_pipe, \ - "rx | sw_if_index subport pipe \n" \ - "profile \n") \ -_(sw_interface_set_dpdk_hqos_subport, \ - "rx | sw_if_index subport [rate ]\n" \ - "[bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ]\n") \ -_(sw_interface_set_dpdk_hqos_tctbl, \ - "rx | sw_if_index entry tc queue \n") -#endif - /* List of command functions, CLI names map directly to functions */ #define foreach_cli_function \ _(comment, "usage: comment ") \ @@ -18705,22 +18423,6 @@ _(unset, "usage: unset ") foreach_vpe_api_reply_msg; #undef _ -#if DPDK > 0 -#define _(N,n) \ - static void vl_api_##n##_t_handler_uni \ - (vl_api_##n##_t * mp) \ - { \ - vat_main_t * vam = &vat_main; \ - if (vam->json_output) { \ - vl_api_##n##_t_handler_json(mp); \ - } else { \ - vl_api_##n##_t_handler(mp); \ - } \ - } -foreach_vpe_dpdk_api_reply_msg; -#undef _ -#endif - void vat_api_hookup (vat_main_t * vam) { @@ -18734,18 +18436,6 @@ vat_api_hookup (vat_main_t * vam) foreach_vpe_api_reply_msg; #undef _ -#if DPDK > 0 -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler_uni, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vpe_dpdk_api_reply_msg; -#undef _ -#endif - #if (VPP_API_TEST_BUILTIN==0) vl_msg_api_set_first_available_msg_id (VL_MSG_FIRST_AVAILABLE); #endif @@ -18760,21 +18450,11 @@ vat_api_hookup (vat_main_t * vam) #define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); foreach_vpe_api_msg; #undef _ -#if DPDK >0 -#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); - foreach_vpe_dpdk_api_msg; -#undef _ -#endif /* Help strings */ #define _(n,h) hash_set_mem (vam->help_by_name, #n, h); foreach_vpe_api_msg; #undef _ -#if DPDK >0 -#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); - foreach_vpe_dpdk_api_msg; -#undef _ -#endif /* CLI functions */ #define _(n,h) hash_set_mem (vam->function_by_name, #n, n); diff --git a/src/vnet.am b/src/vnet.am index 923f61d8..84930f05 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -23,8 +23,7 @@ libvnet_la_DEPENDENCIES = \ libvlibmemory.la \ libvlibmemoryclient.la -libvnet_la_LIBADD = $(libvnet_la_DEPENDENCIES) -lm -lpthread -ldl -lrt $(DPDK_LD_ADD) -libvnet_la_LDFLAGS = $(DPDK_LD_FLAGS) +libvnet_la_LIBADD = $(libvnet_la_DEPENDENCIES) -lm -lpthread -ldl -lrt if WITH_LIBSSL libvnet_la_LIBADD += -lcrypto @@ -396,15 +395,6 @@ libvnet_la_SOURCES += \ vnet/ipsec/ipsec_api.c API_FILES += vnet/ipsec/ipsec.api - -if WITH_DPDK -libvnet_la_SOURCES += \ - vnet/devices/dpdk/ipsec/esp_encrypt.c \ - vnet/devices/dpdk/ipsec/esp_decrypt.c \ - vnet/devices/dpdk/ipsec/crypto_node.c \ - vnet/devices/dpdk/ipsec/cli.c \ - vnet/devices/dpdk/ipsec/ipsec.c -endif endif libvnet_la_SOURCES += \ @@ -416,11 +406,6 @@ nobase_include_HEADERS += \ vnet/ipsec/ikev2.h \ vnet/ipsec/ikev2_priv.h \ vnet/ipsec/ipsec.api.h -if WITH_DPDK -nobase_include_HEADERS += \ - vnet/devices/dpdk/ipsec/ipsec.h \ - vnet/devices/dpdk/ipsec/esp.h -endif ######################################## # Layer 3 protocol: osi @@ -803,29 +788,7 @@ nobase_include_HEADERS += \ vnet/pg/pg.h \ vnet/pg/edit.h -######################################## -# DPDK -######################################## -if WITH_DPDK -libvnet_la_SOURCES += \ - vnet/devices/dpdk/buffer.c \ - vnet/devices/dpdk/dpdk_priv.h \ - vnet/devices/dpdk/device.c \ - vnet/devices/dpdk/format.c \ - vnet/devices/dpdk/init.c \ - vnet/devices/dpdk/main.c \ - vnet/devices/dpdk/node.c \ - vnet/devices/dpdk/thread.c \ - vnet/devices/dpdk/hqos.c \ - vnet/devices/dpdk/cli.c \ - vnet/devices/dpdk/dpdk_api.c - -nobase_include_HEADERS += \ - vnet/devices/dpdk/dpdk.h \ - vnet/devices/dpdk/dpdk.api.h - -API_FILES += vnet/devices/dpdk/dpdk.api -else +if !WITH_DPDK libvnet_la_SOURCES += \ vnet/devices/nic/ixge.c \ vnet/devices/nic/ixge.h \ diff --git a/src/vnet/devices/dpdk/buffer.c b/src/vnet/devices/dpdk/buffer.c deleted file mode 100644 index f95d4cb5..00000000 --- a/src/vnet/devices/dpdk/buffer.c +++ /dev/null @@ -1,588 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * buffer.c: allocate/free network buffers. - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @file - * - * Allocate/free network buffers. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - - -STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM, - "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM"); - -static_always_inline void -dpdk_rte_pktmbuf_free (vlib_main_t * vm, vlib_buffer_t * b) -{ - vlib_buffer_t *hb = b; - struct rte_mbuf *mb; - u32 next, flags; - mb = rte_mbuf_from_vlib_buffer (hb); - -next: - flags = b->flags; - next = b->next_buffer; - mb = rte_mbuf_from_vlib_buffer (b); - - if (PREDICT_FALSE (b->n_add_refs)) - { - rte_mbuf_refcnt_update (mb, b->n_add_refs); - b->n_add_refs = 0; - } - - rte_pktmbuf_free_seg (mb); - - if (flags & VLIB_BUFFER_NEXT_PRESENT) - { - b = vlib_get_buffer (vm, next); - goto next; - } -} - -static void -del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) -{ - u32 i; - vlib_buffer_t *b; - - for (i = 0; i < vec_len (f->buffers); i++) - { - b = vlib_get_buffer (vm, f->buffers[i]); - dpdk_rte_pktmbuf_free (vm, b); - } - - vec_free (f->name); - vec_free (f->buffers); -} - -/* Add buffer free list. */ -static void -dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *f; - u32 merge_index; - int i; - - ASSERT (os_get_cpu_number () == 0); - - f = vlib_buffer_get_free_list (vm, free_list_index); - - merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); - if (merge_index != ~0 && merge_index != free_list_index) - { - vlib_buffer_merge_free_lists (pool_elt_at_index - (bm->buffer_free_list_pool, merge_index), - f); - } - - del_free_list (vm, f); - - /* Poison it. */ - memset (f, 0xab, sizeof (f[0])); - - pool_put (bm->buffer_free_list_pool, f); - - for (i = 1; i < vec_len (vlib_mains); i++) - { - bm = vlib_mains[i]->buffer_main; - f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);; - memset (f, 0xab, sizeof (f[0])); - pool_put (bm->buffer_free_list_pool, f); - } -} - -/* Make sure free list has at least given number of free buffers. */ -static uword -fill_free_list (vlib_main_t * vm, - vlib_buffer_free_list_t * fl, uword min_free_buffers) -{ - dpdk_main_t *dm = &dpdk_main; - vlib_buffer_t *b0, *b1, *b2, *b3; - int n, i; - u32 bi0, bi1, bi2, bi3; - unsigned socket_id = rte_socket_id (); - struct rte_mempool *rmp = dm->pktmbuf_pools[socket_id]; - struct rte_mbuf *mb0, *mb1, *mb2, *mb3; - - /* Too early? */ - if (PREDICT_FALSE (rmp == 0)) - return 0; - - /* Already have enough free buffers on free list? */ - n = min_free_buffers - vec_len (fl->buffers); - if (n <= 0) - return min_free_buffers; - - /* Always allocate round number of buffers. */ - n = round_pow2 (n, CLIB_CACHE_LINE_BYTES / sizeof (u32)); - - /* Always allocate new buffers in reasonably large sized chunks. */ - n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); - - vec_validate (vm->mbuf_alloc_list, n - 1); - - if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0) - return 0; - - _vec_len (vm->mbuf_alloc_list) = n; - - i = 0; - - while (i < (n - 7)) - { - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf - (vm->mbuf_alloc_list[i + 4]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf - (vm->mbuf_alloc_list[i + 5]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf - (vm->mbuf_alloc_list[i + 6]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf - (vm->mbuf_alloc_list[i + 7]), STORE); - - mb0 = vm->mbuf_alloc_list[i]; - mb1 = vm->mbuf_alloc_list[i + 1]; - mb2 = vm->mbuf_alloc_list[i + 2]; - mb3 = vm->mbuf_alloc_list[i + 3]; - - ASSERT (rte_mbuf_refcnt_read (mb0) == 0); - ASSERT (rte_mbuf_refcnt_read (mb1) == 0); - ASSERT (rte_mbuf_refcnt_read (mb2) == 0); - ASSERT (rte_mbuf_refcnt_read (mb3) == 0); - - rte_mbuf_refcnt_set (mb0, 1); - rte_mbuf_refcnt_set (mb1, 1); - rte_mbuf_refcnt_set (mb2, 1); - rte_mbuf_refcnt_set (mb3, 1); - - b0 = vlib_buffer_from_rte_mbuf (mb0); - b1 = vlib_buffer_from_rte_mbuf (mb1); - b2 = vlib_buffer_from_rte_mbuf (mb2); - b3 = vlib_buffer_from_rte_mbuf (mb3); - - bi0 = vlib_get_buffer_index (vm, b0); - bi1 = vlib_get_buffer_index (vm, b1); - bi2 = vlib_get_buffer_index (vm, b2); - bi3 = vlib_get_buffer_index (vm, b3); - - vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); - vec_add1_aligned (fl->buffers, bi1, CLIB_CACHE_LINE_BYTES); - vec_add1_aligned (fl->buffers, bi2, CLIB_CACHE_LINE_BYTES); - vec_add1_aligned (fl->buffers, bi3, CLIB_CACHE_LINE_BYTES); - - vlib_buffer_init_for_free_list (b0, fl); - vlib_buffer_init_for_free_list (b1, fl); - vlib_buffer_init_for_free_list (b2, fl); - vlib_buffer_init_for_free_list (b3, fl); - - if (fl->buffer_init_function) - { - fl->buffer_init_function (vm, fl, &bi0, 1); - fl->buffer_init_function (vm, fl, &bi1, 1); - fl->buffer_init_function (vm, fl, &bi2, 1); - fl->buffer_init_function (vm, fl, &bi3, 1); - } - i += 4; - } - - while (i < n) - { - mb0 = vm->mbuf_alloc_list[i]; - - ASSERT (rte_mbuf_refcnt_read (mb0) == 0); - rte_mbuf_refcnt_set (mb0, 1); - - b0 = vlib_buffer_from_rte_mbuf (mb0); - bi0 = vlib_get_buffer_index (vm, b0); - - vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); - - vlib_buffer_init_for_free_list (b0, fl); - - if (fl->buffer_init_function) - fl->buffer_init_function (vm, fl, &bi0, 1); - i++; - } - - fl->n_alloc += n; - - return n; -} - -static u32 -alloc_from_free_list (vlib_main_t * vm, - vlib_buffer_free_list_t * free_list, - u32 * alloc_buffers, u32 n_alloc_buffers) -{ - u32 *dst, *src; - uword len, n_filled; - - dst = alloc_buffers; - - n_filled = fill_free_list (vm, free_list, n_alloc_buffers); - if (n_filled == 0) - return 0; - - len = vec_len (free_list->buffers); - ASSERT (len >= n_alloc_buffers); - - src = free_list->buffers + len - n_alloc_buffers; - clib_memcpy (dst, src, n_alloc_buffers * sizeof (u32)); - - _vec_len (free_list->buffers) -= n_alloc_buffers; - - return n_alloc_buffers; -} - -/* Allocate a given number of buffers into given array. - Returns number actually allocated which will be either zero or - number requested. */ -u32 -dpdk_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - - return alloc_from_free_list - (vm, - pool_elt_at_index (bm->buffer_free_list_pool, - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX), - buffers, n_buffers); -} - - -u32 -dpdk_buffer_alloc_from_free_list (vlib_main_t * vm, - u32 * buffers, - u32 n_buffers, u32 free_list_index) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *f; - f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); - return alloc_from_free_list (vm, f, buffers, n_buffers); -} - -static_always_inline void -vlib_buffer_free_inline (vlib_main_t * vm, - u32 * buffers, u32 n_buffers, u32 follow_buffer_next) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *fl; - u32 fi; - int i; - u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, - u32 follow_buffer_next); - - cb = bm->buffer_free_callback; - - if (PREDICT_FALSE (cb != 0)) - n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); - - if (!n_buffers) - return; - - for (i = 0; i < n_buffers; i++) - { - vlib_buffer_t *b; - - b = vlib_get_buffer (vm, buffers[i]); - - fl = vlib_buffer_get_buffer_free_list (vm, b, &fi); - - /* The only current use of this callback: multicast recycle */ - if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) - { - int j; - - vlib_buffer_add_to_free_list - (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); - - for (j = 0; j < vec_len (bm->announce_list); j++) - { - if (fl == bm->announce_list[j]) - goto already_announced; - } - vec_add1 (bm->announce_list, fl); - already_announced: - ; - } - else - { - if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) - dpdk_rte_pktmbuf_free (vm, b); - } - } - if (vec_len (bm->announce_list)) - { - vlib_buffer_free_list_t *fl; - for (i = 0; i < vec_len (bm->announce_list); i++) - { - fl = bm->announce_list[i]; - fl->buffers_added_to_freelist_function (vm, fl); - } - _vec_len (bm->announce_list) = 0; - } -} - -static void -dpdk_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers) -{ - vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ - 1); -} - -static void -dpdk_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers) -{ - vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ - 0); -} - -static void -dpdk_packet_template_init (vlib_main_t * vm, - void *vt, - void *packet_data, - uword n_packet_data_bytes, - uword min_n_buffers_each_physmem_alloc, u8 * name) -{ - vlib_packet_template_t *t = (vlib_packet_template_t *) vt; - - vlib_worker_thread_barrier_sync (vm); - memset (t, 0, sizeof (t[0])); - - vec_add (t->packet_data, packet_data, n_packet_data_bytes); - - vlib_worker_thread_barrier_release (vm); -} - -clib_error_t * -vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, - unsigned socket_id) -{ - dpdk_main_t *dm = &dpdk_main; - vlib_physmem_main_t *vpm = &vm->physmem_main; - struct rte_mempool *rmp; - int i; - - vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); - - /* pool already exists, nothing to do */ - if (dm->pktmbuf_pools[socket_id]) - return 0; - - u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0); - - rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */ - num_mbufs, /* number of mbufs */ - 512, /* cache size */ - VLIB_BUFFER_HDR_SIZE, /* priv size */ - VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */ - socket_id); /* cpu socket */ - - if (rmp) - { - { - uword this_pool_end; - uword this_pool_start; - uword this_pool_size; - uword save_vpm_start, save_vpm_end, save_vpm_size; - struct rte_mempool_memhdr *memhdr; - - this_pool_start = ~0ULL; - this_pool_end = 0LL; - - STAILQ_FOREACH (memhdr, &rmp->mem_list, next) - { - if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end) - this_pool_end = (uword) (memhdr->addr + memhdr->len); - if (((uword) memhdr->addr) < this_pool_start) - this_pool_start = (uword) (memhdr->addr); - } - ASSERT (this_pool_start < ~0ULL && this_pool_end > 0); - this_pool_size = this_pool_end - this_pool_start; - - if (CLIB_DEBUG > 1) - { - clib_warning ("%s: pool start %llx pool end %llx pool size %lld", - pool_name, this_pool_start, this_pool_end, - this_pool_size); - clib_warning - ("before: virtual.start %llx virtual.end %llx virtual.size %lld", - vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); - } - - save_vpm_start = vpm->virtual.start; - save_vpm_end = vpm->virtual.end; - save_vpm_size = vpm->virtual.size; - - if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0) - vpm->virtual.start = this_pool_start; - if (this_pool_end > vpm->virtual.end) - vpm->virtual.end = this_pool_end; - - vpm->virtual.size = vpm->virtual.end - vpm->virtual.start; - - if (CLIB_DEBUG > 1) - { - clib_warning - ("after: virtual.start %llx virtual.end %llx virtual.size %lld", - vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); - } - - /* check if fits into buffer index range */ - if ((u64) vpm->virtual.size > - ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) - { - clib_warning ("physmem: virtual size out of range!"); - vpm->virtual.start = save_vpm_start; - vpm->virtual.end = save_vpm_end; - vpm->virtual.size = save_vpm_size; - rmp = 0; - } - } - if (rmp) - { - dm->pktmbuf_pools[socket_id] = rmp; - vec_free (pool_name); - return 0; - } - } - - vec_free (pool_name); - - /* no usable pool for this socket, try to use pool from another one */ - for (i = 0; i < vec_len (dm->pktmbuf_pools); i++) - { - if (dm->pktmbuf_pools[i]) - { - clib_warning - ("WARNING: Failed to allocate mempool for CPU socket %u. " - "Threads running on socket %u will use socket %u mempool.", - socket_id, socket_id, i); - dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i]; - return 0; - } - } - - return clib_error_return (0, "failed to allocate mempool on socket %u", - socket_id); -} - -#if CLIB_DEBUG > 0 - -u32 *vlib_buffer_state_validation_lock; -uword *vlib_buffer_state_validation_hash; -void *vlib_buffer_state_heap; - -static clib_error_t * -buffer_state_validation_init (vlib_main_t * vm) -{ - void *oldheap; - - vlib_buffer_state_heap = mheap_alloc (0, 10 << 20); - - oldheap = clib_mem_set_heap (vlib_buffer_state_heap); - - vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword)); - vec_validate_aligned (vlib_buffer_state_validation_lock, 0, - CLIB_CACHE_LINE_BYTES); - clib_mem_set_heap (oldheap); - return 0; -} - -VLIB_INIT_FUNCTION (buffer_state_validation_init); -#endif - -static vlib_buffer_callbacks_t callbacks = { - .vlib_buffer_alloc_cb = &dpdk_buffer_alloc, - .vlib_buffer_alloc_from_free_list_cb = &dpdk_buffer_alloc_from_free_list, - .vlib_buffer_free_cb = &dpdk_buffer_free, - .vlib_buffer_free_no_next_cb = &dpdk_buffer_free_no_next, - .vlib_packet_template_init_cb = &dpdk_packet_template_init, - .vlib_buffer_delete_free_list_cb = &dpdk_buffer_delete_free_list, -}; - -static clib_error_t * -dpdk_buffer_init (vlib_main_t * vm) -{ - vlib_buffer_cb_register (vm, &callbacks); - return 0; -} - -VLIB_INIT_FUNCTION (dpdk_buffer_init); - -/** @endcond */ -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c deleted file mode 100644 index 99998862..00000000 --- a/src/vnet/devices/dpdk/cli.c +++ /dev/null @@ -1,2079 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "dpdk_priv.h" - -/** - * @file - * @brief CLI for DPDK Abstraction Layer and pcap Tx Trace. - * - * This file contains the source code for CLI for DPDK - * Abstraction Layer and pcap Tx Trace. - */ - - -static clib_error_t * -get_hqos (u32 hw_if_index, u32 subport_id, dpdk_device_t ** xd, - dpdk_device_config_t ** devconf) -{ - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - struct rte_eth_dev_info dev_info; - uword *p = 0; - clib_error_t *error = NULL; - - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - if (subport_id != 0) - { - error = clib_error_return (0, "Invalid subport"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - *xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rte_eth_dev_info_get ((*xd)->device_index, &dev_info); - if (dev_info.pci_dev) - { /* bonded interface has no pci info */ - vlib_pci_addr_t pci_addr; - - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - - p = - hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } - - if (p) - (*devconf) = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - (*devconf) = &dm->conf->default_devconf; - -done: - return error; -} - -static clib_error_t * -pcap_trace_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ -#define PCAP_DEF_PKT_TO_CAPTURE (100) - - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - u8 *filename; - u8 *chroot_filename = 0; - u32 max = 0; - int enabled = 0; - int errorFlag = 0; - clib_error_t *error = 0; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "on")) - { - if (dm->tx_pcap_enable == 0) - { - enabled = 1; - } - else - { - vlib_cli_output (vm, "pcap tx capture already on..."); - errorFlag = 1; - break; - } - } - else if (unformat (line_input, "off")) - { - if (dm->tx_pcap_enable) - { - vlib_cli_output (vm, "captured %d pkts...", - dm->pcap_main.n_packets_captured + 1); - if (dm->pcap_main.n_packets_captured) - { - dm->pcap_main.n_packets_to_capture = - dm->pcap_main.n_packets_captured; - error = pcap_write (&dm->pcap_main); - if (error) - clib_error_report (error); - else - vlib_cli_output (vm, "saved to %s...", dm->pcap_filename); - } - - dm->tx_pcap_enable = 0; - } - else - { - vlib_cli_output (vm, "pcap tx capture already off..."); - errorFlag = 1; - break; - } - } - else if (unformat (line_input, "max %d", &max)) - { - if (dm->tx_pcap_enable) - { - vlib_cli_output (vm, - "can't change max value while pcap tx capture active..."); - errorFlag = 1; - break; - } - } - else if (unformat (line_input, "intfc %U", - unformat_vnet_sw_interface, dm->vnet_main, - &dm->pcap_sw_if_index)) - ; - - else if (unformat (line_input, "intfc any")) - { - dm->pcap_sw_if_index = 0; - } - else if (unformat (line_input, "file %s", &filename)) - { - if (dm->tx_pcap_enable) - { - vlib_cli_output (vm, - "can't change file while pcap tx capture active..."); - errorFlag = 1; - break; - } - - /* Brain-police user path input */ - if (strstr ((char *) filename, "..") - || index ((char *) filename, '/')) - { - vlib_cli_output (vm, "illegal characters in filename '%s'", - filename); - vlib_cli_output (vm, - "Hint: Only filename, do not enter directory structure."); - vec_free (filename); - errorFlag = 1; - break; - } - - chroot_filename = format (0, "/tmp/%s%c", filename, 0); - vec_free (filename); - } - else if (unformat (line_input, "status")) - { - if (dm->pcap_sw_if_index == 0) - { - vlib_cli_output (vm, "max is %d for any interface to file %s", - dm-> - pcap_pkts_to_capture ? dm->pcap_pkts_to_capture - : PCAP_DEF_PKT_TO_CAPTURE, - dm-> - pcap_filename ? dm->pcap_filename : (u8 *) - "/tmp/vpe.pcap"); - } - else - { - vlib_cli_output (vm, "max is %d for interface %U to file %s", - dm-> - pcap_pkts_to_capture ? dm->pcap_pkts_to_capture - : PCAP_DEF_PKT_TO_CAPTURE, - format_vnet_sw_if_index_name, dm->vnet_main, - dm->pcap_sw_if_index, - dm-> - pcap_filename ? dm->pcap_filename : (u8 *) - "/tmp/vpe.pcap"); - } - - if (dm->tx_pcap_enable == 0) - { - vlib_cli_output (vm, "pcap tx capture is off..."); - } - else - { - vlib_cli_output (vm, "pcap tx capture is on: %d of %d pkts...", - dm->pcap_main.n_packets_captured, - dm->pcap_main.n_packets_to_capture); - } - break; - } - - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - errorFlag = 1; - break; - } - } - unformat_free (line_input); - - - if (errorFlag == 0) - { - /* Since no error, save configured values. */ - if (chroot_filename) - { - if (dm->pcap_filename) - vec_free (dm->pcap_filename); - vec_add1 (chroot_filename, 0); - dm->pcap_filename = chroot_filename; - } - - if (max) - dm->pcap_pkts_to_capture = max; - - - if (enabled) - { - if (dm->pcap_filename == 0) - dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0); - - memset (&dm->pcap_main, 0, sizeof (dm->pcap_main)); - dm->pcap_main.file_name = (char *) dm->pcap_filename; - dm->pcap_main.n_packets_to_capture = PCAP_DEF_PKT_TO_CAPTURE; - if (dm->pcap_pkts_to_capture) - dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture; - - dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; - dm->tx_pcap_enable = 1; - vlib_cli_output (vm, "pcap tx capture on..."); - } - } - else if (chroot_filename) - vec_free (chroot_filename); - - - return error; -} - -/*? - * This command is used to start or stop a packet capture, or show - * the status of packet capture. - * - * This command has the following optional parameters: - * - * - on|off - Used to start or stop a packet capture. - * - * - max - Depth of local buffer. Once 'nn' number - * of packets have been received, buffer is flushed to file. Once another - * 'nn' number of packets have been received, buffer is flushed - * to file, overwriting previous write. If not entered, value defaults - * to 100. Can only be updated if packet capture is off. - * - * - intfc |any - Used to specify a given interface, - * or use 'any' to run packet capture on all interfaces. - * 'any' is the default if not provided. Settings from a previous - * packet capture are preserved, so 'any' can be used to reset - * the interface setting. - * - * - file - Used to specify the output filename. The file will - * be placed in the '/tmp' directory, so only the filename is - * supported. Directory should not be entered. If file already exists, file - * will be overwritten. If no filename is provided, '/tmp/vpe.pcap' - * will be used. Can only be updated if packet capture is off. - * - * - status - Displays the current status and configured attributes - * associated with a packet capture. If packet capture is in progress, - * 'status' also will return the number of packets currently in - * the local buffer. All additional attributes entered on command line - * with 'status' will be ingnored and not applied. - * - * @cliexpar - * Example of how to display the status of a tx packet capture when off: - * @cliexstart{pcap tx trace status} - * max is 100, for any interface to file /tmp/vpe.pcap - * pcap tx capture is off... - * @cliexend - * Example of how to start a tx packet capture: - * @cliexstart{pcap tx trace on max 35 intfc GigabitEthernet0/8/0 file vppTest.pcap} - * pcap tx capture on... - * @cliexend - * Example of how to display the status of a tx packet capture in progress: - * @cliexstart{pcap tx trace status} - * max is 35, for interface GigabitEthernet0/8/0 to file /tmp/vppTest.pcap - * pcap tx capture is on: 20 of 35 pkts... - * @cliexend - * Example of how to stop a tx packet capture: - * @cliexstart{vppctl pcap tx trace off} - * captured 21 pkts... - * saved to /tmp/vppTest.pcap... - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (pcap_trace_command, static) = { - .path = "pcap tx trace", - .short_help = - "pcap tx trace [on|off] [max ] [intfc |any] [file ] [status]", - .function = pcap_trace_command_fn, -}; -/* *INDENT-ON* */ - - -static clib_error_t * -show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - struct rte_mempool *rmp; - int i; - - for (i = 0; i < vec_len (dpdk_main.pktmbuf_pools); i++) - { - rmp = dpdk_main.pktmbuf_pools[i]; - if (rmp) - { - unsigned count = rte_mempool_avail_count (rmp); - unsigned free_count = rte_mempool_in_use_count (rmp); - - vlib_cli_output (vm, - "name=\"%s\" available = %7d allocated = %7d total = %7d\n", - rmp->name, (u32) count, (u32) free_count, - (u32) (count + free_count)); - } - else - { - vlib_cli_output (vm, "rte_mempool is NULL (!)\n"); - } - } - return 0; -} - -/*? - * This command displays statistics of each DPDK mempool. - * - * @cliexpar - * Example of how to display DPDK buffer data: - * @cliexstart{show dpdk buffer} - * name="mbuf_pool_socket0" available = 15104 allocated = 1280 total = 16384 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_bufferr,static) = { - .path = "show dpdk buffer", - .short_help = "show dpdk buffer", - .function = show_dpdk_buffer, - .is_mp_safe = 1, -}; -/* *INDENT-ON* */ - -static clib_error_t * -test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - static u32 *allocated_buffers; - u32 n_alloc = 0; - u32 n_free = 0; - u32 first, actual_alloc; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "allocate %d", &n_alloc)) - ; - else if (unformat (input, "free %d", &n_free)) - ; - else - break; - } - - if (n_free) - { - if (vec_len (allocated_buffers) < n_free) - return clib_error_return (0, "Can't free %d, only %d allocated", - n_free, vec_len (allocated_buffers)); - - first = vec_len (allocated_buffers) - n_free; - vlib_buffer_free (vm, allocated_buffers + first, n_free); - _vec_len (allocated_buffers) = first; - } - if (n_alloc) - { - first = vec_len (allocated_buffers); - vec_validate (allocated_buffers, - vec_len (allocated_buffers) + n_alloc - 1); - - actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first, - n_alloc); - _vec_len (allocated_buffers) = first + actual_alloc; - - if (actual_alloc < n_alloc) - vlib_cli_output (vm, "WARNING: only allocated %d buffers", - actual_alloc); - } - - vlib_cli_output (vm, "Currently %d buffers allocated", - vec_len (allocated_buffers)); - - if (allocated_buffers && vec_len (allocated_buffers) == 0) - vec_free (allocated_buffers); - - return 0; -} - -/*? - * This command tests the allocation and freeing of DPDK buffers. - * If both 'allocate' and 'free' are entered on the - * same command, the 'free' is executed first. If no - * parameters are provided, this command display how many DPDK buffers - * the test command has allocated. - * - * @cliexpar - * @parblock - * - * Example of how to display how many DPDK buffer test command has allcoated: - * @cliexstart{test dpdk buffer} - * Currently 0 buffers allocated - * @cliexend - * - * Example of how to allocate DPDK buffers using the test command: - * @cliexstart{test dpdk buffer allocate 10} - * Currently 10 buffers allocated - * @cliexend - * - * Example of how to free DPDK buffers allocated by the test command: - * @cliexstart{test dpdk buffer free 10} - * Currently 0 buffers allocated - * @cliexend - * @endparblock -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = { - .path = "test dpdk buffer", - .short_help = "test dpdk buffer [allocate ] [free ]", - .function = test_dpdk_buffer, - .is_mp_safe = 1, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 nb_rx_desc = (u32) ~ 0; - u32 nb_tx_desc = (u32) ~ 0; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "tx %d", &nb_tx_desc)) - ; - else if (unformat (line_input, "rx %d", &nb_rx_desc)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - { - error = - clib_error_return (0, - "number of descriptors can be set only for " - "physical devices"); - goto done; - } - - if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) && - (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc)) - { - error = clib_error_return (0, "nothing changed"); - goto done; - } - - if (nb_rx_desc != (u32) ~ 0) - xd->nb_rx_desc = nb_rx_desc; - - if (nb_tx_desc != (u32) ~ 0) - xd->nb_tx_desc = nb_tx_desc; - - error = dpdk_port_setup (dm, xd); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command sets the number of DPDK 'rx' and - * 'tx' descriptors for the given physical interface. Use - * the command 'show hardware-interface' to display the - * current descriptor allocation. - * - * @cliexpar - * Example of how to set the DPDK interface descriptors: - * @cliexcmd{set dpdk interface descriptors GigabitEthernet0/8/0 rx 512 tx 512} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = { - .path = "set dpdk interface descriptors", - .short_help = "set dpdk interface descriptors [rx ] [tx ]", - .function = set_dpdk_if_desc, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_and_queue_t *dq; - int cpu; - - if (tm->n_vlib_mains == 1) - vlib_cli_output (vm, "All interfaces are handled by main thread"); - - for (cpu = 0; cpu < vec_len (dm->devices_by_cpu); cpu++) - { - if (cpu >= dm->input_cpu_first_index && - cpu < (dm->input_cpu_first_index + dm->input_cpu_count)) - vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, - vlib_worker_threads[cpu].name, - vlib_worker_threads[cpu].lcore_id); - - /* *INDENT-OFF* */ - vec_foreach(dq, dm->devices_by_cpu[cpu]) - { - u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; - vnet_hw_interface_t * hi = vnet_get_hw_interface(dm->vnet_main, hw_if_index); - vlib_cli_output(vm, " %v queue %u", hi->name, dq->queue_id); - } - /* *INDENT-ON* */ - } - return 0; -} - -/*? - * This command is used to display the thread and core each - * DPDK interface and queue is assigned too. - * - * @cliexpar - * Example of how to display the DPDK interface placement: - * @cliexstart{show dpdk interface placement} - * Thread 1 (vpp_wk_0 at lcore 1): - * GigabitEthernet0/8/0 queue 0 - * GigabitEthernet0/9/0 queue 0 - * Thread 2 (vpp_wk_1 at lcore 2): - * GigabitEthernet0/8/0 queue 1 - * GigabitEthernet0/9/0 queue 1 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_if_placement,static) = { - .path = "show dpdk interface placement", - .short_help = "show dpdk interface placement", - .function = show_dpdk_if_placement, -}; -/* *INDENT-ON* */ - -static int -dpdk_device_queue_sort (void *a1, void *a2) -{ - dpdk_device_and_queue_t *dq1 = a1; - dpdk_device_and_queue_t *dq2 = a2; - - if (dq1->device > dq2->device) - return 1; - else if (dq1->device < dq2->device) - return -1; - else if (dq1->queue_id > dq2->queue_id) - return 1; - else if (dq1->queue_id < dq2->queue_id) - return -1; - else - return 0; -} - -static clib_error_t * -set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_and_queue_t *dq; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 queue = (u32) 0; - u32 cpu = (u32) ~ 0; - int i; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "queue %d", &queue)) - ; - else if (unformat (line_input, "thread %d", &cpu)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - if (cpu < dm->input_cpu_first_index || - cpu >= (dm->input_cpu_first_index + dm->input_cpu_count)) - { - error = clib_error_return (0, "please specify valid thread id"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - for (i = 0; i < vec_len (dm->devices_by_cpu); i++) - { - /* *INDENT-OFF* */ - vec_foreach(dq, dm->devices_by_cpu[i]) - { - if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index && - queue == dq->queue_id) - { - if (cpu == i) /* nothing to do */ - goto done; - - vec_del1(dm->devices_by_cpu[i], dq - dm->devices_by_cpu[i]); - vec_add2(dm->devices_by_cpu[cpu], dq, 1); - dq->queue_id = queue; - dq->device = xd->device_index; - xd->cpu_socket_id_by_queue[queue] = - rte_lcore_to_socket_id(vlib_worker_threads[cpu].lcore_id); - - vec_sort_with_function(dm->devices_by_cpu[i], - dpdk_device_queue_sort); - - vec_sort_with_function(dm->devices_by_cpu[cpu], - dpdk_device_queue_sort); - - if (vec_len(dm->devices_by_cpu[i]) == 0) - vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, - VLIB_NODE_STATE_DISABLED); - - if (vec_len(dm->devices_by_cpu[cpu]) == 1) - vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); - - goto done; - } - } - /* *INDENT-ON* */ - } - - error = clib_error_return (0, "not found"); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to assign a given interface, and optionally a - * given queue, to a different thread. This will not create a thread, - * so the thread must already exist. Use '/etc/vpp/startup.conf' - * for the initial thread creation. If the 'queue' is not provided, - * it defaults to 0. - * - * @cliexpar - * Example of how to display the DPDK interface placement: - * @cliexstart{show dpdk interface placement} - * Thread 1 (vpp_wk_0 at lcore 1): - * GigabitEthernet0/8/0 queue 0 - * GigabitEthernet0/9/0 queue 0 - * Thread 2 (vpp_wk_1 at lcore 2): - * GigabitEthernet0/8/0 queue 1 - * GigabitEthernet0/9/0 queue 1 - * @cliexend - * Example of how to assign a DPDK interface and queue to a thread: - * @cliexcmd{set dpdk interface placement GigabitEthernet0/8/0 queue 1 thread 1} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { - .path = "set dpdk interface placement", - .short_help = "set dpdk interface placement [queue ] thread ", - .function = set_dpdk_if_placement, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_and_queue_t *dq; - int cpu; - - if (tm->n_vlib_mains == 1) - vlib_cli_output (vm, "All interfaces are handled by main thread"); - - for (cpu = 0; cpu < vec_len (dm->devices_by_hqos_cpu); cpu++) - { - if (cpu >= dm->hqos_cpu_first_index && - cpu < (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) - vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, - vlib_worker_threads[cpu].name, - vlib_worker_threads[cpu].lcore_id); - - vec_foreach (dq, dm->devices_by_hqos_cpu[cpu]) - { - u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; - vnet_hw_interface_t *hi = - vnet_get_hw_interface (dm->vnet_main, hw_if_index); - vlib_cli_output (vm, " %v queue %u", hi->name, dq->queue_id); - } - } - return 0; -} - -/*? - * This command is used to display the thread and core each - * DPDK output interface and HQoS queue is assigned too. - * - * @cliexpar - * Example of how to display the DPDK output interface and HQoS queue placement: - * @cliexstart{show dpdk interface hqos placement} - * Thread 1 (vpp_hqos-threads_0 at lcore 3): - * GigabitEthernet0/8/0 queue 0 - * Thread 2 (vpp_hqos-threads_1 at lcore 4): - * GigabitEthernet0/9/0 queue 0 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos_placement, static) = { - .path = "show dpdk interface hqos placement", - .short_help = "show dpdk interface hqos placement", - .function = show_dpdk_if_hqos_placement, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_and_queue_t *dq; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 cpu = (u32) ~ 0; - int i; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "thread %d", &cpu)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); - - if (cpu < dm->hqos_cpu_first_index || - cpu >= (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) - { - error = clib_error_return (0, "please specify valid thread id"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - for (i = 0; i < vec_len (dm->devices_by_hqos_cpu); i++) - { - vec_foreach (dq, dm->devices_by_hqos_cpu[i]) - { - if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index) - { - if (cpu == i) /* nothing to do */ - goto done; - - vec_del1 (dm->devices_by_hqos_cpu[i], - dq - dm->devices_by_hqos_cpu[i]); - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->queue_id = 0; - dq->device = xd->device_index; - - vec_sort_with_function (dm->devices_by_hqos_cpu[i], - dpdk_device_queue_sort); - - vec_sort_with_function (dm->devices_by_hqos_cpu[cpu], - dpdk_device_queue_sort); - - goto done; - } - } - } - - error = clib_error_return (0, "not found"); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to assign a given DPDK output interface and - * HQoS queue to a different thread. This will not create a thread, - * so the thread must already exist. Use '/etc/vpp/startup.conf' - * for the initial thread creation. See @ref qos_doc for more details. - * - * @cliexpar - * Example of how to display the DPDK output interface and HQoS queue placement: - * @cliexstart{show dpdk interface hqos placement} - * Thread 1 (vpp_hqos-threads_0 at lcore 3): - * GigabitEthernet0/8/0 queue 0 - * Thread 2 (vpp_hqos-threads_1 at lcore 4): - * GigabitEthernet0/9/0 queue 0 - * @cliexend - * Example of how to assign a DPDK output interface and HQoS queue to a thread: - * @cliexcmd{set dpdk interface hqos placement GigabitEthernet0/8/0 thread 2} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_placement, static) = { - .path = "set dpdk interface hqos placement", - .short_help = "set dpdk interface hqos placement thread ", - .function = set_dpdk_if_hqos_placement, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 subport_id = (u32) ~ 0; - u32 pipe_id = (u32) ~ 0; - u32 profile_id = (u32) ~ 0; - int rv; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "subport %d", &subport_id)) - ; - else if (unformat (line_input, "pipe %d", &pipe_id)) - ; - else if (unformat (line_input, "profile %d", &profile_id)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = - rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, - profile_id); - if (rv) - { - error = clib_error_return (0, "pipe configuration failed"); - goto done; - } - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to change the profile associate with a HQoS pipe. The - * '' is zero based. Use the command - * 'show dpdk interface hqos' to display the content of each profile. - * See @ref qos_doc for more details. - * - * @note - * Currently there is not an API to create a new HQoS pipe profile. One is - * created by default in the code (search for 'hqos_pipe_params_default''). - * Additional profiles can be created in code and code recompiled. Then use this - * command to assign it. - * - * @cliexpar - * Example of how to assign a new profile to a HQoS pipe: - * @cliexcmd{set dpdk interface hqos pipe GigabitEthernet0/8/0 subport 0 pipe 2 profile 1} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pipe, static) = -{ - .path = "set dpdk interface hqos pipe", - .short_help = "set dpdk interface hqos pipe subport pipe " - "profile ", - .function = set_dpdk_if_hqos_pipe, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = NULL; - u32 hw_if_index = (u32) ~ 0; - u32 subport_id = (u32) ~ 0; - struct rte_sched_subport_params p; - int rv; - clib_error_t *error = NULL; - u32 tb_rate = (u32) ~ 0; - u32 tb_size = (u32) ~ 0; - u32 tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = - { (u32) ~ 0, (u32) ~ 0, (u32) ~ 0, (u32) ~ 0 }; - u32 tc_period = (u32) ~ 0; - dpdk_device_config_t *devconf = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "subport %d", &subport_id)) - ; - else if (unformat (line_input, "rate %d", &tb_rate)) - ; - else if (unformat (line_input, "bktsize %d", &tb_size)) - ; - else if (unformat (line_input, "tc0 %d", &tc_rate[0])) - ; - else if (unformat (line_input, "tc1 %d", &tc_rate[1])) - ; - else if (unformat (line_input, "tc2 %d", &tc_rate[2])) - ; - else if (unformat (line_input, "tc3 %d", &tc_rate[3])) - ; - else if (unformat (line_input, "period %d", &tc_period)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - error = get_hqos (hw_if_index, subport_id, &xd, &devconf); - - if (error == NULL) - { - /* Copy the current values over to local structure. */ - memcpy (&p, &devconf->hqos.subport[subport_id], sizeof (p)); - - /* Update local structure with input values. */ - if (tb_rate != (u32) ~ 0) - { - p.tb_rate = tb_rate; - p.tc_rate[0] = tb_rate; - p.tc_rate[1] = tb_rate; - p.tc_rate[2] = tb_rate; - p.tc_rate[3] = tb_rate; - } - if (tb_size != (u32) ~ 0) - { - p.tb_size = tb_size; - } - if (tc_rate[0] != (u32) ~ 0) - { - p.tc_rate[0] = tc_rate[0]; - } - if (tc_rate[1] != (u32) ~ 0) - { - p.tc_rate[1] = tc_rate[1]; - } - if (tc_rate[2] != (u32) ~ 0) - { - p.tc_rate[2] = tc_rate[2]; - } - if (tc_rate[3] != (u32) ~ 0) - { - p.tc_rate[3] = tc_rate[3]; - } - if (tc_period != (u32) ~ 0) - { - p.tc_period = tc_period; - } - - /* Apply changes. */ - rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p); - if (rv) - { - error = clib_error_return (0, "subport configuration failed"); - goto done; - } - else - { - /* Successfully applied, so save of the input values. */ - memcpy (&devconf->hqos.subport[subport_id], &p, sizeof (p)); - } - } - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to set the subport level parameters such as token - * bucket rate (bytes per seconds), token bucket size (bytes), traffic class - * rates (bytes per seconds) and token update period (Milliseconds). - * - * By default, the 'rate' is set to 1250000000 bytes/second (10GbE - * rate) and each of the four traffic classes is set to 100% of the port rate. - * If the 'rate' is updated by this command, all four traffic classes - * are assigned the same value. Each of the four traffic classes can be updated - * individually. - * - * @cliexpar - * Example of how modify the subport attributes for a 1GbE link: - * @cliexcmd{set dpdk interface hqos subport GigabitEthernet0/8/0 subport 0 rate 125000000} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_subport, static) = { - .path = "set dpdk interface hqos subport", - .short_help = "set dpdk interface hqos subport subport " - "[rate ] [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] " - "[period ]", - .function = set_dpdk_if_hqos_subport, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 tc = (u32) ~ 0; - u32 queue = (u32) ~ 0; - u32 entry = (u32) ~ 0; - u32 val, i; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "entry %d", &entry)) - ; - else if (unformat (line_input, "tc %d", &tc)) - ; - else if (unformat (line_input, "queue %d", &queue)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - if (entry >= 64) - { - error = clib_error_return (0, "invalid entry"); - goto done; - } - if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) - { - error = clib_error_return (0, "invalid traffic class"); - goto done; - } - if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) - { - error = clib_error_return (0, "invalid traffic class queue"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - /* Should never happen, shut up Coverity warning */ - if (p == 0) - { - error = clib_error_return (0, "no worker registrations?"); - goto done; - } - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; - for (i = 0; i < worker_thread_count; i++) - xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to set the traffic class translation table. The - * traffic class translation table is used to map 64 values (0-63) to one of - * four traffic class and one of four HQoS input queue. Use the 'show - * dpdk interface hqos' command to display the traffic class translation - * table. See @ref qos_doc for more details. - * - * This command has the following parameters: - * - * - - Used to specify the output interface. - * - * - entry - Mapped value (0-63) to assign traffic class and queue to. - * - * - tc - Traffic class (0-3) to be used by the provided mapped value. - * - * - queue - HQoS input queue (0-3) to be used by the provided mapped value. - * - * @cliexpar - * Example of how modify the traffic class translation table: - * @cliexcmd{set dpdk interface hqos tctbl GigabitEthernet0/8/0 entry 16 tc 2 queue 2} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_tctbl, static) = { - .path = "set dpdk interface hqos tctbl", - .short_help = "set dpdk interface hqos tctbl entry tc queue ", - .function = set_dpdk_if_hqos_tctbl, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - clib_error_t *error = NULL; - - /* Device specific data */ - struct rte_eth_dev_info dev_info; - dpdk_device_config_t *devconf = 0; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - /* Should never happen, shut up Coverity warning */ - if (p == 0) - return clib_error_return (0, "no worker registrations?"); - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - /* Packet field configuration */ - u64 mask = (u64) ~ 0; - u32 id = (u32) ~ 0; - u32 offset = (u32) ~ 0; - - /* HQoS params */ - u32 n_subports_per_port, n_pipes_per_subport, tctbl_size; - - u32 i; - - /* Parse input arguments */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "id subport")) - id = 0; - else if (unformat (line_input, "id pipe")) - id = 1; - else if (unformat (line_input, "id tc")) - id = 2; - else if (unformat (line_input, "id %d", &id)) - ; - else if (unformat (line_input, "offset %d", &offset)) - ; - else if (unformat (line_input, "mask %llx", &mask)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - /* Get interface */ - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rte_eth_dev_info_get (xd->device_index, &dev_info); - if (dev_info.pci_dev) - { /* bonded interface has no pci info */ - vlib_pci_addr_t pci_addr; - - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - - p = - hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - if (devconf->hqos_enabled == 0) - { - vlib_cli_output (vm, "HQoS disabled for this interface"); - goto done; - } - - n_subports_per_port = devconf->hqos.port.n_subports_per_port; - n_pipes_per_subport = devconf->hqos.port.n_pipes_per_subport; - tctbl_size = RTE_DIM (devconf->hqos.tc_table); - - /* Validate packet field configuration: id, offset and mask */ - if (id >= 3) - { - error = clib_error_return (0, "invalid packet field id"); - goto done; - } - - switch (id) - { - case 0: - if (dpdk_hqos_validate_mask (mask, n_subports_per_port) != 0) - { - error = clib_error_return (0, "invalid subport ID mask " - "(n_subports_per_port = %u)", - n_subports_per_port); - goto done; - } - break; - case 1: - if (dpdk_hqos_validate_mask (mask, n_pipes_per_subport) != 0) - { - error = clib_error_return (0, "invalid pipe ID mask " - "(n_pipes_per_subport = %u)", - n_pipes_per_subport); - goto done; - } - break; - case 2: - default: - if (dpdk_hqos_validate_mask (mask, tctbl_size) != 0) - { - error = clib_error_return (0, "invalid TC table index mask " - "(TC table size = %u)", tctbl_size); - goto done; - } - } - - /* Propagate packet field configuration to all workers */ - for (i = 0; i < worker_thread_count; i++) - switch (id) - { - case 0: - xd->hqos_wt[worker_thread_first + i].hqos_field0_slabpos = offset; - xd->hqos_wt[worker_thread_first + i].hqos_field0_slabmask = mask; - xd->hqos_wt[worker_thread_first + i].hqos_field0_slabshr = - __builtin_ctzll (mask); - break; - case 1: - xd->hqos_wt[worker_thread_first + i].hqos_field1_slabpos = offset; - xd->hqos_wt[worker_thread_first + i].hqos_field1_slabmask = mask; - xd->hqos_wt[worker_thread_first + i].hqos_field1_slabshr = - __builtin_ctzll (mask); - break; - case 2: - default: - xd->hqos_wt[worker_thread_first + i].hqos_field2_slabpos = offset; - xd->hqos_wt[worker_thread_first + i].hqos_field2_slabmask = mask; - xd->hqos_wt[worker_thread_first + i].hqos_field2_slabshr = - __builtin_ctzll (mask); - } - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to set the packet fields required for classifiying the - * incoming packet. As a result of classification process, packet field - * information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, - * color) and stored in packet mbuf. - * - * This command has the following parameters: - * - * - - Used to specify the output interface. - * - * - id subport|pipe|tc - Classification occurs across three fields. - * This parameter indicates which of the three masks are being configured. Legacy - * code used 0-2 to represent these three fields, so 0-2 is still accepted. - * - subport|0 - Currently only one subport is supported, so only - * an empty mask is supported for the subport classification. - * - pipe|1 - Currently, 4096 pipes per subport are supported, so a - * 12-bit mask should be configure to map to the 0-4095 pipes. - * - tc|2 - The translation table (see 'set dpdk interface hqos - * tctbl' command) maps each value (0-63) into one of the 4 traffic classes - * per pipe. A 6-bit mask should be configure to map this field to a traffic class. - * - * - offset - Offset in the packet to apply the 64-bit mask for classification. - * The offset should be on an 8-byte boundary (0,8,16,24..). - * - * - mask - 64-bit mask to apply to packet at the given 'offset'. - * Bits must be contiguous and should not include '0x'. - * - * The default values for the 'pktfield' assumes Ethernet/IPv4/UDP packets with - * no VLAN. Adjust based on expected packet format and desired classification field. - * - 'subport' is always empty (offset 0 mask 0000000000000000) - * - By default, 'pipe' maps to the UDP payload bits 12 .. 23 (offset 40 - * mask 0000000fff000000) - * - By default, 'tc' maps to the DSCP field in IP header (offset 48 mask - * 00000000000000fc) - * - * @cliexpar - * Example of how modify the 'pipe' classification filter to match VLAN: - * @cliexcmd{set dpdk interface hqos pktfield GigabitEthernet0/8/0 id pipe offset 8 mask 0000000000000FFF} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pktfield, static) = { - .path = "set dpdk interface hqos pktfield", - .short_help = "set dpdk interface hqos pktfield id subport|pipe|tc offset " - "mask ", - .function = set_dpdk_if_hqos_pktfield, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - dpdk_device_config_hqos_t *cfg; - dpdk_device_hqos_per_hqos_thread_t *ht; - dpdk_device_hqos_per_worker_thread_t *wk; - u32 *tctbl; - u32 hw_if_index = (u32) ~ 0; - u32 profile_id, subport_id, i; - struct rte_eth_dev_info dev_info; - dpdk_device_config_t *devconf = 0; - vlib_thread_registration_t *tr; - uword *p = 0; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify interface name!!"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rte_eth_dev_info_get (xd->device_index, &dev_info); - if (dev_info.pci_dev) - { /* bonded interface has no pci info */ - vlib_pci_addr_t pci_addr; - - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - - p = - hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - if (devconf->hqos_enabled == 0) - { - vlib_cli_output (vm, "HQoS disabled for this interface"); - goto done; - } - - /* Detect the set of worker threads */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - - /* Should never happen, shut up Coverity warning */ - if (p == 0) - { - error = clib_error_return (0, "no worker registrations?"); - goto done; - } - - tr = (vlib_thread_registration_t *) p[0]; - - cfg = &devconf->hqos; - ht = xd->hqos_ht; - wk = &xd->hqos_wt[tr->first_index]; - tctbl = wk->hqos_tc_table; - - vlib_cli_output (vm, " Thread:"); - vlib_cli_output (vm, " Input SWQ size = %u packets", cfg->swq_size); - vlib_cli_output (vm, " Enqueue burst size = %u packets", - ht->hqos_burst_enq); - vlib_cli_output (vm, " Dequeue burst size = %u packets", - ht->hqos_burst_deq); - - vlib_cli_output (vm, - " Packet field 0: slab position = %4u, slab bitmask = 0x%016llx (subport)", - wk->hqos_field0_slabpos, wk->hqos_field0_slabmask); - vlib_cli_output (vm, - " Packet field 1: slab position = %4u, slab bitmask = 0x%016llx (pipe)", - wk->hqos_field1_slabpos, wk->hqos_field1_slabmask); - vlib_cli_output (vm, - " Packet field 2: slab position = %4u, slab bitmask = 0x%016llx (tc)", - wk->hqos_field2_slabpos, wk->hqos_field2_slabmask); - vlib_cli_output (vm, - " Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...)"); - vlib_cli_output (vm, - " [ 0 .. 15]: " - "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", - tctbl[0] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[0] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[1] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[1] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[2] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[2] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[3] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[3] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[4] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[4] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[5] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[5] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[6] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[6] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[7] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[7] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[8] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[8] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[9] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[9] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[10] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[10] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[11] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[11] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[12] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[12] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[13] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[13] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[14] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[14] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[15] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[15] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - vlib_cli_output (vm, - " [16 .. 31]: " - "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", - tctbl[16] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[16] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[17] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[17] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[18] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[18] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[19] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[19] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[20] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[20] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[21] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[21] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[22] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[22] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[23] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[23] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[24] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[24] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[25] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[25] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[26] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[26] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[27] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[27] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[28] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[28] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[29] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[29] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[30] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[30] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[31] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[31] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - vlib_cli_output (vm, - " [32 .. 47]: " - "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", - tctbl[32] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[32] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[33] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[33] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[34] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[34] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[35] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[35] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[36] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[36] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[37] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[37] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[38] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[38] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[39] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[39] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[40] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[40] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[41] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[41] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[42] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[42] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[43] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[43] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[44] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[44] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[45] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[45] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[46] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[46] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[47] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[47] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - vlib_cli_output (vm, - " [48 .. 63]: " - "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", - tctbl[48] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[48] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[49] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[49] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[50] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[50] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[51] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[51] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[52] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[52] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[53] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[53] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[54] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[54] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[55] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[55] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[56] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[56] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[57] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[57] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[58] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[58] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[59] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[59] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[60] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[60] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[61] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[61] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[62] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[62] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[63] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[63] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - vlib_cli_output (vm, " Port:"); - vlib_cli_output (vm, " Rate = %u bytes/second", cfg->port.rate); - vlib_cli_output (vm, " MTU = %u bytes", cfg->port.mtu); - vlib_cli_output (vm, " Frame overhead = %u bytes", - cfg->port.frame_overhead); - vlib_cli_output (vm, " Number of subports = %u", - cfg->port.n_subports_per_port); - vlib_cli_output (vm, " Number of pipes per subport = %u", - cfg->port.n_pipes_per_subport); - vlib_cli_output (vm, - " Packet queue size: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u packets", - cfg->port.qsize[0], cfg->port.qsize[1], cfg->port.qsize[2], - cfg->port.qsize[3]); - vlib_cli_output (vm, " Number of pipe profiles = %u", - cfg->port.n_pipe_profiles); - - for (subport_id = 0; subport_id < vec_len (cfg->subport); subport_id++) - { - vlib_cli_output (vm, " Subport %u:", subport_id); - vlib_cli_output (vm, " Rate = %u bytes/second", - cfg->subport[subport_id].tb_rate); - vlib_cli_output (vm, " Token bucket size = %u bytes", - cfg->subport[subport_id].tb_size); - vlib_cli_output (vm, - " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", - cfg->subport[subport_id].tc_rate[0], - cfg->subport[subport_id].tc_rate[1], - cfg->subport[subport_id].tc_rate[2], - cfg->subport[subport_id].tc_rate[3]); - vlib_cli_output (vm, " TC period = %u milliseconds", - cfg->subport[subport_id].tc_period); - } - - for (profile_id = 0; profile_id < vec_len (cfg->pipe); profile_id++) - { - vlib_cli_output (vm, " Pipe profile %u:", profile_id); - vlib_cli_output (vm, " Rate = %u bytes/second", - cfg->pipe[profile_id].tb_rate); - vlib_cli_output (vm, " Token bucket size = %u bytes", - cfg->pipe[profile_id].tb_size); - vlib_cli_output (vm, - " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", - cfg->pipe[profile_id].tc_rate[0], - cfg->pipe[profile_id].tc_rate[1], - cfg->pipe[profile_id].tc_rate[2], - cfg->pipe[profile_id].tc_rate[3]); - vlib_cli_output (vm, " TC period = %u milliseconds", - cfg->pipe[profile_id].tc_period); -#ifdef RTE_SCHED_SUBPORT_TC_OV - vlib_cli_output (vm, " TC3 oversubscription_weight = %u", - cfg->pipe[profile_id].tc_ov_weight); -#endif - - for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) - { - vlib_cli_output (vm, - " TC%u WRR weights: Q0 = %u, Q1 = %u, Q2 = %u, Q3 = %u", - i, cfg->pipe[profile_id].wrr_weights[i * 4], - cfg->pipe[profile_id].wrr_weights[i * 4 + 1], - cfg->pipe[profile_id].wrr_weights[i * 4 + 2], - cfg->pipe[profile_id].wrr_weights[i * 4 + 3]); - } - } - -#ifdef RTE_SCHED_RED - vlib_cli_output (vm, " Weighted Random Early Detection (WRED):"); - for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) - { - vlib_cli_output (vm, " TC%u min: G = %u, Y = %u, R = %u", i, - cfg->port.red_params[i][e_RTE_METER_GREEN].min_th, - cfg->port.red_params[i][e_RTE_METER_YELLOW].min_th, - cfg->port.red_params[i][e_RTE_METER_RED].min_th); - - vlib_cli_output (vm, " TC%u max: G = %u, Y = %u, R = %u", i, - cfg->port.red_params[i][e_RTE_METER_GREEN].max_th, - cfg->port.red_params[i][e_RTE_METER_YELLOW].max_th, - cfg->port.red_params[i][e_RTE_METER_RED].max_th); - - vlib_cli_output (vm, - " TC%u inverted probability: G = %u, Y = %u, R = %u", - i, cfg->port.red_params[i][e_RTE_METER_GREEN].maxp_inv, - cfg->port.red_params[i][e_RTE_METER_YELLOW].maxp_inv, - cfg->port.red_params[i][e_RTE_METER_RED].maxp_inv); - - vlib_cli_output (vm, " TC%u weight: R = %u, Y = %u, R = %u", i, - cfg->port.red_params[i][e_RTE_METER_GREEN].wq_log2, - cfg->port.red_params[i][e_RTE_METER_YELLOW].wq_log2, - cfg->port.red_params[i][e_RTE_METER_RED].wq_log2); - } -#endif - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to display details of an output interface's HQoS - * settings. - * - * @cliexpar - * Example of how to display HQoS settings for an interfaces: - * @cliexstart{show dpdk interface hqos GigabitEthernet0/8/0} - * Thread: - * Input SWQ size = 4096 packets - * Enqueue burst size = 256 packets - * Dequeue burst size = 220 packets - * Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) - * Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) - * Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) - * Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) - * [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - * [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - * [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - * [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - * Port: - * Rate = 1250000000 bytes/second - * MTU = 1514 bytes - * Frame overhead = 24 bytes - * Number of subports = 1 - * Number of pipes per subport = 4096 - * Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets - * Number of pipe profiles = 2 - * Subport 0: - * Rate = 1250000000 bytes/second - * Token bucket size = 1000000 bytes - * Traffic class rate: TC0 = 1250000000, TC1 = 1250000000, TC2 = 1250000000, TC3 = 1250000000 bytes/second - * TC period = 10 milliseconds - * Pipe profile 0: - * Rate = 305175 bytes/second - * Token bucket size = 1000000 bytes - * Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second - * TC period = 40 milliseconds - * TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - * TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - * TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - * TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos, static) = { - .path = "show dpdk interface hqos", - .short_help = "show dpdk interface hqos ", - .function = show_dpdk_if_hqos, -}; - -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - clib_error_t *error = NULL; -#ifdef RTE_SCHED_COLLECT_STATS - dpdk_main_t *dm = &dpdk_main; - u32 hw_if_index = (u32) ~ 0; - u32 subport = (u32) ~ 0; - u32 pipe = (u32) ~ 0; - u32 tc = (u32) ~ 0; - u32 tc_q = (u32) ~ 0; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - uword *p = 0; - struct rte_eth_dev_info dev_info; - dpdk_device_config_t *devconf = 0; - u32 qindex; - struct rte_sched_queue_stats stats; - u16 qlen; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - - else if (unformat (line_input, "subport %d", &subport)) - ; - - else if (unformat (line_input, "pipe %d", &pipe)) - ; - - else if (unformat (line_input, "tc %d", &tc)) - ; - - else if (unformat (line_input, "tc_q %d", &tc_q)) - ; - - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify interface name!!"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rte_eth_dev_info_get (xd->device_index, &dev_info); - if (dev_info.pci_dev) - { /* bonded interface has no pci info */ - vlib_pci_addr_t pci_addr; - - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - - p = - hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - if (devconf->hqos_enabled == 0) - { - vlib_cli_output (vm, "HQoS disabled for this interface"); - goto done; - } - - /* - * Figure out which queue to query. cf rte_sched_port_qindex. (Not sure why - * that method isn't made public by DPDK - how _should_ we get the queue ID?) - */ - qindex = subport * devconf->hqos.port.n_pipes_per_subport + pipe; - qindex = qindex * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + tc; - qindex = qindex * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + tc_q; - - if (rte_sched_queue_read_stats (xd->hqos_ht->hqos, qindex, &stats, &qlen) != - 0) - { - error = clib_error_return (0, "failed to read stats"); - goto done; - } - - vlib_cli_output (vm, "%=24s%=16s", "Stats Parameter", "Value"); - vlib_cli_output (vm, "%=24s%=16d", "Packets", stats.n_pkts); - vlib_cli_output (vm, "%=24s%=16d", "Packets dropped", stats.n_pkts_dropped); -#ifdef RTE_SCHED_RED - vlib_cli_output (vm, "%=24s%=16d", "Packets dropped (RED)", - stats.n_pkts_red_dropped); -#endif - vlib_cli_output (vm, "%=24s%=16d", "Bytes", stats.n_bytes); - vlib_cli_output (vm, "%=24s%=16d", "Bytes dropped", stats.n_bytes_dropped); - -#else - - /* Get a line of input */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - vlib_cli_output (vm, "RTE_SCHED_COLLECT_STATS disabled in DPDK"); - goto done; - -#endif - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to display statistics associated with a HQoS traffic class - * queue. - * - * @note - * Statistic collection by the scheduler is disabled by default in DPDK. In order to - * turn it on, add the following line to '../vpp/dpdk/Makefile': - * - $(call set,RTE_SCHED_COLLECT_STATS,y) - * - * @cliexpar - * Example of how to display statistics of HQoS a HQoS traffic class queue: - * @cliexstart{show dpdk hqos queue GigabitEthernet0/9/0 subport 0 pipe 3181 tc 0 tc_q 0} - * Stats Parameter Value - * Packets 140 - * Packets dropped 0 - * Bytes 8400 - * Bytes dropped 0 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = { - .path = "show dpdk hqos queue", - .short_help = "show dpdk hqos queue subport pipe tc tc_q ", - .function = show_dpdk_hqos_queue_stats, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_version_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ -#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c); - _("DPDK Version", "%s", rte_version ()); - _("DPDK EAL init args", "%s", dpdk_config_main.eal_init_args_str); -#undef _ - return 0; -} - -/*? - * This command is used to display the current DPDK version and - * the list of arguments passed to DPDK when started. - * - * @cliexpar - * Example of how to display how many DPDK buffer test command has allcoated: - * @cliexstart{show dpdk version} - * DPDK Version: DPDK 16.11.0 - * DPDK EAL init args: -c 1 -n 4 --huge-dir /run/vpp/hugepages --file-prefix vpp -w 0000:00:08.0 -w 0000:00:09.0 --master-lcore 0 --socket-mem 256 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_vpe_version_command, static) = { - .path = "show dpdk version", - .short_help = "show dpdk version", - .function = show_dpdk_version_command_fn, -}; -/* *INDENT-ON* */ - -clib_error_t * -dpdk_cli_init (vlib_main_t * vm) -{ - return 0; -} - -VLIB_INIT_FUNCTION (dpdk_cli_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/device.c b/src/vnet/devices/dpdk/device.c deleted file mode 100644 index 17397900..00000000 --- a/src/vnet/devices/dpdk/device.c +++ /dev/null @@ -1,852 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include - -#include "dpdk_priv.h" -#include - -#define foreach_dpdk_tx_func_error \ - _(BAD_RETVAL, "DPDK tx function returned an error") \ - _(RING_FULL, "Tx packet drops (ring full)") \ - _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \ - _(REPL_FAIL, "Tx packet drops (replication failure)") - -typedef enum -{ -#define _(f,s) DPDK_TX_FUNC_ERROR_##f, - foreach_dpdk_tx_func_error -#undef _ - DPDK_TX_FUNC_N_ERROR, -} dpdk_tx_func_error_t; - -static char *dpdk_tx_func_error_strings[] = { -#define _(n,s) s, - foreach_dpdk_tx_func_error -#undef _ -}; - -clib_error_t * -dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address) -{ - int error; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - - error = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) address); - - if (error) - { - return clib_error_return (0, "mac address set failed: %d", error); - } - else - { - vec_reset_length (xd->default_mac_address); - vec_add (xd->default_mac_address, address, sizeof (address)); - return NULL; - } -} - -clib_error_t * -dpdk_set_mc_filter (vnet_hw_interface_t * hi, - struct ether_addr mc_addr_vec[], int naddr) -{ - int error; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - - error = rte_eth_dev_set_mc_addr_list (xd->device_index, mc_addr_vec, naddr); - - if (error) - { - return clib_error_return (0, "mc addr list failed: %d", error); - } - else - { - return NULL; - } -} - -struct rte_mbuf * -dpdk_replicate_packet_mb (vlib_buffer_t * b) -{ - dpdk_main_t *dm = &dpdk_main; - struct rte_mbuf **mbufs = 0, *s, *d; - u8 nb_segs; - unsigned socket_id = rte_socket_id (); - int i; - - ASSERT (dm->pktmbuf_pools[socket_id]); - s = rte_mbuf_from_vlib_buffer (b); - nb_segs = s->nb_segs; - vec_validate (mbufs, nb_segs - 1); - - if (rte_pktmbuf_alloc_bulk (dm->pktmbuf_pools[socket_id], mbufs, nb_segs)) - { - vec_free (mbufs); - return 0; - } - - d = mbufs[0]; - d->nb_segs = s->nb_segs; - d->data_len = s->data_len; - d->pkt_len = s->pkt_len; - d->data_off = s->data_off; - clib_memcpy (d->buf_addr, s->buf_addr, RTE_PKTMBUF_HEADROOM + s->data_len); - - for (i = 1; i < nb_segs; i++) - { - d->next = mbufs[i]; - d = mbufs[i]; - s = s->next; - d->data_len = s->data_len; - clib_memcpy (d->buf_addr, s->buf_addr, - RTE_PKTMBUF_HEADROOM + s->data_len); - } - - d = mbufs[0]; - vec_free (mbufs); - return d; -} - -static void -dpdk_tx_trace_buffer (dpdk_main_t * dm, - vlib_node_runtime_t * node, - dpdk_device_t * xd, - u16 queue_id, u32 buffer_index, vlib_buffer_t * buffer) -{ - vlib_main_t *vm = vlib_get_main (); - dpdk_tx_dma_trace_t *t0; - struct rte_mbuf *mb; - - mb = rte_mbuf_from_vlib_buffer (buffer); - - t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0])); - t0->queue_index = queue_id; - t0->device_index = xd->device_index; - t0->buffer_index = buffer_index; - clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); - clib_memcpy (&t0->buffer, buffer, - sizeof (buffer[0]) - sizeof (buffer->pre_data)); - clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data, - sizeof (t0->buffer.pre_data)); -} - -static_always_inline void -dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b, - int maybe_multiseg) -{ - struct rte_mbuf *mb, *first_mb, *last_mb; - - /* buffer is coming from non-dpdk source so we need to init - rte_mbuf header */ - if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_EXT_HDR_VALID) == 0)) - { - vlib_buffer_t *b2 = b; - last_mb = mb = rte_mbuf_from_vlib_buffer (b2); - rte_pktmbuf_reset (mb); - while (maybe_multiseg && (b2->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - b2 = vlib_get_buffer (vm, b2->next_buffer); - mb = rte_mbuf_from_vlib_buffer (b2); - rte_pktmbuf_reset (mb); - } - } - - last_mb = first_mb = mb = rte_mbuf_from_vlib_buffer (b); - first_mb->nb_segs = 1; - mb->data_len = b->current_length; - mb->pkt_len = maybe_multiseg ? vlib_buffer_length_in_chain (vm, b) : - b->current_length; - mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; - - while (maybe_multiseg && (b->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - b = vlib_get_buffer (vm, b->next_buffer); - mb = rte_mbuf_from_vlib_buffer (b); - last_mb->next = mb; - last_mb = mb; - mb->data_len = b->current_length; - mb->pkt_len = b->current_length; - mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; - first_mb->nb_segs++; - if (PREDICT_FALSE (b->n_add_refs)) - { - rte_mbuf_refcnt_update (mb, b->n_add_refs); - b->n_add_refs = 0; - } - } -} - -/* - * This function calls the dpdk's tx_burst function to transmit the packets - * on the tx_vector. It manages a lock per-device if the device does not - * support multiple queues. It returns the number of packets untransmitted - * on the tx_vector. If all packets are transmitted (the normal case), the - * function returns 0. - * - * The function assumes there is at least one packet on the tx_vector. - */ -static_always_inline - u32 tx_burst_vector_internal (vlib_main_t * vm, - dpdk_device_t * xd, - struct rte_mbuf **tx_vector) -{ - dpdk_main_t *dm = &dpdk_main; - u32 n_packets; - u32 tx_head; - u32 tx_tail; - u32 n_retry; - int rv; - int queue_id; - tx_ring_hdr_t *ring; - - ring = vec_header (tx_vector, sizeof (*ring)); - - n_packets = ring->tx_head - ring->tx_tail; - - tx_head = ring->tx_head % xd->nb_tx_desc; - - /* - * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to - * unpredictable results. - */ - ASSERT (n_packets > 0); - - /* - * Check for tx_vector overflow. If this fails it is a system configuration - * error. The ring should be sized big enough to handle the largest un-flowed - * off burst from a traffic manager. A larger size also helps performance - * a bit because it decreases the probability of having to issue two tx_burst - * calls due to a ring wrap. - */ - ASSERT (n_packets < xd->nb_tx_desc); - ASSERT (ring->tx_tail == 0); - - n_retry = 16; - queue_id = vm->cpu_index; - - do - { - /* start the burst at the tail */ - tx_tail = ring->tx_tail % xd->nb_tx_desc; - - /* - * This device only supports one TX queue, - * and we're running multi-threaded... - */ - if (PREDICT_FALSE (xd->lockp != 0)) - { - queue_id = queue_id % xd->tx_q_used; - while (__sync_lock_test_and_set (xd->lockp[queue_id], 1)) - /* zzzz */ - queue_id = (queue_id + 1) % xd->tx_q_used; - } - - if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */ - { - /* no wrap, transmit in one burst */ - dpdk_device_hqos_per_worker_thread_t *hqos = - &xd->hqos_wt[vm->cpu_index]; - - ASSERT (hqos->swq != NULL); - - dpdk_hqos_metadata_set (hqos, - &tx_vector[tx_tail], tx_head - tx_tail); - rv = rte_ring_sp_enqueue_burst (hqos->swq, - (void **) &tx_vector[tx_tail], - (uint16_t) (tx_head - tx_tail)); - } - else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) - { - /* no wrap, transmit in one burst */ - rv = rte_eth_tx_burst (xd->device_index, - (uint16_t) queue_id, - &tx_vector[tx_tail], - (uint16_t) (tx_head - tx_tail)); - } - else - { - ASSERT (0); - rv = 0; - } - - if (PREDICT_FALSE (xd->lockp != 0)) - *xd->lockp[queue_id] = 0; - - if (PREDICT_FALSE (rv < 0)) - { - // emit non-fatal message, bump counter - vnet_main_t *vnm = dm->vnet_main; - vnet_interface_main_t *im = &vnm->interface_main; - u32 node_index; - - node_index = vec_elt_at_index (im->hw_interfaces, - xd->vlib_hw_if_index)->tx_node_index; - - vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1); - clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index, - rv); - return n_packets; // untransmitted packets - } - ring->tx_tail += (u16) rv; - n_packets -= (uint16_t) rv; - } - while (rv && n_packets && (n_retry > 0)); - - return n_packets; -} - -static_always_inline void -dpdk_prefetch_buffer_by_index (vlib_main_t * vm, u32 bi) -{ - vlib_buffer_t *b; - struct rte_mbuf *mb; - b = vlib_get_buffer (vm, bi); - mb = rte_mbuf_from_vlib_buffer (b); - CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); -} - -static_always_inline void -dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp) -{ - dpdk_main_t *dm = &dpdk_main; - u32 my_cpu = vm->cpu_index; - struct rte_mbuf *mb_new; - - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0) - return; - - mb_new = dpdk_replicate_packet_mb (b); - if (PREDICT_FALSE (mb_new == 0)) - { - vlib_error_count (vm, node->node_index, - DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); - b->flags |= VLIB_BUFFER_REPL_FAIL; - } - else - *mbp = mb_new; - - vec_add1 (dm->recycle[my_cpu], bi); -} - -/* - * Transmits the packets on the frame to the interface associated with the - * node. It first copies packets on the frame to a tx_vector containing the - * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal - * which calls the dpdk tx_burst function. - */ -static uword -dpdk_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * f) -{ - dpdk_main_t *dm = &dpdk_main; - vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance); - u32 n_packets = f->n_vectors; - u32 n_left; - u32 *from; - struct rte_mbuf **tx_vector; - u16 i; - u16 nb_tx_desc = xd->nb_tx_desc; - int queue_id; - u32 my_cpu; - u32 tx_pkts = 0; - tx_ring_hdr_t *ring; - u32 n_on_ring; - - my_cpu = vm->cpu_index; - - queue_id = my_cpu; - - tx_vector = xd->tx_vectors[queue_id]; - ring = vec_header (tx_vector, sizeof (*ring)); - - n_on_ring = ring->tx_head - ring->tx_tail; - from = vlib_frame_vector_args (f); - - ASSERT (n_packets <= VLIB_FRAME_SIZE); - - if (PREDICT_FALSE (n_on_ring + n_packets > nb_tx_desc)) - { - /* - * Overflowing the ring should never happen. - * If it does then drop the whole frame. - */ - vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL, - n_packets); - - while (n_packets--) - { - u32 bi0 = from[n_packets]; - vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer (b0); - rte_pktmbuf_free (mb0); - } - return n_on_ring; - } - - if (PREDICT_FALSE (dm->tx_pcap_enable)) - { - n_left = n_packets; - while (n_left > 0) - { - u32 bi0 = from[0]; - vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - if (dm->pcap_sw_if_index == 0 || - dm->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_TX]) - pcap_add_buffer (&dm->pcap_main, vm, bi0, 512); - from++; - n_left--; - } - } - - from = vlib_frame_vector_args (f); - n_left = n_packets; - i = ring->tx_head % nb_tx_desc; - - while (n_left >= 8) - { - u32 bi0, bi1, bi2, bi3; - struct rte_mbuf *mb0, *mb1, *mb2, *mb3; - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 or_flags; - - dpdk_prefetch_buffer_by_index (vm, from[4]); - dpdk_prefetch_buffer_by_index (vm, from[5]); - dpdk_prefetch_buffer_by_index (vm, from[6]); - dpdk_prefetch_buffer_by_index (vm, from[7]); - - bi0 = from[0]; - bi1 = from[1]; - bi2 = from[2]; - bi3 = from[3]; - from += 4; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - b2 = vlib_get_buffer (vm, bi2); - b3 = vlib_get_buffer (vm, bi3); - - or_flags = b0->flags | b1->flags | b2->flags | b3->flags; - - if (or_flags & VLIB_BUFFER_NEXT_PRESENT) - { - dpdk_validate_rte_mbuf (vm, b0, 1); - dpdk_validate_rte_mbuf (vm, b1, 1); - dpdk_validate_rte_mbuf (vm, b2, 1); - dpdk_validate_rte_mbuf (vm, b3, 1); - } - else - { - dpdk_validate_rte_mbuf (vm, b0, 0); - dpdk_validate_rte_mbuf (vm, b1, 0); - dpdk_validate_rte_mbuf (vm, b2, 0); - dpdk_validate_rte_mbuf (vm, b3, 0); - } - - mb0 = rte_mbuf_from_vlib_buffer (b0); - mb1 = rte_mbuf_from_vlib_buffer (b1); - mb2 = rte_mbuf_from_vlib_buffer (b2); - mb3 = rte_mbuf_from_vlib_buffer (b3); - - if (PREDICT_FALSE (or_flags & VLIB_BUFFER_RECYCLE)) - { - dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); - dpdk_buffer_recycle (vm, node, b1, bi1, &mb1); - dpdk_buffer_recycle (vm, node, b2, bi2, &mb2); - dpdk_buffer_recycle (vm, node, b3, bi3, &mb3); - - /* dont enqueue packets if replication failed as they must - be sent back to recycle */ - if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - tx_vector[i++ % nb_tx_desc] = mb0; - if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - tx_vector[i++ % nb_tx_desc] = mb1; - if (PREDICT_TRUE ((b2->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - tx_vector[i++ % nb_tx_desc] = mb2; - if (PREDICT_TRUE ((b3->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - tx_vector[i++ % nb_tx_desc] = mb3; - } - else - { - if (PREDICT_FALSE (i + 3 >= nb_tx_desc)) - { - tx_vector[i++ % nb_tx_desc] = mb0; - tx_vector[i++ % nb_tx_desc] = mb1; - tx_vector[i++ % nb_tx_desc] = mb2; - tx_vector[i++ % nb_tx_desc] = mb3; - i %= nb_tx_desc; - } - else - { - tx_vector[i++] = mb0; - tx_vector[i++] = mb1; - tx_vector[i++] = mb2; - tx_vector[i++] = mb3; - } - } - - - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) - { - if (b0->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); - if (b1->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1); - if (b2->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi2, b2); - if (b3->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi3, b3); - } - - n_left -= 4; - } - while (n_left > 0) - { - u32 bi0; - struct rte_mbuf *mb0; - vlib_buffer_t *b0; - - bi0 = from[0]; - from++; - - b0 = vlib_get_buffer (vm, bi0); - - dpdk_validate_rte_mbuf (vm, b0, 1); - - mb0 = rte_mbuf_from_vlib_buffer (b0); - dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); - - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) - if (b0->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); - - if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - { - tx_vector[i % nb_tx_desc] = mb0; - i++; - } - n_left--; - } - - /* account for additional packets in the ring */ - ring->tx_head += n_packets; - n_on_ring = ring->tx_head - ring->tx_tail; - - /* transmit as many packets as possible */ - n_packets = tx_burst_vector_internal (vm, xd, tx_vector); - - /* - * tx_pkts is the number of packets successfully transmitted - * This is the number originally on ring minus the number remaining on ring - */ - tx_pkts = n_on_ring - n_packets; - - { - /* If there is no callback then drop any non-transmitted packets */ - if (PREDICT_FALSE (n_packets)) - { - vlib_simple_counter_main_t *cm; - vnet_main_t *vnm = vnet_get_main (); - - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_TX_ERROR); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - n_packets); - - vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, - n_packets); - - while (n_packets--) - rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]); - } - - /* Reset head/tail to avoid unnecessary wrap */ - ring->tx_head = 0; - ring->tx_tail = 0; - } - - /* Recycle replicated buffers */ - if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu]))) - { - vlib_buffer_free (vm, dm->recycle[my_cpu], - vec_len (dm->recycle[my_cpu])); - _vec_len (dm->recycle[my_cpu]) = 0; - } - - ASSERT (ring->tx_head >= ring->tx_tail); - - return tx_pkts; -} - -static void -dpdk_clear_hw_interface_counters (u32 instance) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, instance); - - /* - * Set the "last_cleared_stats" to the current stats, so that - * things appear to clear from a display perspective. - */ - dpdk_update_counters (xd, vlib_time_now (dm->vlib_main)); - - clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof (xd->stats)); - clib_memcpy (xd->last_cleared_xstats, xd->xstats, - vec_len (xd->last_cleared_xstats) * - sizeof (xd->last_cleared_xstats[0])); - -} - -static clib_error_t * -dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) -{ - vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); - uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hif->dev_instance); - int rv = 0; - - if (is_up) - { - f64 now = vlib_time_now (dm->vlib_main); - - if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) - { - rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - } - - if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) - rte_eth_promiscuous_enable (xd->device_index); - else - rte_eth_promiscuous_disable (xd->device_index); - - rte_eth_allmulticast_enable (xd->device_index); - xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP; - dpdk_update_counters (xd, now); - dpdk_update_link_state (xd, now); - } - else - { - xd->flags &= ~DPDK_DEVICE_FLAG_ADMIN_UP; - - rte_eth_allmulticast_disable (xd->device_index); - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); - rte_eth_dev_stop (xd->device_index); - - /* For bonded interface, stop slave links */ - if (xd->pmd == VNET_DPDK_PMD_BOND) - { - u8 slink[16]; - int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16); - while (nlink >= 1) - { - u8 dpdk_port = slink[--nlink]; - rte_eth_dev_stop (dpdk_port); - } - } - } - - if (rv < 0) - clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop", rv); - - return /* no error */ 0; -} - -/* - * Dynamically redirect all pkts from a specific interface - * to the specified node - */ -static void -dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, - u32 node_index) -{ - dpdk_main_t *xm = &dpdk_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); - - /* Shut off redirection */ - if (node_index == ~0) - { - xd->per_interface_next_index = node_index; - return; - } - - xd->per_interface_next_index = - vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index); -} - - -static clib_error_t * -dpdk_subif_add_del_function (vnet_main_t * vnm, - u32 hw_if_index, - struct vnet_sw_interface_t *st, int is_add) -{ - dpdk_main_t *xm = &dpdk_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); - vnet_sw_interface_t *t = (vnet_sw_interface_t *) st; - int r, vlan_offload; - u32 prev_subifs = xd->num_subifs; - clib_error_t *err = 0; - - if (is_add) - xd->num_subifs++; - else if (xd->num_subifs) - xd->num_subifs--; - - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - goto done; - - /* currently we program VLANS only for IXGBE VF and I40E VF */ - if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && (xd->pmd != VNET_DPDK_PMD_I40EVF)) - goto done; - - if (t->sub.eth.flags.no_tags == 1) - goto done; - - if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1)) - { - xd->num_subifs = prev_subifs; - err = clib_error_return (0, "unsupported VLAN setup"); - goto done; - } - - vlan_offload = rte_eth_dev_get_vlan_offload (xd->device_index); - vlan_offload |= ETH_VLAN_FILTER_OFFLOAD; - - if ((r = rte_eth_dev_set_vlan_offload (xd->device_index, vlan_offload))) - { - xd->num_subifs = prev_subifs; - err = clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d", - xd->device_index, r); - goto done; - } - - - if ((r = - rte_eth_dev_vlan_filter (xd->device_index, t->sub.eth.outer_vlan_id, - is_add))) - { - xd->num_subifs = prev_subifs; - err = clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d", - xd->device_index, r); - goto done; - } - -done: - if (xd->num_subifs) - xd->flags |= DPDK_DEVICE_FLAG_HAVE_SUBIF; - else - xd->flags &= ~DPDK_DEVICE_FLAG_HAVE_SUBIF; - - return err; -} - -/* *INDENT-OFF* */ -VNET_DEVICE_CLASS (dpdk_device_class) = { - .name = "dpdk", - .tx_function = dpdk_interface_tx, - .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR, - .tx_function_error_strings = dpdk_tx_func_error_strings, - .format_device_name = format_dpdk_device_name, - .format_device = format_dpdk_device, - .format_tx_trace = format_dpdk_tx_dma_trace, - .clear_counters = dpdk_clear_hw_interface_counters, - .admin_up_down_function = dpdk_interface_admin_up_down, - .subif_add_del_function = dpdk_subif_add_del_function, - .rx_redirect_to_node = dpdk_set_interface_next_node, - .mac_addr_change_function = dpdk_set_mac_address, -}; - -VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx) -/* *INDENT-ON* */ - -#define UP_DOWN_FLAG_EVENT 1 - -uword -admin_up_down_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - clib_error_t *error = 0; - uword event_type; - uword *event_data = 0; - u32 sw_if_index; - u32 flags; - - while (1) - { - vlib_process_wait_for_event (vm); - - event_type = vlib_process_get_events (vm, &event_data); - - dpdk_main.admin_up_down_in_progress = 1; - - switch (event_type) - { - case UP_DOWN_FLAG_EVENT: - { - if (vec_len (event_data) == 2) - { - sw_if_index = event_data[0]; - flags = event_data[1]; - error = - vnet_sw_interface_set_flags (vnet_get_main (), sw_if_index, - flags); - clib_error_report (error); - } - } - break; - } - - vec_reset_length (event_data); - - dpdk_main.admin_up_down_in_progress = 0; - - } - return 0; /* or not */ -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (admin_up_down_process_node,static) = { - .function = admin_up_down_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "admin-up-down-process", - .process_log2_n_stack_bytes = 17, // 256KB -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/dir.dox b/src/vnet/devices/dpdk/dir.dox deleted file mode 100644 index 43e36753..00000000 --- a/src/vnet/devices/dpdk/dir.dox +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Doxygen directory documentation */ - -/** -@dir -@brief DPDK Abstraction Layer. - -This directory contains the source code for the DPDK abstraction layer. - -*/ -/*? %%clicmd:group_label DPDK and pcap tx %% ?*/ -/*? %%syscfg:group_label DPDK and pcap tx %% ?*/ diff --git a/src/vnet/devices/dpdk/dpdk.api b/src/vnet/devices/dpdk/dpdk.api deleted file mode 100644 index 21215d45..00000000 --- a/src/vnet/devices/dpdk/dpdk.api +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** \brief DPDK interface HQoS pipe profile set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param pipe - pipe ID within its subport - @param profile - pipe profile ID -*/ -define sw_interface_set_dpdk_hqos_pipe { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 pipe; - u32 profile; -}; - -/** \brief DPDK interface HQoS pipe profile set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_pipe_reply { - u32 context; - i32 retval; -}; - -/** \brief DPDK interface HQoS subport parameters set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param tb_rate - subport token bucket rate (measured in bytes/second) - @param tb_size - subport token bucket size (measured in credits) - @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) - @param tc_period - enforcement period for rates (measured in milliseconds) -*/ -define sw_interface_set_dpdk_hqos_subport { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 tb_rate; - u32 tb_size; - u32 tc_rate[4]; - u32 tc_period; -}; - -/** \brief DPDK interface HQoS subport parameters set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_subport_reply { - u32 context; - i32 retval; -}; - -/** \brief DPDK interface HQoS tctbl entry set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param entry - entry index ID - @param tc - traffic class (0 .. 3) - @param queue - traffic class queue (0 .. 3) -*/ -define sw_interface_set_dpdk_hqos_tctbl { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 entry; - u32 tc; - u32 queue; -}; - -/** \brief DPDK interface HQoS tctbl entry set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_tctbl_reply { - u32 context; - i32 retval; -}; - -/* - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ - \ No newline at end of file diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h deleted file mode 100644 index bf9f2768..00000000 --- a/src/vnet/devices/dpdk/dpdk.h +++ /dev/null @@ -1,487 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __included_dpdk_h__ -#define __included_dpdk_h__ - -/* $$$$ We should rename always_inline -> clib_always_inline */ -#undef always_inline - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#if CLIB_DEBUG > 0 -#define always_inline static inline -#else -#define always_inline static inline __attribute__ ((__always_inline__)) -#endif - -#include - -#define NB_MBUF (16<<10) - -extern vnet_device_class_t dpdk_device_class; -extern vlib_node_registration_t dpdk_input_node; -extern vlib_node_registration_t handoff_dispatch_node; - -#define foreach_dpdk_pmd \ - _ ("net_thunderx", THUNDERX) \ - _ ("net_e1000_em", E1000EM) \ - _ ("net_e1000_igb", IGB) \ - _ ("net_e1000_igb_vf", IGBVF) \ - _ ("net_ixgbe", IXGBE) \ - _ ("net_ixgbe_vf", IXGBEVF) \ - _ ("net_i40e", I40E) \ - _ ("net_i40e_vf", I40EVF) \ - _ ("net_virtio", VIRTIO) \ - _ ("net_enic", ENIC) \ - _ ("net_vmxnet3", VMXNET3) \ - _ ("AF_PACKET PMD", AF_PACKET) \ - _ ("rte_bond_pmd", BOND) \ - _ ("net_fm10k", FM10K) \ - _ ("net_cxgbe", CXGBE) \ - _ ("net_mlx5", MLX5) \ - _ ("net_dpaa2", DPAA2) - -typedef enum -{ - VNET_DPDK_PMD_NONE, -#define _(s,f) VNET_DPDK_PMD_##f, - foreach_dpdk_pmd -#undef _ - VNET_DPDK_PMD_UNKNOWN, /* must be last */ -} dpdk_pmd_t; - -typedef enum -{ - VNET_DPDK_PORT_TYPE_ETH_1G, - VNET_DPDK_PORT_TYPE_ETH_10G, - VNET_DPDK_PORT_TYPE_ETH_40G, - VNET_DPDK_PORT_TYPE_ETH_100G, - VNET_DPDK_PORT_TYPE_ETH_BOND, - VNET_DPDK_PORT_TYPE_ETH_SWITCH, - VNET_DPDK_PORT_TYPE_AF_PACKET, - VNET_DPDK_PORT_TYPE_UNKNOWN, -} dpdk_port_type_t; - -/* - * The header for the tx_vector in dpdk_device_t. - * Head and tail are indexes into the tx_vector and are of type - * u64 so they never overflow. - */ -typedef struct -{ - u64 tx_head; - u64 tx_tail; -} tx_ring_hdr_t; - -typedef struct -{ - struct rte_ring *swq; - - u64 hqos_field0_slabmask; - u32 hqos_field0_slabpos; - u32 hqos_field0_slabshr; - u64 hqos_field1_slabmask; - u32 hqos_field1_slabpos; - u32 hqos_field1_slabshr; - u64 hqos_field2_slabmask; - u32 hqos_field2_slabpos; - u32 hqos_field2_slabshr; - u32 hqos_tc_table[64]; -} dpdk_device_hqos_per_worker_thread_t; - -typedef struct -{ - struct rte_ring **swq; - struct rte_mbuf **pkts_enq; - struct rte_mbuf **pkts_deq; - struct rte_sched_port *hqos; - u32 hqos_burst_enq; - u32 hqos_burst_deq; - u32 pkts_enq_len; - u32 swq_pos; - u32 flush_count; -} dpdk_device_hqos_per_hqos_thread_t; - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - volatile u32 **lockp; - - /* Instance ID */ - u32 device_index; - - u32 vlib_hw_if_index; - u32 vlib_sw_if_index; - - /* next node index if we decide to steal the rx graph arc */ - u32 per_interface_next_index; - - /* dpdk rte_mbuf rx and tx vectors, VLIB_FRAME_SIZE */ - struct rte_mbuf ***tx_vectors; /* one per worker thread */ - struct rte_mbuf ***rx_vectors; - - /* vector of traced contexts, per device */ - u32 **d_trace_buffers; - - dpdk_pmd_t pmd:8; - i8 cpu_socket; - - u16 flags; -#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0) -#define DPDK_DEVICE_FLAG_PROMISC (1 << 1) -#define DPDK_DEVICE_FLAG_PMD (1 << 2) -#define DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE (1 << 3) -#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4) -#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5) -#define DPDK_DEVICE_FLAG_HQOS (1 << 6) - - u16 nb_tx_desc; - CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); - - u8 *interface_name_suffix; - - /* number of sub-interfaces */ - u16 num_subifs; - - /* PMD related */ - u16 tx_q_used; - u16 rx_q_used; - u16 nb_rx_desc; - u16 *cpu_socket_id_by_queue; - struct rte_eth_conf port_conf; - struct rte_eth_txconf tx_conf; - - /* HQoS related */ - dpdk_device_hqos_per_worker_thread_t *hqos_wt; - dpdk_device_hqos_per_hqos_thread_t *hqos_ht; - - /* af_packet */ - u8 af_packet_port_id; - - struct rte_eth_link link; - f64 time_last_link_update; - - struct rte_eth_stats stats; - struct rte_eth_stats last_stats; - struct rte_eth_stats last_cleared_stats; - struct rte_eth_xstat *xstats; - struct rte_eth_xstat *last_cleared_xstats; - f64 time_last_stats_update; - dpdk_port_type_t port_type; - - /* mac address */ - u8 *default_mac_address; -} dpdk_device_t; - -#define DPDK_STATS_POLL_INTERVAL (10.0) -#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */ - -#define DPDK_LINK_POLL_INTERVAL (3.0) -#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */ - -typedef struct -{ - u32 device; - u16 queue_id; -} dpdk_device_and_queue_t; - -#ifndef DPDK_HQOS_DBG_BYPASS -#define DPDK_HQOS_DBG_BYPASS 0 -#endif - -#ifndef HQOS_FLUSH_COUNT_THRESHOLD -#define HQOS_FLUSH_COUNT_THRESHOLD 100000 -#endif - -typedef struct dpdk_device_config_hqos_t -{ - u32 hqos_thread; - u32 hqos_thread_valid; - - u32 swq_size; - u32 burst_enq; - u32 burst_deq; - - u32 pktfield0_slabpos; - u32 pktfield1_slabpos; - u32 pktfield2_slabpos; - u64 pktfield0_slabmask; - u64 pktfield1_slabmask; - u64 pktfield2_slabmask; - u32 tc_table[64]; - - struct rte_sched_port_params port; - struct rte_sched_subport_params *subport; - struct rte_sched_pipe_params *pipe; - uint32_t *pipe_map; -} dpdk_device_config_hqos_t; - -int dpdk_hqos_validate_mask (u64 mask, u32 n); -void dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * - hqos, u32 pipe_profile_id); -void dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos); -clib_error_t *dpdk_port_setup_hqos (dpdk_device_t * xd, - dpdk_device_config_hqos_t * hqos); -void dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, - struct rte_mbuf **pkts, u32 n_pkts); - -#define foreach_dpdk_device_config_item \ - _ (num_rx_queues) \ - _ (num_tx_queues) \ - _ (num_rx_desc) \ - _ (num_tx_desc) \ - _ (rss_fn) - -typedef struct -{ - vlib_pci_addr_t pci_addr; - u8 is_blacklisted; - u8 vlan_strip_offload; -#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0 -#define DPDK_DEVICE_VLAN_STRIP_OFF 1 -#define DPDK_DEVICE_VLAN_STRIP_ON 2 - -#define _(x) uword x; - foreach_dpdk_device_config_item -#undef _ - clib_bitmap_t * workers; - u32 hqos_enabled; - dpdk_device_config_hqos_t hqos; -} dpdk_device_config_t; - -typedef struct -{ - - /* Config stuff */ - u8 **eal_init_args; - u8 *eal_init_args_str; - u8 *uio_driver_name; - u8 no_multi_seg; - u8 enable_tcp_udp_checksum; - u8 cryptodev; - - /* Required config parameters */ - u8 coremask_set_manually; - u8 nchannels_set_manually; - u32 coremask; - u32 nchannels; - u32 num_mbufs; - u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */ - - /* - * format interface names ala xxxEthernet%d/%d/%d instead of - * xxxEthernet%x/%x/%x. - */ - u8 interface_name_format_decimal; - - /* per-device config */ - dpdk_device_config_t default_devconf; - dpdk_device_config_t *dev_confs; - uword *device_config_index_by_pci_addr; - -} dpdk_config_main_t; - -dpdk_config_main_t dpdk_config_main; - -typedef struct -{ - - /* Devices */ - dpdk_device_t *devices; - dpdk_device_and_queue_t **devices_by_cpu; - dpdk_device_and_queue_t **devices_by_hqos_cpu; - - /* per-thread recycle lists */ - u32 **recycle; - - /* buffer flags template, configurable to enable/disable tcp / udp cksum */ - u32 buffer_flags_template; - - /* vlib buffer free list, must be same size as an rte_mbuf */ - u32 vlib_buffer_free_list_index; - - /* Ethernet input node index */ - u32 ethernet_input_node_index; - - /* pcap tracing [only works if (CLIB_DEBUG > 0)] */ - int tx_pcap_enable; - pcap_main_t pcap_main; - u8 *pcap_filename; - u32 pcap_sw_if_index; - u32 pcap_pkts_to_capture; - - /* hashes */ - uword *dpdk_device_by_kni_port_id; - uword *vu_sw_if_index_by_listener_fd; - uword *vu_sw_if_index_by_sock_fd; - u32 *vu_inactive_interfaces_device_index; - - /* - * flag indicating that a posted admin up/down - * (via post_sw_interface_set_flags) is in progress - */ - u8 admin_up_down_in_progress; - - u8 use_rss; - - /* which cpus are running dpdk-input */ - int input_cpu_first_index; - int input_cpu_count; - - /* which cpus are running I/O TX */ - int hqos_cpu_first_index; - int hqos_cpu_count; - - /* control interval of dpdk link state and stat polling */ - f64 link_state_poll_interval; - f64 stat_poll_interval; - - /* Sleep for this many MS after each device poll */ - u32 poll_sleep; - - /* convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; - dpdk_config_main_t *conf; - - /* mempool */ - struct rte_mempool **pktmbuf_pools; -} dpdk_main_t; - -dpdk_main_t dpdk_main; - -typedef struct -{ - u32 buffer_index; - u16 device_index; - u8 queue_index; - struct rte_mbuf mb; - /* Copy of VLIB buffer; packet data stored in pre_data. */ - vlib_buffer_t buffer; -} dpdk_tx_dma_trace_t; - -typedef struct -{ - u32 buffer_index; - u16 device_index; - u16 queue_index; - struct rte_mbuf mb; - vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */ - u8 data[256]; /* First 256 data bytes, used for hexdump */ -} dpdk_rx_dma_trace_t; - -void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b); - -clib_error_t *dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address); - -clib_error_t *dpdk_set_mc_filter (vnet_hw_interface_t * hi, - struct ether_addr mc_addr_vec[], int naddr); - -void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd); - -clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd); - -u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance); - -struct rte_mbuf *dpdk_replicate_packet_mb (vlib_buffer_t * b); -struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b); - -#define foreach_dpdk_error \ - _(NONE, "no error") \ - _(RX_PACKET_ERROR, "Rx packet errors") \ - _(RX_BAD_FCS, "Rx bad fcs") \ - _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \ - _(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \ - _(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \ - _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error") - -typedef enum -{ -#define _(f,s) DPDK_ERROR_##f, - foreach_dpdk_error -#undef _ - DPDK_N_ERROR, -} dpdk_error_t; - -int dpdk_set_stat_poll_interval (f64 interval); -int dpdk_set_link_state_poll_interval (f64 interval); -void dpdk_update_link_state (dpdk_device_t * xd, f64 now); -void dpdk_device_lock_init (dpdk_device_t * xd); -void dpdk_device_lock_free (dpdk_device_t * xd); - -void dpdk_rx_trace (dpdk_main_t * dm, - vlib_node_runtime_t * node, - dpdk_device_t * xd, - u16 queue_id, u32 * buffers, uword n_buffers); - -#define EFD_OPERATION_LESS_THAN 0 -#define EFD_OPERATION_GREATER_OR_EQUAL 1 - -format_function_t format_dpdk_device_name; -format_function_t format_dpdk_device; -format_function_t format_dpdk_tx_dma_trace; -format_function_t format_dpdk_rx_dma_trace; -format_function_t format_dpdk_rte_mbuf; -format_function_t format_dpdk_rx_rte_mbuf; -unformat_function_t unformat_socket_mem; -clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn); -clib_error_t *unformat_hqos (unformat_input_t * input, - dpdk_device_config_hqos_t * hqos); - -uword -admin_up_down_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, vlib_frame_t * f); - -#endif /* __included_dpdk_h__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/dpdk_api.c b/src/vnet/devices/dpdk/dpdk_api.c deleted file mode 100644 index 8faf5c2c..00000000 --- a/src/vnet/devices/dpdk/dpdk_api.c +++ /dev/null @@ -1,246 +0,0 @@ -/* - *------------------------------------------------------------------ - * dpdk_api.c - dpdk interface api - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include - -#if DPDK > 0 -#include -#endif - -#include - -#define vl_typedefs /* define message structures */ -#include -#undef vl_typedefs - -#define vl_endianfun /* define message structures */ -#include -#undef vl_endianfun - -/* instantiate all the print functions we know about */ -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) -#define vl_printfun -#include -#undef vl_printfun - -#include - -#define foreach_vpe_api_msg \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) - -static void - vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler - (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - u32 pipe = ntohl (mp->pipe); - u32 profile = ntohl (mp->profile); - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); - - BAD_SW_IF_INDEX_LABEL; -#else - clib_warning ("setting HQoS pipe parameters without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); -} - -static void - vl_api_sw_interface_set_dpdk_hqos_subport_t_handler - (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - struct rte_sched_subport_params p; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - p.tb_rate = ntohl (mp->tb_rate); - p.tb_size = ntohl (mp->tb_size); - p.tc_rate[0] = ntohl (mp->tc_rate[0]); - p.tc_rate[1] = ntohl (mp->tc_rate[1]); - p.tc_rate[2] = ntohl (mp->tc_rate[2]); - p.tc_rate[3] = ntohl (mp->tc_rate[3]); - p.tc_period = ntohl (mp->tc_period); - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); - - BAD_SW_IF_INDEX_LABEL; -#else - clib_warning - ("setting HQoS subport parameters without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); -} - -static void - vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler - (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 entry = ntohl (mp->entry); - u32 tc = ntohl (mp->tc); - u32 queue = ntohl (mp->queue); - u32 val, i; - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) - { - clib_warning ("invalid traffic class !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) - { - clib_warning ("invalid queue !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - - if (p == 0) - { - clib_warning ("worker thread registration AWOL !!"); - rv = VNET_API_ERROR_INVALID_VALUE_2; - goto done; - } - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; - for (i = 0; i < worker_thread_count; i++) - xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; - - BAD_SW_IF_INDEX_LABEL; -done: -#else - clib_warning ("setting HQoS DSCP table entry without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); -} - -/* - * dpdk_api_hookup - * Add vpe's API message handlers to the table. - * vlib has alread mapped shared memory and - * added the client registration handlers. - * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() - */ -#define vl_msg_name_crc_list -#include -#undef vl_msg_name_crc_list - -static void -setup_message_id_table (api_main_t * am) -{ -#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); - foreach_vl_msg_name_crc_dpdk; -#undef _ -} - -static clib_error_t * -dpdk_api_hookup (vlib_main_t * vm) -{ - api_main_t *am = &api_main; - -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vpe_api_msg; -#undef _ - - /* - * Set up the (msg_name, crc, message-id) table - */ - setup_message_id_table (am); - - return 0; -} - -VLIB_API_INIT_FUNCTION (dpdk_api_hookup); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/dpdk_priv.h b/src/vnet/devices/dpdk/dpdk_priv.h deleted file mode 100644 index dd40ff48..00000000 --- a/src/vnet/devices/dpdk/dpdk_priv.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1) -#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1)) - -#define DPDK_NB_RX_DESC_DEFAULT 1024 -#define DPDK_NB_TX_DESC_DEFAULT 1024 -#define DPDK_NB_RX_DESC_VIRTIO 256 -#define DPDK_NB_TX_DESC_VIRTIO 256 - -#define I40E_DEV_ID_SFP_XL710 0x1572 -#define I40E_DEV_ID_QSFP_A 0x1583 -#define I40E_DEV_ID_QSFP_B 0x1584 -#define I40E_DEV_ID_QSFP_C 0x1585 -#define I40E_DEV_ID_10G_BASE_T 0x1586 -#define I40E_DEV_ID_VF 0x154C - -/* These args appear by themselves */ -#define foreach_eal_double_hyphen_predicate_arg \ -_(no-shconf) \ -_(no-hpet) \ -_(no-huge) \ -_(vmware-tsc-map) - -#define foreach_eal_single_hyphen_mandatory_arg \ -_(coremask, c) \ -_(nchannels, n) \ - -#define foreach_eal_single_hyphen_arg \ -_(blacklist, b) \ -_(mem-alloc-request, m) \ -_(force-ranks, r) - -/* These args are preceeded by "--" and followed by a single string */ -#define foreach_eal_double_hyphen_arg \ -_(huge-dir) \ -_(proc-type) \ -_(file-prefix) \ -_(vdev) - -static inline void -dpdk_get_xstats (dpdk_device_t * xd) -{ - int len; - if ((len = rte_eth_xstats_get (xd->device_index, NULL, 0)) > 0) - { - vec_validate (xd->xstats, len - 1); - vec_validate (xd->last_cleared_xstats, len - 1); - - len = - rte_eth_xstats_get (xd->device_index, xd->xstats, - vec_len (xd->xstats)); - - ASSERT (vec_len (xd->xstats) == len); - ASSERT (vec_len (xd->last_cleared_xstats) == len); - - _vec_len (xd->xstats) = len; - _vec_len (xd->last_cleared_xstats) = len; - - } -} - - -static inline void -dpdk_update_counters (dpdk_device_t * xd, f64 now) -{ - vlib_simple_counter_main_t *cm; - vnet_main_t *vnm = vnet_get_main (); - u32 my_cpu = os_get_cpu_number (); - u64 rxerrors, last_rxerrors; - - /* only update counters for PMD interfaces */ - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - return; - - xd->time_last_stats_update = now ? now : xd->time_last_stats_update; - clib_memcpy (&xd->last_stats, &xd->stats, sizeof (xd->last_stats)); - rte_eth_stats_get (xd->device_index, &xd->stats); - - /* maybe bump interface rx no buffer counter */ - if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf)) - { - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_RX_NO_BUF); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - xd->stats.rx_nombuf - - xd->last_stats.rx_nombuf); - } - - /* missed pkt counter */ - if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed)) - { - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_RX_MISS); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - xd->stats.imissed - - xd->last_stats.imissed); - } - rxerrors = xd->stats.ierrors; - last_rxerrors = xd->last_stats.ierrors; - - if (PREDICT_FALSE (rxerrors != last_rxerrors)) - { - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_RX_ERROR); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - rxerrors - last_rxerrors); - } - - dpdk_get_xstats (xd); -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/format.c b/src/vnet/devices/dpdk/format.c deleted file mode 100644 index 1558630c..00000000 --- a/src/vnet/devices/dpdk/format.c +++ /dev/null @@ -1,754 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include - -#include "dpdk_priv.h" -#include - -#define foreach_dpdk_counter \ - _ (tx_frames_ok, opackets) \ - _ (tx_bytes_ok, obytes) \ - _ (tx_errors, oerrors) \ - _ (rx_frames_ok, ipackets) \ - _ (rx_bytes_ok, ibytes) \ - _ (rx_errors, ierrors) \ - _ (rx_missed, imissed) \ - _ (rx_no_bufs, rx_nombuf) - -#define foreach_dpdk_q_counter \ - _ (rx_frames_ok, q_ipackets) \ - _ (tx_frames_ok, q_opackets) \ - _ (rx_bytes_ok, q_ibytes) \ - _ (tx_bytes_ok, q_obytes) \ - _ (rx_errors, q_errors) - -#define foreach_dpdk_rss_hf \ - _(ETH_RSS_FRAG_IPV4, "ipv4-frag") \ - _(ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \ - _(ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \ - _(ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \ - _(ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \ - _(ETH_RSS_IPV4, "ipv4") \ - _(ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \ - _(ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \ - _(ETH_RSS_FRAG_IPV6, "ipv6-frag") \ - _(ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \ - _(ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \ - _(ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \ - _(ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \ - _(ETH_RSS_L2_PAYLOAD, "l2-payload") \ - _(ETH_RSS_IPV6_EX, "ipv6-ex") \ - _(ETH_RSS_IPV6, "ipv6") - - -#define foreach_dpdk_rx_offload_caps \ - _(DEV_RX_OFFLOAD_VLAN_STRIP, "vlan-strip") \ - _(DEV_RX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ - _(DEV_RX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ - _(DEV_RX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ - _(DEV_RX_OFFLOAD_TCP_LRO , "rcp-lro") \ - _(DEV_RX_OFFLOAD_QINQ_STRIP, "qinq-strip") - -#define foreach_dpdk_tx_offload_caps \ - _(DEV_TX_OFFLOAD_VLAN_INSERT, "vlan-insert") \ - _(DEV_TX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ - _(DEV_TX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ - _(DEV_TX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ - _(DEV_TX_OFFLOAD_SCTP_CKSUM , "sctp-cksum") \ - _(DEV_TX_OFFLOAD_TCP_TSO , "tcp-tso") \ - _(DEV_TX_OFFLOAD_UDP_TSO , "udp-tso") \ - _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \ - _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert") - -#define foreach_dpdk_pkt_rx_offload_flag \ - _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \ - _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \ - _ (PKT_RX_FDIR, "RX packet with FDIR infos") \ - _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \ - _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \ - _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \ - _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \ - _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \ - _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \ - _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \ - _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped") - -#define foreach_dpdk_pkt_type \ - _ (L2, ETHER, "Ethernet packet") \ - _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \ - _ (L2, ETHER_ARP, "ARP packet") \ - _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet") \ - _ (L2, ETHER_NSH, "NSH (Network Service Header) packet") \ - _ (L2, ETHER_VLAN, "VLAN packet") \ - _ (L2, ETHER_QINQ, "QinQ packet") \ - _ (L3, IPV4, "IPv4 packet without extension headers") \ - _ (L3, IPV4_EXT, "IPv4 packet with extension headers") \ - _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \ - _ (L3, IPV6, "IPv6 packet without extension headers") \ - _ (L3, IPV6_EXT, "IPv6 packet with extension headers") \ - _ (L3, IPV6_EXT_UNKNOWN, "IPv6 packet with or without extension headers") \ - _ (L4, TCP, "TCP packet") \ - _ (L4, UDP, "UDP packet") \ - _ (L4, FRAG, "Fragmented IP packet") \ - _ (L4, SCTP, "SCTP (Stream Control Transmission Protocol) packet") \ - _ (L4, ICMP, "ICMP packet") \ - _ (L4, NONFRAG, "Non-fragmented IP packet") \ - _ (TUNNEL, GRE, "GRE tunneling packet") \ - _ (TUNNEL, VXLAN, "VXLAN tunneling packet") \ - _ (TUNNEL, NVGRE, "NVGRE Tunneling packet") \ - _ (TUNNEL, GENEVE, "GENEVE Tunneling packet") \ - _ (TUNNEL, GRENAT, "Teredo, VXLAN or GRE Tunneling packet") \ - _ (INNER_L2, ETHER, "Inner Ethernet packet") \ - _ (INNER_L2, ETHER_VLAN, "Inner Ethernet packet with VLAN") \ - _ (INNER_L3, IPV4, "Inner IPv4 packet without extension headers") \ - _ (INNER_L3, IPV4_EXT, "Inner IPv4 packet with extension headers") \ - _ (INNER_L3, IPV4_EXT_UNKNOWN, "Inner IPv4 packet with or without extension headers") \ - _ (INNER_L3, IPV6, "Inner IPv6 packet without extension headers") \ - _ (INNER_L3, IPV6_EXT, "Inner IPv6 packet with extension headers") \ - _ (INNER_L3, IPV6_EXT_UNKNOWN, "Inner IPv6 packet with or without extension headers") \ - _ (INNER_L4, TCP, "Inner TCP packet") \ - _ (INNER_L4, UDP, "Inner UDP packet") \ - _ (INNER_L4, FRAG, "Inner fagmented IP packet") \ - _ (INNER_L4, SCTP, "Inner SCTP (Stream Control Transmission Protocol) packet") \ - _ (INNER_L4, ICMP, "Inner ICMP packet") \ - _ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet") - -#define foreach_dpdk_pkt_tx_offload_flag \ - _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \ - _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \ - _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \ - _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \ - _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp") - -#define foreach_dpdk_pkt_offload_flag \ - foreach_dpdk_pkt_rx_offload_flag \ - foreach_dpdk_pkt_tx_offload_flag - -u8 * -format_dpdk_device_name (u8 * s, va_list * args) -{ - dpdk_main_t *dm = &dpdk_main; - char *devname_format; - char *device_name; - u32 i = va_arg (*args, u32); - struct rte_eth_dev_info dev_info; - u8 *ret; - - if (dm->conf->interface_name_format_decimal) - devname_format = "%s%d/%d/%d"; - else - devname_format = "%s%x/%x/%x"; - - switch (dm->devices[i].port_type) - { - case VNET_DPDK_PORT_TYPE_ETH_1G: - device_name = "GigabitEthernet"; - break; - - case VNET_DPDK_PORT_TYPE_ETH_10G: - device_name = "TenGigabitEthernet"; - break; - - case VNET_DPDK_PORT_TYPE_ETH_40G: - device_name = "FortyGigabitEthernet"; - break; - - case VNET_DPDK_PORT_TYPE_ETH_100G: - device_name = "HundredGigabitEthernet"; - break; - - case VNET_DPDK_PORT_TYPE_ETH_BOND: - return format (s, "BondEthernet%d", dm->devices[i].device_index); - - case VNET_DPDK_PORT_TYPE_ETH_SWITCH: - device_name = "EthernetSwitch"; - break; - - case VNET_DPDK_PORT_TYPE_AF_PACKET: - rte_eth_dev_info_get (i, &dev_info); - return format (s, "af_packet%d", dm->devices[i].af_packet_port_id); - - default: - case VNET_DPDK_PORT_TYPE_UNKNOWN: - device_name = "UnknownEthernet"; - break; - } - - rte_eth_dev_info_get (i, &dev_info); - - if (dev_info.pci_dev) - ret = format (s, devname_format, device_name, dev_info.pci_dev->addr.bus, - dev_info.pci_dev->addr.devid, - dev_info.pci_dev->addr.function); - else - ret = format (s, "%s%d", device_name, dm->devices[i].device_index); - - if (dm->devices[i].interface_name_suffix) - return format (ret, "/%s", dm->devices[i].interface_name_suffix); - return ret; -} - -static u8 * -format_dpdk_device_type (u8 * s, va_list * args) -{ - dpdk_main_t *dm = &dpdk_main; - char *dev_type; - u32 i = va_arg (*args, u32); - - switch (dm->devices[i].pmd) - { - case VNET_DPDK_PMD_E1000EM: - dev_type = "Intel 82540EM (e1000)"; - break; - - case VNET_DPDK_PMD_IGB: - dev_type = "Intel e1000"; - break; - - case VNET_DPDK_PMD_I40E: - dev_type = "Intel X710/XL710 Family"; - break; - - case VNET_DPDK_PMD_I40EVF: - dev_type = "Intel X710/XL710 Family VF"; - break; - - case VNET_DPDK_PMD_FM10K: - dev_type = "Intel FM10000 Family Ethernet Switch"; - break; - - case VNET_DPDK_PMD_IGBVF: - dev_type = "Intel e1000 VF"; - break; - - case VNET_DPDK_PMD_VIRTIO: - dev_type = "Red Hat Virtio"; - break; - - case VNET_DPDK_PMD_IXGBEVF: - dev_type = "Intel 82599 VF"; - break; - - case VNET_DPDK_PMD_IXGBE: - dev_type = "Intel 82599"; - break; - - case VNET_DPDK_PMD_ENIC: - dev_type = "Cisco VIC"; - break; - - case VNET_DPDK_PMD_CXGBE: - dev_type = "Chelsio T4/T5"; - break; - - case VNET_DPDK_PMD_MLX5: - dev_type = "Mellanox ConnectX-4 Family"; - break; - - case VNET_DPDK_PMD_VMXNET3: - dev_type = "VMware VMXNET3"; - break; - - case VNET_DPDK_PMD_AF_PACKET: - dev_type = "af_packet"; - break; - - case VNET_DPDK_PMD_BOND: - dev_type = "Ethernet Bonding"; - break; - - case VNET_DPDK_PMD_DPAA2: - dev_type = "NXP DPAA2 Mac"; - break; - - default: - case VNET_DPDK_PMD_UNKNOWN: - dev_type = "### UNKNOWN ###"; - break; - } - - return format (s, dev_type); -} - -static u8 * -format_dpdk_link_status (u8 * s, va_list * args) -{ - dpdk_device_t *xd = va_arg (*args, dpdk_device_t *); - struct rte_eth_link *l = &xd->link; - vnet_main_t *vnm = vnet_get_main (); - vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index); - - s = format (s, "%s ", l->link_status ? "up" : "down"); - if (l->link_status) - { - u32 promisc = rte_eth_promiscuous_get (xd->device_index); - - s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ? - "full" : "half"); - s = format (s, "speed %u mtu %d %s\n", l->link_speed, - hi->max_packet_bytes, promisc ? " promisc" : ""); - } - else - s = format (s, "\n"); - - return s; -} - -#define _line_len 72 -#define _(v, str) \ -if (bitmap & v) { \ - if (format_get_indent (s) > next_split ) { \ - next_split += _line_len; \ - s = format(s,"\n%U", format_white_space, indent); \ - } \ - s = format(s, "%s ", str); \ -} - -static u8 * -format_dpdk_rss_hf_name (u8 * s, va_list * args) -{ - u64 bitmap = va_arg (*args, u64); - int next_split = _line_len; - int indent = format_get_indent (s); - - if (!bitmap) - return format (s, "none"); - - foreach_dpdk_rss_hf return s; -} - -static u8 * -format_dpdk_rx_offload_caps (u8 * s, va_list * args) -{ - u32 bitmap = va_arg (*args, u32); - int next_split = _line_len; - int indent = format_get_indent (s); - - if (!bitmap) - return format (s, "none"); - - foreach_dpdk_rx_offload_caps return s; -} - -static u8 * -format_dpdk_tx_offload_caps (u8 * s, va_list * args) -{ - u32 bitmap = va_arg (*args, u32); - int next_split = _line_len; - int indent = format_get_indent (s); - if (!bitmap) - return format (s, "none"); - - foreach_dpdk_tx_offload_caps return s; -} - -#undef _line_len -#undef _ - -u8 * -format_dpdk_device (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - int verbose = va_arg (*args, int); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance); - uword indent = format_get_indent (s); - f64 now = vlib_time_now (dm->vlib_main); - struct rte_eth_dev_info di; - - dpdk_update_counters (xd, now); - dpdk_update_link_state (xd, now); - - s = format (s, "%U\n%Ucarrier %U", - format_dpdk_device_type, xd->device_index, - format_white_space, indent + 2, format_dpdk_link_status, xd); - - rte_eth_dev_info_get (xd->device_index, &di); - - if (verbose > 1 && xd->flags & DPDK_DEVICE_FLAG_PMD) - { - struct rte_pci_device *pci; - struct rte_eth_rss_conf rss_conf; - int vlan_off; - int retval; - - rss_conf.rss_key = 0; - retval = rte_eth_dev_rss_hash_conf_get (xd->device_index, &rss_conf); - if (retval < 0) - clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval); - pci = di.pci_dev; - - if (pci) - s = - format (s, - "%Upci id: device %04x:%04x subsystem %04x:%04x\n" - "%Upci address: %04x:%02x:%02x.%02x\n", - format_white_space, indent + 2, pci->id.vendor_id, - pci->id.device_id, pci->id.subsystem_vendor_id, - pci->id.subsystem_device_id, format_white_space, indent + 2, - pci->addr.domain, pci->addr.bus, pci->addr.devid, - pci->addr.function); - s = - format (s, "%Umax rx packet len: %d\n", format_white_space, - indent + 2, di.max_rx_pktlen); - s = - format (s, "%Umax num of queues: rx %d tx %d\n", format_white_space, - indent + 2, di.max_rx_queues, di.max_tx_queues); - s = - format (s, "%Upromiscuous: unicast %s all-multicast %s\n", - format_white_space, indent + 2, - rte_eth_promiscuous_get (xd->device_index) ? "on" : "off", - rte_eth_promiscuous_get (xd->device_index) ? "on" : "off"); - vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); - s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n", - format_white_space, indent + 2, - vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off", - vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off", - vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off"); - s = format (s, "%Urx offload caps: %U\n", - format_white_space, indent + 2, - format_dpdk_rx_offload_caps, di.rx_offload_capa); - s = format (s, "%Utx offload caps: %U\n", - format_white_space, indent + 2, - format_dpdk_tx_offload_caps, di.tx_offload_capa); - s = format (s, "%Urss active: %U\n" - "%Urss supported: %U\n", - format_white_space, indent + 2, - format_dpdk_rss_hf_name, rss_conf.rss_hf, - format_white_space, indent + 2, - format_dpdk_rss_hf_name, di.flow_type_rss_offloads); - } - - s = format (s, "%Urx queues %d, rx desc %d, tx queues %d, tx desc %d\n", - format_white_space, indent + 2, - xd->rx_q_used, xd->nb_rx_desc, xd->tx_q_used, xd->nb_tx_desc); - - if (xd->cpu_socket > -1) - s = format (s, "%Ucpu socket %d\n", - format_white_space, indent + 2, xd->cpu_socket); - - /* $$$ MIB counters */ - { -#define _(N, V) \ - if ((xd->stats.V - xd->last_cleared_stats.V) != 0) { \ - s = format (s, "\n%U%-40U%16Ld", \ - format_white_space, indent + 2, \ - format_c_identifier, #N, \ - xd->stats.V - xd->last_cleared_stats.V); \ - } \ - - foreach_dpdk_counter -#undef _ - } - - u8 *xs = 0; - u32 i = 0; - struct rte_eth_xstat *xstat, *last_xstat; - struct rte_eth_xstat_name *xstat_names = 0; - int len = rte_eth_xstats_get_names (xd->device_index, NULL, 0); - vec_validate (xstat_names, len - 1); - rte_eth_xstats_get_names (xd->device_index, xstat_names, len); - - ASSERT (vec_len (xd->xstats) == vec_len (xd->last_cleared_xstats)); - - /* *INDENT-OFF* */ - vec_foreach_index(i, xd->xstats) - { - u64 delta = 0; - xstat = vec_elt_at_index(xd->xstats, i); - last_xstat = vec_elt_at_index(xd->last_cleared_xstats, i); - - delta = xstat->value - last_xstat->value; - if (verbose == 2 || (verbose && delta)) - { - /* format_c_identifier doesn't like c strings inside vector */ - u8 * name = format(0,"%s", xstat_names[i].name); - xs = format(xs, "\n%U%-38U%16Ld", - format_white_space, indent + 4, - format_c_identifier, name, delta); - vec_free(name); - } - } - /* *INDENT-ON* */ - - vec_free (xstat_names); - - if (xs) - { - s = format (s, "\n%Uextended stats:%v", - format_white_space, indent + 2, xs); - vec_free (xs); - } - - return s; -} - -u8 * -format_dpdk_tx_dma_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); - dpdk_tx_dma_trace_t *t = va_arg (*va, dpdk_tx_dma_trace_t *); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); - uword indent = format_get_indent (s); - vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); - - s = format (s, "%U tx queue %d", - format_vnet_sw_interface_name, vnm, sw, t->queue_index); - - s = format (s, "\n%Ubuffer 0x%x: %U", - format_white_space, indent, - t->buffer_index, format_vlib_buffer, &t->buffer); - - s = format (s, "\n%U%U", format_white_space, indent, - format_ethernet_header_with_length, t->buffer.pre_data, - sizeof (t->buffer.pre_data)); - - return s; -} - -u8 * -format_dpdk_rx_dma_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); - dpdk_rx_dma_trace_t *t = va_arg (*va, dpdk_rx_dma_trace_t *); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); - format_function_t *f; - uword indent = format_get_indent (s); - vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); - - s = format (s, "%U rx queue %d", - format_vnet_sw_interface_name, vnm, sw, t->queue_index); - - s = format (s, "\n%Ubuffer 0x%x: %U", - format_white_space, indent, - t->buffer_index, format_vlib_buffer, &t->buffer); - - s = format (s, "\n%U%U", - format_white_space, indent, - format_dpdk_rte_mbuf, &t->mb, &t->data); - - if (vm->trace_main.verbose) - { - s = format (s, "\n%UPacket Dump%s", format_white_space, indent + 2, - t->mb.data_len > sizeof (t->data) ? " (truncated)" : ""); - s = format (s, "\n%U%U", format_white_space, indent + 4, - format_hexdump, &t->data, - t->mb.data_len > - sizeof (t->data) ? sizeof (t->data) : t->mb.data_len); - } - f = node->format_buffer; - if (!f) - f = format_hex_bytes; - s = format (s, "\n%U%U", format_white_space, indent, - f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); - - return s; -} - - -static inline u8 * -format_dpdk_pkt_types (u8 * s, va_list * va) -{ - u32 *pkt_types = va_arg (*va, u32 *); - uword indent __attribute__ ((unused)) = format_get_indent (s) + 2; - - if (!*pkt_types) - return s; - - s = format (s, "Packet Types"); - -#define _(L, F, S) \ - if ((*pkt_types & RTE_PTYPE_##L##_MASK) == RTE_PTYPE_##L##_##F) \ - { \ - s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \ - "RTE_PTYPE_" #L "_" #F, RTE_PTYPE_##L##_##F, S); \ - } - - foreach_dpdk_pkt_type -#undef _ - return s; -} - -static inline u8 * -format_dpdk_pkt_offload_flags (u8 * s, va_list * va) -{ - u64 *ol_flags = va_arg (*va, u64 *); - uword indent = format_get_indent (s) + 2; - - if (!*ol_flags) - return s; - - s = format (s, "Packet Offload Flags"); - -#define _(F, S) \ - if (*ol_flags & F) \ - { \ - s = format (s, "\n%U%s (0x%04x) %s", \ - format_white_space, indent, #F, F, S); \ - } - - foreach_dpdk_pkt_offload_flag -#undef _ - return s; -} - -u8 * -format_dpdk_rte_mbuf_vlan (u8 * s, va_list * va) -{ - ethernet_vlan_header_tv_t *vlan_hdr = - va_arg (*va, ethernet_vlan_header_tv_t *); - - if (clib_net_to_host_u16 (vlan_hdr->type) == ETHERNET_TYPE_DOT1AD) - { - s = format (s, "%U 802.1q vlan ", - format_ethernet_vlan_tci, - clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); - vlan_hdr++; - } - - s = format (s, "%U", - format_ethernet_vlan_tci, - clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); - - return s; -} - -u8 * -format_dpdk_rte_mbuf (u8 * s, va_list * va) -{ - struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *); - ethernet_header_t *eth_hdr = va_arg (*va, ethernet_header_t *); - uword indent = format_get_indent (s) + 2; - - s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d" - "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x, data_off %d, phys_addr 0x%x" - "\n%Upacket_type 0x%x", - mb->port, mb->nb_segs, mb->pkt_len, - format_white_space, indent, - mb->buf_len, mb->data_len, mb->ol_flags, mb->data_off, - mb->buf_physaddr, format_white_space, indent, mb->packet_type); - - if (mb->ol_flags) - s = format (s, "\n%U%U", format_white_space, indent, - format_dpdk_pkt_offload_flags, &mb->ol_flags); - - if ((mb->ol_flags & PKT_RX_VLAN_PKT) && - ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0)) - { - ethernet_vlan_header_tv_t *vlan_hdr = - ((ethernet_vlan_header_tv_t *) & (eth_hdr->type)); - s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr); - } - - if (mb->packet_type) - s = format (s, "\n%U%U", format_white_space, indent, - format_dpdk_pkt_types, &mb->packet_type); - - return s; -} - -/* FIXME is this function used? */ -#if 0 -uword -unformat_socket_mem (unformat_input_t * input, va_list * va) -{ - uword **r = va_arg (*va, uword **); - int i = 0; - u32 mem; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, ",")) - hash_set (*r, i, 1024); - else if (unformat (input, "%u,", &mem)) - hash_set (*r, i, mem); - else if (unformat (input, "%u", &mem)) - hash_set (*r, i, mem); - else - { - unformat_put_input (input); - goto done; - } - i++; - } - -done: - return 1; -} -#endif - -clib_error_t * -unformat_rss_fn (unformat_input_t * input, uword * rss_fn) -{ - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (0) - ; -#undef _ -#define _(f, s) \ - else if (unformat (input, s)) \ - *rss_fn |= f; - - foreach_dpdk_rss_hf -#undef _ - else - { - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - } - return 0; -} - -clib_error_t * -unformat_hqos (unformat_input_t * input, dpdk_device_config_hqos_t * hqos) -{ - clib_error_t *error = 0; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "hqos-thread %u", &hqos->hqos_thread)) - hqos->hqos_thread_valid = 1; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - break; - } - } - - return error; -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/hqos.c b/src/vnet/devices/dpdk/hqos.c deleted file mode 100644 index d68bc48f..00000000 --- a/src/vnet/devices/dpdk/hqos.c +++ /dev/null @@ -1,775 +0,0 @@ -/* - * Copyright(c) 2016 Intel Corporation. All rights reserved. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include /* enumerate all vlib messages */ - -#define vl_typedefs /* define message structures */ -#include -#undef vl_typedefs - -/* instantiate all the print functions we know about */ -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) -#define vl_printfun -#include -#undef vl_printfun - -#include "dpdk_priv.h" - -dpdk_main_t dpdk_main; - -/*** - * - * HQoS default configuration values - * - ***/ - -static dpdk_device_config_hqos_t hqos_params_default = { - .hqos_thread_valid = 0, - - .swq_size = 4096, - .burst_enq = 256, - .burst_deq = 220, - - /* - * Packet field to identify the subport. - * - * Default value: Since only one subport is defined by default (see below: - * n_subports_per_port = 1), the subport ID is hardcoded to 0. - */ - .pktfield0_slabpos = 0, - .pktfield0_slabmask = 0, - - /* - * Packet field to identify the pipe. - * - * Default value: Assuming Ethernet/IPv4/UDP packets, UDP payload bits 12 .. 23 - */ - .pktfield1_slabpos = 40, - .pktfield1_slabmask = 0x0000000FFF000000LLU, - - /* Packet field used as index into TC translation table to identify the traffic - * class and queue. - * - * Default value: Assuming Ethernet/IPv4 packets, IPv4 DSCP field - */ - .pktfield2_slabpos = 8, - .pktfield2_slabmask = 0x00000000000000FCLLU, - .tc_table = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - }, - - /* port */ - .port = { - .name = NULL, /* Set at init */ - .socket = 0, /* Set at init */ - .rate = 1250000000, /* Assuming 10GbE port */ - .mtu = 14 + 1500, /* Assuming Ethernet/IPv4 pkt (Ethernet FCS not included) */ - .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT, - .n_subports_per_port = 1, - .n_pipes_per_subport = 4096, - .qsize = {64, 64, 64, 64}, - .pipe_profiles = NULL, /* Set at config */ - .n_pipe_profiles = 1, - -#ifdef RTE_SCHED_RED - .red_params = { - /* Traffic Class 0 Colors Green / Yellow / Red */ - [0][0] = {.min_th = 48,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [0][1] = {.min_th = 40,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [0][2] = {.min_th = 32,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - - /* Traffic Class 1 - Colors Green / Yellow / Red */ - [1][0] = {.min_th = 48,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [1][1] = {.min_th = 40,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [1][2] = {.min_th = 32,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - - /* Traffic Class 2 - Colors Green / Yellow / Red */ - [2][0] = {.min_th = 48,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [2][1] = {.min_th = 40,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [2][2] = {.min_th = 32,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - - /* Traffic Class 3 - Colors Green / Yellow / Red */ - [3][0] = {.min_th = 48,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [3][1] = {.min_th = 40,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [3][2] = {.min_th = 32,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9} - }, -#endif /* RTE_SCHED_RED */ - }, -}; - -static struct rte_sched_subport_params hqos_subport_params_default = { - .tb_rate = 1250000000, /* 10GbE line rate (measured in bytes/second) */ - .tb_size = 1000000, - .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000}, - .tc_period = 10, -}; - -static struct rte_sched_pipe_params hqos_pipe_params_default = { - .tb_rate = 305175, /* 10GbE line rate divided by 4K pipes */ - .tb_size = 1000000, - .tc_rate = {305175, 305175, 305175, 305175}, - .tc_period = 40, -#ifdef RTE_SCHED_SUBPORT_TC_OV - .tc_ov_weight = 1, -#endif - .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, -}; - -/*** - * - * HQoS configuration - * - ***/ - -int -dpdk_hqos_validate_mask (u64 mask, u32 n) -{ - int count = __builtin_popcountll (mask); - int pos_lead = sizeof (u64) * 8 - __builtin_clzll (mask); - int pos_trail = __builtin_ctzll (mask); - int count_expected = __builtin_popcount (n - 1); - - /* Handle the exceptions */ - if (n == 0) - return -1; /* Error */ - - if ((mask == 0) && (n == 1)) - return 0; /* OK */ - - if (((mask == 0) && (n != 1)) || ((mask != 0) && (n == 1))) - return -2; /* Error */ - - /* Check that mask is contiguous */ - if ((pos_lead - pos_trail) != count) - return -3; /* Error */ - - /* Check that mask contains the expected number of bits set */ - if (count != count_expected) - return -4; /* Error */ - - return 0; /* OK */ -} - -void -dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * - hqos, u32 pipe_profile_id) -{ - memcpy (&hqos->pipe[pipe_profile_id], &hqos_pipe_params_default, - sizeof (hqos_pipe_params_default)); -} - -void -dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos) -{ - struct rte_sched_subport_params *subport_params; - struct rte_sched_pipe_params *pipe_params; - u32 *pipe_map; - u32 i; - - memcpy (hqos, &hqos_params_default, sizeof (hqos_params_default)); - - /* pipe */ - vec_add2 (hqos->pipe, pipe_params, hqos->port.n_pipe_profiles); - - for (i = 0; i < vec_len (hqos->pipe); i++) - memcpy (&pipe_params[i], - &hqos_pipe_params_default, sizeof (hqos_pipe_params_default)); - - hqos->port.pipe_profiles = hqos->pipe; - - /* subport */ - vec_add2 (hqos->subport, subport_params, hqos->port.n_subports_per_port); - - for (i = 0; i < vec_len (hqos->subport); i++) - memcpy (&subport_params[i], - &hqos_subport_params_default, - sizeof (hqos_subport_params_default)); - - /* pipe profile */ - vec_add2 (hqos->pipe_map, - pipe_map, - hqos->port.n_subports_per_port * hqos->port.n_pipes_per_subport); - - for (i = 0; i < vec_len (hqos->pipe_map); i++) - pipe_map[i] = 0; -} - -/*** - * - * HQoS init - * - ***/ - -clib_error_t * -dpdk_port_setup_hqos (dpdk_device_t * xd, dpdk_device_config_hqos_t * hqos) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - char name[32]; - u32 subport_id, i; - int rv; - - /* Detect the set of worker threads */ - int worker_thread_first = 0; - int worker_thread_count = 0; - - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - vlib_thread_registration_t *tr = - p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - worker_thread_first = tr->first_index; - worker_thread_count = tr->count; - } - - /* Allocate the per-thread device data array */ - vec_validate_aligned (xd->hqos_wt, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - memset (xd->hqos_wt, 0, tm->n_vlib_mains * sizeof (xd->hqos_wt[0])); - - vec_validate_aligned (xd->hqos_ht, 0, CLIB_CACHE_LINE_BYTES); - memset (xd->hqos_ht, 0, sizeof (xd->hqos_ht[0])); - - /* Allocate space for one SWQ per worker thread in the I/O TX thread data structure */ - vec_validate (xd->hqos_ht->swq, worker_thread_count); - - /* SWQ */ - for (i = 0; i < worker_thread_count + 1; i++) - { - u32 swq_flags = RING_F_SP_ENQ | RING_F_SC_DEQ; - - snprintf (name, sizeof (name), "SWQ-worker%u-to-device%u", i, - xd->device_index); - xd->hqos_ht->swq[i] = - rte_ring_create (name, hqos->swq_size, xd->cpu_socket, swq_flags); - if (xd->hqos_ht->swq[i] == NULL) - return clib_error_return (0, - "SWQ-worker%u-to-device%u: rte_ring_create err", - i, xd->device_index); - } - - /* - * HQoS - */ - - /* HQoS port */ - snprintf (name, sizeof (name), "HQoS%u", xd->device_index); - hqos->port.name = strdup (name); - if (hqos->port.name == NULL) - return clib_error_return (0, "HQoS%u: strdup err", xd->device_index); - - hqos->port.socket = rte_eth_dev_socket_id (xd->device_index); - if (hqos->port.socket == SOCKET_ID_ANY) - hqos->port.socket = 0; - - xd->hqos_ht->hqos = rte_sched_port_config (&hqos->port); - if (xd->hqos_ht->hqos == NULL) - return clib_error_return (0, "HQoS%u: rte_sched_port_config err", - xd->device_index); - - /* HQoS subport */ - for (subport_id = 0; subport_id < hqos->port.n_subports_per_port; - subport_id++) - { - u32 pipe_id; - - rv = - rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, - &hqos->subport[subport_id]); - if (rv) - return clib_error_return (0, - "HQoS%u subport %u: rte_sched_subport_config err (%d)", - xd->device_index, subport_id, rv); - - /* HQoS pipe */ - for (pipe_id = 0; pipe_id < hqos->port.n_pipes_per_subport; pipe_id++) - { - u32 pos = subport_id * hqos->port.n_pipes_per_subport + pipe_id; - u32 profile_id = hqos->pipe_map[pos]; - - rv = - rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, - profile_id); - if (rv) - return clib_error_return (0, - "HQoS%u subport %u pipe %u: rte_sched_pipe_config err (%d)", - xd->device_index, subport_id, pipe_id, - rv); - } - } - - /* Set up per-thread device data for the I/O TX thread */ - xd->hqos_ht->hqos_burst_enq = hqos->burst_enq; - xd->hqos_ht->hqos_burst_deq = hqos->burst_deq; - vec_validate (xd->hqos_ht->pkts_enq, 2 * hqos->burst_enq - 1); - vec_validate (xd->hqos_ht->pkts_deq, hqos->burst_deq - 1); - xd->hqos_ht->pkts_enq_len = 0; - xd->hqos_ht->swq_pos = 0; - xd->hqos_ht->flush_count = 0; - - /* Set up per-thread device data for each worker thread */ - for (i = 0; i < worker_thread_count + 1; i++) - { - u32 tid; - if (i) - tid = worker_thread_first + (i - 1); - else - tid = i; - - xd->hqos_wt[tid].swq = xd->hqos_ht->swq[i]; - xd->hqos_wt[tid].hqos_field0_slabpos = hqos->pktfield0_slabpos; - xd->hqos_wt[tid].hqos_field0_slabmask = hqos->pktfield0_slabmask; - xd->hqos_wt[tid].hqos_field0_slabshr = - __builtin_ctzll (hqos->pktfield0_slabmask); - xd->hqos_wt[tid].hqos_field1_slabpos = hqos->pktfield1_slabpos; - xd->hqos_wt[tid].hqos_field1_slabmask = hqos->pktfield1_slabmask; - xd->hqos_wt[tid].hqos_field1_slabshr = - __builtin_ctzll (hqos->pktfield1_slabmask); - xd->hqos_wt[tid].hqos_field2_slabpos = hqos->pktfield2_slabpos; - xd->hqos_wt[tid].hqos_field2_slabmask = hqos->pktfield2_slabmask; - xd->hqos_wt[tid].hqos_field2_slabshr = - __builtin_ctzll (hqos->pktfield2_slabmask); - memcpy (xd->hqos_wt[tid].hqos_tc_table, hqos->tc_table, - sizeof (hqos->tc_table)); - } - - return 0; -} - -/*** - * - * HQoS run-time - * - ***/ -/* - * dpdk_hqos_thread - Contains the main loop of an HQoS thread. - * - * w - * Information for the current thread - */ -static_always_inline void -dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm) -{ - dpdk_main_t *dm = &dpdk_main; - u32 cpu_index = vm->cpu_index; - u32 dev_pos; - - dev_pos = 0; - while (1) - { - vlib_worker_thread_barrier_check (); - - u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); - if (dev_pos >= n_devs) - dev_pos = 0; - - dpdk_device_and_queue_t *dq = - vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); - dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); - - dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; - u32 device_index = xd->device_index; - u16 queue_id = dq->queue_id; - - struct rte_mbuf **pkts_enq = hqos->pkts_enq; - u32 pkts_enq_len = hqos->pkts_enq_len; - u32 swq_pos = hqos->swq_pos; - u32 n_swq = vec_len (hqos->swq), i; - u32 flush_count = hqos->flush_count; - - for (i = 0; i < n_swq; i++) - { - /* Get current SWQ for this device */ - struct rte_ring *swq = hqos->swq[swq_pos]; - - /* Read SWQ burst to packet buffer of this device */ - pkts_enq_len += rte_ring_sc_dequeue_burst (swq, - (void **) - &pkts_enq[pkts_enq_len], - hqos->hqos_burst_enq); - - /* Get next SWQ for this device */ - swq_pos++; - if (swq_pos >= n_swq) - swq_pos = 0; - hqos->swq_pos = swq_pos; - - /* HWQ TX enqueue when burst available */ - if (pkts_enq_len >= hqos->hqos_burst_enq) - { - u32 n_pkts = rte_eth_tx_burst (device_index, - (uint16_t) queue_id, - pkts_enq, - (uint16_t) pkts_enq_len); - - for (; n_pkts < pkts_enq_len; n_pkts++) - rte_pktmbuf_free (pkts_enq[n_pkts]); - - pkts_enq_len = 0; - flush_count = 0; - break; - } - } - if (pkts_enq_len) - { - flush_count++; - if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) - { - rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); - - pkts_enq_len = 0; - flush_count = 0; - } - } - hqos->pkts_enq_len = pkts_enq_len; - hqos->flush_count = flush_count; - - /* Advance to next device */ - dev_pos++; - } -} - -static_always_inline void -dpdk_hqos_thread_internal (vlib_main_t * vm) -{ - dpdk_main_t *dm = &dpdk_main; - u32 cpu_index = vm->cpu_index; - u32 dev_pos; - - dev_pos = 0; - while (1) - { - vlib_worker_thread_barrier_check (); - - u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); - if (PREDICT_FALSE (n_devs == 0)) - { - dev_pos = 0; - continue; - } - if (dev_pos >= n_devs) - dev_pos = 0; - - dpdk_device_and_queue_t *dq = - vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); - dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); - - dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; - u32 device_index = xd->device_index; - u16 queue_id = dq->queue_id; - - struct rte_mbuf **pkts_enq = hqos->pkts_enq; - struct rte_mbuf **pkts_deq = hqos->pkts_deq; - u32 pkts_enq_len = hqos->pkts_enq_len; - u32 swq_pos = hqos->swq_pos; - u32 n_swq = vec_len (hqos->swq), i; - u32 flush_count = hqos->flush_count; - - /* - * SWQ dequeue and HQoS enqueue for current device - */ - for (i = 0; i < n_swq; i++) - { - /* Get current SWQ for this device */ - struct rte_ring *swq = hqos->swq[swq_pos]; - - /* Read SWQ burst to packet buffer of this device */ - pkts_enq_len += rte_ring_sc_dequeue_burst (swq, - (void **) - &pkts_enq[pkts_enq_len], - hqos->hqos_burst_enq); - - /* Get next SWQ for this device */ - swq_pos++; - if (swq_pos >= n_swq) - swq_pos = 0; - hqos->swq_pos = swq_pos; - - /* HQoS enqueue when burst available */ - if (pkts_enq_len >= hqos->hqos_burst_enq) - { - rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); - - pkts_enq_len = 0; - flush_count = 0; - break; - } - } - if (pkts_enq_len) - { - flush_count++; - if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) - { - rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); - - pkts_enq_len = 0; - flush_count = 0; - } - } - hqos->pkts_enq_len = pkts_enq_len; - hqos->flush_count = flush_count; - - /* - * HQoS dequeue and HWQ TX enqueue for current device - */ - { - u32 pkts_deq_len, n_pkts; - - pkts_deq_len = rte_sched_port_dequeue (hqos->hqos, - pkts_deq, - hqos->hqos_burst_deq); - - for (n_pkts = 0; n_pkts < pkts_deq_len;) - n_pkts += rte_eth_tx_burst (device_index, - (uint16_t) queue_id, - &pkts_deq[n_pkts], - (uint16_t) (pkts_deq_len - n_pkts)); - } - - /* Advance to next device */ - dev_pos++; - } -} - -void -dpdk_hqos_thread (vlib_worker_thread_t * w) -{ - vlib_main_t *vm; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - - vm = vlib_get_main (); - - ASSERT (vm->cpu_index == os_get_cpu_number ()); - - clib_time_init (&vm->clib_time); - clib_mem_set_heap (w->thread_mheap); - - /* Wait until the dpdk init sequence is complete */ - while (tm->worker_thread_release == 0) - vlib_worker_thread_barrier_check (); - - if (vec_len (dm->devices_by_hqos_cpu[vm->cpu_index]) == 0) - return - clib_error - ("current I/O TX thread does not have any devices assigned to it"); - - if (DPDK_HQOS_DBG_BYPASS) - dpdk_hqos_thread_internal_hqos_dbg_bypass (vm); - else - dpdk_hqos_thread_internal (vm); -} - -void -dpdk_hqos_thread_fn (void *arg) -{ - vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; - vlib_worker_thread_init (w); - dpdk_hqos_thread (w); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_THREAD (hqos_thread_reg, static) = -{ - .name = "hqos-threads", - .short_name = "hqos-threads", - .function = dpdk_hqos_thread_fn, -}; -/* *INDENT-ON* */ - -/* - * HQoS run-time code to be called by the worker threads - */ -#define BITFIELD(byte_array, slab_pos, slab_mask, slab_shr) \ -({ \ - u64 slab = *((u64 *) &byte_array[slab_pos]); \ - u64 val = (rte_be_to_cpu_64(slab) & slab_mask) >> slab_shr; \ - val; \ -}) - -#define RTE_SCHED_PORT_HIERARCHY(subport, pipe, traffic_class, queue, color) \ - ((((u64) (queue)) & 0x3) | \ - ((((u64) (traffic_class)) & 0x3) << 2) | \ - ((((u64) (color)) & 0x3) << 4) | \ - ((((u64) (subport)) & 0xFFFF) << 16) | \ - ((((u64) (pipe)) & 0xFFFFFFFF) << 32)) - -void -dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, - struct rte_mbuf **pkts, u32 n_pkts) -{ - u32 i; - - for (i = 0; i < (n_pkts & (~0x3)); i += 4) - { - struct rte_mbuf *pkt0 = pkts[i]; - struct rte_mbuf *pkt1 = pkts[i + 1]; - struct rte_mbuf *pkt2 = pkts[i + 2]; - struct rte_mbuf *pkt3 = pkts[i + 3]; - - u8 *pkt0_data = rte_pktmbuf_mtod (pkt0, u8 *); - u8 *pkt1_data = rte_pktmbuf_mtod (pkt1, u8 *); - u8 *pkt2_data = rte_pktmbuf_mtod (pkt2, u8 *); - u8 *pkt3_data = rte_pktmbuf_mtod (pkt3, u8 *); - - u64 pkt0_subport = BITFIELD (pkt0_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt0_pipe = BITFIELD (pkt0_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt0_dscp = BITFIELD (pkt0_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt0_tc = hqos->hqos_tc_table[pkt0_dscp & 0x3F] >> 2; - u32 pkt0_tc_q = hqos->hqos_tc_table[pkt0_dscp & 0x3F] & 0x3; - - u64 pkt1_subport = BITFIELD (pkt1_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt1_pipe = BITFIELD (pkt1_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt1_dscp = BITFIELD (pkt1_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt1_tc = hqos->hqos_tc_table[pkt1_dscp & 0x3F] >> 2; - u32 pkt1_tc_q = hqos->hqos_tc_table[pkt1_dscp & 0x3F] & 0x3; - - u64 pkt2_subport = BITFIELD (pkt2_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt2_pipe = BITFIELD (pkt2_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt2_dscp = BITFIELD (pkt2_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt2_tc = hqos->hqos_tc_table[pkt2_dscp & 0x3F] >> 2; - u32 pkt2_tc_q = hqos->hqos_tc_table[pkt2_dscp & 0x3F] & 0x3; - - u64 pkt3_subport = BITFIELD (pkt3_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt3_pipe = BITFIELD (pkt3_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt3_dscp = BITFIELD (pkt3_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt3_tc = hqos->hqos_tc_table[pkt3_dscp & 0x3F] >> 2; - u32 pkt3_tc_q = hqos->hqos_tc_table[pkt3_dscp & 0x3F] & 0x3; - - u64 pkt0_sched = RTE_SCHED_PORT_HIERARCHY (pkt0_subport, - pkt0_pipe, - pkt0_tc, - pkt0_tc_q, - 0); - u64 pkt1_sched = RTE_SCHED_PORT_HIERARCHY (pkt1_subport, - pkt1_pipe, - pkt1_tc, - pkt1_tc_q, - 0); - u64 pkt2_sched = RTE_SCHED_PORT_HIERARCHY (pkt2_subport, - pkt2_pipe, - pkt2_tc, - pkt2_tc_q, - 0); - u64 pkt3_sched = RTE_SCHED_PORT_HIERARCHY (pkt3_subport, - pkt3_pipe, - pkt3_tc, - pkt3_tc_q, - 0); - - pkt0->hash.sched.lo = pkt0_sched & 0xFFFFFFFF; - pkt0->hash.sched.hi = pkt0_sched >> 32; - pkt1->hash.sched.lo = pkt1_sched & 0xFFFFFFFF; - pkt1->hash.sched.hi = pkt1_sched >> 32; - pkt2->hash.sched.lo = pkt2_sched & 0xFFFFFFFF; - pkt2->hash.sched.hi = pkt2_sched >> 32; - pkt3->hash.sched.lo = pkt3_sched & 0xFFFFFFFF; - pkt3->hash.sched.hi = pkt3_sched >> 32; - } - - for (; i < n_pkts; i++) - { - struct rte_mbuf *pkt = pkts[i]; - - u8 *pkt_data = rte_pktmbuf_mtod (pkt, u8 *); - - u64 pkt_subport = BITFIELD (pkt_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt_pipe = BITFIELD (pkt_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt_dscp = BITFIELD (pkt_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt_tc = hqos->hqos_tc_table[pkt_dscp & 0x3F] >> 2; - u32 pkt_tc_q = hqos->hqos_tc_table[pkt_dscp & 0x3F] & 0x3; - - u64 pkt_sched = RTE_SCHED_PORT_HIERARCHY (pkt_subport, - pkt_pipe, - pkt_tc, - pkt_tc_q, - 0); - - pkt->hash.sched.lo = pkt_sched & 0xFFFFFFFF; - pkt->hash.sched.hi = pkt_sched >> 32; - } -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c deleted file mode 100755 index 29423e15..00000000 --- a/src/vnet/devices/dpdk/init.c +++ /dev/null @@ -1,1801 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dpdk_priv.h" - -dpdk_main_t dpdk_main; - -/* force linker to link functions used by vlib and declared weak */ -void *vlib_weakly_linked_functions[] = { - &rte_pktmbuf_init, - &rte_pktmbuf_pool_init, -}; - -#define LINK_STATE_ELOGS 0 - -#define DEFAULT_HUGE_DIR "/run/vpp/hugepages" -#define VPP_RUN_DIR "/run/vpp" - -/* Port configuration, mildly modified Intel app values */ - -static struct rte_eth_conf port_conf_template = { - .rxmode = { - .split_hdr_size = 0, - .header_split = 0, /**< Header Split disabled */ - .hw_ip_checksum = 0, /**< IP checksum offload disabled */ - .hw_vlan_filter = 0, /**< VLAN filtering disabled */ - .hw_strip_crc = 0, /**< CRC stripped by hardware */ - }, - .txmode = { - .mq_mode = ETH_MQ_TX_NONE, - }, -}; - -clib_error_t * -dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) -{ - int rv; - int j; - - ASSERT (os_get_cpu_number () == 0); - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0); - rte_eth_dev_stop (xd->device_index); - } - - rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, - xd->tx_q_used, &xd->port_conf); - - if (rv < 0) - return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); - - /* Set up one TX-queue per worker thread */ - for (j = 0; j < xd->tx_q_used; j++) - { - rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, - xd->cpu_socket, &xd->tx_conf); - - /* retry with any other CPU socket */ - if (rv < 0) - rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, - SOCKET_ID_ANY, &xd->tx_conf); - if (rv < 0) - break; - } - - if (rv < 0) - return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", - xd->device_index, rv); - - for (j = 0; j < xd->rx_q_used; j++) - { - - rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, - xd->cpu_socket, 0, - dm-> - pktmbuf_pools[xd->cpu_socket_id_by_queue - [j]]); - - /* retry with any other CPU socket */ - if (rv < 0) - rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, - SOCKET_ID_ANY, 0, - dm-> - pktmbuf_pools[xd->cpu_socket_id_by_queue - [j]]); - if (rv < 0) - return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", - xd->device_index, rv); - } - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - int rv; - rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } - return 0; -} - -static u32 -dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - u32 old = 0; - - if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC (flags)) - { - old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0; - - if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) - xd->flags |= DPDK_DEVICE_FLAG_PROMISC; - else - xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC; - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) - rte_eth_promiscuous_enable (xd->device_index); - else - rte_eth_promiscuous_disable (xd->device_index); - } - } - else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags)) - { - /* - * DAW-FIXME: The Cisco VIC firmware does not provide an api for a - * driver to dynamically change the mtu. If/when the - * VIC firmware gets fixed, then this should be removed. - */ - if (xd->pmd == VNET_DPDK_PMD_ENIC) - { - struct rte_eth_dev_info dev_info; - - /* - * Restore mtu to what has been set by CIMC in the firmware cfg. - */ - rte_eth_dev_info_get (xd->device_index, &dev_info); - hi->max_packet_bytes = dev_info.max_rx_pktlen; - - vlib_cli_output (vlib_get_main (), - "Cisco VIC mtu can only be changed " - "using CIMC then rebooting the server!"); - } - else - { - int rv; - - xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - rte_eth_dev_stop (xd->device_index); - - rv = rte_eth_dev_configure - (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); - - if (rv < 0) - vlib_cli_output (vlib_get_main (), - "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); - - rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - int rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } - } - } - return old; -} - -void -dpdk_device_lock_init (dpdk_device_t * xd) -{ - int q; - vec_validate (xd->lockp, xd->tx_q_used - 1); - for (q = 0; q < xd->tx_q_used; q++) - { - xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); - memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES); - } -} - -void -dpdk_device_lock_free (dpdk_device_t * xd) -{ - int q; - - for (q = 0; q < vec_len (xd->lockp); q++) - clib_mem_free ((void *) xd->lockp[q]); - vec_free (xd->lockp); - xd->lockp = 0; -} - -static clib_error_t * -dpdk_lib_init (dpdk_main_t * dm) -{ - u32 nports; - u32 nb_desc = 0; - int i; - clib_error_t *error; - vlib_main_t *vm = vlib_get_main (); - vlib_thread_main_t *tm = vlib_get_thread_main (); - vnet_sw_interface_t *sw; - vnet_hw_interface_t *hi; - dpdk_device_t *xd; - vlib_pci_addr_t last_pci_addr; - u32 last_pci_addr_port = 0; - vlib_thread_registration_t *tr, *tr_hqos; - uword *p, *p_hqos; - - u32 next_cpu = 0, next_hqos_cpu = 0; - u8 af_packet_port_id = 0; - last_pci_addr.as_u32 = ~0; - - dm->input_cpu_first_index = 0; - dm->input_cpu_count = 1; - - /* find out which cpus will be used for input */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - dm->input_cpu_first_index = tr->first_index; - dm->input_cpu_count = tr->count; - } - - vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - dm->hqos_cpu_first_index = 0; - dm->hqos_cpu_count = 0; - - /* find out which cpus will be used for I/O TX */ - p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads"); - tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0; - - if (tr_hqos && tr_hqos->count > 0) - { - dm->hqos_cpu_first_index = tr_hqos->first_index; - dm->hqos_cpu_count = tr_hqos->count; - } - - vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - nports = rte_eth_dev_count (); - if (nports < 1) - { - clib_warning ("DPDK drivers found no ports..."); - } - - if (CLIB_DEBUG > 0) - clib_warning ("DPDK drivers found %d ports...", nports); - - /* - * All buffers are all allocated from the same rte_mempool. - * Thus they all have the same number of data bytes. - */ - dm->vlib_buffer_free_list_index = - vlib_buffer_get_or_create_free_list (vm, - VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, - "dpdk rx"); - - if (dm->conf->enable_tcp_udp_checksum) - dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT - | IP_BUFFER_L4_CHECKSUM_COMPUTED); - - for (i = 0; i < nports; i++) - { - u8 addr[6]; - u8 vlan_strip = 0; - int j; - struct rte_eth_dev_info dev_info; - clib_error_t *rv; - struct rte_eth_link l; - dpdk_device_config_t *devconf = 0; - vlib_pci_addr_t pci_addr; - uword *p = 0; - - rte_eth_dev_info_get (i, &dev_info); - if (dev_info.pci_dev) /* bonded interface has no pci info */ - { - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - p = - hash_get (dm->conf->device_config_index_by_pci_addr, - pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - /* Create vnet interface */ - vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); - xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; - xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; - xd->cpu_socket = (i8) rte_eth_dev_socket_id (i); - - /* Handle interface naming for devices with multiple ports sharing same PCI ID */ - if (dev_info.pci_dev) - { - struct rte_eth_dev_info di = { 0 }; - rte_eth_dev_info_get (i + 1, &di); - if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 && - memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr, - sizeof (struct rte_pci_addr)) == 0) - { - xd->interface_name_suffix = format (0, "0"); - last_pci_addr.as_u32 = pci_addr.as_u32; - last_pci_addr_port = i; - } - else if (pci_addr.as_u32 == last_pci_addr.as_u32) - { - xd->interface_name_suffix = - format (0, "%u", i - last_pci_addr_port); - } - else - { - last_pci_addr.as_u32 = ~0; - } - } - else - last_pci_addr.as_u32 = ~0; - - clib_memcpy (&xd->tx_conf, &dev_info.default_txconf, - sizeof (struct rte_eth_txconf)); - if (dm->conf->no_multi_seg) - { - xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; - port_conf_template.rxmode.jumbo_frame = 0; - } - else - { - xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; - port_conf_template.rxmode.jumbo_frame = 1; - xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG; - } - - clib_memcpy (&xd->port_conf, &port_conf_template, - sizeof (struct rte_eth_conf)); - - xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains); - - if (devconf->num_tx_queues > 0 - && devconf->num_tx_queues < xd->tx_q_used) - xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues); - - if (devconf->num_rx_queues > 1 && dm->use_rss == 0) - { - dm->use_rss = 1; - } - - if (devconf->num_rx_queues > 1 - && dev_info.max_rx_queues >= devconf->num_rx_queues) - { - xd->rx_q_used = devconf->num_rx_queues; - xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; - if (devconf->rss_fn == 0) - xd->port_conf.rx_adv_conf.rss_conf.rss_hf = - ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; - else - xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn; - } - else - xd->rx_q_used = 1; - - xd->flags |= DPDK_DEVICE_FLAG_PMD; - - /* workaround for drivers not setting driver_name */ - if ((!dev_info.driver_name) && (dev_info.pci_dev)) - dev_info.driver_name = dev_info.pci_dev->driver->driver.name; - - ASSERT (dev_info.driver_name); - - if (!xd->pmd) - { - - -#define _(s,f) else if (dev_info.driver_name && \ - !strcmp(dev_info.driver_name, s)) \ - xd->pmd = VNET_DPDK_PMD_##f; - if (0) - ; - foreach_dpdk_pmd -#undef _ - else - xd->pmd = VNET_DPDK_PMD_UNKNOWN; - - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; - xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; - - switch (xd->pmd) - { - /* 1G adapters */ - case VNET_DPDK_PMD_E1000EM: - case VNET_DPDK_PMD_IGB: - case VNET_DPDK_PMD_IGBVF: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - break; - - /* 10G adapters */ - case VNET_DPDK_PMD_IXGBE: - case VNET_DPDK_PMD_IXGBEVF: - case VNET_DPDK_PMD_THUNDERX: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - case VNET_DPDK_PMD_DPAA2: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - - /* Cisco VIC */ - case VNET_DPDK_PMD_ENIC: - rte_eth_link_get_nowait (i, &l); - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - if (l.link_speed == 40000) - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - else - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - - /* Intel Fortville */ - case VNET_DPDK_PMD_I40E: - case VNET_DPDK_PMD_I40EVF: - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - - switch (dev_info.pci_dev->id.device_id) - { - case I40E_DEV_ID_10G_BASE_T: - case I40E_DEV_ID_SFP_XL710: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - case I40E_DEV_ID_QSFP_A: - case I40E_DEV_ID_QSFP_B: - case I40E_DEV_ID_QSFP_C: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - case I40E_DEV_ID_VF: - rte_eth_link_get_nowait (i, &l); - xd->port_type = l.link_speed == 10000 ? - VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; - break; - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - break; - - case VNET_DPDK_PMD_CXGBE: - switch (dev_info.pci_dev->id.device_id) - { - case 0x540d: /* T580-CR */ - case 0x5410: /* T580-LP-cr */ - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - case 0x5403: /* T540-CR */ - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - break; - - case VNET_DPDK_PMD_MLX5: - { - char *pn_100g[] = { "MCX415A-CCAT", "MCX416A-CCAT", 0 }; - char *pn_40g[] = { "MCX413A-BCAT", "MCX414A-BCAT", - "MCX415A-BCAT", "MCX416A-BCAT", "MCX4131A-BCAT", 0 - }; - char *pn_10g[] = { "MCX4111A-XCAT", "MCX4121A-XCAT", 0 }; - - vlib_pci_device_t *pd = vlib_get_pci_device (&pci_addr); - u8 *pn = 0; - char **c; - int found = 0; - pn = format (0, "%U%c", - format_vlib_pci_vpd, pd->vpd_r, "PN", 0); - - if (!pn) - break; - - c = pn_100g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_100G; - break; - } - c++; - } - - c = pn_40g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - } - c++; - } - - c = pn_10g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - } - c++; - } - - vec_free (pn); - } - - break; - /* Intel Red Rock Canyon */ - case VNET_DPDK_PMD_FM10K: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; - break; - - /* virtio */ - case VNET_DPDK_PMD_VIRTIO: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO; - xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO; - break; - - /* vmxnet3 */ - case VNET_DPDK_PMD_VMXNET3: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; - break; - - case VNET_DPDK_PMD_AF_PACKET: - xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET; - xd->af_packet_port_id = af_packet_port_id++; - break; - - case VNET_DPDK_PMD_BOND: - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; - break; - - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - - if (devconf->num_rx_desc) - xd->nb_rx_desc = devconf->num_rx_desc; - - if (devconf->num_tx_desc) - xd->nb_tx_desc = devconf->num_tx_desc; - } - - /* - * Ensure default mtu is not > the mtu read from the hardware. - * Otherwise rte_eth_dev_configure() will fail and the port will - * not be available. - */ - if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen) - { - /* - * This device does not support the platforms's max frame - * size. Use it's advertised mru instead. - */ - xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen; - } - else - { - xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES; - - /* - * Some platforms do not account for Ethernet FCS (4 bytes) in - * MTU calculations. To interop with them increase mru but only - * if the device's settings can support it. - */ - if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) && - xd->port_conf.rxmode.hw_strip_crc) - { - /* - * Allow additional 4 bytes (for Ethernet FCS). These bytes are - * stripped by h/w and so will not consume any buffer memory. - */ - xd->port_conf.rxmode.max_rx_pkt_len += 4; - } - } - - if (xd->pmd == VNET_DPDK_PMD_AF_PACKET) - { - f64 now = vlib_time_now (vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - clib_memcpy (addr + 2, &rnd, sizeof (rnd)); - addr[0] = 2; - addr[1] = 0xfe; - } - else - rte_eth_macaddr_get (i, (struct ether_addr *) addr); - - if (xd->tx_q_used < tm->n_vlib_mains) - dpdk_device_lock_init (xd); - - xd->device_index = xd - dm->devices; - ASSERT (i == xd->device_index); - xd->per_interface_next_index = ~0; - - /* assign interface to input thread */ - dpdk_device_and_queue_t *dq; - int q; - - if (devconf->workers) - { - int i; - q = 0; - /* *INDENT-OFF* */ - clib_bitmap_foreach (i, devconf->workers, ({ - int cpu = dm->input_cpu_first_index + i; - unsigned lcore = vlib_worker_threads[cpu].lcore_id; - vec_validate(xd->cpu_socket_id_by_queue, q); - xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); - vec_add2(dm->devices_by_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = q++; - })); - /* *INDENT-ON* */ - } - else - for (q = 0; q < xd->rx_q_used; q++) - { - int cpu = dm->input_cpu_first_index + next_cpu; - unsigned lcore = vlib_worker_threads[cpu].lcore_id; - - /* - * numa node for worker thread handling this queue - * needed for taking buffers from the right mempool - */ - vec_validate (xd->cpu_socket_id_by_queue, q); - xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore); - - /* - * construct vector of (device,queue) pairs for each worker thread - */ - vec_add2 (dm->devices_by_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = q; - - next_cpu++; - if (next_cpu == dm->input_cpu_count) - next_cpu = 0; - } - - - if (devconf->hqos_enabled) - { - xd->flags |= DPDK_DEVICE_FLAG_HQOS; - - if (devconf->hqos.hqos_thread_valid) - { - int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread; - - if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count) - return clib_error_return (0, "invalid HQoS thread index"); - - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = 0; - } - else - { - int cpu = dm->hqos_cpu_first_index + next_hqos_cpu; - - if (dm->hqos_cpu_count == 0) - return clib_error_return (0, "no HQoS threads available"); - - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = 0; - - next_hqos_cpu++; - if (next_hqos_cpu == dm->hqos_cpu_count) - next_hqos_cpu = 0; - - devconf->hqos.hqos_thread_valid = 1; - devconf->hqos.hqos_thread = cpu; - } - } - - vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j < tm->n_vlib_mains; j++) - { - vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc, - sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->tx_vectors[j]); - } - - vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j < xd->rx_q_used; j++) - { - vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, - CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->rx_vectors[j]); - } - - vec_validate_aligned (xd->d_trace_buffers, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); - - rv = dpdk_port_setup (dm, xd); - - if (rv) - return rv; - - if (devconf->hqos_enabled) - { - rv = dpdk_port_setup_hqos (xd, &devconf->hqos); - if (rv) - return rv; - } - - /* count the number of descriptors used for this device */ - nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; - - error = ethernet_register_interface - (dm->vnet_main, dpdk_device_class.index, xd->device_index, - /* ethernet address */ addr, - &xd->vlib_hw_if_index, dpdk_flag_change); - if (error) - return error; - - sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); - xd->vlib_sw_if_index = sw->sw_if_index; - hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); - - /* - * DAW-FIXME: The Cisco VIC firmware does not provide an api for a - * driver to dynamically change the mtu. If/when the - * VIC firmware gets fixed, then this should be removed. - */ - if (xd->pmd == VNET_DPDK_PMD_ENIC) - { - /* - * Initialize mtu to what has been set by CIMC in the firmware cfg. - */ - hi->max_packet_bytes = dev_info.max_rx_pktlen; - if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF) - vlan_strip = 1; /* remove vlan tag from VIC port by default */ - else - clib_warning ("VLAN strip disabled for interface\n"); - } - else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON) - vlan_strip = 1; - - if (vlan_strip) - { - int vlan_off; - vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); - vlan_off |= ETH_VLAN_STRIP_OFFLOAD; - xd->port_conf.rxmode.hw_vlan_strip = vlan_off; - if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0) - clib_warning ("VLAN strip enabled for interface\n"); - else - clib_warning ("VLAN strip cannot be supported by interface\n"); - } - - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = - xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t); - - rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); - } - - if (nb_desc > dm->conf->num_mbufs) - clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", - dm->conf->num_mbufs, nb_desc); - - return 0; -} - -static void -dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) -{ - vlib_pci_main_t *pm = &pci_main; - clib_error_t *error; - vlib_pci_device_t *d; - u8 *pci_addr = 0; - int num_whitelisted = vec_len (conf->dev_confs); - - /* *INDENT-OFF* */ - pool_foreach (d, pm->pci_devs, ({ - dpdk_device_config_t * devconf = 0; - vec_reset_length (pci_addr); - pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); - - if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO) - continue; - - if (num_whitelisted) - { - uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32); - - if (!p) - continue; - - devconf = pool_elt_at_index (conf->dev_confs, p[0]); - } - - /* virtio */ - if (d->vendor_id == 0x1af4 && d->device_id == 0x1000) - ; - /* vmxnet3 */ - else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0) - ; - /* all Intel devices */ - else if (d->vendor_id == 0x8086) - ; - /* Cisco VIC */ - else if (d->vendor_id == 0x1137 && d->device_id == 0x0043) - ; - /* Chelsio T4/T5 */ - else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000) - ; - else - { - clib_warning ("Unsupported Ethernet PCI device 0x%04x:0x%04x found " - "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id, - pci_addr); - continue; - } - - error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name); - - if (error) - { - if (devconf == 0) - { - pool_get (conf->dev_confs, devconf); - hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32, - devconf - conf->dev_confs); - devconf->pci_addr.as_u32 = d->bus_address.as_u32; - } - devconf->is_blacklisted = 1; - clib_error_report (error); - } - })); - /* *INDENT-ON* */ - vec_free (pci_addr); -} - -static clib_error_t * -dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, - unformat_input_t * input, u8 is_default) -{ - clib_error_t *error = 0; - uword *p; - dpdk_device_config_t *devconf; - unformat_input_t sub_input; - - if (is_default) - { - devconf = &conf->default_devconf; - } - else - { - p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32); - - if (!p) - { - pool_get (conf->dev_confs, devconf); - hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32, - devconf - conf->dev_confs); - } - else - return clib_error_return (0, - "duplicate configuration for PCI address %U", - format_vlib_pci_addr, &pci_addr); - } - - devconf->pci_addr.as_u32 = pci_addr.as_u32; - devconf->hqos_enabled = 0; - dpdk_device_config_hqos_default (&devconf->hqos); - - if (!input) - return 0; - - unformat_skip_white_space (input); - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues)) - ; - else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues)) - ; - else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc)) - ; - else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc)) - ; - else if (unformat (input, "workers %U", unformat_bitmap_list, - &devconf->workers)) - ; - else - if (unformat - (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input)) - { - error = unformat_rss_fn (&sub_input, &devconf->rss_fn); - if (error) - break; - } - else if (unformat (input, "vlan-strip-offload off")) - devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; - else if (unformat (input, "vlan-strip-offload on")) - devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; - else - if (unformat - (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input)) - { - devconf->hqos_enabled = 1; - error = unformat_hqos (&sub_input, &devconf->hqos); - if (error) - break; - } - else if (unformat (input, "hqos")) - { - devconf->hqos_enabled = 1; - } - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - break; - } - } - - if (error) - return error; - - if (devconf->workers && devconf->num_rx_queues == 0) - devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers); - else if (devconf->workers && - clib_bitmap_count_set_bits (devconf->workers) != - devconf->num_rx_queues) - error = - clib_error_return (0, - "%U: number of worker threadds must be " - "equal to number of rx queues", format_vlib_pci_addr, - &pci_addr); - - return error; -} - -static clib_error_t * -dpdk_config (vlib_main_t * vm, unformat_input_t * input) -{ - clib_error_t *error = 0; - dpdk_main_t *dm = &dpdk_main; - dpdk_config_main_t *conf = &dpdk_config_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_device_config_t *devconf; - vlib_pci_addr_t pci_addr; - unformat_input_t sub_input; - u8 *s, *tmp = 0; - u8 *rte_cmd = 0, *ethname = 0; - u32 log_level; - int ret, i; - int num_whitelisted = 0; - u8 no_pci = 0; - u8 no_huge = 0; - u8 huge_dir = 0; - u8 file_prefix = 0; - u8 *socket_mem = 0; - - conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - /* Prime the pump */ - if (unformat (input, "no-hugetlb")) - { - vec_add1 (conf->eal_init_args, (u8 *) "no-huge"); - no_huge = 1; - } - - else if (unformat (input, "enable-tcp-udp-checksum")) - conf->enable_tcp_udp_checksum = 1; - - else if (unformat (input, "decimal-interface-names")) - conf->interface_name_format_decimal = 1; - - else if (unformat (input, "no-multi-seg")) - conf->no_multi_seg = 1; - - else if (unformat (input, "enable-cryptodev")) - conf->cryptodev = 1; - - else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input, - &sub_input)) - { - error = - dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input, - 1); - - if (error) - return error; - } - else - if (unformat - (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr, - unformat_vlib_cli_sub_input, &sub_input)) - { - error = dpdk_device_config (conf, pci_addr, &sub_input, 0); - - if (error) - return error; - - num_whitelisted++; - } - else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr)) - { - error = dpdk_device_config (conf, pci_addr, 0, 0); - - if (error) - return error; - - num_whitelisted++; - } - else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) - ; - else if (unformat (input, "kni %d", &conf->num_kni)) - ; - else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) - ; - else if (unformat (input, "socket-mem %s", &socket_mem)) - ; - else if (unformat (input, "no-pci")) - { - no_pci = 1; - tmp = format (0, "--no-pci%c", 0); - vec_add1 (conf->eal_init_args, tmp); - } - else if (unformat (input, "poll-sleep %d", &dm->poll_sleep)) - ; - -#define _(a) \ - else if (unformat(input, #a)) \ - { \ - tmp = format (0, "--%s%c", #a, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - } - foreach_eal_double_hyphen_predicate_arg -#undef _ -#define _(a) \ - else if (unformat(input, #a " %s", &s)) \ - { \ - if (!strncmp(#a, "huge-dir", 8)) \ - huge_dir = 1; \ - else if (!strncmp(#a, "file-prefix", 11)) \ - file_prefix = 1; \ - tmp = format (0, "--%s%c", #a, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - vec_add1 (s, 0); \ - if (!strncmp(#a, "vdev", 4)) \ - if (strstr((char*)s, "af_packet")) \ - clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \ - vec_add1 (conf->eal_init_args, s); \ - } - foreach_eal_double_hyphen_arg -#undef _ -#define _(a,b) \ - else if (unformat(input, #a " %s", &s)) \ - { \ - tmp = format (0, "-%s%c", #b, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - vec_add1 (s, 0); \ - vec_add1 (conf->eal_init_args, s); \ - } - foreach_eal_single_hyphen_arg -#undef _ -#define _(a,b) \ - else if (unformat(input, #a " %s", &s)) \ - { \ - tmp = format (0, "-%s%c", #b, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - vec_add1 (s, 0); \ - vec_add1 (conf->eal_init_args, s); \ - conf->a##_set_manually = 1; \ - } - foreach_eal_single_hyphen_mandatory_arg -#undef _ - else if (unformat (input, "default")) - ; - - else if (unformat_skip_white_space (input)) - ; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - goto done; - } - } - - if (!conf->uio_driver_name) - conf->uio_driver_name = format (0, "uio_pci_generic%c", 0); - - /* - * Use 1G huge pages if available. - */ - if (!no_huge && !huge_dir) - { - u32 x, *mem_by_socket = 0; - uword c = 0; - u8 use_1g = 1; - u8 use_2m = 1; - u8 less_than_1g = 1; - int rv; - - umount (DEFAULT_HUGE_DIR); - - /* Process "socket-mem" parameter value */ - if (vec_len (socket_mem)) - { - unformat_input_t in; - unformat_init_vector (&in, socket_mem); - while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT) - { - if (unformat (&in, "%u,", &x)) - ; - else if (unformat (&in, "%u", &x)) - ; - else if (unformat (&in, ",")) - x = 0; - else - break; - - vec_add1 (mem_by_socket, x); - - if (x > 1023) - less_than_1g = 0; - } - /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ - unformat_free (&in); - socket_mem = 0; - } - else - { - /* *INDENT-OFF* */ - clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( - { - vec_validate(mem_by_socket, c); - mem_by_socket[c] = 256; /* default per-socket mem */ - } - )); - /* *INDENT-ON* */ - } - - /* check if available enough 1GB pages for each socket */ - /* *INDENT-OFF* */ - clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( - { - int pages_avail, page_size, mem; - - vec_validate(mem_by_socket, c); - mem = mem_by_socket[c]; - - page_size = 1024; - pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); - - if (pages_avail < 0 || page_size * pages_avail < mem) - use_1g = 0; - - page_size = 2; - pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); - - if (pages_avail < 0 || page_size * pages_avail < mem) - use_2m = 0; - })); - /* *INDENT-ON* */ - - if (mem_by_socket == 0) - { - error = clib_error_return (0, "mem_by_socket NULL"); - goto done; - } - _vec_len (mem_by_socket) = c + 1; - - /* regenerate socket_mem string */ - vec_foreach_index (x, mem_by_socket) - socket_mem = format (socket_mem, "%s%u", - socket_mem ? "," : "", mem_by_socket[x]); - socket_mem = format (socket_mem, "%c", 0); - - vec_free (mem_by_socket); - - rv = mkdir (VPP_RUN_DIR, 0755); - if (rv && errno != EEXIST) - { - error = clib_error_return (0, "mkdir '%s' failed errno %d", - VPP_RUN_DIR, errno); - goto done; - } - - rv = mkdir (DEFAULT_HUGE_DIR, 0755); - if (rv && errno != EEXIST) - { - error = clib_error_return (0, "mkdir '%s' failed errno %d", - DEFAULT_HUGE_DIR, errno); - goto done; - } - - if (use_1g && !(less_than_1g && use_2m)) - { - rv = - mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); - } - else if (use_2m) - { - rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); - } - else - { - return clib_error_return (0, "not enough free huge pages"); - } - - if (rv) - { - error = clib_error_return (0, "mount failed %d", errno); - goto done; - } - - tmp = format (0, "--huge-dir%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); - vec_add1 (conf->eal_init_args, tmp); - if (!file_prefix) - { - tmp = format (0, "--file-prefix%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "vpp%c", 0); - vec_add1 (conf->eal_init_args, tmp); - } - } - - vec_free (rte_cmd); - vec_free (ethname); - - if (error) - return error; - - /* I'll bet that -c and -n must be the first and second args... */ - if (!conf->coremask_set_manually) - { - vlib_thread_registration_t *tr; - uword *coremask = 0; - int i; - - /* main thread core */ - coremask = clib_bitmap_set (coremask, tm->main_lcore, 1); - - for (i = 0; i < vec_len (tm->registrations); i++) - { - tr = tm->registrations[i]; - coremask = clib_bitmap_or (coremask, tr->coremask); - } - - vec_insert (conf->eal_init_args, 2, 1); - conf->eal_init_args[1] = (u8 *) "-c"; - tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0); - conf->eal_init_args[2] = tmp; - clib_bitmap_free (coremask); - } - - if (!conf->nchannels_set_manually) - { - vec_insert (conf->eal_init_args, 2, 3); - conf->eal_init_args[3] = (u8 *) "-n"; - tmp = format (0, "%d", conf->nchannels); - conf->eal_init_args[4] = tmp; - } - - if (no_pci == 0 && geteuid () == 0) - dpdk_bind_devices_to_uio (conf); - -#define _(x) \ - if (devconf->x == 0 && conf->default_devconf.x > 0) \ - devconf->x = conf->default_devconf.x ; - - /* *INDENT-OFF* */ - pool_foreach (devconf, conf->dev_confs, ({ - - /* default per-device config items */ - foreach_dpdk_device_config_item - - /* add DPDK EAL whitelist/blacklist entry */ - if (num_whitelisted > 0 && devconf->is_blacklisted == 0) - { - tmp = format (0, "-w%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); - vec_add1 (conf->eal_init_args, tmp); - } - else if (num_whitelisted == 0 && devconf->is_blacklisted != 0) - { - tmp = format (0, "-b%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); - vec_add1 (conf->eal_init_args, tmp); - } - })); - /* *INDENT-ON* */ - -#undef _ - - /* set master-lcore */ - tmp = format (0, "--master-lcore%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%u%c", tm->main_lcore, 0); - vec_add1 (conf->eal_init_args, tmp); - - /* set socket-mem */ - tmp = format (0, "--socket-mem%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%s%c", socket_mem, 0); - vec_add1 (conf->eal_init_args, tmp); - - /* NULL terminate the "argv" vector, in case of stupidity */ - vec_add1 (conf->eal_init_args, 0); - _vec_len (conf->eal_init_args) -= 1; - - /* Set up DPDK eal and packet mbuf pool early. */ - - log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE; - - rte_set_log_level (log_level); - - vm = vlib_get_main (); - - /* make copy of args as rte_eal_init tends to mess up with arg array */ - for (i = 1; i < vec_len (conf->eal_init_args); i++) - conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", - conf->eal_init_args[i]); - - ret = - rte_eal_init (vec_len (conf->eal_init_args), - (char **) conf->eal_init_args); - - /* lazy umount hugepages */ - umount2 (DEFAULT_HUGE_DIR, MNT_DETACH); - - if (ret < 0) - return clib_error_return (0, "rte_eal_init returned %d", ret); - - /* Dump the physical memory layout prior to creating the mbuf_pool */ - fprintf (stdout, "DPDK physical memory layout:\n"); - rte_dump_physmem_layout (stdout); - - /* main thread 1st */ - error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); - if (error) - return error; - - for (i = 0; i < RTE_MAX_LCORE; i++) - { - error = vlib_buffer_pool_create (vm, conf->num_mbufs, - rte_lcore_to_socket_id (i)); - if (error) - return error; - } - -done: - return error; -} - -VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk"); - -void -dpdk_update_link_state (dpdk_device_t * xd, f64 now) -{ - vnet_main_t *vnm = vnet_get_main (); - struct rte_eth_link prev_link = xd->link; - u32 hw_flags = 0; - u8 hw_flags_chg = 0; - - /* only update link state for PMD interfaces */ - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - return; - - xd->time_last_link_update = now ? now : xd->time_last_link_update; - memset (&xd->link, 0, sizeof (xd->link)); - rte_eth_link_get_nowait (xd->device_index, &xd->link); - - if (LINK_STATE_ELOGS) - { - vlib_main_t *vm = vlib_get_main (); - ELOG_TYPE_DECLARE (e) = - { - .format = - "update-link-state: sw_if_index %d, admin_up %d," - "old link_state %d new link_state %d",.format_args = "i4i1i1i1",}; - - struct - { - u32 sw_if_index; - u8 admin_up; - u8 old_link_state; - u8 new_link_state; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->sw_if_index = xd->vlib_sw_if_index; - ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0; - ed->old_link_state = (u8) - vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index); - ed->new_link_state = (u8) xd->link.link_status; - } - - if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) && - ((xd->link.link_status != 0) ^ - vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index))) - { - hw_flags_chg = 1; - hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); - } - - if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex)) - { - hw_flags_chg = 1; - switch (xd->link.link_duplex) - { - case ETH_LINK_HALF_DUPLEX: - hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX; - break; - case ETH_LINK_FULL_DUPLEX: - hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX; - break; - default: - break; - } - } - if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) - { - hw_flags_chg = 1; - switch (xd->link.link_speed) - { - case ETH_SPEED_NUM_10M: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; - break; - case ETH_SPEED_NUM_100M: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; - break; - case ETH_SPEED_NUM_1G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; - break; - case ETH_SPEED_NUM_10G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; - break; - case ETH_SPEED_NUM_40G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; - break; - case 0: - break; - default: - clib_warning ("unknown link speed %d", xd->link.link_speed); - break; - } - } - if (hw_flags_chg) - { - if (LINK_STATE_ELOGS) - { - vlib_main_t *vm = vlib_get_main (); - - ELOG_TYPE_DECLARE (e) = - { - .format = - "update-link-state: sw_if_index %d, new flags %d",.format_args - = "i4i4",}; - - struct - { - u32 sw_if_index; - u32 flags; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->sw_if_index = xd->vlib_sw_if_index; - ed->flags = hw_flags; - } - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags); - } -} - -static uword -dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - clib_error_t *error; - vnet_main_t *vnm = vnet_get_main (); - dpdk_main_t *dm = &dpdk_main; - ethernet_main_t *em = ðernet_main; - dpdk_device_t *xd; - vlib_thread_main_t *tm = vlib_get_thread_main (); - int i; - - error = dpdk_lib_init (dm); - - /* - * Turn on the input node if we found some devices to drive - * and we're not running worker threads or i/o threads - */ - - if (error == 0 && vec_len (dm->devices) > 0) - { - if (tm->n_vlib_mains == 1) - vlib_node_set_state (vm, dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); - else - for (i = 0; i < tm->n_vlib_mains; i++) - if (vec_len (dm->devices_by_cpu[i]) > 0) - vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); - } - - if (error) - clib_error_report (error); - - tm->worker_thread_release = 1; - - f64 now = vlib_time_now (vm); - vec_foreach (xd, dm->devices) - { - dpdk_update_link_state (xd, now); - } - - { - /* - * Extra set up for bond interfaces: - * 1. Setup MACs for bond interfaces and their slave links which was set - * in dpdk_port_setup() but needs to be done again here to take effect. - * 2. Set up info for bond interface related CLI support. - */ - int nports = rte_eth_dev_count (); - if (nports > 0) - { - for (i = 0; i < nports; i++) - { - struct rte_eth_dev_info dev_info; - rte_eth_dev_info_get (i, &dev_info); - if (!dev_info.driver_name) - dev_info.driver_name = dev_info.pci_dev->driver->driver.name; - - ASSERT (dev_info.driver_name); - if (strncmp (dev_info.driver_name, "rte_bond_pmd", 12) == 0) - { - u8 addr[6]; - u8 slink[16]; - int nlink = rte_eth_bond_slaves_get (i, slink, 16); - if (nlink > 0) - { - vnet_hw_interface_t *bhi; - ethernet_interface_t *bei; - int rv; - - /* Get MAC of 1st slave link */ - rte_eth_macaddr_get (slink[0], - (struct ether_addr *) addr); - /* Set MAC of bounded interface to that of 1st slave link */ - rv = - rte_eth_bond_mac_address_set (i, - (struct ether_addr *) - addr); - if (rv < 0) - clib_warning ("Failed to set MAC address"); - - /* Populate MAC of bonded interface in VPP hw tables */ - bhi = - vnet_get_hw_interface (vnm, - dm->devices[i].vlib_hw_if_index); - bei = - pool_elt_at_index (em->interfaces, bhi->hw_instance); - clib_memcpy (bhi->hw_address, addr, 6); - clib_memcpy (bei->address, addr, 6); - /* Init l3 packet size allowed on bonded interface */ - bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES; - bhi->max_l3_packet_bytes[VLIB_RX] = - bhi->max_l3_packet_bytes[VLIB_TX] = - ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t); - while (nlink >= 1) - { /* for all slave links */ - int slave = slink[--nlink]; - dpdk_device_t *sdev = &dm->devices[slave]; - vnet_hw_interface_t *shi; - vnet_sw_interface_t *ssi; - /* Add MAC to all slave links except the first one */ - if (nlink) - rte_eth_dev_mac_addr_add (slave, - (struct ether_addr *) - addr, 0); - /* Set slaves bitmap for bonded interface */ - bhi->bond_info = - clib_bitmap_set (bhi->bond_info, - sdev->vlib_hw_if_index, 1); - /* Set slave link flags on slave interface */ - shi = - vnet_get_hw_interface (vnm, sdev->vlib_hw_if_index); - ssi = - vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); - shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; - ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; - - /* Set l3 packet size allowed as the lowest of slave */ - if (bhi->max_l3_packet_bytes[VLIB_RX] > - shi->max_l3_packet_bytes[VLIB_RX]) - bhi->max_l3_packet_bytes[VLIB_RX] = - bhi->max_l3_packet_bytes[VLIB_TX] = - shi->max_l3_packet_bytes[VLIB_RX]; - - /* Set max packet size allowed as the lowest of slave */ - if (bhi->max_packet_bytes > shi->max_packet_bytes) - bhi->max_packet_bytes = shi->max_packet_bytes; - } - } - } - } - } - } - - while (1) - { - /* - * check each time through the loop in case intervals are changed - */ - f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ? - dm->link_state_poll_interval : dm->stat_poll_interval; - - vlib_process_wait_for_event_or_clock (vm, min_wait); - - if (dm->admin_up_down_in_progress) - /* skip the poll if an admin up down is in progress (on any interface) */ - continue; - - vec_foreach (xd, dm->devices) - { - f64 now = vlib_time_now (vm); - if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval) - dpdk_update_counters (xd, now); - if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) - dpdk_update_link_state (xd, now); - - } - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_process_node,static) = { - .function = dpdk_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "dpdk-process", - .process_log2_n_stack_bytes = 17, -}; -/* *INDENT-ON* */ - -int -dpdk_set_stat_poll_interval (f64 interval) -{ - if (interval < DPDK_MIN_STATS_POLL_INTERVAL) - return (VNET_API_ERROR_INVALID_VALUE); - - dpdk_main.stat_poll_interval = interval; - - return 0; -} - -int -dpdk_set_link_state_poll_interval (f64 interval) -{ - if (interval < DPDK_MIN_LINK_POLL_INTERVAL) - return (VNET_API_ERROR_INVALID_VALUE); - - dpdk_main.link_state_poll_interval = interval; - - return 0; -} - -clib_error_t * -dpdk_init (vlib_main_t * vm) -{ - dpdk_main_t *dm = &dpdk_main; - vlib_node_t *ei; - clib_error_t *error = 0; - vlib_thread_main_t *tm = vlib_get_thread_main (); - - /* verify that structs are cacheline aligned */ - STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0, - "Cache line marker must be 1st element in dpdk_device_t"); - STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) == - CLIB_CACHE_LINE_BYTES, - "Data in cache line 0 is bigger than cache line size"); - STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, - "Cache line marker must be 1st element in frame_queue_trace_t"); - - dm->vlib_main = vm; - dm->vnet_main = vnet_get_main (); - dm->conf = &dpdk_config_main; - - ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); - if (ei == 0) - return clib_error_return (0, "ethernet-input node AWOL"); - - dm->ethernet_input_node_index = ei->index; - - dm->conf->nchannels = 4; - dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; - vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); - - dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword)); - dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword)); - dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword)); - - /* $$$ use n_thread_stacks since it's known-good at this point */ - vec_validate (dm->recycle, tm->n_thread_stacks - 1); - - /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ - dm->buffer_flags_template = - (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID - | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); - - dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; - dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL; - - /* init CLI */ - if ((error = vlib_call_init_function (vm, dpdk_cli_init))) - return error; - - return error; -} - -VLIB_INIT_FUNCTION (dpdk_init); - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/cli.c b/src/vnet/devices/dpdk/ipsec/cli.c deleted file mode 100644 index f9d3a5d0..00000000 --- a/src/vnet/devices/dpdk/ipsec/cli.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -static void -dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display) -{ - dpdk_config_main_t *conf = &dpdk_config_main; - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - u32 i, skip_master; - - if (!conf->cryptodev) - { - vlib_cli_output (vm, "DPDK Cryptodev support is disabled\n"); - return; - } - - if (detail_display) - vlib_cli_output (vm, "worker\t%10s\t%15s\tdir\tdev\tqp\n", - "cipher", "auth"); - else - vlib_cli_output (vm, "worker\tcrypto device id(type)\n"); - - skip_master = vlib_num_workers () > 0; - - for (i = 0; i < tm->n_vlib_mains; i++) - { - uword key, data; - u32 cpu_index = vlib_mains[i]->cpu_index; - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - u8 *s = 0; - - if (skip_master) - { - skip_master = 0; - continue; - } - - if (!detail_display) - { - i32 last_cdev = -1; - crypto_qp_data_t *qpd; - - s = format (s, "%u\t", cpu_index); - - /* *INDENT-OFF* */ - vec_foreach (qpd, cwm->qp_data) - { - u32 dev_id = qpd->dev_id; - - if ((u16) last_cdev != dev_id) - { - struct rte_cryptodev_info cdev_info; - - rte_cryptodev_info_get (dev_id, &cdev_info); - - s = format(s, "%u(%s)\t", dev_id, cdev_info.feature_flags & - RTE_CRYPTODEV_FF_HW_ACCELERATED ? "HW" : "SW"); - } - last_cdev = dev_id; - } - /* *INDENT-ON* */ - vlib_cli_output (vm, "%s", s); - } - else - { - char cipher_str[15], auth_str[15]; - struct rte_cryptodev_capabilities cap; - crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; - /* *INDENT-OFF* */ - hash_foreach (key, data, cwm->algo_qp_map, - ({ - cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; - cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER; - cap.sym.cipher.algo = p_key->cipher_algo; - check_algo_is_supported (&cap, cipher_str); - cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; - cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AUTH; - cap.sym.auth.algo = p_key->auth_algo; - check_algo_is_supported (&cap, auth_str); - vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n", - vlib_mains[i]->cpu_index, cipher_str, auth_str, - p_key->is_outbound ? "out" : "in", - cwm->qp_data[data].dev_id, - cwm->qp_data[data].qp_id); - })); - /* *INDENT-ON* */ - } - } -} - -static clib_error_t * -lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u16 detail = 0; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "verbose")) - detail = 1; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - dpdk_ipsec_show_mapping (vm, detail); - -done: - unformat_free (line_input); - - return error; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (lcore_cryptodev_map, static) = { - .path = "show crypto device mapping", - .short_help = - "show cryptodev device mapping ", - .function = lcore_cryptodev_map_fn, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/crypto_node.c b/src/vnet/devices/dpdk/ipsec/crypto_node.c deleted file mode 100644 index e8fef235..00000000 --- a/src/vnet/devices/dpdk/ipsec/crypto_node.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - *------------------------------------------------------------------ - * crypto_node.c - DPDK Cryptodev input node - * - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include -#include -#include - -#include -#include -#include - -#define foreach_dpdk_crypto_input_next \ - _(DROP, "error-drop") \ - _(ENCRYPT_POST, "dpdk-esp-encrypt-post") \ - _(DECRYPT_POST, "dpdk-esp-decrypt-post") - -typedef enum -{ -#define _(f,s) DPDK_CRYPTO_INPUT_NEXT_##f, - foreach_dpdk_crypto_input_next -#undef _ - DPDK_CRYPTO_INPUT_N_NEXT, -} dpdk_crypto_input_next_t; - -#define foreach_dpdk_crypto_input_error \ - _(DQ_COPS, "Crypto ops dequeued") \ - _(COP_FAILED, "Crypto op failed") - -typedef enum -{ -#define _(f,s) DPDK_CRYPTO_INPUT_ERROR_##f, - foreach_dpdk_crypto_input_error -#undef _ - DPDK_CRYPTO_INPUT_N_ERROR, -} dpdk_crypto_input_error_t; - -static char *dpdk_crypto_input_error_strings[] = { -#define _(n, s) s, - foreach_dpdk_crypto_input_error -#undef _ -}; - -vlib_node_registration_t dpdk_crypto_input_node; - -typedef struct -{ - u32 cdev; - u32 qp; - u32 status; - u32 sa_idx; - u32 next_index; -} dpdk_crypto_input_trace_t; - -static u8 * -format_dpdk_crypto_input_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - dpdk_crypto_input_trace_t *t = va_arg (*args, dpdk_crypto_input_trace_t *); - - s = format (s, "dpdk_crypto: cryptodev-id %u queue-pair %u next-index %d", - t->cdev, t->qp, t->next_index); - - s = format (s, " status %u sa-idx %u\n", t->status, t->sa_idx); - - return s; -} - -static_always_inline u32 -dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, - crypto_qp_data_t * qpd) -{ - u32 n_deq, *to_next = 0, next_index, n_cops, def_next_index; - struct rte_crypto_op **cops = qpd->cops; - - if (qpd->inflights == 0) - return 0; - - if (qpd->is_outbound) - def_next_index = DPDK_CRYPTO_INPUT_NEXT_ENCRYPT_POST; - else - def_next_index = DPDK_CRYPTO_INPUT_NEXT_DECRYPT_POST; - - n_cops = rte_cryptodev_dequeue_burst (qpd->dev_id, qpd->qp_id, - cops, VLIB_FRAME_SIZE); - n_deq = n_cops; - next_index = def_next_index; - - qpd->inflights -= n_cops; - ASSERT (qpd->inflights >= 0); - - while (n_cops > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_cops > 0 && n_left_to_next > 0) - { - u32 bi0, next0; - vlib_buffer_t *b0 = 0; - struct rte_crypto_op *cop; - struct rte_crypto_sym_op *sym_cop; - - cop = cops[0]; - cops += 1; - n_cops -= 1; - n_left_to_next -= 1; - - next0 = def_next_index; - - if (PREDICT_FALSE (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) - { - next0 = DPDK_CRYPTO_INPUT_NEXT_DROP; - vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, - DPDK_CRYPTO_INPUT_ERROR_COP_FAILED, - 1); - } - cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; - - sym_cop = (struct rte_crypto_sym_op *) (cop + 1); - b0 = vlib_buffer_from_rte_mbuf (sym_cop->m_src); - bi0 = vlib_get_buffer_index (vm, b0); - - to_next[0] = bi0; - to_next += 1; - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - vlib_trace_next_frame (vm, node, next0); - dpdk_crypto_input_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->cdev = qpd->dev_id; - tr->qp = qpd->qp_id; - tr->status = cop->status; - tr->next_index = next0; - tr->sa_idx = vnet_buffer (b0)->ipsec.sad_index; - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - crypto_free_cop (qpd, qpd->cops, n_deq); - - vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, - DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, n_deq); - return n_deq; -} - -static uword -dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - u32 cpu_index = os_get_cpu_number (); - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - crypto_qp_data_t *qpd; - u32 n_deq = 0; - - /* *INDENT-OFF* */ - vec_foreach (qpd, cwm->qp_data) - n_deq += dpdk_crypto_dequeue(vm, node, qpd); - /* *INDENT-ON* */ - - return n_deq; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_crypto_input_node) = -{ - .function = dpdk_crypto_input_fn, - .name = "dpdk-crypto-input", - .format_trace = format_dpdk_crypto_input_trace, - .type = VLIB_NODE_TYPE_INPUT, - .state = VLIB_NODE_STATE_DISABLED, - .n_errors = DPDK_CRYPTO_INPUT_N_ERROR, - .error_strings = dpdk_crypto_input_error_strings, - .n_next_nodes = DPDK_CRYPTO_INPUT_N_NEXT, - .next_nodes = - { -#define _(s,n) [DPDK_CRYPTO_INPUT_NEXT_##s] = n, - foreach_dpdk_crypto_input_next -#undef _ - }, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_crypto_input_node, dpdk_crypto_input_fn) -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/dir.dox b/src/vnet/devices/dpdk/ipsec/dir.dox deleted file mode 100644 index ffebfc4d..00000000 --- a/src/vnet/devices/dpdk/ipsec/dir.dox +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - @dir vnet/vnet/devices/dpdk/ipsec - @brief IPSec ESP encrypt/decrypt using DPDK Cryptodev API -*/ diff --git a/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md b/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md deleted file mode 100644 index fed2fe0e..00000000 --- a/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md +++ /dev/null @@ -1,86 +0,0 @@ -# VPP IPSec implementation using DPDK Cryptodev API {#dpdk_crypto_ipsec_doc} - -This document is meant to contain all related information about implementation and usability. - - -## VPP IPsec with DPDK Cryptodev - -DPDK Cryptodev is an asynchronous crypto API that supports both Hardware and Software implementations (for more details refer to [DPDK Cryptography Device Library documentation](http://dpdk.org/doc/guides/prog_guide/cryptodev_lib.html)). - -When DPDK support is enabled and there are enough Cryptodev resources for all workers, the node graph is reconfigured by adding and changing default next nodes. - -The following nodes are added: -* dpdk-crypto-input : polling input node, basically dequeuing from crypto devices. -* dpdk-esp-encrypt : internal node. -* dpdk-esp-decrypt : internal node. -* dpdk-esp-encrypt-post : internal node. -* dpdk-esp-decrypt-post : internal node. - -Set new default next nodes: -* for esp encryption: esp-encrypt -> dpdk-esp-encrypt -* for esp decryption: esp-decrypt -> dpdk-esp-decrypt - - -### How to enable VPP IPSec with DPDK Cryptodev support - -DPDK Cryptodev is supported in DPDK enabled VPP. -By default, only HW Cryptodev is supported but needs to be explicetly enabled with the following config option: - -``` -dpdk { - enable-cryptodev -} -``` - -To enable SW Cryptodev support (AESNI-MB-PMD and GCM-PMD), we need the following env option: - - vpp_uses_dpdk_cryptodev_sw=yes - -A couple of ways to achive this: -* uncomment/add it in the platforms config (ie. build-data/platforms/vpp.mk) -* set the option when building vpp (ie. make vpp_uses_dpdk_cryptodev_sw=yes build-release) - -When enabling SW Cryptodev support, it means that you need to pre-build the required crypto libraries needed by those SW Cryptodev PMDs. - - -### Crypto Resources allocation - -VPP allocates crypto resources based on a best effort approach: -* first allocate Hardware crypto resources, then Software. -* if there are not enough crypto resources for all workers, the graph node is not modifed, therefore the default VPP IPsec implementation based in OpenSSL is used. The following message is displayed: - - 0: dpdk_ipsec_init: not enough cryptodevs for ipsec - - -### Configuration example - -To enable DPDK Cryptodev the user just need to provide the startup.conf option -as mentioned previously. - -Example startup.conf: - -``` -dpdk { - socket-mem 1024,1024 - num-mbufs 131072 - dev 0000:81:00.0 - dev 0000:81:00.1 - enable-cryptodev - dev 0000:85:01.0 - dev 0000:85:01.1 - vdev cryptodev_aesni_mb_pmd,socket_id=1 - vdev cryptodev_aesni_mb_pmd,socket_id=1 -} -``` - -In the above configuration: -* 0000:85:01.0 and 0000:85:01.1 are crypto BDFs and they require the same driver binding as DPDK Ethernet devices but they do not support any extra configuration options. -* Two AESNI-MB Software Cryptodev PMDs are created in NUMA node 1. - -For further details refer to [DPDK Crypto Device Driver documentation](http://dpdk.org/doc/guides/cryptodevs/index.html) - -### Operational data - -The following CLI command displays the Cryptodev/Worker mapping: - - show crypto device mapping [verbose] diff --git a/src/vnet/devices/dpdk/ipsec/esp.h b/src/vnet/devices/dpdk/ipsec/esp.h deleted file mode 100644 index d0b27618..00000000 --- a/src/vnet/devices/dpdk/ipsec/esp.h +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __DPDK_ESP_H__ -#define __DPDK_ESP_H__ - -#include -#include -#include - -typedef struct -{ - enum rte_crypto_cipher_algorithm algo; - u8 key_len; - u8 iv_len; -} dpdk_esp_crypto_alg_t; - -typedef struct -{ - enum rte_crypto_auth_algorithm algo; - u8 trunc_size; -} dpdk_esp_integ_alg_t; - -typedef struct -{ - dpdk_esp_crypto_alg_t *esp_crypto_algs; - dpdk_esp_integ_alg_t *esp_integ_algs; -} dpdk_esp_main_t; - -dpdk_esp_main_t dpdk_esp_main; - -static_always_inline void -dpdk_esp_init () -{ - dpdk_esp_main_t *em = &dpdk_esp_main; - dpdk_esp_integ_alg_t *i; - dpdk_esp_crypto_alg_t *c; - - vec_validate (em->esp_crypto_algs, IPSEC_CRYPTO_N_ALG - 1); - - c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_128]; - c->algo = RTE_CRYPTO_CIPHER_AES_CBC; - c->key_len = 16; - c->iv_len = 16; - - c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_192]; - c->algo = RTE_CRYPTO_CIPHER_AES_CBC; - c->key_len = 24; - c->iv_len = 16; - - c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_256]; - c->algo = RTE_CRYPTO_CIPHER_AES_CBC; - c->key_len = 32; - c->iv_len = 16; - - c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_GCM_128]; - c->algo = RTE_CRYPTO_CIPHER_AES_GCM; - c->key_len = 16; - c->iv_len = 8; - - vec_validate (em->esp_integ_algs, IPSEC_INTEG_N_ALG - 1); - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA1_96]; - i->algo = RTE_CRYPTO_AUTH_SHA1_HMAC; - i->trunc_size = 12; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_96]; - i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; - i->trunc_size = 12; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_128]; - i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; - i->trunc_size = 16; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_384_192]; - i->algo = RTE_CRYPTO_AUTH_SHA384_HMAC; - i->trunc_size = 24; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256]; - i->algo = RTE_CRYPTO_AUTH_SHA512_HMAC; - i->trunc_size = 32; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_AES_GCM_128]; - i->algo = RTE_CRYPTO_AUTH_AES_GCM; - i->trunc_size = 16; -} - -static_always_inline int -translate_crypto_algo (ipsec_crypto_alg_t crypto_algo, - struct rte_crypto_sym_xform *cipher_xform) -{ - switch (crypto_algo) - { - case IPSEC_CRYPTO_ALG_NONE: - cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL; - break; - case IPSEC_CRYPTO_ALG_AES_CBC_128: - case IPSEC_CRYPTO_ALG_AES_CBC_192: - case IPSEC_CRYPTO_ALG_AES_CBC_256: - cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; - break; - case IPSEC_CRYPTO_ALG_AES_GCM_128: - cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_GCM; - break; - default: - return -1; - } - - cipher_xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; - - return 0; -} - -static_always_inline int -translate_integ_algo (ipsec_integ_alg_t integ_alg, - struct rte_crypto_sym_xform *auth_xform, int use_esn) -{ - switch (integ_alg) - { - case IPSEC_INTEG_ALG_NONE: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_NULL; - auth_xform->auth.digest_length = 0; - break; - case IPSEC_INTEG_ALG_SHA1_96: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC; - auth_xform->auth.digest_length = 12; - break; - case IPSEC_INTEG_ALG_SHA_256_96: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; - auth_xform->auth.digest_length = 12; - break; - case IPSEC_INTEG_ALG_SHA_256_128: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; - auth_xform->auth.digest_length = 16; - break; - case IPSEC_INTEG_ALG_SHA_384_192: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA384_HMAC; - auth_xform->auth.digest_length = 24; - break; - case IPSEC_INTEG_ALG_SHA_512_256: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA512_HMAC; - auth_xform->auth.digest_length = 32; - break; - case IPSEC_INTEG_ALG_AES_GCM_128: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM; - auth_xform->auth.digest_length = 16; - auth_xform->auth.add_auth_data_length = use_esn ? 12 : 8; - break; - default: - return -1; - } - - auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; - - return 0; -} - -static_always_inline int -create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, - u8 is_outbound) -{ - u32 cpu_index = os_get_cpu_number (); - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - struct rte_crypto_sym_xform cipher_xform = { 0 }; - struct rte_crypto_sym_xform auth_xform = { 0 }; - struct rte_crypto_sym_xform *xfs; - uword key = 0, *data; - crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; - - if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - sa->crypto_key_len -= 4; - clib_memcpy (&sa->salt, &sa->crypto_key[sa->crypto_key_len], 4); - } - else - { - u32 seed = (u32) clib_cpu_time_now (); - sa->salt = random_u32 (&seed); - } - - cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; - cipher_xform.cipher.key.data = sa->crypto_key; - cipher_xform.cipher.key.length = sa->crypto_key_len; - - auth_xform.type = RTE_CRYPTO_SYM_XFORM_AUTH; - auth_xform.auth.key.data = sa->integ_key; - auth_xform.auth.key.length = sa->integ_key_len; - - if (translate_crypto_algo (sa->crypto_alg, &cipher_xform) < 0) - return -1; - p_key->cipher_algo = cipher_xform.cipher.algo; - - if (translate_integ_algo (sa->integ_alg, &auth_xform, sa->use_esn) < 0) - return -1; - p_key->auth_algo = auth_xform.auth.algo; - - if (is_outbound) - { - cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; - auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; - cipher_xform.next = &auth_xform; - xfs = &cipher_xform; - } - else - { - cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; - auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_VERIFY; - auth_xform.next = &cipher_xform; - xfs = &auth_xform; - } - - p_key->is_outbound = is_outbound; - - data = hash_get (cwm->algo_qp_map, key); - if (!data) - return -1; - - sa_sess->sess = - rte_cryptodev_sym_session_create (cwm->qp_data[*data].dev_id, xfs); - - if (!sa_sess->sess) - return -1; - - sa_sess->qp_index = (u8) * data; - - return 0; -} - -#endif /* __DPDK_ESP_H__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/esp_decrypt.c b/src/vnet/devices/dpdk/ipsec/esp_decrypt.c deleted file mode 100644 index 76007609..00000000 --- a/src/vnet/devices/dpdk/ipsec/esp_decrypt.c +++ /dev/null @@ -1,594 +0,0 @@ -/* - * esp_decrypt.c : IPSec ESP Decrypt node using DPDK Cryptodev - * - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include - -#define foreach_esp_decrypt_next \ -_(DROP, "error-drop") \ -_(IP4_INPUT, "ip4-input") \ -_(IP6_INPUT, "ip6-input") - -#define _(v, s) ESP_DECRYPT_NEXT_##v, -typedef enum { - foreach_esp_decrypt_next -#undef _ - ESP_DECRYPT_N_NEXT, -} esp_decrypt_next_t; - -#define foreach_esp_decrypt_error \ - _(RX_PKTS, "ESP pkts received") \ - _(DECRYPTION_FAILED, "ESP decryption failed") \ - _(REPLAY, "SA replayed packet") \ - _(NOT_IP, "Not IP packet (dropped)") \ - _(ENQ_FAIL, "Enqueue failed (buffer full)") \ - _(NO_CRYPTODEV, "Cryptodev not configured") \ - _(BAD_LEN, "Invalid ciphertext length") \ - _(UNSUPPORTED, "Cipher/Auth not supported") - - -typedef enum { -#define _(sym,str) ESP_DECRYPT_ERROR_##sym, - foreach_esp_decrypt_error -#undef _ - ESP_DECRYPT_N_ERROR, -} esp_decrypt_error_t; - -static char * esp_decrypt_error_strings[] = { -#define _(sym,string) string, - foreach_esp_decrypt_error -#undef _ -}; - -vlib_node_registration_t dpdk_esp_decrypt_node; - -typedef struct { - ipsec_crypto_alg_t crypto_alg; - ipsec_integ_alg_t integ_alg; -} esp_decrypt_trace_t; - -/* packet trace format function */ -static u8 * format_esp_decrypt_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *); - - s = format (s, "esp: crypto %U integrity %U", - format_ipsec_crypto_alg, t->crypto_alg, - format_ipsec_integ_alg, t->integ_alg); - return s; -} - -static uword -dpdk_esp_decrypt_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, *from, *to_next, next_index; - ipsec_main_t *im = &ipsec_main; - u32 cpu_index = os_get_cpu_number(); - dpdk_crypto_main_t * dcm = &dpdk_crypto_main; - dpdk_esp_main_t * em = &dpdk_esp_main; - u32 i; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - if (PREDICT_FALSE(!dcm->workers_main)) - { - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_NO_CRYPTODEV, n_left_from); - vlib_buffer_free(vm, from, n_left_from); - return n_left_from; - } - - crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, cpu_index); - u32 n_qps = vec_len(cwm->qp_data); - struct rte_crypto_op ** cops_to_enq[n_qps]; - u32 n_cop_qp[n_qps], * bi_to_enq[n_qps]; - - for (i = 0; i < n_qps; i++) - { - bi_to_enq[i] = cwm->qp_data[i].bi; - cops_to_enq[i] = cwm->qp_data[i].cops; - } - - memset(n_cop_qp, 0, n_qps * sizeof(u32)); - - crypto_alloc_cops(); - - next_index = ESP_DECRYPT_NEXT_DROP; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, sa_index0 = ~0, seq, icv_size, iv_size; - vlib_buffer_t * b0; - esp_header_t * esp0; - ipsec_sa_t * sa0; - struct rte_mbuf * mb0 = 0; - const int BLOCK_SIZE = 16; - crypto_sa_session_t * sa_sess; - void * sess; - u16 qp_index; - struct rte_crypto_op * cop = 0; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - - b0 = vlib_get_buffer (vm, bi0); - esp0 = vlib_buffer_get_current (b0); - - sa_index0 = vnet_buffer(b0)->ipsec.sad_index; - sa0 = pool_elt_at_index (im->sad, sa_index0); - - seq = clib_host_to_net_u32(esp0->seq); - - /* anti-replay check */ - if (sa0->use_anti_replay) - { - int rv = 0; - - if (PREDICT_TRUE(sa0->use_esn)) - rv = esp_replay_check_esn(sa0, seq); - else - rv = esp_replay_check(sa0, seq); - - if (PREDICT_FALSE(rv)) - { - clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_REPLAY, 1); - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - } - - sa0->total_data_size += b0->current_length; - - if (PREDICT_FALSE(sa0->integ_alg == IPSEC_INTEG_ALG_NONE) || - PREDICT_FALSE(sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE)) - { - clib_warning ("SPI %u : only cipher + auth supported", sa0->spi); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_UNSUPPORTED, 1); - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - - sa_sess = pool_elt_at_index(cwm->sa_sess_d[0], sa_index0); - - if (PREDICT_FALSE(!sa_sess->sess)) - { - int ret = create_sym_sess(sa0, sa_sess, 0); - - if (PREDICT_FALSE (ret)) - { - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - } - - sess = sa_sess->sess; - qp_index = sa_sess->qp_index; - - ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); - cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); - ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); - - cops_to_enq[qp_index][0] = cop; - cops_to_enq[qp_index] += 1; - n_cop_qp[qp_index] += 1; - bi_to_enq[qp_index][0] = bi0; - bi_to_enq[qp_index] += 1; - - rte_crypto_op_attach_sym_session(cop, sess); - - icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; - iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; - - /* Convert vlib buffer to mbuf */ - mb0 = rte_mbuf_from_vlib_buffer(b0); - mb0->data_len = b0->current_length; - mb0->pkt_len = b0->current_length; - mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; - - /* Outer IP header has already been stripped */ - u16 payload_len = rte_pktmbuf_pkt_len(mb0) - sizeof (esp_header_t) - - iv_size - icv_size; - - if ((payload_len & (BLOCK_SIZE - 1)) || (payload_len <= 0)) - { - clib_warning ("payload %u not multiple of %d\n", - payload_len, BLOCK_SIZE); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_BAD_LEN, 1); - vec_add (vec_elt (cwm->qp_data, qp_index).free_cops, &cop, 1); - bi_to_enq[qp_index] -= 1; - cops_to_enq[qp_index] -= 1; - n_cop_qp[qp_index] -= 1; - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - - struct rte_crypto_sym_op *sym_cop = (struct rte_crypto_sym_op *)(cop + 1); - - sym_cop->m_src = mb0; - sym_cop->cipher.data.offset = sizeof (esp_header_t) + iv_size; - sym_cop->cipher.data.length = payload_len; - - u8 *iv = rte_pktmbuf_mtod_offset(mb0, void*, sizeof (esp_header_t)); - dpdk_cop_priv_t * priv = (dpdk_cop_priv_t *)(sym_cop + 1); - - if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - dpdk_gcm_cnt_blk *icb = &priv->cb; - icb->salt = sa0->salt; - clib_memcpy(icb->iv, iv, 8); - icb->cnt = clib_host_to_net_u32(1); - sym_cop->cipher.iv.data = (u8 *)icb; - sym_cop->cipher.iv.phys_addr = cop->phys_addr + - (uintptr_t)icb - (uintptr_t)cop; - sym_cop->cipher.iv.length = 16; - - u8 *aad = priv->aad; - clib_memcpy(aad, iv - sizeof(esp_header_t), 8); - sym_cop->auth.aad.data = aad; - sym_cop->auth.aad.phys_addr = cop->phys_addr + - (uintptr_t)aad - (uintptr_t)cop; - if (sa0->use_esn) - { - *((u32*)&aad[8]) = sa0->seq_hi; - sym_cop->auth.aad.length = 12; - } - else - { - sym_cop->auth.aad.length = 8; - } - - sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.length = icv_size; - - } - else - { - sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(mb0, void*, - sizeof (esp_header_t)); - sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(mb0, - sizeof (esp_header_t)); - sym_cop->cipher.iv.length = iv_size; - - if (sa0->use_esn) - { - dpdk_cop_priv_t* priv = (dpdk_cop_priv_t*) (sym_cop + 1); - u8* payload_end = rte_pktmbuf_mtod_offset( - mb0, u8*, sizeof(esp_header_t) + iv_size + payload_len); - - clib_memcpy (priv->icv, payload_end, icv_size); - *((u32*) payload_end) = sa0->seq_hi; - sym_cop->auth.data.offset = 0; - sym_cop->auth.data.length = sizeof(esp_header_t) + iv_size - + payload_len + sizeof(sa0->seq_hi); - sym_cop->auth.digest.data = priv->icv; - sym_cop->auth.digest.phys_addr = cop->phys_addr - + (uintptr_t) priv->icv - (uintptr_t) cop; - sym_cop->auth.digest.length = icv_size; - } - else - { - sym_cop->auth.data.offset = 0; - sym_cop->auth.data.length = sizeof(esp_header_t) + - iv_size + payload_len; - - sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.length = icv_size; - } - } - -trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->crypto_alg = sa0->crypto_alg; - tr->integ_alg = sa0->integ_alg; - } - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_RX_PKTS, - from_frame->n_vectors); - crypto_qp_data_t *qpd; - /* *INDENT-OFF* */ - vec_foreach_index (i, cwm->qp_data) - { - u32 enq; - - qpd = vec_elt_at_index(cwm->qp_data, i); - enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, - qpd->cops, n_cop_qp[i]); - qpd->inflights += enq; - - if (PREDICT_FALSE(enq < n_cop_qp[i])) - { - crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); - vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); - - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_ENQ_FAIL, - n_cop_qp[i] - enq); - } - } - /* *INDENT-ON* */ - - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dpdk_esp_decrypt_node) = { - .function = dpdk_esp_decrypt_node_fn, - .name = "dpdk-esp-decrypt", - .vector_size = sizeof (u32), - .format_trace = format_esp_decrypt_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(esp_decrypt_error_strings), - .error_strings = esp_decrypt_error_strings, - - .n_next_nodes = ESP_DECRYPT_N_NEXT, - .next_nodes = { -#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, - foreach_esp_decrypt_next -#undef _ - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_node, dpdk_esp_decrypt_node_fn) - -/* - * Decrypt Post Node - */ - -#define foreach_esp_decrypt_post_error \ - _(PKTS, "ESP post pkts") - -typedef enum { -#define _(sym,str) ESP_DECRYPT_POST_ERROR_##sym, - foreach_esp_decrypt_post_error -#undef _ - ESP_DECRYPT_POST_N_ERROR, -} esp_decrypt_post_error_t; - -static char * esp_decrypt_post_error_strings[] = { -#define _(sym,string) string, - foreach_esp_decrypt_post_error -#undef _ -}; - -vlib_node_registration_t dpdk_esp_decrypt_post_node; - -static u8 * format_esp_decrypt_post_trace (u8 * s, va_list * args) -{ - return s; -} - -static uword -dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, *from, *to_next = 0, next_index; - ipsec_sa_t * sa0; - u32 sa_index0 = ~0; - ipsec_main_t *im = &ipsec_main; - dpdk_esp_main_t *em = &dpdk_esp_main; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - esp_footer_t * f0; - u32 bi0, next0, icv_size, iv_size; - vlib_buffer_t * b0 = 0; - ip4_header_t *ih4 = 0, *oh4 = 0; - ip6_header_t *ih6 = 0, *oh6 = 0; - u8 tunnel_mode = 1; - u8 transport_ip6 = 0; - - next0 = ESP_DECRYPT_NEXT_DROP; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - sa_index0 = vnet_buffer(b0)->ipsec.sad_index; - sa0 = pool_elt_at_index (im->sad, sa_index0); - - to_next[0] = bi0; - to_next += 1; - - icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; - iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; - - if (sa0->use_anti_replay) - { - esp_header_t * esp0 = vlib_buffer_get_current (b0); - u32 seq; - seq = clib_host_to_net_u32(esp0->seq); - if (PREDICT_TRUE(sa0->use_esn)) - esp_replay_advance_esn(sa0, seq); - else - esp_replay_advance(sa0, seq); - } - - ih4 = (ip4_header_t *) (b0->data + sizeof(ethernet_header_t)); - vlib_buffer_advance (b0, sizeof (esp_header_t) + iv_size); - - b0->current_length -= (icv_size + 2); - b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; - f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (b0) + - b0->current_length); - b0->current_length -= f0->pad_length; - - /* transport mode */ - if (PREDICT_FALSE(!sa0->is_tunnel && !sa0->is_tunnel_ip6)) - { - tunnel_mode = 0; - - if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) != 0x40)) - { - if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) == 0x60)) - transport_ip6 = 1; - else - { - clib_warning("next header: 0x%x", f0->next_header); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_NOT_IP, 1); - goto trace; - } - } - } - - if (PREDICT_TRUE (tunnel_mode)) - { - if (PREDICT_TRUE(f0->next_header == IP_PROTOCOL_IP_IN_IP)) - next0 = ESP_DECRYPT_NEXT_IP4_INPUT; - else if (f0->next_header == IP_PROTOCOL_IPV6) - next0 = ESP_DECRYPT_NEXT_IP6_INPUT; - else - { - clib_warning("next header: 0x%x", f0->next_header); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_DECRYPTION_FAILED, - 1); - goto trace; - } - } - /* transport mode */ - else - { - if (PREDICT_FALSE(transport_ip6)) - { - ih6 = (ip6_header_t *) (b0->data + sizeof(ethernet_header_t)); - vlib_buffer_advance (b0, -sizeof(ip6_header_t)); - oh6 = vlib_buffer_get_current (b0); - memmove(oh6, ih6, sizeof(ip6_header_t)); - - next0 = ESP_DECRYPT_NEXT_IP6_INPUT; - oh6->protocol = f0->next_header; - oh6->payload_length = - clib_host_to_net_u16 ( - vlib_buffer_length_in_chain(vm, b0) - - sizeof (ip6_header_t)); - } - else - { - vlib_buffer_advance (b0, -sizeof(ip4_header_t)); - oh4 = vlib_buffer_get_current (b0); - memmove(oh4, ih4, sizeof(ip4_header_t)); - - next0 = ESP_DECRYPT_NEXT_IP4_INPUT; - oh4->ip_version_and_header_length = 0x45; - oh4->fragment_id = 0; - oh4->flags_and_fragment_offset = 0; - oh4->protocol = f0->next_header; - oh4->length = clib_host_to_net_u16 ( - vlib_buffer_length_in_chain (vm, b0)); - oh4->checksum = ip4_header_checksum (oh4); - } - } - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0; - -trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->crypto_alg = sa0->crypto_alg; - tr->integ_alg = sa0->integ_alg; - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, dpdk_esp_decrypt_post_node.index, - ESP_DECRYPT_POST_ERROR_PKTS, - from_frame->n_vectors); - - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dpdk_esp_decrypt_post_node) = { - .function = dpdk_esp_decrypt_post_node_fn, - .name = "dpdk-esp-decrypt-post", - .vector_size = sizeof (u32), - .format_trace = format_esp_decrypt_post_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(esp_decrypt_post_error_strings), - .error_strings = esp_decrypt_post_error_strings, - - .n_next_nodes = ESP_DECRYPT_N_NEXT, - .next_nodes = { -#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, - foreach_esp_decrypt_next -#undef _ - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_post_node, dpdk_esp_decrypt_post_node_fn) diff --git a/src/vnet/devices/dpdk/ipsec/esp_encrypt.c b/src/vnet/devices/dpdk/ipsec/esp_encrypt.c deleted file mode 100644 index 6eb1afc9..00000000 --- a/src/vnet/devices/dpdk/ipsec/esp_encrypt.c +++ /dev/null @@ -1,609 +0,0 @@ -/* - * esp_encrypt.c : IPSec ESP encrypt node using DPDK Cryptodev - * - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include - -#define foreach_esp_encrypt_next \ -_(DROP, "error-drop") \ -_(IP4_LOOKUP, "ip4-lookup") \ -_(IP6_LOOKUP, "ip6-lookup") \ -_(INTERFACE_OUTPUT, "interface-output") - -#define _(v, s) ESP_ENCRYPT_NEXT_##v, -typedef enum -{ - foreach_esp_encrypt_next -#undef _ - ESP_ENCRYPT_N_NEXT, -} esp_encrypt_next_t; - -#define foreach_esp_encrypt_error \ - _(RX_PKTS, "ESP pkts received") \ - _(SEQ_CYCLED, "sequence number cycled") \ - _(ENQ_FAIL, "Enqueue failed (buffer full)") \ - _(NO_CRYPTODEV, "Cryptodev not configured") \ - _(UNSUPPORTED, "Cipher/Auth not supported") - - -typedef enum -{ -#define _(sym,str) ESP_ENCRYPT_ERROR_##sym, - foreach_esp_encrypt_error -#undef _ - ESP_ENCRYPT_N_ERROR, -} esp_encrypt_error_t; - -static char *esp_encrypt_error_strings[] = { -#define _(sym,string) string, - foreach_esp_encrypt_error -#undef _ -}; - -vlib_node_registration_t dpdk_esp_encrypt_node; - -typedef struct -{ - u32 spi; - u32 seq; - ipsec_crypto_alg_t crypto_alg; - ipsec_integ_alg_t integ_alg; -} esp_encrypt_trace_t; - -/* packet trace format function */ -static u8 * -format_esp_encrypt_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *); - - s = format (s, "esp: spi %u seq %u crypto %U integrity %U", - t->spi, t->seq, - format_ipsec_crypto_alg, t->crypto_alg, - format_ipsec_integ_alg, t->integ_alg); - return s; -} - -static uword -dpdk_esp_encrypt_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, *from, *to_next, next_index; - ipsec_main_t *im = &ipsec_main; - u32 cpu_index = os_get_cpu_number (); - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - dpdk_esp_main_t *em = &dpdk_esp_main; - u32 i; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - if (PREDICT_FALSE (!dcm->workers_main)) - { - /* Likely there are not enough cryptodevs, so drop frame */ - vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, - ESP_ENCRYPT_ERROR_NO_CRYPTODEV, - n_left_from); - vlib_buffer_free (vm, from, n_left_from); - return n_left_from; - } - - crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, cpu_index); - u32 n_qps = vec_len (cwm->qp_data); - struct rte_crypto_op **cops_to_enq[n_qps]; - u32 n_cop_qp[n_qps], *bi_to_enq[n_qps]; - - for (i = 0; i < n_qps; i++) - { - bi_to_enq[i] = cwm->qp_data[i].bi; - cops_to_enq[i] = cwm->qp_data[i].cops; - } - - memset (n_cop_qp, 0, n_qps * sizeof (u32)); - - crypto_alloc_cops (); - - next_index = ESP_ENCRYPT_NEXT_DROP; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, next0; - vlib_buffer_t *b0 = 0; - u32 sa_index0; - ipsec_sa_t *sa0; - ip4_and_esp_header_t *ih0, *oh0 = 0; - ip6_and_esp_header_t *ih6_0, *oh6_0 = 0; - struct rte_mbuf *mb0 = 0; - esp_footer_t *f0; - u8 is_ipv6; - u8 ip_hdr_size; - u8 next_hdr_type; - u8 transport_mode = 0; - const int BLOCK_SIZE = 16; - u32 iv_size; - u16 orig_sz; - crypto_sa_session_t *sa_sess; - void *sess; - struct rte_crypto_op *cop = 0; - u16 qp_index; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - - b0 = vlib_get_buffer (vm, bi0); - sa_index0 = vnet_buffer (b0)->ipsec.sad_index; - sa0 = pool_elt_at_index (im->sad, sa_index0); - - if (PREDICT_FALSE (esp_seq_advance (sa0))) - { - clib_warning ("sequence number counter has cycled SPI %u", - sa0->spi); - vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, - ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1); - //TODO: rekey SA - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - - sa0->total_data_size += b0->current_length; - - sa_sess = pool_elt_at_index (cwm->sa_sess_d[1], sa_index0); - if (PREDICT_FALSE (!sa_sess->sess)) - { - int ret = create_sym_sess (sa0, sa_sess, 1); - - if (PREDICT_FALSE (ret)) - { - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - } - - qp_index = sa_sess->qp_index; - sess = sa_sess->sess; - - ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); - cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); - ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); - - cops_to_enq[qp_index][0] = cop; - cops_to_enq[qp_index] += 1; - n_cop_qp[qp_index] += 1; - bi_to_enq[qp_index][0] = bi0; - bi_to_enq[qp_index] += 1; - - ssize_t adv; - iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; - ih0 = vlib_buffer_get_current (b0); - orig_sz = b0->current_length; - is_ipv6 = (ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60; - /* is ipv6 */ - if (PREDICT_TRUE (sa0->is_tunnel)) - { - if (PREDICT_TRUE (!is_ipv6)) - adv = -sizeof (ip4_and_esp_header_t); - else - adv = -sizeof (ip6_and_esp_header_t); - } - else - { - adv = -sizeof (esp_header_t); - if (PREDICT_TRUE (!is_ipv6)) - orig_sz -= sizeof (ip4_header_t); - else - orig_sz -= sizeof (ip6_header_t); - } - - /*transport mode save the eth header before it is overwritten */ - if (PREDICT_FALSE (!sa0->is_tunnel)) - { - ethernet_header_t *ieh0 = (ethernet_header_t *) - ((u8 *) vlib_buffer_get_current (b0) - - sizeof (ethernet_header_t)); - ethernet_header_t *oeh0 = - (ethernet_header_t *) ((u8 *) ieh0 + (adv - iv_size)); - clib_memcpy (oeh0, ieh0, sizeof (ethernet_header_t)); - } - - vlib_buffer_advance (b0, adv - iv_size); - - /* XXX IP6/ip4 and IP4/IP6 not supported, only IP4/IP4 and IP6/IP6 */ - - /* is ipv6 */ - if (PREDICT_FALSE (is_ipv6)) - { - ih6_0 = (ip6_and_esp_header_t *) ih0; - ip_hdr_size = sizeof (ip6_header_t); - oh6_0 = vlib_buffer_get_current (b0); - - if (PREDICT_TRUE (sa0->is_tunnel)) - { - next_hdr_type = IP_PROTOCOL_IPV6; - oh6_0->ip6.ip_version_traffic_class_and_flow_label = - ih6_0->ip6.ip_version_traffic_class_and_flow_label; - } - else - { - next_hdr_type = ih6_0->ip6.protocol; - memmove (oh6_0, ih6_0, sizeof (ip6_header_t)); - } - - oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP; - oh6_0->ip6.hop_limit = 254; - oh6_0->esp.spi = clib_net_to_host_u32 (sa0->spi); - oh6_0->esp.seq = clib_net_to_host_u32 (sa0->seq); - } - else - { - ip_hdr_size = sizeof (ip4_header_t); - oh0 = vlib_buffer_get_current (b0); - - if (PREDICT_TRUE (sa0->is_tunnel)) - { - next_hdr_type = IP_PROTOCOL_IP_IN_IP; - oh0->ip4.tos = ih0->ip4.tos; - } - else - { - next_hdr_type = ih0->ip4.protocol; - memmove (oh0, ih0, sizeof (ip4_header_t)); - } - - oh0->ip4.ip_version_and_header_length = 0x45; - oh0->ip4.fragment_id = 0; - oh0->ip4.flags_and_fragment_offset = 0; - oh0->ip4.ttl = 254; - oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP; - oh0->esp.spi = clib_net_to_host_u32 (sa0->spi); - oh0->esp.seq = clib_net_to_host_u32 (sa0->seq); - } - - if (PREDICT_TRUE (sa0->is_tunnel && !sa0->is_tunnel_ip6)) - { - oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32; - oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32; - - /* in tunnel mode send it back to FIB */ - next0 = ESP_ENCRYPT_NEXT_IP4_LOOKUP; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - } - else if (sa0->is_tunnel && sa0->is_tunnel_ip6) - { - oh6_0->ip6.src_address.as_u64[0] = - sa0->tunnel_src_addr.ip6.as_u64[0]; - oh6_0->ip6.src_address.as_u64[1] = - sa0->tunnel_src_addr.ip6.as_u64[1]; - oh6_0->ip6.dst_address.as_u64[0] = - sa0->tunnel_dst_addr.ip6.as_u64[0]; - oh6_0->ip6.dst_address.as_u64[1] = - sa0->tunnel_dst_addr.ip6.as_u64[1]; - - /* in tunnel mode send it back to FIB */ - next0 = ESP_ENCRYPT_NEXT_IP6_LOOKUP; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - } - else - { - next0 = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT; - transport_mode = 1; - } - - ASSERT (sa0->crypto_alg < IPSEC_CRYPTO_N_ALG); - ASSERT (sa0->crypto_alg != IPSEC_CRYPTO_ALG_NONE); - - int blocks = 1 + (orig_sz + 1) / BLOCK_SIZE; - - /* pad packet in input buffer */ - u8 pad_bytes = BLOCK_SIZE * blocks - 2 - orig_sz; - u8 i; - u8 *padding = vlib_buffer_get_current (b0) + b0->current_length; - - for (i = 0; i < pad_bytes; ++i) - padding[i] = i + 1; - - f0 = vlib_buffer_get_current (b0) + b0->current_length + pad_bytes; - f0->pad_length = pad_bytes; - f0->next_header = next_hdr_type; - b0->current_length += pad_bytes + 2 + - em->esp_integ_algs[sa0->integ_alg].trunc_size; - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = - vnet_buffer (b0)->sw_if_index[VLIB_RX]; - b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; - - struct rte_crypto_sym_op *sym_cop; - sym_cop = (struct rte_crypto_sym_op *) (cop + 1); - - dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *) (sym_cop + 1); - - vnet_buffer (b0)->unused[0] = next0; - - mb0 = rte_mbuf_from_vlib_buffer (b0); - mb0->data_len = b0->current_length; - mb0->pkt_len = b0->current_length; - mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; - - rte_crypto_op_attach_sym_session (cop, sess); - - sym_cop->m_src = mb0; - - dpdk_gcm_cnt_blk *icb = &priv->cb; - icb->salt = sa0->salt; - icb->iv[0] = sa0->seq; - icb->iv[1] = sa0->seq_hi; - - if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - icb->cnt = clib_host_to_net_u32 (1); - clib_memcpy (vlib_buffer_get_current (b0) + ip_hdr_size + - sizeof (esp_header_t), icb->iv, 8); - sym_cop->cipher.data.offset = - ip_hdr_size + sizeof (esp_header_t) + iv_size; - sym_cop->cipher.data.length = BLOCK_SIZE * blocks; - sym_cop->cipher.iv.length = 16; - } - else - { - sym_cop->cipher.data.offset = - ip_hdr_size + sizeof (esp_header_t); - sym_cop->cipher.data.length = BLOCK_SIZE * blocks + iv_size; - sym_cop->cipher.iv.length = iv_size; - } - - sym_cop->cipher.iv.data = (u8 *) icb; - sym_cop->cipher.iv.phys_addr = cop->phys_addr + (uintptr_t) icb - - (uintptr_t) cop; - - - ASSERT (sa0->integ_alg < IPSEC_INTEG_N_ALG); - ASSERT (sa0->integ_alg != IPSEC_INTEG_ALG_NONE); - - if (PREDICT_FALSE (sa0->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128)) - { - u8 *aad = priv->aad; - clib_memcpy (aad, vlib_buffer_get_current (b0) + ip_hdr_size, - 8); - sym_cop->auth.aad.data = aad; - sym_cop->auth.aad.phys_addr = cop->phys_addr + - (uintptr_t) aad - (uintptr_t) cop; - - if (PREDICT_FALSE (sa0->use_esn)) - { - *((u32 *) & aad[8]) = sa0->seq_hi; - sym_cop->auth.aad.length = 12; - } - else - { - sym_cop->auth.aad.length = 8; - } - } - else - { - sym_cop->auth.data.offset = ip_hdr_size; - sym_cop->auth.data.length = b0->current_length - ip_hdr_size - - em->esp_integ_algs[sa0->integ_alg].trunc_size; - - if (PREDICT_FALSE (sa0->use_esn)) - { - u8 *payload_end = - vlib_buffer_get_current (b0) + b0->current_length; - *((u32 *) payload_end) = sa0->seq_hi; - sym_cop->auth.data.length += sizeof (sa0->seq_hi); - } - } - sym_cop->auth.digest.data = vlib_buffer_get_current (b0) + - b0->current_length - - em->esp_integ_algs[sa0->integ_alg].trunc_size; - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset (mb0, - b0->current_length - - - em->esp_integ_algs - [sa0->integ_alg].trunc_size); - sym_cop->auth.digest.length = - em->esp_integ_algs[sa0->integ_alg].trunc_size; - - - if (PREDICT_FALSE (is_ipv6)) - { - oh6_0->ip6.payload_length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (ip6_header_t)); - } - else - { - oh0->ip4.length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4); - } - - if (transport_mode) - vlib_buffer_advance (b0, -sizeof (ethernet_header_t)); - - trace: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - esp_encrypt_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->spi = sa0->spi; - tr->seq = sa0->seq - 1; - tr->crypto_alg = sa0->crypto_alg; - tr->integ_alg = sa0->integ_alg; - } - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, - ESP_ENCRYPT_ERROR_RX_PKTS, - from_frame->n_vectors); - crypto_qp_data_t *qpd; - /* *INDENT-OFF* */ - vec_foreach_index (i, cwm->qp_data) - { - u32 enq; - - qpd = vec_elt_at_index(cwm->qp_data, i); - enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, - qpd->cops, n_cop_qp[i]); - qpd->inflights += enq; - - if (PREDICT_FALSE(enq < n_cop_qp[i])) - { - crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); - vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); - - vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, - ESP_ENCRYPT_ERROR_ENQ_FAIL, - n_cop_qp[i] - enq); - } - } - /* *INDENT-ON* */ - - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dpdk_esp_encrypt_node) = -{ - .function = dpdk_esp_encrypt_node_fn,.name = "dpdk-esp-encrypt",.flags = - VLIB_NODE_FLAG_IS_OUTPUT,.vector_size = sizeof (u32),.format_trace = - format_esp_encrypt_trace,.n_errors = - ARRAY_LEN (esp_encrypt_error_strings),.error_strings = - esp_encrypt_error_strings,.n_next_nodes = 1,.next_nodes = - { - [ESP_ENCRYPT_NEXT_DROP] = "error-drop",} -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_node, dpdk_esp_encrypt_node_fn) -/* - * ESP Encrypt Post Node - */ -#define foreach_esp_encrypt_post_error \ - _(PKTS, "ESP post pkts") - typedef enum - { -#define _(sym,str) ESP_ENCRYPT_POST_ERROR_##sym, - foreach_esp_encrypt_post_error -#undef _ - ESP_ENCRYPT_POST_N_ERROR, - } esp_encrypt_post_error_t; - - static char *esp_encrypt_post_error_strings[] = { -#define _(sym,string) string, - foreach_esp_encrypt_post_error -#undef _ - }; - -vlib_node_registration_t dpdk_esp_encrypt_post_node; - -static u8 * -format_esp_encrypt_post_trace (u8 * s, va_list * args) -{ - return s; -} - -static uword -dpdk_esp_encrypt_post_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, *from, *to_next = 0, next_index; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, next0; - vlib_buffer_t *b0 = 0; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - to_next[0] = bi0; - to_next += 1; - - next0 = vnet_buffer (b0)->unused[0]; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, bi0, - next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, dpdk_esp_encrypt_post_node.index, - ESP_ENCRYPT_POST_ERROR_PKTS, - from_frame->n_vectors); - - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dpdk_esp_encrypt_post_node) = -{ - .function = dpdk_esp_encrypt_post_node_fn,.name = - "dpdk-esp-encrypt-post",.vector_size = sizeof (u32),.format_trace = - format_esp_encrypt_post_trace,.type = VLIB_NODE_TYPE_INTERNAL,.n_errors = - ARRAY_LEN (esp_encrypt_post_error_strings),.error_strings = - esp_encrypt_post_error_strings,.n_next_nodes = - ESP_ENCRYPT_N_NEXT,.next_nodes = - { -#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n, - foreach_esp_encrypt_next -#undef _ - } -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_post_node, - dpdk_esp_encrypt_post_node_fn) -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/ipsec.c b/src/vnet/devices/dpdk/ipsec/ipsec.c deleted file mode 100644 index 05c17c99..00000000 --- a/src/vnet/devices/dpdk/ipsec/ipsec.c +++ /dev/null @@ -1,430 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include - -#define DPDK_CRYPTO_NB_SESS_OBJS 20000 -#define DPDK_CRYPTO_CACHE_SIZE 512 -#define DPDK_CRYPTO_PRIV_SIZE 128 -#define DPDK_CRYPTO_N_QUEUE_DESC 1024 -#define DPDK_CRYPTO_NB_COPS (1024 * 4) - -static int -add_del_sa_sess (u32 sa_index, u8 is_add) -{ - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - crypto_worker_main_t *cwm; - u8 skip_master = vlib_num_workers () > 0; - - /* *INDENT-OFF* */ - vec_foreach (cwm, dcm->workers_main) - { - crypto_sa_session_t *sa_sess; - u8 is_outbound; - - if (skip_master) - { - skip_master = 0; - continue; - } - - for (is_outbound = 0; is_outbound < 2; is_outbound++) - { - if (is_add) - { - pool_get (cwm->sa_sess_d[is_outbound], sa_sess); - } - else - { - u8 dev_id; - - sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index); - dev_id = cwm->qp_data[sa_sess->qp_index].dev_id; - - if (!sa_sess->sess) - continue; - - if (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess)) - { - clib_warning("failed to free session"); - return -1; - } - memset(sa_sess, 0, sizeof(sa_sess[0])); - } - } - } - /* *INDENT-OFF* */ - - return 0; -} - -static void -update_qp_data (crypto_worker_main_t * cwm, - u8 cdev_id, u16 qp_id, u8 is_outbound, u16 * idx) -{ - crypto_qp_data_t *qpd; - - /* *INDENT-OFF* */ - vec_foreach_index (*idx, cwm->qp_data) - { - qpd = vec_elt_at_index(cwm->qp_data, *idx); - - if (qpd->dev_id == cdev_id && qpd->qp_id == qp_id && - qpd->is_outbound == is_outbound) - return; - } - /* *INDENT-ON* */ - - vec_add2 (cwm->qp_data, qpd, 1); - - qpd->dev_id = cdev_id; - qpd->qp_id = qp_id; - qpd->is_outbound = is_outbound; -} - -/* - * return: - * 0: already exist - * 1: mapped - */ -static int -add_mapping (crypto_worker_main_t * cwm, - u8 cdev_id, u16 qp, u8 is_outbound, - const struct rte_cryptodev_capabilities *cipher_cap, - const struct rte_cryptodev_capabilities *auth_cap) -{ - u16 qp_index; - uword key = 0, data, *ret; - crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; - - p_key->cipher_algo = (u8) cipher_cap->sym.cipher.algo; - p_key->auth_algo = (u8) auth_cap->sym.auth.algo; - p_key->is_outbound = is_outbound; - - ret = hash_get (cwm->algo_qp_map, key); - if (ret) - return 0; - - update_qp_data (cwm, cdev_id, qp, is_outbound, &qp_index); - - data = (uword) qp_index; - hash_set (cwm->algo_qp_map, key, data); - - return 1; -} - -/* - * return: - * 0: already exist - * 1: mapped - */ -static int -add_cdev_mapping (crypto_worker_main_t * cwm, - struct rte_cryptodev_info *dev_info, u8 cdev_id, - u16 qp, u8 is_outbound) -{ - const struct rte_cryptodev_capabilities *i, *j; - u32 mapped = 0; - - for (i = dev_info->capabilities; i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++) - { - if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER) - continue; - - if (check_algo_is_supported (i, NULL) != 0) - continue; - - for (j = dev_info->capabilities; j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; - j++) - { - if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH) - continue; - - if (check_algo_is_supported (j, NULL) != 0) - continue; - - mapped |= add_mapping (cwm, cdev_id, qp, is_outbound, i, j); - } - } - - return mapped; -} - -static int -check_cryptodev_queues () -{ - u32 n_qs = 0; - u8 cdev_id; - u32 n_req_qs = 2; - - if (vlib_num_workers () > 0) - n_req_qs = vlib_num_workers () * 2; - - for (cdev_id = 0; cdev_id < rte_cryptodev_count (); cdev_id++) - { - struct rte_cryptodev_info cdev_info; - - rte_cryptodev_info_get (cdev_id, &cdev_info); - - if (! - (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) - continue; - - n_qs += cdev_info.max_nb_queue_pairs; - } - - if (n_qs >= n_req_qs) - return 0; - else - return -1; -} - -static clib_error_t * -dpdk_ipsec_check_support (ipsec_sa_t * sa) -{ - if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - if (sa->integ_alg != IPSEC_INTEG_ALG_NONE) - return clib_error_return (0, "unsupported integ-alg %U with " - "crypto-algo aes-gcm-128", - format_ipsec_integ_alg, sa->integ_alg); - sa->integ_alg = IPSEC_INTEG_ALG_AES_GCM_128; - } - else - { - if (sa->integ_alg == IPSEC_INTEG_ALG_NONE || - sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128) - return clib_error_return (0, "unsupported integ-alg %U", - format_ipsec_integ_alg, sa->integ_alg); - } - - return 0; -} - -static uword -dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, - vlib_frame_t * f) -{ - dpdk_config_main_t *conf = &dpdk_config_main; - ipsec_main_t *im = &ipsec_main; - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - struct rte_cryptodev_config dev_conf; - struct rte_cryptodev_qp_conf qp_conf; - struct rte_cryptodev_info cdev_info; - struct rte_mempool *rmp; - i32 dev_id, ret; - u32 i, skip_master; - - if (!conf->cryptodev) - { - clib_warning ("DPDK Cryptodev support is disabled, " - "default to OpenSSL IPsec"); - return 0; - } - - if (check_cryptodev_queues () < 0) - { - conf->cryptodev = 0; - clib_warning ("not enough Cryptodevs, default to OpenSSL IPsec"); - return 0; - } - - vec_alloc (dcm->workers_main, tm->n_vlib_mains); - _vec_len (dcm->workers_main) = tm->n_vlib_mains; - - fprintf (stdout, "DPDK Cryptodevs info:\n"); - fprintf (stdout, "dev_id\tn_qp\tnb_obj\tcache_size\n"); - /* HW cryptodevs have higher dev_id, use HW first */ - for (dev_id = rte_cryptodev_count () - 1; dev_id >= 0; dev_id--) - { - u16 max_nb_qp, qp = 0; - skip_master = vlib_num_workers () > 0; - - rte_cryptodev_info_get (dev_id, &cdev_info); - - if (! - (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) - continue; - - max_nb_qp = cdev_info.max_nb_queue_pairs; - - for (i = 0; i < tm->n_vlib_mains; i++) - { - u8 is_outbound; - crypto_worker_main_t *cwm; - uword *map; - - if (skip_master) - { - skip_master = 0; - continue; - } - - cwm = vec_elt_at_index (dcm->workers_main, i); - map = cwm->algo_qp_map; - - if (!map) - { - map = hash_create (0, sizeof (crypto_worker_qp_key_t)); - if (!map) - { - clib_warning ("unable to create hash table for worker %u", - vlib_mains[i]->cpu_index); - goto error; - } - cwm->algo_qp_map = map; - } - - for (is_outbound = 0; is_outbound < 2 && qp < max_nb_qp; - is_outbound++) - qp += add_cdev_mapping (cwm, &cdev_info, dev_id, qp, is_outbound); - } - - if (qp == 0) - continue; - - dev_conf.socket_id = rte_cryptodev_socket_id (dev_id); - dev_conf.nb_queue_pairs = cdev_info.max_nb_queue_pairs; - dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_SESS_OBJS; - dev_conf.session_mp.cache_size = DPDK_CRYPTO_CACHE_SIZE; - - ret = rte_cryptodev_configure (dev_id, &dev_conf); - if (ret < 0) - { - clib_warning ("cryptodev %u config error", dev_id); - goto error; - } - - qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC; - for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++) - { - ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf, - dev_conf.socket_id); - if (ret < 0) - { - clib_warning ("cryptodev %u qp %u setup error", dev_id, qp); - goto error; - } - } - vec_validate_aligned (dcm->cop_pools, dev_conf.socket_id, - CLIB_CACHE_LINE_BYTES); - - if (!vec_elt (dcm->cop_pools, dev_conf.socket_id)) - { - u8 *pool_name = format (0, "crypto_op_pool_socket%u%c", - dev_conf.socket_id, 0); - - rmp = rte_crypto_op_pool_create ((char *) pool_name, - RTE_CRYPTO_OP_TYPE_SYMMETRIC, - DPDK_CRYPTO_NB_COPS * - (1 + vlib_num_workers ()), - DPDK_CRYPTO_CACHE_SIZE, - DPDK_CRYPTO_PRIV_SIZE, - dev_conf.socket_id); - vec_free (pool_name); - - if (!rmp) - { - clib_warning ("failed to allocate mempool on socket %u", - dev_conf.socket_id); - goto error; - } - vec_elt (dcm->cop_pools, dev_conf.socket_id) = rmp; - } - - fprintf (stdout, "%u\t%u\t%u\t%u\n", dev_id, dev_conf.nb_queue_pairs, - DPDK_CRYPTO_NB_SESS_OBJS, DPDK_CRYPTO_CACHE_SIZE); - } - - dpdk_esp_init (); - - /* Add new next node and set as default */ - vlib_node_t *node, *next_node; - - next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-encrypt"); - ASSERT (next_node); - node = vlib_get_node_by_name (vm, (u8 *) "ipsec-output-ip4"); - ASSERT (node); - im->esp_encrypt_node_index = next_node->index; - im->esp_encrypt_next_index = - vlib_node_add_next (vm, node->index, next_node->index); - - next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-decrypt"); - ASSERT (next_node); - node = vlib_get_node_by_name (vm, (u8 *) "ipsec-input-ip4"); - ASSERT (node); - im->esp_decrypt_node_index = next_node->index; - im->esp_decrypt_next_index = - vlib_node_add_next (vm, node->index, next_node->index); - - im->cb.check_support_cb = dpdk_ipsec_check_support; - im->cb.add_del_sa_sess_cb = add_del_sa_sess; - - if (vec_len (vlib_mains) == 0) - vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index, - VLIB_NODE_STATE_POLLING); - else - for (i = 1; i < tm->n_vlib_mains; i++) - vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index, - VLIB_NODE_STATE_POLLING); - - /* TODO cryptodev counters */ - - return 0; - -error: - ; - crypto_worker_main_t *cwm; - struct rte_mempool **mp; - /* *INDENT-OFF* */ - vec_foreach (cwm, dcm->workers_main) - hash_free (cwm->algo_qp_map); - - vec_foreach (mp, dcm->cop_pools) - { - if (mp) - rte_mempool_free (mp[0]); - } - /* *INDENT-ON* */ - vec_free (dcm->workers_main); - vec_free (dcm->cop_pools); - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_ipsec_process_node,static) = { - .function = dpdk_ipsec_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "dpdk-ipsec-process", - .process_log2_n_stack_bytes = 17, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/ipsec.h b/src/vnet/devices/dpdk/ipsec/ipsec.h deleted file mode 100644 index 3465b361..00000000 --- a/src/vnet/devices/dpdk/ipsec/ipsec.h +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __DPDK_IPSEC_H__ -#define __DPDK_IPSEC_H__ - -#include - -#undef always_inline -#include -#include - -#if CLIB_DEBUG > 0 -#define always_inline static inline -#else -#define always_inline static inline __attribute__ ((__always_inline__)) -#endif - - -#define MAX_QP_PER_LCORE 16 - -typedef struct -{ - u32 salt; - u32 iv[2]; - u32 cnt; -} dpdk_gcm_cnt_blk; - -typedef struct -{ - dpdk_gcm_cnt_blk cb; - union - { - u8 aad[12]; - u8 icv[64]; - }; -} dpdk_cop_priv_t; - -typedef struct -{ - u8 cipher_algo; - u8 auth_algo; - u8 is_outbound; -} crypto_worker_qp_key_t; - -typedef struct -{ - u16 dev_id; - u16 qp_id; - u16 is_outbound; - i16 inflights; - u32 bi[VLIB_FRAME_SIZE]; - struct rte_crypto_op *cops[VLIB_FRAME_SIZE]; - struct rte_crypto_op **free_cops; -} crypto_qp_data_t; - -typedef struct -{ - u8 qp_index; - void *sess; -} crypto_sa_session_t; - -typedef struct -{ - crypto_sa_session_t *sa_sess_d[2]; - crypto_qp_data_t *qp_data; - uword *algo_qp_map; -} crypto_worker_main_t; - -typedef struct -{ - struct rte_mempool **cop_pools; - crypto_worker_main_t *workers_main; -} dpdk_crypto_main_t; - -dpdk_crypto_main_t dpdk_crypto_main; - -extern vlib_node_registration_t dpdk_crypto_input_node; - -#define CRYPTO_N_FREE_COPS (VLIB_FRAME_SIZE * 3) - -static_always_inline void -crypto_alloc_cops () -{ - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - u32 cpu_index = os_get_cpu_number (); - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - unsigned socket_id = rte_socket_id (); - crypto_qp_data_t *qpd; - - /* *INDENT-OFF* */ - vec_foreach (qpd, cwm->qp_data) - { - u32 l = vec_len (qpd->free_cops); - - if (PREDICT_FALSE (l < VLIB_FRAME_SIZE)) - { - u32 n_alloc; - - if (PREDICT_FALSE (!qpd->free_cops)) - vec_alloc (qpd->free_cops, CRYPTO_N_FREE_COPS); - - n_alloc = rte_crypto_op_bulk_alloc (dcm->cop_pools[socket_id], - RTE_CRYPTO_OP_TYPE_SYMMETRIC, - &qpd->free_cops[l], - CRYPTO_N_FREE_COPS - l - 1); - - _vec_len (qpd->free_cops) = l + n_alloc; - } - } - /* *INDENT-ON* */ -} - -static_always_inline void -crypto_free_cop (crypto_qp_data_t * qpd, struct rte_crypto_op **cops, u32 n) -{ - u32 l = vec_len (qpd->free_cops); - - if (l + n >= CRYPTO_N_FREE_COPS) - { - l -= VLIB_FRAME_SIZE; - rte_mempool_put_bulk (cops[0]->mempool, - (void **) &qpd->free_cops[l], VLIB_FRAME_SIZE); - } - clib_memcpy (&qpd->free_cops[l], cops, sizeof (*cops) * n); - - _vec_len (qpd->free_cops) = l + n; -} - -static_always_inline int -check_algo_is_supported (const struct rte_cryptodev_capabilities *cap, - char *name) -{ - struct - { - uint8_t cipher_algo; - enum rte_crypto_sym_xform_type type; - union - { - enum rte_crypto_auth_algorithm auth; - enum rte_crypto_cipher_algorithm cipher; - }; - char *name; - } supported_algo[] = - { - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_NULL,.name = "NULL"}, - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_AES_CBC,.name = "AES_CBC"}, - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_AES_CTR,.name = "AES_CTR"}, - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_3DES_CBC,.name = "3DES-CBC"}, - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_SHA1_HMAC,.name = "HMAC-SHA1"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_SHA256_HMAC,.name = "HMAC-SHA256"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_SHA384_HMAC,.name = "HMAC-SHA384"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_SHA512_HMAC,.name = "HMAC-SHA512"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_AES_XCBC_MAC,.name = "AES-XCBC-MAC"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_AES_GCM,.name = "AES-GCM"}, - { - /* tail */ - .type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED},}; - uint32_t i = 0; - - if (cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC) - return -1; - - while (supported_algo[i].type != RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED) - { - if (cap->sym.xform_type == supported_algo[i].type) - { - if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER && - cap->sym.cipher.algo == supported_algo[i].cipher) || - (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH && - cap->sym.auth.algo == supported_algo[i].auth)) - { - if (name) - strcpy (name, supported_algo[i].name); - return 0; - } - } - - i++; - } - - return -1; -} - -#endif /* __DPDK_IPSEC_H__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/main.c b/src/vnet/devices/dpdk/main.c deleted file mode 100644 index 9ea3aa04..00000000 --- a/src/vnet/devices/dpdk/main.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - - -/* - * Called by the dpdk driver's rte_delay_us() function. - * Return 0 to have the dpdk do a regular delay loop. - * Return 1 if to skip the delay loop because we are suspending - * the calling vlib process instead. - */ -int -rte_delay_us_override (unsigned us) -{ - vlib_main_t *vm; - - /* Don't bother intercepting for short delays */ - if (us < 10) - return 0; - - /* - * Only intercept if we are in a vlib process. - * If we are called from a vlib worker thread or the vlib main - * thread then do not intercept. (Must not be called from an - * independent pthread). - */ - if (os_get_cpu_number () == 0) - { - /* - * We're in the vlib main thread or a vlib process. Make sure - * the process is running and we're not still initializing. - */ - vm = vlib_get_main (); - if (vlib_in_process_context (vm)) - { - /* Only suspend for the admin_down_process */ - vlib_process_t *proc = vlib_get_current_process (vm); - if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) || - (proc->node_runtime.function != admin_up_down_process)) - return 0; - - f64 delay = 1e-6 * us; - vlib_process_suspend (vm, delay); - return 1; - } - } - return 0; // no override -} - -static void -rte_delay_us_override_cb (unsigned us) -{ - if (rte_delay_us_override (us) == 0) - rte_delay_us_block (us); -} - -static clib_error_t * dpdk_main_init (vlib_main_t * vm) -{ - clib_error_t * error = 0; - - if ((error = vlib_call_init_function (vm, dpdk_init))) - return error; - - /* register custom delay function */ - rte_delay_us_callback_register (rte_delay_us_override_cb); - - return error; -} - -VLIB_INIT_FUNCTION (dpdk_main_init); - diff --git a/src/vnet/devices/dpdk/node.c b/src/vnet/devices/dpdk/node.c deleted file mode 100644 index 0d64ae08..00000000 --- a/src/vnet/devices/dpdk/node.c +++ /dev/null @@ -1,674 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dpdk_priv.h" - -static char *dpdk_error_strings[] = { -#define _(n,s) s, - foreach_dpdk_error -#undef _ -}; - -always_inline int -vlib_buffer_is_ip4 (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) b->data; - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); -} - -always_inline int -vlib_buffer_is_ip6 (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) b->data; - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); -} - -always_inline int -vlib_buffer_is_mpls (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) b->data; - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); -} - -always_inline u32 -dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0) -{ - if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0))) - if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0)) - return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; - else - return VNET_DEVICE_INPUT_NEXT_IP4_INPUT; - else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0))) - return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; - else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) - return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; - else - return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; -} - -always_inline int -dpdk_mbuf_is_vlan (struct rte_mbuf *mb) -{ - return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) == - RTE_PTYPE_L2_ETHER_VLAN; -} - -always_inline int -dpdk_mbuf_is_ip4 (struct rte_mbuf *mb) -{ - return RTE_ETH_IS_IPV4_HDR (mb->packet_type) != 0; -} - -always_inline int -dpdk_mbuf_is_ip6 (struct rte_mbuf *mb) -{ - return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0; -} - -always_inline u32 -dpdk_rx_next_from_mb (struct rte_mbuf * mb, vlib_buffer_t * b0) -{ - if (PREDICT_FALSE (dpdk_mbuf_is_vlan (mb))) - return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb))) - return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; - else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb))) - return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; - else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) - return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; - else - return dpdk_rx_next_from_etype (mb, b0); -} - -always_inline void -dpdk_rx_error_from_mb (struct rte_mbuf *mb, u32 * next, u8 * error) -{ - if (mb->ol_flags & PKT_RX_IP_CKSUM_BAD) - { - *error = DPDK_ERROR_IP_CHECKSUM_ERROR; - *next = VNET_DEVICE_INPUT_NEXT_DROP; - } - else - *error = DPDK_ERROR_NONE; -} - -void -dpdk_rx_trace (dpdk_main_t * dm, - vlib_node_runtime_t * node, - dpdk_device_t * xd, - u16 queue_id, u32 * buffers, uword n_buffers) -{ - vlib_main_t *vm = vlib_get_main (); - u32 *b, n_left; - u32 next0; - - n_left = n_buffers; - b = buffers; - - while (n_left >= 1) - { - u32 bi0; - vlib_buffer_t *b0; - dpdk_rx_dma_trace_t *t0; - struct rte_mbuf *mb; - u8 error0; - - bi0 = b[0]; - n_left -= 1; - - b0 = vlib_get_buffer (vm, bi0); - mb = rte_mbuf_from_vlib_buffer (b0); - - if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) - next0 = xd->per_interface_next_index; - else if (PREDICT_TRUE - ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) - next0 = dpdk_rx_next_from_mb (mb, b0); - else - next0 = dpdk_rx_next_from_etype (mb, b0); - - dpdk_rx_error_from_mb (mb, &next0, &error0); - - vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); - t0->queue_index = queue_id; - t0->device_index = xd->device_index; - t0->buffer_index = bi0; - - clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); - clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); - clib_memcpy (t0->buffer.pre_data, b0->data, - sizeof (t0->buffer.pre_data)); - clib_memcpy (&t0->data, mb->buf_addr + mb->data_off, sizeof (t0->data)); - - b += 1; - } -} - -static inline u32 -dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id) -{ - u32 n_buffers; - u32 n_left; - u32 n_this_chunk; - - n_left = VLIB_FRAME_SIZE; - n_buffers = 0; - - if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) - { - while (n_left) - { - n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id, - xd->rx_vectors[queue_id] + - n_buffers, n_left); - n_buffers += n_this_chunk; - n_left -= n_this_chunk; - - /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */ - if (n_this_chunk < 32) - break; - } - } - else - { - ASSERT (0); - } - - return n_buffers; -} - - -static_always_inline void -dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, - struct rte_mbuf *mb, vlib_buffer_free_list_t * fl) -{ - u8 nb_seg = 1; - struct rte_mbuf *mb_seg = 0; - vlib_buffer_t *b_seg, *b_chain = 0; - mb_seg = mb->next; - b_chain = b; - - while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) - { - ASSERT (mb_seg != 0); - - b_seg = vlib_buffer_from_rte_mbuf (mb_seg); - vlib_buffer_init_for_free_list (b_seg, fl); - - ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - ASSERT (b_seg->current_data == 0); - - /* - * The driver (e.g. virtio) may not put the packet data at the start - * of the segment, so don't assume b_seg->current_data == 0 is correct. - */ - b_seg->current_data = - (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data; - - b_seg->current_length = mb_seg->data_len; - b->total_length_not_including_first_buffer += mb_seg->data_len; - - b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT; - b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg); - - b_chain = b_seg; - mb_seg = mb_seg->next; - nb_seg++; - } -} - -static_always_inline void -dpdk_prefetch_buffer (struct rte_mbuf *mb) -{ - vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); - CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE); -} - -/* - * This function is used when there are no worker threads. - * The main thread performs IO and forwards the packets. - */ -static_always_inline u32 -dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, - vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id) -{ - u32 n_buffers; - u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - u32 n_left_to_next, *to_next; - u32 mb_index; - vlib_main_t *vm = vlib_get_main (); - uword n_rx_bytes = 0; - u32 n_trace, trace_cnt __attribute__ ((unused)); - vlib_buffer_free_list_t *fl; - u32 buffer_flags_template; - - if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) - return 0; - - n_buffers = dpdk_rx_burst (dm, xd, queue_id); - - if (n_buffers == 0) - { - return 0; - } - - buffer_flags_template = dm->buffer_flags_template; - - vec_reset_length (xd->d_trace_buffers[cpu_index]); - trace_cnt = n_trace = vlib_get_trace_count (vm, node); - - if (n_trace > 0) - { - u32 n = clib_min (n_trace, n_buffers); - mb_index = 0; - - while (n--) - { - struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++]; - vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); - vec_add1 (xd->d_trace_buffers[cpu_index], - vlib_get_buffer_index (vm, b)); - } - } - - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - - mb_index = 0; - - while (n_buffers > 0) - { - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 bi0, next0, l3_offset0; - u32 bi1, next1, l3_offset1; - u32 bi2, next2, l3_offset2; - u32 bi3, next3, l3_offset3; - u8 error0, error1, error2, error3; - u64 or_ol_flags; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_buffers > 8 && n_left_to_next > 4) - { - struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; - struct rte_mbuf *mb1 = xd->rx_vectors[queue_id][mb_index + 1]; - struct rte_mbuf *mb2 = xd->rx_vectors[queue_id][mb_index + 2]; - struct rte_mbuf *mb3 = xd->rx_vectors[queue_id][mb_index + 3]; - - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 4]); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 5]); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 6]); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 7]); - - if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) - { - if (PREDICT_FALSE (mb0->nb_segs > 1)) - dpdk_prefetch_buffer (mb0->next); - if (PREDICT_FALSE (mb1->nb_segs > 1)) - dpdk_prefetch_buffer (mb1->next); - if (PREDICT_FALSE (mb2->nb_segs > 1)) - dpdk_prefetch_buffer (mb2->next); - if (PREDICT_FALSE (mb3->nb_segs > 1)) - dpdk_prefetch_buffer (mb3->next); - } - - ASSERT (mb0); - ASSERT (mb1); - ASSERT (mb2); - ASSERT (mb3); - - or_ol_flags = (mb0->ol_flags | mb1->ol_flags | - mb2->ol_flags | mb3->ol_flags); - b0 = vlib_buffer_from_rte_mbuf (mb0); - b1 = vlib_buffer_from_rte_mbuf (mb1); - b2 = vlib_buffer_from_rte_mbuf (mb2); - b3 = vlib_buffer_from_rte_mbuf (mb3); - - vlib_buffer_init_for_free_list (b0, fl); - vlib_buffer_init_for_free_list (b1, fl); - vlib_buffer_init_for_free_list (b2, fl); - vlib_buffer_init_for_free_list (b3, fl); - - bi0 = vlib_get_buffer_index (vm, b0); - bi1 = vlib_get_buffer_index (vm, b1); - bi2 = vlib_get_buffer_index (vm, b2); - bi3 = vlib_get_buffer_index (vm, b3); - - to_next[0] = bi0; - to_next[1] = bi1; - to_next[2] = bi2; - to_next[3] = bi3; - to_next += 4; - n_left_to_next -= 4; - - if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) - { - next0 = next1 = next2 = next3 = xd->per_interface_next_index; - } - else if (PREDICT_TRUE - ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) - { - next0 = dpdk_rx_next_from_mb (mb0, b0); - next1 = dpdk_rx_next_from_mb (mb1, b1); - next2 = dpdk_rx_next_from_mb (mb2, b2); - next3 = dpdk_rx_next_from_mb (mb3, b3); - } - else - { - next0 = dpdk_rx_next_from_etype (mb0, b0); - next1 = dpdk_rx_next_from_etype (mb1, b1); - next2 = dpdk_rx_next_from_etype (mb2, b2); - next3 = dpdk_rx_next_from_etype (mb3, b3); - } - - if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD)) - { - dpdk_rx_error_from_mb (mb0, &next0, &error0); - dpdk_rx_error_from_mb (mb1, &next1, &error1); - dpdk_rx_error_from_mb (mb2, &next2, &error2); - dpdk_rx_error_from_mb (mb3, &next3, &error3); - b0->error = node->errors[error0]; - b1->error = node->errors[error1]; - b2->error = node->errors[error2]; - b3->error = node->errors[error3]; - } - else - { - b0->error = b1->error = node->errors[DPDK_ERROR_NONE]; - b2->error = b3->error = node->errors[DPDK_ERROR_NONE]; - } - - l3_offset0 = device_input_next_node_advance[next0]; - l3_offset1 = device_input_next_node_advance[next1]; - l3_offset2 = device_input_next_node_advance[next2]; - l3_offset3 = device_input_next_node_advance[next3]; - - b0->current_data = l3_offset0 + mb0->data_off; - b1->current_data = l3_offset1 + mb1->data_off; - b2->current_data = l3_offset2 + mb2->data_off; - b3->current_data = l3_offset3 + mb3->data_off; - - b0->current_data -= RTE_PKTMBUF_HEADROOM; - b1->current_data -= RTE_PKTMBUF_HEADROOM; - b2->current_data -= RTE_PKTMBUF_HEADROOM; - b3->current_data -= RTE_PKTMBUF_HEADROOM; - - b0->current_length = mb0->data_len - l3_offset0; - b1->current_length = mb1->data_len - l3_offset1; - b2->current_length = mb2->data_len - l3_offset2; - b3->current_length = mb3->data_len - l3_offset3; - - b0->flags = buffer_flags_template; - b1->flags = buffer_flags_template; - b2->flags = buffer_flags_template; - b3->flags = buffer_flags_template; - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b2)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b3)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b2)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b3)->sw_if_index[VLIB_TX] = (u32) ~ 0; - - n_rx_bytes += mb0->pkt_len; - n_rx_bytes += mb1->pkt_len; - n_rx_bytes += mb2->pkt_len; - n_rx_bytes += mb3->pkt_len; - - /* Process subsequent segments of multi-segment packets */ - if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) - { - dpdk_process_subseq_segs (vm, b0, mb0, fl); - dpdk_process_subseq_segs (vm, b1, mb1, fl); - dpdk_process_subseq_segs (vm, b2, mb2, fl); - dpdk_process_subseq_segs (vm, b3, mb3, fl); - } - - /* - * Turn this on if you run into - * "bad monkey" contexts, and you want to know exactly - * which nodes they've visited... See main.c... - */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); - - /* Do we have any driver RX features configured on the interface? */ - vnet_feature_start_device_input_x4 (xd->vlib_sw_if_index, - &next0, &next1, &next2, &next3, - b0, b1, b2, b3, - l3_offset0, l3_offset1, - l3_offset2, l3_offset3); - - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, bi2, bi3, - next0, next1, next2, next3); - n_buffers -= 4; - mb_index += 4; - } - while (n_buffers > 0 && n_left_to_next > 0) - { - struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; - - ASSERT (mb0); - - b0 = vlib_buffer_from_rte_mbuf (mb0); - - /* Prefetch one next segment if it exists. */ - if (PREDICT_FALSE (mb0->nb_segs > 1)) - dpdk_prefetch_buffer (mb0->next); - - vlib_buffer_init_for_free_list (b0, fl); - - bi0 = vlib_get_buffer_index (vm, b0); - - to_next[0] = bi0; - to_next++; - n_left_to_next--; - - if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) - next0 = xd->per_interface_next_index; - else if (PREDICT_TRUE - ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) - next0 = dpdk_rx_next_from_mb (mb0, b0); - else - next0 = dpdk_rx_next_from_etype (mb0, b0); - - dpdk_rx_error_from_mb (mb0, &next0, &error0); - b0->error = node->errors[error0]; - - l3_offset0 = device_input_next_node_advance[next0]; - - b0->current_data = l3_offset0; - b0->current_data += mb0->data_off - RTE_PKTMBUF_HEADROOM; - b0->current_length = mb0->data_len - l3_offset0; - - b0->flags = buffer_flags_template; - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - n_rx_bytes += mb0->pkt_len; - - /* Process subsequent segments of multi-segment packets */ - dpdk_process_subseq_segs (vm, b0, mb0, fl); - - /* - * Turn this on if you run into - * "bad monkey" contexts, and you want to know exactly - * which nodes they've visited... See main.c... - */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - - /* Do we have any driver RX features configured on the interface? */ - vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0, - b0, l3_offset0); - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - n_buffers--; - mb_index++; - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0)) - { - dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index], - vec_len (xd->d_trace_buffers[cpu_index])); - vlib_set_trace_count (vm, node, n_trace - - vec_len (xd->d_trace_buffers[cpu_index])); - } - - vlib_increment_combined_counter - (vnet_get_main ()->interface_main.combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, - cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); - - vnet_device_increment_rx_packets (cpu_index, mb_index); - - return mb_index; -} - -static inline void -poll_rate_limit (dpdk_main_t * dm) -{ - /* Limit the poll rate by sleeping for N msec between polls */ - if (PREDICT_FALSE (dm->poll_sleep != 0)) - { - struct timespec ts, tsrem; - - ts.tv_sec = 0; - ts.tv_nsec = 1000 * 1000 * dm->poll_sleep; /* 1ms */ - - while (nanosleep (&ts, &tsrem) < 0) - { - ts = tsrem; - } - } -} - -/** \brief Main DPDK input node - @node dpdk-input - - This is the main DPDK input node: across each assigned interface, - call rte_eth_rx_burst(...) or similar to obtain a vector of - packets to process. Handle early packet discard. Derive @c - vlib_buffer_t metadata from struct rte_mbuf metadata, - Depending on the resulting metadata: adjust b->current_data, - b->current_length and dispatch directly to - ip4-input-no-checksum, or ip6-input. Trace the packet if required. - - @param vm vlib_main_t corresponding to the current thread - @param node vlib_node_runtime_t - @param f vlib_frame_t input-node, not used. - - @par Graph mechanics: buffer metadata, next index usage - - @em Uses: - - struct rte_mbuf mb->ol_flags - - PKT_RX_IP_CKSUM_BAD - - RTE_ETH_IS_xxx_HDR(mb->packet_type) - - packet classification result - - @em Sets: - - b->error if the packet is to be dropped immediately - - b->current_data, b->current_length - - adjusted as needed to skip the L2 header in direct-dispatch cases - - vnet_buffer(b)->sw_if_index[VLIB_RX] - - rx interface sw_if_index - - vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0 - - required by ipX-lookup - - b->flags - - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc. - - Next Nodes: - - Static arcs to: error-drop, ethernet-input, - ip4-input-no-checksum, ip6-input, mpls-input - - per-interface redirection, controlled by - xd->per_interface_next_index -*/ - -static uword -dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - uword n_rx_packets = 0; - dpdk_device_and_queue_t *dq; - u32 cpu_index = os_get_cpu_number (); - - /* - * Poll all devices on this cpu for input/interrupts. - */ - /* *INDENT-OFF* */ - vec_foreach (dq, dm->devices_by_cpu[cpu_index]) - { - xd = vec_elt_at_index(dm->devices, dq->device); - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id); - } - /* *INDENT-ON* */ - - poll_rate_limit (dm); - - return n_rx_packets; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_input_node) = { - .function = dpdk_input, - .type = VLIB_NODE_TYPE_INPUT, - .name = "dpdk-input", - .sibling_of = "device-input", - - /* Will be enabled if/when hardware is detected. */ - .state = VLIB_NODE_STATE_DISABLED, - - .format_buffer = format_ethernet_header_with_length, - .format_trace = format_dpdk_rx_dma_trace, - - .n_errors = DPDK_N_ERROR, - .error_strings = dpdk_error_strings, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input); -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/qos_doc.md b/src/vnet/devices/dpdk/qos_doc.md deleted file mode 100644 index 7c064246..00000000 --- a/src/vnet/devices/dpdk/qos_doc.md +++ /dev/null @@ -1,411 +0,0 @@ -# QoS Hierarchical Scheduler {#qos_doc} - -The Quality-of-Service (QoS) scheduler performs egress-traffic management by -prioritizing the transmission of the packets of different type services and -subcribers based on the Service Level Agreements (SLAs). The QoS scheduler can -be enabled on one or more NIC output interfaces depending upon the -requirement. - - -## Overview - -The QoS schdeuler supports a number of scheduling and shaping levels which -construct hierarchical-tree. The first level in the hierarchy is port (i.e. -the physical interface) that constitutes the root node of the tree. The -subsequent level is subport which represents the group of the -users/subscribers. The individual user/subscriber is represented by the pipe -at the next level. Each user can have different traffic type based on the -criteria of specific loss rate, jitter, and latency. These traffic types are -represented at the traffic-class level in the form of different traffic- -classes. The last level contains number of queues which are grouped together -to host the packets of the specific class type traffic. - -The QoS scheduler implementation requires flow classification, enqueue and -dequeue operations. The flow classification is mandatory stage for HQoS where -incoming packets are classified by mapping the packet fields information to -5-tuple (HQoS subport, pipe, traffic class, queue within traffic class, and -color) and storing that information in mbuf sched field. The enqueue operation -uses this information to determine the queue for storing the packet, and at -this stage, if the specific queue is full, QoS drops the packet. The dequeue -operation consists of scheduling the packet based on its length and available -credits, and handing over the scheduled packet to the output interface. - -For more information on QoS Scheduler, please refer DPDK Programmer's Guide- -http://dpdk.org/doc/guides/prog_guide/qos_framework.html - - -### QoS Schdeuler Parameters - -Following illustrates the default HQoS configuration for each 10GbE output -port: - -Single subport (subport 0): - - Subport rate set to 100% of port rate - - Each of the 4 traffic classes has rate set to 100% of port rate - -4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration: - - Pipe rate set to 1/4K of port rate - - Each of the 4 traffic classes has rate set to 100% of pipe rate - - Within each traffic class, the byte-level WRR weights for the 4 queues are set to 1:1:1:1 - - -#### Port configuration - -``` -port { - rate 1250000000 /* Assuming 10GbE port */ - frame_overhead 24 /* Overhead fields per Ethernet frame: - * 7B (Preamble) + - * 1B (Start of Frame Delimiter (SFD)) + - * 4B (Frame Check Sequence (FCS)) + - * 12B (Inter Frame Gap (IFG)) - */ - mtu 1522 /* Assuming Ethernet/IPv4 pkt (FCS not included) */ - n_subports_per_port 1 /* Number of subports per output interface */ - n_pipes_per_subport 4096 /* Number of pipes (users/subscribers) */ - queue_sizes 64 64 64 64 /* Packet queue size for each traffic class. - * All queues within the same pipe traffic class - * have the same size. Queues from different - * pipes serving the same traffic class have - * the same size. */ -} -``` - - -#### Subport configuration - -``` -subport 0 { - tb_rate 1250000000 /* Subport level token bucket rate (bytes per second) */ - tb_size 1000000 /* Subport level token bucket size (bytes) */ - tc0_rate 1250000000 /* Subport level token bucket rate for traffic class 0 (bytes per second) */ - tc1_rate 1250000000 /* Subport level token bucket rate for traffic class 1 (bytes per second) */ - tc2_rate 1250000000 /* Subport level token bucket rate for traffic class 2 (bytes per second) */ - tc3_rate 1250000000 /* Subport level token bucket rate for traffic class 3 (bytes per second) */ - tc_period 10 /* Time interval for refilling the token bucket associated with traffic class (Milliseconds) */ - pipe 0 4095 profile 0 /* pipes (users/subscribers) configured with pipe profile 0 */ -} -``` - - -#### Pipe configuration - -``` -pipe_profile 0 { - tb_rate 305175 /* Pipe level token bucket rate (bytes per second) */ - tb_size 1000000 /* Pipe level token bucket size (bytes) */ - tc0_rate 305175 /* Pipe level token bucket rate for traffic class 0 (bytes per second) */ - tc1_rate 305175 /* Pipe level token bucket rate for traffic class 1 (bytes per second) */ - tc2_rate 305175 /* Pipe level token bucket rate for traffic class 2 (bytes per second) */ - tc3_rate 305175 /* Pipe level token bucket rate for traffic class 3 (bytes per second) */ - tc_period 40 /* Time interval for refilling the token bucket associated with traffic class at pipe level (Milliseconds) */ - tc3_oversubscription_weight 1 /* Weight traffic class 3 oversubscription */ - tc0_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 0 */ - tc1_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 1 */ - tc2_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 2 */ - tc3_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 3 */ -} -``` - - -#### Random Early Detection (RED) parameters per traffic class and color (Green / Yellow / Red) - -``` -red { - tc0_wred_min 48 40 32 /* Minimum threshold for traffic class 0 queue (min_th) in number of packets */ - tc0_wred_max 64 64 64 /* Maximum threshold for traffic class 0 queue (max_th) in number of packets */ - tc0_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 0 queue (maxp = 1 / maxp_inv) */ - tc0_wred_weight 9 9 9 /* Traffic Class 0 queue weight */ - tc1_wred_min 48 40 32 /* Minimum threshold for traffic class 1 queue (min_th) in number of packets */ - tc1_wred_max 64 64 64 /* Maximum threshold for traffic class 1 queue (max_th) in number of packets */ - tc1_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 1 queue (maxp = 1 / maxp_inv) */ - tc1_wred_weight 9 9 9 /* Traffic Class 1 queue weight */ - tc2_wred_min 48 40 32 /* Minimum threshold for traffic class 2 queue (min_th) in number of packets */ - tc2_wred_max 64 64 64 /* Maximum threshold for traffic class 2 queue (max_th) in number of packets */ - tc2_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 2 queue (maxp = 1 / maxp_inv) */ - tc2_wred_weight 9 9 9 /* Traffic Class 2 queue weight */ - tc3_wred_min 48 40 32 /* Minimum threshold for traffic class 3 queue (min_th) in number of packets */ - tc3_wred_max 64 64 64 /* Maximum threshold for traffic class 3 queue (max_th) in number of packets */ - tc3_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 3 queue (maxp = 1 / maxp_inv) */ - tc3_wred_weight 9 9 9 /* Traffic Class 3 queue weight */ -} -``` - - -### DPDK QoS Scheduler Integration in VPP - -The Hierarchical Quaity-of-Service (HQoS) scheduler object could be seen as -part of the logical NIC output interface. To enable HQoS on specific output -interface, vpp startup.conf file has to be configured accordingly. The output -interface that requires HQoS, should have "hqos" parameter specified in dpdk -section. Another optional parameter "hqos-thread" has been defined which can -be used to associate the output interface with specific hqos thread. In cpu -section of the config file, "corelist-hqos-threads" is introduced to assign -logical cpu cores to run the HQoS threads. A HQoS thread can run multiple HQoS -objects each associated with different output interfaces. All worker threads -instead of writing packets to NIC TX queue directly, write the packets to a -software queues. The hqos_threads read the software queues, and enqueue the -packets to HQoS objects, as well as dequeue packets from HQOS objects and -write them to NIC output interfaces. The worker threads need to be able to -send the packets to any output interface, therefore, each HQoS object -associated with NIC output interface should have software queues equal to -worker threads count. - -Following illustrates the sample startup configuration file with 4x worker -threads feeding 2x hqos threads that handle each QoS scheduler for 1x output -interface. - -``` -dpdk { - socket-mem 16384,16384 - - dev 0000:02:00.0 { - num-rx-queues 2 - hqos - } - dev 0000:06:00.0 { - num-rx-queues 2 - hqos - } - - num-mbufs 1000000 -} - -cpu { - main-core 0 - corelist-workers 1, 2, 3, 4 - corelist-hqos-threads 5, 6 -} -``` - - -### QoS scheduler CLI Commands - -Each QoS scheduler instance is initialised with default parameters required to -configure hqos port, subport, pipe and queues. Some of the parameters can be -re-configured in run-time through CLI commands. - - -#### Configuration - -Following commands can be used to configure QoS scheduler parameters. - -The command below can be used to set the subport level parameters such as -token bucket rate (bytes per seconds), token bucket size (bytes), traffic -class rates (bytes per seconds) and token update period (Milliseconds). - -``` -set dpdk interface hqos subport subport [rate ] - [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] -``` - -For setting the pipe profile, following command can be used. - -``` -set dpdk interface hqos pipe subport pipe - profile -``` - -To assign QoS scheduler instance to the specific thread, following command can -be used. - -``` -set dpdk interface hqos placement thread -``` - -The command below is used to set the packet fields required for classifiying -the incoming packet. As a result of classification process, packet field -information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, -color) and stored in packet mbuf. - -``` -set dpdk interface hqos pktfield id subport|pipe|tc offset - mask -``` - -The DSCP table entries used for idenfiying the traffic class and queue can be set using the command below; - -``` -set dpdk interface hqos tctbl entry tc queue -``` - - -#### Show Command - -The QoS Scheduler configuration can displayed using the command below. - -``` - vpp# show dpdk interface hqos TenGigabitEthernet2/0/0 - Thread: - Input SWQ size = 4096 packets - Enqueue burst size = 256 packets - Dequeue burst size = 220 packets - Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) - Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) - Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) - Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) - [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - Port: - Rate = 1250000000 bytes/second - MTU = 1514 bytes - Frame overhead = 24 bytes - Number of subports = 1 - Number of pipes per subport = 4096 - Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets - Number of pipe profiles = 1 - Subport 0: - Rate = 120000000 bytes/second - Token bucket size = 1000000 bytes - Traffic class rate: TC0 = 120000000, TC1 = 120000000, TC2 = 120000000, TC3 = 120000000 bytes/second - TC period = 10 milliseconds - Pipe profile 0: - Rate = 305175 bytes/second - Token bucket size = 1000000 bytes - Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second - TC period = 40 milliseconds - TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 -``` - -The QoS Scheduler placement over the logical cpu cores can be displayed using -below command. - -``` - vpp# show dpdk interface hqos placement - Thread 5 (vpp_hqos-threads_0 at lcore 5): - TenGigabitEthernet2/0/0 queue 0 - Thread 6 (vpp_hqos-threads_1 at lcore 6): - TenGigabitEthernet4/0/1 queue 0 -``` - - -### QoS Scheduler Binary APIs - -This section explans the available binary APIs for configuring QoS scheduler -parameters in run-time. - -The following API can be used to set the pipe profile of a pipe that belongs -to a given subport: - -``` -sw_interface_set_dpdk_hqos_pipe rx | sw_if_index - subport pipe profile -``` - -The data structures used for set the pipe profile parameter are as follows; - -``` - /** \\brief DPDK interface HQoS pipe profile set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param pipe - pipe ID within its subport - @param profile - pipe profile ID - */ - define sw_interface_set_dpdk_hqos_pipe { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 pipe; - u32 profile; - }; - - /** \\brief DPDK interface HQoS pipe profile set reply - @param context - sender context, to match reply w/ request - @param retval - request return code - */ - define sw_interface_set_dpdk_hqos_pipe_reply { - u32 context; - i32 retval; - }; -``` - -The following API can be used to set the subport level parameters, for -example- token bucket rate (bytes per seconds), token bucket size (bytes), -traffic class rate (bytes per seconds) and tokens update period. - -``` -sw_interface_set_dpdk_hqos_subport rx | sw_if_index - subport [rate ] [bktsize ] - [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] -``` - -The data structures used for set the subport level parameter are as follows; - -``` - /** \\brief DPDK interface HQoS subport parameters set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param tb_rate - subport token bucket rate (measured in bytes/second) - @param tb_size - subport token bucket size (measured in credits) - @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) - @param tc_period - enforcement period for rates (measured in milliseconds) - */ - define sw_interface_set_dpdk_hqos_subport { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 tb_rate; - u32 tb_size; - u32 tc_rate[4]; - u32 tc_period; - }; - - /** \\brief DPDK interface HQoS subport parameters set reply - @param context - sender context, to match reply w/ request - @param retval - request return code - */ - define sw_interface_set_dpdk_hqos_subport_reply { - u32 context; - i32 retval; - }; -``` - -The following API can be used set the DSCP table entry. The DSCP table have -64 entries to map the packet DSCP field onto traffic class and hqos input -queue. - -``` -sw_interface_set_dpdk_hqos_tctbl rx | sw_if_index - entry tc queue -``` - -The data structures used for setting DSCP table entries are given below. - -``` - /** \\brief DPDK interface HQoS tctbl entry set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param entry - entry index ID - @param tc - traffic class (0 .. 3) - @param queue - traffic class queue (0 .. 3) - */ - define sw_interface_set_dpdk_hqos_tctbl { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 entry; - u32 tc; - u32 queue; - }; - - /** \\brief DPDK interface HQoS tctbl entry set reply - @param context - sender context, to match reply w/ request - @param retval - request return code - */ - define sw_interface_set_dpdk_hqos_tctbl_reply { - u32 context; - i32 retval; - }; -``` diff --git a/src/vnet/devices/dpdk/thread.c b/src/vnet/devices/dpdk/thread.c deleted file mode 100644 index 475dd142..00000000 --- a/src/vnet/devices/dpdk/thread.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static clib_error_t * -dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) -{ - int r; - r = rte_eal_remote_launch (fp, (void *) w, lcore_id); - if (r) - return clib_error_return (0, "Failed to launch thread %u", lcore_id); - return 0; -} - -static clib_error_t * -dpdk_thread_set_lcore (u32 thread, u16 lcore) -{ - return 0; -} - -static vlib_thread_callbacks_t callbacks = { - .vlib_launch_thread_cb = &dpdk_launch_thread, - .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore, -}; - -static clib_error_t * -dpdk_thread_init (vlib_main_t * vm) -{ - vlib_thread_cb_register (vm, &callbacks); - return 0; -} - -VLIB_INIT_FUNCTION (dpdk_thread_init); - -/** @endcond */ -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index 3083b614..dd23a909 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -328,17 +328,6 @@ typedef struct int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_intf_details_t ** out_vuids); -// CLI commands to be used from dpdk -clib_error_t *vhost_user_connect_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd); -clib_error_t *vhost_user_delete_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd); -clib_error_t *show_vhost_user_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd); - #endif /* diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c index 49b475cf..e37bccee 100644 --- a/src/vnet/ipsec/ipsec_api.c +++ b/src/vnet/ipsec/ipsec_api.c @@ -79,11 +79,7 @@ static void vl_api_ipsec_spd_add_del_t_handler vl_api_ipsec_spd_add_del_reply_t *rmp; int rv; -#if DPDK > 0 rv = ipsec_add_del_spd (vm, ntohl (mp->spd_id), mp->is_add); -#else - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif REPLY_MACRO (VL_API_IPSEC_SPD_ADD_DEL_REPLY); #endif diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c index e15faeb8..4a65b024 100644 --- a/src/vnet/pg/input.c +++ b/src/vnet/pg/input.c @@ -1212,10 +1212,10 @@ pg_stream_fill_helper (pg_main_t * pg, /* * Historically, the pg maintained its own free lists and - * device drivers tx paths would return pkts. With the DPDK, - * that doesn't happen. + * device drivers tx paths would return pkts. */ - if (DPDK == 0 && !(s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE)) + if (vm->buffer_main->extern_buffer_mgmt == 0 && + !(s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE)) f->buffer_init_function = pg_buffer_init; f->buffer_init_function_opaque = (s - pg->streams) | ((bi - s->buffer_indices) << 24); @@ -1238,7 +1238,7 @@ pg_stream_fill_helper (pg_main_t * pg, n_alloc = n_allocated; /* Reinitialize buffers */ - if (DPDK == 0 || CLIB_DEBUG > 0 + if (vm->buffer_main->extern_buffer_mgmt == 0 || CLIB_DEBUG > 0 || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE)) init_buffers_inline (vm, s, @@ -1246,7 +1246,8 @@ pg_stream_fill_helper (pg_main_t * pg, n_alloc, (bi - s->buffer_indices) * s->buffer_bytes /* data offset */ , s->buffer_bytes, /* set_data */ - DPDK == 1 || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE) != 0); + vm->buffer_main->extern_buffer_mgmt != 0 + || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE) != 0); if (next_buffers) pg_set_next_buffer_pointers (pg, s, buffers, next_buffers, n_alloc); diff --git a/src/vnet/pg/stream.c b/src/vnet/pg/stream.c index c46875e1..560c4b07 100644 --- a/src/vnet/pg/stream.c +++ b/src/vnet/pg/stream.c @@ -442,9 +442,8 @@ pg_stream_add (pg_main_t * pg, pg_stream_t * s_init) pg_buffer_index_t *bi; int n; -#if DPDK > 0 - s->buffer_bytes = VLIB_BUFFER_DATA_SIZE; -#endif + if (vm->buffer_main->extern_buffer_mgmt) + s->buffer_bytes = VLIB_BUFFER_DATA_SIZE; if (!s->buffer_bytes) s->buffer_bytes = s->max_packet_bytes; diff --git a/src/vnet/replication.c b/src/vnet/replication.c index 02755195..86d922b5 100644 --- a/src/vnet/replication.c +++ b/src/vnet/replication.c @@ -214,9 +214,9 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl) b0->flags |= VLIB_BUFFER_IS_RECYCLED; #if (CLIB_DEBUG > 0) -#if DPDK == 0 - vlib_buffer_set_known_state (vm, bi0, VLIB_BUFFER_KNOWN_ALLOCATED); -#endif + if (vm->buffer_main->extern_buffer_mgmt == 0) + vlib_buffer_set_known_state (vm, bi0, + VLIB_BUFFER_KNOWN_ALLOCATED); #endif /* If buffer is traced, mark frame as traced */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index c4075db6..9d3abae5 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -30,9 +30,6 @@ #endif /* included_from_layer_3 */ #include -#if DPDK > 0 -#include -#endif #include #include #include diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 4cc6aa73..3871601b 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -237,58 +237,6 @@ static void *vl_api_sw_interface_set_l2_bridge_t_print FINISH; } -#if DPDK > 0 -static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print - (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_pipe "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = format (s, "subport %u pipe %u profile %u ", - ntohl (mp->subport), ntohl (mp->pipe), ntohl (mp->profile)); - - FINISH; -} - -static void *vl_api_sw_interface_set_dpdk_hqos_subport_t_print - (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_subport "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = - format (s, - "subport %u rate %u bkt_size %u tc0 %u tc1 %u tc2 %u tc3 %u period %u", - ntohl (mp->subport), ntohl (mp->tb_rate), ntohl (mp->tb_size), - ntohl (mp->tc_rate[0]), ntohl (mp->tc_rate[1]), - ntohl (mp->tc_rate[2]), ntohl (mp->tc_rate[3]), - ntohl (mp->tc_period)); - - FINISH; -} - -static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print - (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_tctbl "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = format (s, "entry %u tc %u queue %u", - ntohl (mp->entry), ntohl (mp->tc), ntohl (mp->queue)); - - FINISH; -} -#endif - static void *vl_api_bridge_domain_add_del_t_print (vl_api_bridge_domain_add_del_t * mp, void *handle) { @@ -3036,18 +2984,6 @@ vl_msg_api_custom_dump_configure (api_main_t * am) = (void *) vl_api_##f##_t_print; foreach_custom_print_function; #undef _ - -#if DPDK > 0 - /* - * manually add DPDK hqos print handlers - */ - am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE] = - (void *) vl_api_sw_interface_set_dpdk_hqos_pipe_t_print; - am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT] = - (void *) vl_api_sw_interface_set_dpdk_hqos_subport_t_print; - am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL] = - (void *) vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print; -#endif } /* diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c index b28608f0..610f40ed 100644 --- a/src/vpp/api/gmon.c +++ b/src/vpp/api/gmon.c @@ -137,7 +137,8 @@ gmon_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) *gm->vector_rate_ptr = vector_rate; now = vlib_time_now (vm); dt = now - last_runtime; - input_packets = vnet_get_aggregate_rx_packets (); + // TODO + //input_packets = vnet_get_aggregate_rx_packets (); *gm->input_rate_ptr = (f64) (input_packets - last_input_packets) / dt; last_runtime = now; last_input_packets = input_packets; diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 2d6e4f37..7f9c2038 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -41,7 +41,6 @@ * SESSION APIs: .../vnet/session/{session.api session_api.c} * MPLS APIs: see .../src/vnet/mpls/{mpls.api, mpls_api.c} * SR APIs: see .../src/vnet/sr/{sr.api, sr_api.c} - * DPDK APIs: see ... /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} * CLASSIFY APIs: see ... /src/vnet/classify/{classify.api, classify_api.c} * FLOW APIs: see ... /src/vnet/flow/{flow.api, flow_api.c} * DHCP APIs: see ... /src/vnet/dhcp/{dhcpk.api, dhcp_api.c} diff --git a/src/vpp/app/l2t.c b/src/vpp/app/l2t.c deleted file mode 100644 index e1eda155..00000000 --- a/src/vpp/app/l2t.c +++ /dev/null @@ -1,562 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -#if DPDK == 0 -#include -#else -#include -#endif - -#include -#include -#include - -l2t_main_t l2t_main; - -/* $$$$ unused? - * get_interface_ethernet_address - * paints the ethernet address for a given interface - * into the supplied destination - */ -void -get_interface_ethernet_address (l2t_main_t * lm, u8 * dst, u32 sw_if_index) -{ - ethernet_main_t *em = ethernet_get_main (lm->vlib_main); - ethernet_interface_t *ei; - vnet_hw_interface_t *hi; - - hi = vnet_get_sup_hw_interface (lm->vnet_main, sw_if_index); - ei = pool_elt_at_index (em->interfaces, hi->hw_instance); - clib_memcpy (dst, ei->address, sizeof (ei->address)); -} - -/* packet trace format function */ -u8 * -format_l2t_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - l2t_trace_t *t = va_arg (*args, l2t_trace_t *); - - if (t->is_user_to_network) - s = format (s, "L2T: %U (client) -> %U (our) session %d", - format_ip6_address, &t->client_address, - format_ip6_address, &t->our_address, t->session_index); - else - s = format (s, "L2T: %U (our) -> %U (client) session %d)", - format_ip6_address, &t->our_address, - format_ip6_address, &t->client_address, t->session_index); - return s; -} - -u8 * -format_l2t_session (u8 * s, va_list * args) -{ - l2t_session_t *session = va_arg (*args, l2t_session_t *); - l2t_main_t *lm = &l2t_main; - u32 counter_index; - vlib_counter_t v; - - s = format (s, "[%d] %U (our) %U (client) vlan-id %d rx_sw_if_index %d\n", - session - lm->sessions, - format_ip6_address, &session->our_address, - format_ip6_address, &session->client_address, - clib_net_to_host_u16 (session->vlan_id), session->sw_if_index); - - s = format (s, " local cookie %llx remote cookie %llx\n", - clib_net_to_host_u64 (session->local_cookie), - clib_net_to_host_u64 (session->remote_cookie)); - - if (session->cookie_flags & L2TP_COOKIE_ROLLOVER_LOCAL) - { - s = format (s, " local rollover cookie %llx\n", - clib_net_to_host_u64 (session->lcl_ro_cookie)); - } - - s = format (s, " local session-id %d remote session-id %d\n", - clib_net_to_host_u32 (session->local_session_id), - clib_net_to_host_u32 (session->remote_session_id)); - - s = format (s, " l2 specific sublayer %s\n", - session->l2_sublayer_present ? "preset" : "absent"); - - counter_index = - session_index_to_counter_index (session - lm->sessions, - SESSION_COUNTER_USER_TO_NETWORK); - - vlib_get_combined_counter (&lm->counter_main, counter_index, &v); - if (v.packets != 0) - s = format (s, " user-to-net: %llu pkts %llu bytes\n", - v.packets, v.bytes); - - vlib_get_combined_counter (&lm->counter_main, counter_index + 1, &v); - - if (v.packets != 0) - s = format (s, " net-to-user: %llu pkts %llu bytes\n", - v.packets, v.bytes); - return s; -} - -static clib_error_t * -show_session_summary_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2t_main_t *lm = &l2t_main; - - vlib_cli_output (vm, "%d active sessions\n", pool_elts (lm->sessions)); - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (show_session_summary_command) = { - .path = "show session", - .short_help = "show session summary", - .function = show_session_summary_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_session_detail_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2t_session_t *session; - l2t_main_t *lm = &l2t_main; - - /* *INDENT-OFF* */ - pool_foreach (session, lm->sessions, - ({ - vlib_cli_output (vm, "%U", format_l2t_session, session); - })); - /* *INDENT-ON* */ - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (show_session_detail_command) = { - .path = "show session detail", - .short_help = "show session table detail", - .function = show_session_detail_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -test_counters_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - l2t_session_t *session; - l2t_main_t *lm = &l2t_main; - u32 session_index; - u32 counter_index; - u32 nincr = 0; - - /* *INDENT-OFF* */ - pool_foreach (session, lm->sessions, - ({ - session_index = session - lm->sessions; - counter_index = - session_index_to_counter_index (session_index, - SESSION_COUNTER_USER_TO_NETWORK); - vlib_increment_combined_counter (&lm->counter_main, - counter_index, - 1/*pkt*/, 1111 /*bytes*/); - vlib_increment_combined_counter (&lm->counter_main, - counter_index+1, - 1/*pkt*/, 2222 /*bytes*/); - nincr++; - })); - /* *INDENT-ON* */ - vlib_cli_output (vm, "Incremented %d active counters\n", nincr); - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (test_counters_command) = { - .path = "test counters", - .short_help = "increment all active counters", - .function = test_counters_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -clear_counters_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - l2t_session_t *session; - l2t_main_t *lm = &l2t_main; - u32 session_index; - u32 counter_index; - u32 nincr = 0; - - /* *INDENT-OFF* */ - pool_foreach (session, lm->sessions, - ({ - session_index = session - lm->sessions; - counter_index = - session_index_to_counter_index (session_index, - SESSION_COUNTER_USER_TO_NETWORK); - vlib_zero_combined_counter (&lm->counter_main, counter_index); - vlib_zero_combined_counter (&lm->counter_main, counter_index+1); - nincr++; - })); - /* *INDENT-ON* */ - vlib_cli_output (vm, "Cleared %d active counters\n", nincr); - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (clear_counters_command) = { - .path = "clear counters", - .short_help = "clear all active counters", - .function = clear_counters_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -l2tp_session_add_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - ip6_address_t client_address, our_address; - ip6_address_t *dst_address_copy, *src_address_copy; - unformat_input_t _line_input, *line_input = &_line_input; - u32 vlan_id; - u32 sw_if_index = (u32) ~ 0; - l2t_main_t *lm = &l2t_main; - l2t_session_t *s; - uword *p; - vnet_hw_interface_t *hi; - vnet_sw_interface_t *si; - u32 next_index; - uword vlan_and_sw_if_index_key; - u32 counter_index; - u64 local_cookie = (u64) ~ 0, remote_cookie = (u64) ~ 0; - u32 local_session_id = 1, remote_session_id = 1; - int our_address_set = 0, client_address_set = 0; - int l2_sublayer_present = 0; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "client %U", - unformat_ip6_address, &client_address)) - client_address_set = 1; - else if (unformat (line_input, "our %U", - unformat_ip6_address, &our_address)) - our_address_set = 1; - else if (unformat (line_input, "vlan %d", &vlan_id)) - ; - else if (unformat (line_input, "l2-interface %U", - unformat_vnet_sw_interface, - vnet_get_main (), &sw_if_index)) - ; - else if (unformat (line_input, "interface %U", - unformat_vnet_sw_interface, - vnet_get_main (), &sw_if_index)) - ; - else if (unformat (line_input, "local-cookie %llx", &local_cookie)) - ; - else if (unformat (line_input, "remote-cookie %llx", &remote_cookie)) - ; - else if (unformat (line_input, "local-session-id %d", - &local_session_id)) - ; - else if (unformat (line_input, "remote-session-id %d", - &remote_session_id)) - ; - else if (unformat (line_input, "l2-sublayer-present")) - l2_sublayer_present = 1; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - unformat_free (line_input); - return error; - } - } - - unformat_free (line_input); - - if (sw_if_index == (u32) ~ 0) - return clib_error_return (0, "l2-interface not specified"); - if (our_address_set == 0) - return clib_error_return (0, "our address not specified"); - if (client_address_set == 0) - return clib_error_return (0, "client address not specified"); - - remote_session_id = clib_host_to_net_u32 (remote_session_id); - local_session_id = clib_host_to_net_u32 (local_session_id); - - switch (lm->lookup_type) - { - case L2T_LOOKUP_SRC_ADDRESS: - p = hash_get_mem (lm->session_by_src_address, &client_address); - if (p) - return clib_error_return - (0, "Session w/ client address %U already exists", - format_ip6_address, &client_address); - break; - - case L2T_LOOKUP_DST_ADDRESS: - p = hash_get_mem (lm->session_by_dst_address, &our_address); - if (p) - return clib_error_return - (0, "Session w/ our address %U already exists", - format_ip6_address, &our_address); - break; - - case L2T_LOOKUP_SESSION_ID: - p = hash_get (lm->session_by_session_id, local_session_id); - if (p) - return clib_error_return - (0, - "Session w/ local session id %d already exists", - clib_net_to_host_u32 (local_session_id)); - break; - - default: - ASSERT (0); - } - - pool_get (lm->sessions, s); - memset (s, 0, sizeof (*s)); - clib_memcpy (&s->our_address, &our_address, sizeof (s->our_address)); - clib_memcpy (&s->client_address, &client_address, - sizeof (s->client_address)); - s->sw_if_index = sw_if_index; - s->vlan_id = clib_host_to_net_u16 (vlan_id); - s->local_cookie = clib_host_to_net_u64 (local_cookie); - l2tp_session_set_remote_cookie (s, remote_cookie); - s->local_session_id = local_session_id; - s->remote_session_id = remote_session_id; - s->l2_sublayer_present = l2_sublayer_present; - - hi = vnet_get_sup_hw_interface (lm->vnet_main, sw_if_index); - si = vnet_get_sup_sw_interface (lm->vnet_main, sw_if_index); - - next_index = vlib_node_add_next (vm, l2t_ip6_node.index, - hi->output_node_index); - s->l2_output_next_index = next_index; - s->l2_output_sw_if_index = si->sw_if_index; - - /* Setup hash table entries */ - switch (lm->lookup_type) - { - case L2T_LOOKUP_SRC_ADDRESS: - src_address_copy = clib_mem_alloc (sizeof (*src_address_copy)); - clib_memcpy (src_address_copy, &client_address, - sizeof (*src_address_copy)); - hash_set_mem (lm->session_by_src_address, src_address_copy, - s - lm->sessions); - break; - case L2T_LOOKUP_DST_ADDRESS: - dst_address_copy = clib_mem_alloc (sizeof (*dst_address_copy)); - clib_memcpy (dst_address_copy, &our_address, - sizeof (*dst_address_copy)); - hash_set_mem (lm->session_by_dst_address, dst_address_copy, - s - lm->sessions); - break; - case L2T_LOOKUP_SESSION_ID: - hash_set (lm->session_by_session_id, local_session_id, - s - lm->sessions); - break; - - default: - ASSERT (0); - } - - vlan_and_sw_if_index_key = ((uword) (s->vlan_id) << 32) | sw_if_index; - hash_set (lm->session_by_vlan_and_rx_sw_if_index, - vlan_and_sw_if_index_key, s - lm->sessions); - - /* validate counters */ - counter_index = - session_index_to_counter_index (s - lm->sessions, - SESSION_COUNTER_USER_TO_NETWORK); - vlib_validate_counter (&lm->counter_main, counter_index); - vlib_validate_counter (&lm->counter_main, counter_index + 1); - - /* Set promiscuous mode on the l2 interface */ - ethernet_set_flags (lm->vnet_main, hi->hw_if_index, - ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); - vnet_hw_interface_rx_redirect_to_node (lm->vnet_main, hi->hw_if_index, - l2t_l2_node.index); - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (l2tp_session_add_command) = { - .path = "l2tp session add", - .short_help = - "l2tp session add client our vlan local-cookie remote-cookie local-session remote-session l2-interface ", - .function = l2tp_session_add_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -l2tp_session_del_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2t_main_t *lm = &l2t_main; - u32 session_index; - l2t_session_t *s; - hash_pair_t *hp; - void *key; - uword vlan_and_sw_if_index_key; - - if (!unformat (input, "%d", &session_index)) - return clib_error_return (0, "missing session index: '%U'", - format_unformat_error, input); - - if (pool_is_free_index (lm->sessions, session_index)) - return clib_error_return (0, "session %d not in use", session_index); - - s = pool_elt_at_index (lm->sessions, session_index); - - switch (lm->lookup_type) - { - case L2T_LOOKUP_SRC_ADDRESS: - hp = hash_get_pair_mem (lm->session_by_src_address, &s->client_address); - if (hp) - { - key = (void *) (hp->key); - hash_unset_mem (lm->session_by_src_address, &s->client_address); - clib_mem_free (key); - } - else - clib_warning ("session %d src address key %U AWOL", - s - lm->sessions, - format_ip6_address, &s->client_address); - break; - - case L2T_LOOKUP_DST_ADDRESS: - hp = hash_get_pair_mem (lm->session_by_dst_address, &s->our_address); - if (hp) - { - key = (void *) (hp->key); - hash_unset_mem (lm->session_by_dst_address, &s->our_address); - clib_mem_free (key); - } - else - clib_warning ("session %d dst address key %U AWOL", - s - lm->sessions, format_ip6_address, &s->our_address); - break; - - case L2T_LOOKUP_SESSION_ID: - hash_unset (lm->session_by_session_id, s->local_session_id); - break; - - default: - ASSERT (0); - } - - vlan_and_sw_if_index_key = ((uword) (s->vlan_id) << 32) | s->sw_if_index; - - hash_unset (lm->session_by_vlan_and_rx_sw_if_index, - vlan_and_sw_if_index_key); - - pool_put (lm->sessions, s); - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (l2tp_session_del_command) = { - .path = "l2tp session delete", - .short_help = - "l2tp session delete ", - .function = l2tp_session_del_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -l2tp_session_cookie_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2t_main_t *lm = &l2t_main; - u32 session_index; - l2t_session_t *s; - u64 lcl_ro_cookie = (u64) ~ 0, rem_ro_cookie = (u64) ~ 0; - u8 cookie_flags = 0; - - if (!unformat (input, "%d", &session_index)) - return clib_error_return (0, "missing session index: '%U'", - format_unformat_error, input); - - if (pool_is_free_index (lm->sessions, session_index)) - return clib_error_return (0, "session %d not in use", session_index); - - s = pool_elt_at_index (lm->sessions, session_index); - - if (unformat (input, "commit")) - { - if (!s->cookie_flags) - { - return clib_error_return (0, "no rollover cookie ready to commit"); - } - else - { - l2tp_session_cookie_commit (s); - return 0; - } - } - if (!unformat (input, "rollover")) - return clib_error_return (0, "missing 'commit|rollover': '%U'", - format_unformat_error, input); - if (unformat (input, "local %llx", &lcl_ro_cookie)) - { - cookie_flags |= L2TP_COOKIE_ROLLOVER_LOCAL; - l2tp_session_set_local_rollover_cookie (s, lcl_ro_cookie); - } - if (unformat (input, "remote %llx", &rem_ro_cookie)) - { - cookie_flags |= L2TP_COOKIE_ROLLOVER_REMOTE; - l2tp_session_set_remote_cookie (s, rem_ro_cookie); - } - if (!cookie_flags) - return clib_error_return (0, "no rollover cookie specified"); - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (l2tp_session_cookie_command) = { - .path = "l2tp session cookie", - .short_help = - "l2tp session cookie commit|rollover [local ] [remote ]", - .function = l2tp_session_cookie_command_fn, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vpp/app/l2t_l2.c b/src/vpp/app/l2t_l2.c deleted file mode 100644 index 07d30d9a..00000000 --- a/src/vpp/app/l2t_l2.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -#if DPDK == 0 -#include -#include -#include -#else -#include -#endif - -#include -#include -#include - -l2t_main_t l2t_main; - -/* Statistics (not really errors) */ -#define foreach_l2t_l2_error \ -_(NETWORK_TO_USER, "L2 network to user (ip6) pkts") - -static char *l2t_l2_error_strings[] = { -#define _(sym,string) string, - foreach_l2t_l2_error -#undef _ -}; - -typedef enum -{ -#define _(sym,str) L2T_L2_ERROR_##sym, - foreach_l2t_l2_error -#undef _ - L2T_L2_N_ERROR, -} l2t_l2_error_t; - -/* - * Packets go to ethernet-input when they don't match a mapping - */ -typedef enum -{ - L2T_L2_NEXT_DROP, - L2T_L2_NEXT_ETHERNET_INPUT, - L2T_L2_NEXT_IP6_LOOKUP, - L2T_L2_N_NEXT, -} l2t_l2_next_t; - -vlib_node_registration_t l2t_l2_node; - -#define NSTAGES 3 - -static inline void -stage0 (vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) -{ - vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index); - vlib_prefetch_buffer_header (b, STORE); - CLIB_PREFETCH (b->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); -} - -static inline void -stage1 (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi) -{ - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - l2t_main_t *lm = &l2t_main; - ethernet_header_t *eh; - ethernet_vlan_header_t *vh; - u32 session_index; - uword *p; - uword vlan_and_sw_if_index_key; - - /* just in case, needed to test with the tun/tap device */ - vlib_buffer_reset (b); - - eh = vlib_buffer_get_current (b); - - /* Not a VLAN pkt? send to ethernet-input... */ - if (PREDICT_FALSE (eh->type != clib_host_to_net_u16 (0x8100))) - { - vnet_buffer (b)->l2t.next_index = L2T_L2_NEXT_ETHERNET_INPUT; - return; - } - vh = (ethernet_vlan_header_t *) (eh + 1); - - /* look up session */ - vlan_and_sw_if_index_key = ((uword) (vh->priority_cfi_and_id) << 32) - | vnet_buffer (b)->sw_if_index[VLIB_RX]; - - p = hash_get (lm->session_by_vlan_and_rx_sw_if_index, - vlan_and_sw_if_index_key); - - if (PREDICT_FALSE (p == 0)) - { - /* $$$ drop here if not for our MAC? */ - vnet_buffer (b)->l2t.next_index = L2T_L2_NEXT_ETHERNET_INPUT; - return; - } - else - { - session_index = p[0]; - } - - /* Remember mapping index, prefetch the mini counter */ - vnet_buffer (b)->l2t.next_index = L2T_L2_NEXT_IP6_LOOKUP; - vnet_buffer (b)->l2t.session_index = session_index; - - /* Each mapping has 2 x (pkt, byte) counters, hence the shift */ - CLIB_PREFETCH (lm->counter_main.mini + (p[0] << 1), CLIB_CACHE_LINE_BYTES, - STORE); -} - -static inline u32 -last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi) -{ - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - l2t_main_t *lm = &l2t_main; - ethernet_header_t *eh = vlib_buffer_get_current (b); - vlib_node_t *n = vlib_get_node (vm, l2t_l2_node.index); - u32 node_counter_base_index = n->error_heap_index; - vlib_error_main_t *em = &vm->error_main; - l2tpv3_header_t *l2t; /* l2 header */ - ethernet_vlan_header_t *vh; /* 802.1q vlan header */ - u32 counter_index; - l2t_session_t *s; - ip6_header_t *ip6; - u16 payload_ethertype; - u8 dst_mac_address[6]; - u8 src_mac_address[6]; - u16 payload_length; - i32 backup; - - /* Other-than-output pkt? We're done... */ - if (vnet_buffer (b)->l2t.next_index != L2T_L2_NEXT_IP6_LOOKUP) - return vnet_buffer (b)->l2t.next_index; - - vh = (ethernet_vlan_header_t *) (eh + 1); - - em->counters[node_counter_base_index + L2T_L2_ERROR_NETWORK_TO_USER] += 1; - - counter_index = - session_index_to_counter_index (vnet_buffer (b)->l2t.session_index, - SESSION_COUNTER_NETWORK_TO_USER); - - /* per-mapping byte stats include the ethernet header */ - vlib_increment_combined_counter (&lm->counter_main, counter_index, - 1 /* packet_increment */ , - vlib_buffer_length_in_chain (vm, b) + - sizeof (ethernet_header_t)); - - s = pool_elt_at_index (lm->sessions, vnet_buffer (b)->l2t.session_index); - - /* Save src/dst MAC addresses */ -#define _(i) dst_mac_address[i] = eh->dst_address[i]; - _(0) _(1) _(2) _(3) _(4) _(5); -#undef _ -#define _(i) src_mac_address[i] = eh->src_address[i]; - _(0) _(1) _(2) _(3) _(4) _(5); -#undef _ - - payload_ethertype = vh->type; - - /* Splice out the 802.1q vlan tag */ - vlib_buffer_advance (b, 4); - eh = vlib_buffer_get_current (b); - - /* restore src/dst MAC addresses */ -#define _(i) eh->dst_address[i] = dst_mac_address[i]; - _(0) _(1) _(2) _(3) _(4) _(5); -#undef _ -#define _(i) eh->src_address[i] = src_mac_address[i]; - _(0) _(1) _(2) _(3) _(4) _(5); -#undef _ - eh->type = payload_ethertype; - - /* Paint on an l2tpv3 hdr */ - backup = sizeof (*l2t); -#if 0 - /* back up 4 bytes less if no l2 sublayer */ - backup -= s->l2_sublayer_present ? 0 : 4; -#endif - - vlib_buffer_advance (b, -backup); - l2t = vlib_buffer_get_current (b); - - l2t->session_id = s->remote_session_id; - l2t->cookie = s->remote_cookie; - -#if 0 - if (s->l2_sublayer_present) - l2t->l2_specific_sublayer = 0; -#endif - - /* Paint on an ip6 header */ - vlib_buffer_advance (b, -(sizeof (*ip6))); - ip6 = vlib_buffer_get_current (b); - - ip6->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (0x6 << 28); - - /* calculate ip6 payload length */ - payload_length = vlib_buffer_length_in_chain (vm, b); - payload_length -= sizeof (*ip6); - - ip6->payload_length = clib_host_to_net_u16 (payload_length); - ip6->protocol = 0x73; /* l2tpv3 */ - ip6->hop_limit = 0xff; - ip6->src_address.as_u64[0] = s->our_address.as_u64[0]; - ip6->src_address.as_u64[1] = s->our_address.as_u64[1]; - ip6->dst_address.as_u64[0] = s->client_address.as_u64[0]; - ip6->dst_address.as_u64[1] = s->client_address.as_u64[1]; - - return L2T_L2_NEXT_IP6_LOOKUP; -} - -#include - -static uword -l2t_l2_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return dispatch_pipeline (vm, node, frame); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (l2t_l2_node) = { - .function = l2t_l2_node_fn, - .name = "l2t-l2-input", - .vector_size = sizeof (u32), - .format_trace = format_l2t_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(l2t_l2_error_strings), - .error_strings = l2t_l2_error_strings, - - .n_next_nodes = L2T_L2_N_NEXT, - - /* edit / add dispositions here */ - .next_nodes = { - [L2T_L2_NEXT_IP6_LOOKUP] = "ip6-lookup", - [L2T_L2_NEXT_ETHERNET_INPUT] = "ethernet-input", - [L2T_L2_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (l2t_l2_node, l2t_l2_node_fn); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ -- cgit 1.2.3-korg From a1a093d4e46e38503332a97ad216f80053a15f2b Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Thu, 2 Mar 2017 13:13:23 -0500 Subject: Clean up binary api message handler registration issues Removed a fair number of "BUG" message handlers, due to conflicts with actual message handlers in api_format.c. Vpp itself had no business receiving certain messages, up to the point where we started building in relevant code from vpp_api_test. Eliminated all but one duplicate registration complaint. That one needs attention from the vxlan team since the duplicated handlers have diverged. Change-Id: Iafce5429d2f906270643b4ea5f0130e20beb4d1d Signed-off-by: Dave Barach --- src/vat/api_format.c | 43 ++++++- src/vlib/unix/input.c | 31 ++++- src/vlibapi/api.h | 15 +++ src/vlibapi/api_shared.c | 4 + src/vnet/classify/classify_api.c | 8 -- src/vnet/devices/virtio/vhost_user_api.c | 10 +- src/vnet/dhcp/dhcp_api.c | 8 -- src/vnet/interface_api.c | 7 - src/vnet/ip/ip_api.c | 83 ------------ src/vnet/l2/l2_api.c | 22 ---- src/vnet/mpls/mpls_api.c | 28 +--- src/vpp/api/api.c | 211 ------------------------------- src/vpp/api/api_main.c | 1 - src/vpp/stats/stats.c | 7 - 14 files changed, 81 insertions(+), 397 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 1321bade..52436917 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -944,6 +944,7 @@ static void vl_api_sw_interface_details_t_handler_json } } +#if VPP_API_TEST_BUILTIN == 0 static void vl_api_sw_interface_set_flags_t_handler (vl_api_sw_interface_set_flags_t * mp) { @@ -954,6 +955,7 @@ static void vl_api_sw_interface_set_flags_t_handler mp->admin_up_down ? "admin-up" : "admin-down", mp->link_up_down ? "link-up" : "link-down"); } +#endif static void vl_api_sw_interface_set_flags_t_handler_json (vl_api_sw_interface_set_flags_t * mp) @@ -4009,7 +4011,6 @@ foreach_standard_reply_retval_handler; #define foreach_vpe_api_reply_msg \ _(CREATE_LOOPBACK_REPLY, create_loopback_reply) \ _(SW_INTERFACE_DETAILS, sw_interface_details) \ -_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ _(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply) \ _(CONTROL_PING_REPLY, control_ping_reply) \ _(CLI_REPLY, cli_reply) \ @@ -4126,11 +4127,6 @@ _(IKEV2_INITIATE_REKEY_CHILD_SA_REPLY, ikev2_initiate_rekey_child_sa_reply) \ _(DELETE_LOOPBACK_REPLY, delete_loopback_reply) \ _(BD_IP_MAC_ADD_DEL_REPLY, bd_ip_mac_add_del_reply) \ _(DHCP_COMPL_EVENT, dhcp_compl_event) \ -_(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ -_(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ -_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ -_(VNET_IP4_NBR_COUNTERS, vnet_ip4_nbr_counters) \ -_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) \ _(MAP_ADD_DOMAIN_REPLY, map_add_domain_reply) \ _(MAP_DEL_DOMAIN_REPLY, map_del_domain_reply) \ _(MAP_ADD_DEL_RULE_REPLY, map_add_del_rule_reply) \ @@ -4232,6 +4228,14 @@ _(SW_INTERFACE_SET_MTU_REPLY, sw_interface_set_mtu_reply) \ _(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) +#define foreach_standalone_reply_msg \ +_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ +_(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ +_(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ +_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ +_(VNET_IP4_NBR_COUNTERS, vnet_ip4_nbr_counters) \ +_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) + typedef struct { u8 *name; @@ -15425,7 +15429,15 @@ api_af_packet_create (vat_main_t * vam) vec_free (host_if_name); S (mp); - W2 (ret, fprintf (vam->ofp, " new sw_if_index = %d ", vam->sw_if_index)); + + /* *INDENT-OFF* */ + W2 (ret, + ({ + if (ret == 0) + fprintf (vam->ofp ? vam->ofp : stderr, + " new sw_if_index = %d\n", vam->sw_if_index); + })); + /* *INDENT-ON* */ return ret; } @@ -18417,6 +18429,9 @@ _(unset, "usage: unset ") } \ } foreach_vpe_api_reply_msg; +#if VPP_API_TEST_BUILTIN == 0 +foreach_standalone_reply_msg; +#endif #undef _ void @@ -18430,6 +18445,9 @@ vat_api_hookup (vat_main_t * vam) vl_api_##n##_t_print, \ sizeof(vl_api_##n##_t), 1); foreach_vpe_api_reply_msg; +#if VPP_API_TEST_BUILTIN == 0 + foreach_standalone_reply_msg; +#endif #undef _ #if (VPP_API_TEST_BUILTIN==0) @@ -18463,6 +18481,17 @@ vat_api_hookup (vat_main_t * vam) #undef _ } +#if VPP_API_TEST_BUILTIN +static clib_error_t * +vat_api_hookup_shim (vlib_main_t * vm) +{ + vat_api_hookup (&vat_main); + return 0; +} + +VLIB_API_INIT_FUNCTION (vat_api_hookup_shim); +#endif + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index 07096ed2..7b4183a4 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -66,6 +66,7 @@ linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) unix_main_t *um = &unix_main; linux_epoll_main_t *em = &linux_epoll_main; struct epoll_event e; + int op; memset (&e, 0, sizeof (e)); @@ -76,13 +77,29 @@ linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) e.events |= EPOLLET; e.data.u32 = f - um->file_pool; - if (epoll_ctl (em->epoll_fd, - (update_type == UNIX_FILE_UPDATE_ADD - ? EPOLL_CTL_ADD - : (update_type == UNIX_FILE_UPDATE_MODIFY - ? EPOLL_CTL_MOD - : EPOLL_CTL_DEL)), f->file_descriptor, &e) < 0) - clib_warning ("epoll_ctl"); + op = -1; + + switch (update_type) + { + case UNIX_FILE_UPDATE_ADD: + op = EPOLL_CTL_ADD; + break; + + case UNIX_FILE_UPDATE_MODIFY: + op = EPOLL_CTL_MOD; + break; + + case UNIX_FILE_UPDATE_DELETE: + op = EPOLL_CTL_DEL; + break; + + default: + clib_warning ("unknown update_type %d", update_type); + return; + } + + if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0) + clib_unix_warning ("epoll_ctl"); } static uword diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h index fcb101d7..b40ece15 100644 --- a/src/vlibapi/api.h +++ b/src/vlibapi/api.h @@ -271,6 +271,21 @@ vlib_node_t **vlib_node_unserialize (u8 * vector); #define VLIB_API_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,api_init) +/* Call given init function: used for init function dependencies. */ +#define vlib_call_api_init_function(vm, x) \ + ({ \ + extern vlib_init_function_t * _VLIB_INIT_FUNCTION_SYMBOL (x,api_init); \ + vlib_init_function_t * _f = _VLIB_INIT_FUNCTION_SYMBOL (x,api_init); \ + clib_error_t * _error = 0; \ + if (! hash_get (vm->init_functions_called, _f)) \ + { \ + hash_set1 (vm->init_functions_called, _f); \ + _error = _f (vm); \ + } \ + _error; \ + }) + + #endif /* included_api_h */ /* diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c index 1a2740e2..79921afe 100644 --- a/src/vlibapi/api_shared.c +++ b/src/vlibapi/api_shared.c @@ -667,6 +667,10 @@ vl_msg_api_config (vl_msg_api_msg_config_t * c) foreach_msg_api_vector; #undef _ + if (am->msg_names[c->id]) + clib_warning ("BUG: multiple registrations of 'vl_api_%s_t_handler'", + c->name); + am->msg_names[c->id] = c->name; am->msg_handlers[c->id] = c->handler; am->msg_cleanup_handlers[c->id] = c->cleanup; diff --git a/src/vnet/classify/classify_api.c b/src/vnet/classify/classify_api.c index 77a8b434..24c7a2b9 100644 --- a/src/vnet/classify/classify_api.c +++ b/src/vnet/classify/classify_api.c @@ -53,7 +53,6 @@ _(CLASSIFY_TABLE_IDS,classify_table_ids) \ _(CLASSIFY_TABLE_BY_INTERFACE, classify_table_by_interface) \ _(CLASSIFY_TABLE_INFO,classify_table_info) \ _(CLASSIFY_SESSION_DUMP,classify_session_dump) \ -_(CLASSIFY_SESSION_DETAILS,classify_session_details) \ _(POLICER_CLASSIFY_SET_INTERFACE, policer_classify_set_interface) \ _(POLICER_CLASSIFY_DUMP, policer_classify_dump) \ _(FLOW_CLASSIFY_SET_INTERFACE, flow_classify_set_interface) \ @@ -356,13 +355,6 @@ vl_api_classify_table_info_t_handler (vl_api_classify_table_info_t * mp) vl_msg_api_send_shmem (q, (u8 *) & rmp); } -static void -vl_api_classify_session_details_t_handler (vl_api_classify_session_details_t * - mp) -{ - clib_warning ("BUG"); -} - static void send_classify_session_details (unix_shared_memory_queue_t * q, u32 table_id, diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c index dd517c26..8dbd032b 100644 --- a/src/vnet/devices/virtio/vhost_user_api.c +++ b/src/vnet/devices/virtio/vhost_user_api.c @@ -46,8 +46,7 @@ _(CREATE_VHOST_USER_IF, create_vhost_user_if) \ _(MODIFY_VHOST_USER_IF, modify_vhost_user_if) \ _(DELETE_VHOST_USER_IF, delete_vhost_user_if) \ -_(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) \ -_(SW_INTERFACE_VHOST_USER_DETAILS, sw_interface_vhost_user_details) +_(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) /* * WARNING: replicated pending api refactor completion @@ -148,13 +147,6 @@ vl_api_delete_vhost_user_if_t_handler (vl_api_delete_vhost_user_if_t * mp) } } -static void - vl_api_sw_interface_vhost_user_details_t_handler - (vl_api_sw_interface_vhost_user_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_sw_interface_vhost_user_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c index bdf02cae..ce34f6a4 100644 --- a/src/vnet/dhcp/dhcp_api.c +++ b/src/vnet/dhcp/dhcp_api.c @@ -46,7 +46,6 @@ #define foreach_vpe_api_msg \ _(DHCP_PROXY_CONFIG,dhcp_proxy_config) \ _(DHCP_PROXY_DUMP,dhcp_proxy_dump) \ -_(DHCP_PROXY_DETAILS,dhcp_proxy_details) \ _(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss) \ _(DHCP_CLIENT_CONFIG, dhcp_client_config) @@ -158,13 +157,6 @@ dhcp_send_details (fib_protocol_t proto, vl_msg_api_send_shmem (q, (u8 *) & mp); } - -static void -vl_api_dhcp_proxy_details_t_handler (vl_api_dhcp_proxy_details_t * mp) -{ - clib_warning ("BUG"); -} - void dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, u8 is_ipv6, u8 * host_address, u8 * router_address, diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 63f7cad4..60cd6d40 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -50,7 +50,6 @@ _(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ _(SW_INTERFACE_SET_MTU, sw_interface_set_mtu) \ _(WANT_INTERFACE_EVENTS, want_interface_events) \ _(SW_INTERFACE_DUMP, sw_interface_dump) \ -_(SW_INTERFACE_DETAILS, sw_interface_details) \ _(SW_INTERFACE_ADD_DEL_ADDRESS, sw_interface_add_del_address) \ _(SW_INTERFACE_SET_TABLE, sw_interface_set_table) \ _(SW_INTERFACE_GET_TABLE, sw_interface_get_table) \ @@ -684,12 +683,6 @@ out: REPLY_MACRO (VL_API_SW_INTERFACE_TAG_ADD_DEL_REPLY); } -static void -vl_api_sw_interface_details_t_handler (vl_api_sw_interface_details_t * mp) -{ - clib_warning ("BUG"); -} - /* * vpe_api_hookup * Add vpe's API message handlers to the table. diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index 49d941c2..ab164a5f 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -59,17 +59,12 @@ #define foreach_ip_api_msg \ _(IP_FIB_DUMP, ip_fib_dump) \ -_(IP_FIB_DETAILS, ip_fib_details) \ _(IP6_FIB_DUMP, ip6_fib_dump) \ -_(IP6_FIB_DETAILS, ip6_fib_details) \ _(IP_MFIB_DUMP, ip_mfib_dump) \ -_(IP_MFIB_DETAILS, ip_mfib_details) \ _(IP6_MFIB_DUMP, ip6_mfib_dump) \ -_(IP6_MFIB_DETAILS, ip6_mfib_details) \ _(IP_NEIGHBOR_DUMP, ip_neighbor_dump) \ _(IP_MROUTE_ADD_DEL, ip_mroute_add_del) \ _(MFIB_SIGNAL_DUMP, mfib_signal_dump) \ -_(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(IP_ADDRESS_DUMP, ip_address_dump) \ _(IP_DUMP, ip_dump) \ _(IP_NEIGHBOR_ADD_DEL, ip_neighbor_add_del) \ @@ -105,12 +100,6 @@ send_ip_neighbor_details (u8 is_ipv6, vl_msg_api_send_shmem (q, (u8 *) & mp); } -static void -vl_api_ip_neighbor_details_t_handler (vl_api_ip_neighbor_details_t * mp) -{ - clib_warning ("BUG"); -} - static void vl_api_ip_neighbor_dump_t_handler (vl_api_ip_neighbor_dump_t * mp) { @@ -185,24 +174,6 @@ copy_fib_next_hop (fib_route_path_encode_t * api_rpath, void *fp_arg) sizeof (api_rpath->rpath.frp_addr.ip6)); } -static void -vl_api_ip_fib_details_t_handler (vl_api_ip_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip_fib_details_t_endian (vl_api_ip_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip_fib_details_t_print (vl_api_ip_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_ip_fib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, @@ -316,24 +287,6 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) vec_free (lfeis); } -static void -vl_api_ip6_fib_details_t_handler (vl_api_ip6_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip6_fib_details_t_endian (vl_api_ip6_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip6_fib_details_t_print (vl_api_ip6_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_ip6_fib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, @@ -469,24 +422,6 @@ vl_api_ip6_fib_dump_t_handler (vl_api_ip6_fib_dump_t * mp) /* *INDENT-ON* */ } -static void -vl_api_ip_mfib_details_t_handler (vl_api_ip_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip_mfib_details_t_endian (vl_api_ip_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip_mfib_details_t_print (vl_api_ip_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_ip_mfib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, @@ -591,24 +526,6 @@ vl_api_ip_mfib_dump_t_handler (vl_api_ip_mfib_dump_t * mp) vec_free (api_rpaths); } -static void -vl_api_ip6_mfib_details_t_handler (vl_api_ip6_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip6_mfib_details_t_endian (vl_api_ip6_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip6_mfib_details_t_print (vl_api_ip6_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_ip6_mfib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index a3cc49bf..a985852c 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -48,13 +48,10 @@ _(L2_XCONNECT_DUMP, l2_xconnect_dump) \ _(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \ _(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \ -_(L2_FIB_TABLE_ENTRY, l2_fib_table_entry) \ _(L2FIB_ADD_DEL, l2fib_add_del) \ _(L2_FLAGS, l2_flags) \ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ -_(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ -_(BRIDGE_DOMAIN_SW_IF_DETAILS, bridge_domain_sw_if_details) \ _(BRIDGE_FLAGS, bridge_flags) \ _(L2_INTERFACE_VLAN_TAG_REWRITE, l2_interface_vlan_tag_rewrite) \ _(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) @@ -140,12 +137,6 @@ send_l2fib_table_entry (vpe_api_main_t * am, vl_msg_api_send_shmem (q, (u8 *) & mp); } -static void -vl_api_l2_fib_table_entry_t_handler (vl_api_l2_fib_table_entry_t * mp) -{ - clib_warning ("BUG"); -} - static void vl_api_l2_fib_table_dump_t_handler (vl_api_l2_fib_table_dump_t * mp) { @@ -329,19 +320,6 @@ vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) REPLY_MACRO (VL_API_BRIDGE_DOMAIN_ADD_DEL_REPLY); } -static void -vl_api_bridge_domain_details_t_handler (vl_api_bridge_domain_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void - vl_api_bridge_domain_sw_if_details_t_handler - (vl_api_bridge_domain_sw_if_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_bridge_domain_details (unix_shared_memory_queue_t * q, l2_bridge_domain_t * bd_config, diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index ebbeba69..a36a5046 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -50,9 +50,7 @@ _(MPLS_IP_BIND_UNBIND, mpls_ip_bind_unbind) \ _(MPLS_ROUTE_ADD_DEL, mpls_route_add_del) \ _(MPLS_TUNNEL_ADD_DEL, mpls_tunnel_add_del) \ _(MPLS_TUNNEL_DUMP, mpls_tunnel_dump) \ -_(MPLS_TUNNEL_DETAILS, mpls_tunnel_details) \ -_(MPLS_FIB_DUMP, mpls_fib_dump) \ -_(MPLS_FIB_DETAILS, mpls_fib_details) +_(MPLS_FIB_DUMP, mpls_fib_dump) extern void stats_dslock_with_hint (int hint, int tag); extern void stats_dsunlock (void); @@ -280,12 +278,6 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) /* *INDENT-ON* */ } -static void -vl_api_mpls_tunnel_details_t_handler (vl_api_mpls_tunnel_details_t * mp) -{ - clib_warning ("BUG"); -} - typedef struct mpls_tunnel_send_walk_ctx_t_ { unix_shared_memory_queue_t *q; @@ -340,24 +332,6 @@ vl_api_mpls_tunnel_dump_t_handler (vl_api_mpls_tunnel_dump_t * mp) mpls_tunnel_walk (send_mpls_tunnel_entry, &ctx); } -static void -vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_mpls_fib_details_t_endian (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_mpls_fib_details_t_print (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_mpls_fib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 60fd0199..a8f471e8 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -128,12 +128,8 @@ _(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table) \ _(CLASSIFY_SET_INTERFACE_L2_TABLES, classify_set_interface_l2_tables) \ _(GET_NODE_INDEX, get_node_index) \ _(ADD_NODE_NEXT, add_node_next) \ -_(VXLAN_ADD_DEL_TUNNEL, vxlan_add_del_tunnel) \ -_(VXLAN_TUNNEL_DUMP, vxlan_tunnel_dump) \ _(L2_INTERFACE_EFP_FILTER, l2_interface_efp_filter) \ _(SHOW_VERSION, show_version) \ -_(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \ -_(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \ _(INTERFACE_NAME_RENUMBER, interface_name_renumber) \ _(WANT_IP4_ARP_EVENTS, want_ip4_arp_events) \ _(WANT_IP6_ND_EVENTS, want_ip6_nd_events) \ @@ -1436,62 +1432,6 @@ out: /* *INDENT-ON* */ } -static void vl_api_vxlan_add_del_tunnel_t_handler - (vl_api_vxlan_add_del_tunnel_t * mp) -{ - vl_api_vxlan_add_del_tunnel_reply_t *rmp; - int rv = 0; - vnet_vxlan_add_del_tunnel_args_t _a, *a = &_a; - u32 encap_fib_index; - uword *p; - ip4_main_t *im = &ip4_main; - vnet_main_t *vnm = vnet_get_main (); - u32 sw_if_index = ~0; - - p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id)); - if (!p) - { - rv = VNET_API_ERROR_NO_SUCH_FIB; - goto out; - } - encap_fib_index = p[0]; - memset (a, 0, sizeof (*a)); - - a->is_add = mp->is_add; - a->is_ip6 = mp->is_ipv6; - - /* ip addresses sent in network byte order */ - ip46_from_addr_buf (mp->is_ipv6, mp->dst_address, &a->dst); - ip46_from_addr_buf (mp->is_ipv6, mp->src_address, &a->src); - - /* Check src & dst are different */ - if (ip46_address_cmp (&a->dst, &a->src) == 0) - { - rv = VNET_API_ERROR_SAME_SRC_DST; - goto out; - } - a->mcast_sw_if_index = ntohl (mp->mcast_sw_if_index); - if (ip46_address_is_multicast (&a->dst) && - pool_is_free_index (vnm->interface_main.sw_interfaces, - a->mcast_sw_if_index)) - { - rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; - goto out; - } - a->encap_fib_index = encap_fib_index; - a->decap_next_index = ntohl (mp->decap_next_index); - a->vni = ntohl (mp->vni); - rv = vnet_vxlan_add_del_tunnel (a, &sw_if_index); - -out: - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_VXLAN_ADD_DEL_TUNNEL_REPLY, - ({ - rmp->sw_if_index = ntohl (sw_if_index); - })); - /* *INDENT-ON* */ -} - static void send_vxlan_tunnel_details (vxlan_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context) { @@ -1525,43 +1465,6 @@ static void send_vxlan_tunnel_details vl_msg_api_send_shmem (q, (u8 *) & rmp); } -static void vl_api_vxlan_tunnel_dump_t_handler - (vl_api_vxlan_tunnel_dump_t * mp) -{ - unix_shared_memory_queue_t *q; - vxlan_main_t *vxm = &vxlan_main; - vxlan_tunnel_t *t; - u32 sw_if_index; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - { - return; - } - - sw_if_index = ntohl (mp->sw_if_index); - - if (~0 == sw_if_index) - { - /* *INDENT-OFF* */ - pool_foreach (t, vxm->tunnels, - ({ - send_vxlan_tunnel_details(t, q, mp->context); - })); - /* *INDENT-ON* */ - } - else - { - if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) || - (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index])) - { - return; - } - t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]]; - send_vxlan_tunnel_details (t, q, mp->context); - } -} - static void vl_api_l2_patch_add_del_t_handler (vl_api_l2_patch_add_del_t * mp) { @@ -1585,83 +1488,6 @@ vl_api_l2_patch_add_del_t_handler (vl_api_l2_patch_add_del_t * mp) REPLY_MACRO (VL_API_L2_PATCH_ADD_DEL_REPLY); } -static void - vl_api_vxlan_gpe_add_del_tunnel_t_handler - (vl_api_vxlan_gpe_add_del_tunnel_t * mp) -{ - vl_api_vxlan_gpe_add_del_tunnel_reply_t *rmp; - int rv = 0; - vnet_vxlan_gpe_add_del_tunnel_args_t _a, *a = &_a; - u32 encap_fib_index, decap_fib_index; - u8 protocol; - uword *p; - ip4_main_t *im = &ip4_main; - u32 sw_if_index = ~0; - - - p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id)); - if (!p) - { - rv = VNET_API_ERROR_NO_SUCH_FIB; - goto out; - } - encap_fib_index = p[0]; - - protocol = mp->protocol; - - /* Interpret decap_vrf_id as an opaque if sending to other-than-ip4-input */ - if (protocol == VXLAN_GPE_INPUT_NEXT_IP4_INPUT) - { - p = hash_get (im->fib_index_by_table_id, ntohl (mp->decap_vrf_id)); - if (!p) - { - rv = VNET_API_ERROR_NO_SUCH_INNER_FIB; - goto out; - } - decap_fib_index = p[0]; - } - else - { - decap_fib_index = ntohl (mp->decap_vrf_id); - } - - /* Check src & dst are different */ - if ((mp->is_ipv6 && memcmp (mp->local, mp->remote, 16) == 0) || - (!mp->is_ipv6 && memcmp (mp->local, mp->remote, 4) == 0)) - { - rv = VNET_API_ERROR_SAME_SRC_DST; - goto out; - } - memset (a, 0, sizeof (*a)); - - a->is_add = mp->is_add; - a->is_ip6 = mp->is_ipv6; - /* ip addresses sent in network byte order */ - if (a->is_ip6) - { - clib_memcpy (&(a->local.ip6), mp->local, 16); - clib_memcpy (&(a->remote.ip6), mp->remote, 16); - } - else - { - clib_memcpy (&(a->local.ip4), mp->local, 4); - clib_memcpy (&(a->remote.ip4), mp->remote, 4); - } - a->encap_fib_index = encap_fib_index; - a->decap_fib_index = decap_fib_index; - a->protocol = protocol; - a->vni = ntohl (mp->vni); - rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index); - -out: - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_REPLY, - ({ - rmp->sw_if_index = ntohl (sw_if_index); - })); - /* *INDENT-ON* */ -} - static void send_vxlan_gpe_tunnel_details (vxlan_gpe_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context) { @@ -1696,43 +1522,6 @@ static void send_vxlan_gpe_tunnel_details vl_msg_api_send_shmem (q, (u8 *) & rmp); } -static void vl_api_vxlan_gpe_tunnel_dump_t_handler - (vl_api_vxlan_gpe_tunnel_dump_t * mp) -{ - unix_shared_memory_queue_t *q; - vxlan_gpe_main_t *vgm = &vxlan_gpe_main; - vxlan_gpe_tunnel_t *t; - u32 sw_if_index; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - { - return; - } - - sw_if_index = ntohl (mp->sw_if_index); - - if (~0 == sw_if_index) - { - /* *INDENT-OFF* */ - pool_foreach (t, vgm->tunnels, - ({ - send_vxlan_gpe_tunnel_details(t, q, mp->context); - })); - /* *INDENT-ON* */ - } - else - { - if ((sw_if_index >= vec_len (vgm->tunnel_index_by_sw_if_index)) || - (~0 == vgm->tunnel_index_by_sw_if_index[sw_if_index])) - { - return; - } - t = &vgm->tunnels[vgm->tunnel_index_by_sw_if_index[sw_if_index]]; - send_vxlan_gpe_tunnel_details (t, q, mp->context); - } -} - static void vl_api_interface_name_renumber_t_handler (vl_api_interface_name_renumber_t * mp) diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index 97b501e0..6ae510b1 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -48,7 +48,6 @@ api_main_init (vlib_main_t * vm) vam->vlib_main = vm; vam->my_client_index = (u32) ~ 0; init_error_string_table (vam); - vat_api_hookup (vam); rv = vat_plugin_init (vam); if (rv) clib_warning ("vat_plugin_init returned %d", rv); diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c index 5e9b0d69..c46d441a 100644 --- a/src/vpp/stats/stats.c +++ b/src/vpp/stats/stats.c @@ -46,7 +46,6 @@ stats_main_t stats_main; #define foreach_stats_msg \ _(WANT_STATS, want_stats) \ -_(WANT_STATS_REPLY, want_stats_reply) \ _(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ _(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ _(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ @@ -1226,12 +1225,6 @@ vl_api_vnet_ip6_nbr_counters_t_handler (vl_api_vnet_ip6_nbr_counters_t * mp) } } -static void -vl_api_want_stats_reply_t_handler (vl_api_want_stats_reply_t * mp) -{ - clib_warning ("BUG"); -} - static void vl_api_want_stats_t_handler (vl_api_want_stats_t * mp) { -- cgit 1.2.3-korg From 35af9e50cdbfc73dab963557f4ffbd56b21e2abc Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 6 Mar 2017 12:02:50 +0100 Subject: features: take device-input buffer advance value directly Change-Id: Ifac7d9134d03d79164ce6f06ae9413279bbaadb3 Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/node.c | 6 ++-- src/vnet/devices/af_packet/node.c | 3 +- src/vnet/devices/netmap/node.c | 2 +- src/vnet/devices/virtio/vhost-user.c | 2 +- src/vnet/feature/feature.h | 62 ++++++++++++++++++++---------------- src/vnet/unix/tapcli.c | 3 +- src/vnet/unix/tuntap.c | 2 +- 7 files changed, 42 insertions(+), 38 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index 04c41655..ccbfd2f2 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -439,9 +439,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, /* Do we have any driver RX features configured on the interface? */ vnet_feature_start_device_input_x4 (xd->vlib_sw_if_index, &next0, &next1, &next2, &next3, - b0, b1, b2, b3, - l3_offset0, l3_offset1, - l3_offset2, l3_offset3); + b0, b1, b2, b3); vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, n_left_to_next, @@ -502,7 +500,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, /* Do we have any driver RX features configured on the interface? */ vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0, - b0, l3_offset0); + b0); vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index 69fc11c9..ab7fd800 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -216,8 +216,7 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, } /* redirect if feature path enabled */ - vnet_feature_start_device_input_x1 (apif->sw_if_index, &next0, b0, - 0); + vnet_feature_start_device_input_x1 (apif->sw_if_index, &next0, b0); /* enque and take next packet */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, diff --git a/src/vnet/devices/netmap/node.c b/src/vnet/devices/netmap/node.c index 835209a3..68ea7832 100644 --- a/src/vnet/devices/netmap/node.c +++ b/src/vnet/devices/netmap/node.c @@ -218,7 +218,7 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, /* redirect if feature path enabled */ vnet_feature_start_device_input_x1 (nif->sw_if_index, &next0, - first_b0, 0); + first_b0); /* enque and take next packet */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index f490f0c1..c16e9822 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -1747,7 +1747,7 @@ vhost_user_if_input (vlib_main_t * vm, /* redirect if feature path enabled */ vnet_feature_start_device_input_x1 (vui->sw_if_index, &next0, - b_head, 0); + b_head); u32 bi = to_next[-1]; //Cannot use to_next[-1] in the macro vlib_validate_buffer_enqueue_x1 (vm, node, next_index, diff --git a/src/vnet/feature/feature.h b/src/vnet/feature/feature.h index b27aaf17..77b1499d 100644 --- a/src/vnet/feature/feature.h +++ b/src/vnet/feature/feature.h @@ -18,6 +18,7 @@ #include #include +#include /** feature registration object */ typedef struct _vnet_feature_arc_registration @@ -227,7 +228,7 @@ vnet_feature_next (u32 sw_if_index, u32 * next0, vlib_buffer_t * b0) static_always_inline void vnet_feature_start_device_input_x1 (u32 sw_if_index, u32 * next0, - vlib_buffer_t * b0, u16 buffer_advanced0) + vlib_buffer_t * b0) { vnet_feature_main_t *fm = &feature_main; vnet_feature_config_main_t *cm; @@ -242,9 +243,12 @@ vnet_feature_start_device_input_x1 (u32 sw_if_index, u32 * next0, * Save next0 so that the last feature in the chain * can skip ethernet-input if indicated... */ + u16 adv; + vnet_buffer (b0)->device_input_feat.saved_next_index = *next0; - vnet_buffer (b0)->device_input_feat.buffer_advance = buffer_advanced0; - vlib_buffer_advance (b0, -buffer_advanced0); + adv = device_input_next_node_advance[*next0]; + vnet_buffer (b0)->device_input_feat.buffer_advance = adv; + vlib_buffer_advance (b0, -adv); b0->feature_arc_index = feature_arc_index; b0->current_config_index = @@ -258,10 +262,7 @@ static_always_inline void vnet_feature_start_device_input_x2 (u32 sw_if_index, u32 * next0, u32 * next1, - vlib_buffer_t * b0, - vlib_buffer_t * b1, - u16 buffer_advanced0, - u16 buffer_advanced1) + vlib_buffer_t * b0, vlib_buffer_t * b1) { vnet_feature_main_t *fm = &feature_main; vnet_feature_config_main_t *cm; @@ -276,12 +277,17 @@ vnet_feature_start_device_input_x2 (u32 sw_if_index, * Save next0 so that the last feature in the chain * can skip ethernet-input if indicated... */ + u16 adv; + vnet_buffer (b0)->device_input_feat.saved_next_index = *next0; + adv = device_input_next_node_advance[*next0]; + vnet_buffer (b0)->device_input_feat.buffer_advance = adv; + vlib_buffer_advance (b0, -adv); + vnet_buffer (b1)->device_input_feat.saved_next_index = *next1; - vnet_buffer (b0)->device_input_feat.buffer_advance = buffer_advanced0; - vnet_buffer (b1)->device_input_feat.buffer_advance = buffer_advanced1; - vlib_buffer_advance (b0, -buffer_advanced0); - vlib_buffer_advance (b1, -buffer_advanced1); + adv = device_input_next_node_advance[*next1]; + vnet_buffer (b1)->device_input_feat.buffer_advance = adv; + vlib_buffer_advance (b1, -adv); b0->feature_arc_index = feature_arc_index; b1->feature_arc_index = feature_arc_index; @@ -303,12 +309,7 @@ vnet_feature_start_device_input_x4 (u32 sw_if_index, u32 * next3, vlib_buffer_t * b0, vlib_buffer_t * b1, - vlib_buffer_t * b2, - vlib_buffer_t * b3, - u16 buffer_advanced0, - u16 buffer_advanced1, - u16 buffer_advanced2, - u16 buffer_advanced3) + vlib_buffer_t * b2, vlib_buffer_t * b3) { vnet_feature_main_t *fm = &feature_main; vnet_feature_config_main_t *cm; @@ -323,20 +324,27 @@ vnet_feature_start_device_input_x4 (u32 sw_if_index, * Save next0 so that the last feature in the chain * can skip ethernet-input if indicated... */ + u16 adv; + vnet_buffer (b0)->device_input_feat.saved_next_index = *next0; + adv = device_input_next_node_advance[*next0]; + vnet_buffer (b0)->device_input_feat.buffer_advance = adv; + vlib_buffer_advance (b0, -adv); + vnet_buffer (b1)->device_input_feat.saved_next_index = *next1; - vnet_buffer (b2)->device_input_feat.saved_next_index = *next2; - vnet_buffer (b3)->device_input_feat.saved_next_index = *next3; + adv = device_input_next_node_advance[*next1]; + vnet_buffer (b1)->device_input_feat.buffer_advance = adv; + vlib_buffer_advance (b1, -adv); - vnet_buffer (b0)->device_input_feat.buffer_advance = buffer_advanced0; - vnet_buffer (b1)->device_input_feat.buffer_advance = buffer_advanced1; - vnet_buffer (b2)->device_input_feat.buffer_advance = buffer_advanced2; - vnet_buffer (b3)->device_input_feat.buffer_advance = buffer_advanced3; + vnet_buffer (b2)->device_input_feat.saved_next_index = *next2; + adv = device_input_next_node_advance[*next2]; + vnet_buffer (b2)->device_input_feat.buffer_advance = adv; + vlib_buffer_advance (b2, -adv); - vlib_buffer_advance (b0, -buffer_advanced0); - vlib_buffer_advance (b1, -buffer_advanced1); - vlib_buffer_advance (b2, -buffer_advanced2); - vlib_buffer_advance (b3, -buffer_advanced3); + vnet_buffer (b3)->device_input_feat.saved_next_index = *next3; + adv = device_input_next_node_advance[*next3]; + vnet_buffer (b3)->device_input_feat.buffer_advance = adv; + vlib_buffer_advance (b3, -adv); b0->feature_arc_index = feature_arc_index; b1->feature_arc_index = feature_arc_index; diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index 25c930c6..496f3885 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -355,8 +355,7 @@ static uword tapcli_rx_iface(vlib_main_t * vm, to_next++; n_left_to_next--; - vnet_feature_start_device_input_x1 (ti->sw_if_index, &next_index, - b_first, 0); + vnet_feature_start_device_input_x1 (ti->sw_if_index, &next_index, b_first); vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, n_left_to_next, diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c index 4a5dd676..2cfcc92f 100644 --- a/src/vnet/unix/tuntap.c +++ b/src/vnet/unix/tuntap.c @@ -351,7 +351,7 @@ tuntap_rx (vlib_main_t * vm, next_index = VNET_DEVICE_INPUT_NEXT_DROP; } - vnet_feature_start_device_input_x1 (tm->sw_if_index, &next_index, b, 0); + vnet_feature_start_device_input_x1 (tm->sw_if_index, &next_index, b); vlib_set_next_frame_buffer (vm, node, next_index, bi); -- cgit 1.2.3-korg From a1a0901daa6f56039caecbe58e7d7062a145fc98 Mon Sep 17 00:00:00 2001 From: Steven Date: Wed, 8 Mar 2017 00:23:13 -0800 Subject: vhost: binary API changes for interrupt mode Add operation_mode for create_vhost_user_if, modify_vhost_user_if, and sw_interface_vhost_user_details. Only polling mode is supported for these APIs. Other mode is rejected and gets VNET_API_ERROR_UNIMPLEMENTED error. Change-Id: I0596f4e2c087aa2b6f78eb3e0b63910b1859641e Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 24 ++++++++++++++++++------ src/vnet/devices/virtio/vhost-user.h | 14 ++++++++++++-- src/vnet/devices/virtio/vhost_user.api | 6 ++++++ src/vnet/devices/virtio/vhost_user_api.c | 7 +++++-- 4 files changed, 41 insertions(+), 10 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index c16e9822..b6b4c04a 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2540,7 +2540,7 @@ vhost_user_vui_init (vnet_main_t * vnm, vhost_user_intf_t * vui, int server_sock_fd, const char *sock_filename, - u64 feature_mask, u32 * sw_if_index) + u64 feature_mask, u32 * sw_if_index, u8 operation_mode) { vnet_sw_interface_t *sw; sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); @@ -2567,6 +2567,7 @@ vhost_user_vui_init (vnet_main_t * vnm, vui->feature_mask = feature_mask; vui->unix_file_index = ~0; vui->log_base_addr = 0; + vui->operation_mode = operation_mode; for (q = 0; q < VHOST_VRING_MAX_N; q++) vhost_user_vring_init (vui, q); @@ -2594,13 +2595,17 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, u8 is_server, u32 * sw_if_index, u64 feature_mask, - u8 renumber, u32 custom_dev_instance, u8 * hwaddr) + u8 renumber, u32 custom_dev_instance, u8 * hwaddr, + u8 operation_mode) { vhost_user_intf_t *vui = NULL; u32 sw_if_idx = ~0; int rv = 0; int server_sock_fd = -1; + if (operation_mode != VHOST_USER_POLLING_MODE) + return VNET_API_ERROR_UNIMPLEMENTED; + if (sock_filename == NULL || !(strlen (sock_filename) > 0)) { return VNET_API_ERROR_INVALID_ARGUMENT; @@ -2619,7 +2624,7 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_create_ethernet (vnm, vm, vui, hwaddr); vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename, - feature_mask, &sw_if_idx); + feature_mask, &sw_if_idx, operation_mode); if (renumber) vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); @@ -2637,7 +2642,8 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 sw_if_index, - u64 feature_mask, u8 renumber, u32 custom_dev_instance) + u64 feature_mask, u8 renumber, u32 custom_dev_instance, + u8 operation_mode) { vhost_user_main_t *vum = &vhost_user_main; vhost_user_intf_t *vui = NULL; @@ -2646,6 +2652,8 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, int rv = 0; vnet_hw_interface_t *hwif; + if (operation_mode != VHOST_USER_POLLING_MODE) + return VNET_API_ERROR_UNIMPLEMENTED; if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) || hwif->dev_class_index != vhost_user_dev_class.index) return VNET_API_ERROR_INVALID_SW_IF_INDEX; @@ -2660,7 +2668,8 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_term_if (vui); vhost_user_vui_init (vnm, vui, server_sock_fd, - sock_filename, feature_mask, &sw_if_idx); + sock_filename, feature_mask, &sw_if_idx, + operation_mode); if (renumber) vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); @@ -2685,6 +2694,7 @@ vhost_user_connect_command_fn (vlib_main_t * vm, u8 hwaddr[6]; u8 *hw = NULL; clib_error_t *error = NULL; + u8 operation_mode = VHOST_USER_POLLING_MODE; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -2719,7 +2729,8 @@ vhost_user_connect_command_fn (vlib_main_t * vm, int rv; if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename, is_server, &sw_if_index, feature_mask, - renumber, custom_dev_instance, hw))) + renumber, custom_dev_instance, hw, + operation_mode))) { error = clib_error_return (0, "vhost_user_create_if returned %d", rv); goto done; @@ -2809,6 +2820,7 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance); vec_add2 (r_vuids, vuid, 1); + vuid->operation_mode = vui->operation_mode; vuid->sw_if_index = vui->sw_if_index; vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz; vuid->features = vui->features; diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index dd23a909..6b928f05 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -42,6 +42,7 @@ #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 #define VRING_USED_F_NO_NOTIFY 1 +#define VRING_AVAIL_F_NO_INTERRUPT 1 #define foreach_virtio_net_feature \ _ (VIRTIO_NET_F_MRG_RXBUF, 15) \ @@ -65,11 +66,13 @@ typedef enum int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 * sw_if_index, u64 feature_mask, - u8 renumber, u32 custom_dev_instance, u8 * hwaddr); + u8 renumber, u32 custom_dev_instance, u8 * hwaddr, + u8 operation_mode); int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 sw_if_index, u64 feature_mask, - u8 renumber, u32 custom_dev_instance); + u8 renumber, u32 custom_dev_instance, + u8 operation_mode); int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index); @@ -209,6 +212,10 @@ typedef struct u64 log_guest_addr; } vhost_user_vring_t; +#define VHOST_USER_POLLING_MODE 0 +#define VHOST_USER_INTERRUPT_MODE 1 +#define VHOST_USER_ADAPTIVE_MODE 2 + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -249,6 +256,8 @@ typedef struct /* Vector of workers for this interface */ u32 *workers; + + u8 operation_mode; } vhost_user_intf_t; typedef struct @@ -323,6 +332,7 @@ typedef struct u8 sock_filename[256]; u32 num_regions; int sock_errno; + u8 operation_mode; } vhost_user_intf_details_t; int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/vnet/devices/virtio/vhost_user.api index 21e42298..4f604e45 100644 --- a/src/vnet/devices/virtio/vhost_user.api +++ b/src/vnet/devices/virtio/vhost_user.api @@ -19,6 +19,7 @@ @param sock_filename - unix socket filename, used to speak with frontend @param use_custom_mac - enable or disable the use of the provided hardware address @param mac_address - hardware address to use if 'use_custom_mac' is set + @param operation_mode - polling=0, interrupt=1, or adaptive=2 */ define create_vhost_user_if { @@ -31,6 +32,7 @@ define create_vhost_user_if u8 use_custom_mac; u8 mac_address[6]; u8 tag[64]; + u8 operation_mode; }; /** \brief vhost-user interface create response @@ -49,6 +51,7 @@ define create_vhost_user_if_reply @param client_index - opaque cookie to identify the sender @param is_server - our side is socket server @param sock_filename - unix socket filename, used to speak with frontend + @param operation_mode - polling=0, interrupt=1, or adaptive=2 */ define modify_vhost_user_if { @@ -59,6 +62,7 @@ define modify_vhost_user_if u8 sock_filename[256]; u8 renumber; u32 custom_dev_instance; + u8 operation_mode; }; /** \brief vhost-user interface modify response @@ -99,6 +103,7 @@ define delete_vhost_user_if_reply @param is_server - vhost-user server socket @param sock_filename - socket filename @param num_regions - number of used memory regions + @param operation_mode - polling=0, interrupt=1, or adaptive=2 */ define sw_interface_vhost_user_details { @@ -111,6 +116,7 @@ define sw_interface_vhost_user_details u8 sock_filename[256]; u32 num_regions; i32 sock_errno; + u8 operation_mode; }; define sw_interface_vhost_user_dump diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c index 8dbd032b..ac7afa61 100644 --- a/src/vnet/devices/virtio/vhost_user_api.c +++ b/src/vnet/devices/virtio/vhost_user_api.c @@ -81,7 +81,8 @@ vl_api_create_vhost_user_if_t_handler (vl_api_create_vhost_user_if_t * mp) rv = vhost_user_create_if (vnm, vm, (char *) mp->sock_filename, mp->is_server, &sw_if_index, (u64) ~ 0, mp->renumber, ntohl (mp->custom_dev_instance), - (mp->use_custom_mac) ? mp->mac_address : NULL); + (mp->use_custom_mac) ? mp->mac_address : NULL, + mp->operation_mode); /* Remember an interface tag for the new interface */ if (rv == 0) @@ -116,7 +117,8 @@ vl_api_modify_vhost_user_if_t_handler (vl_api_modify_vhost_user_if_t * mp) rv = vhost_user_modify_if (vnm, vm, (char *) mp->sock_filename, mp->is_server, sw_if_index, (u64) ~ 0, - mp->renumber, ntohl (mp->custom_dev_instance)); + mp->renumber, ntohl (mp->custom_dev_instance), + mp->operation_mode); REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_REPLY); } @@ -162,6 +164,7 @@ send_sw_interface_vhost_user_details (vpe_api_main_t * am, mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz); mp->features = clib_net_to_host_u64 (vui->features); mp->is_server = vui->is_server; + mp->operation_mode = vui->operation_mode; mp->num_regions = ntohl (vui->num_regions); mp->sock_errno = ntohl (vui->sock_errno); mp->context = context; -- cgit 1.2.3-korg From 80f54e20270ed0628ee725e3e3c515731a0188f2 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Wed, 8 Mar 2017 19:08:56 -0500 Subject: vlib_mains == 0 special cases be gone Clean up spurious binary API client link dependency on libvlib.so, which managed to hide behind vlib_mains == 0 checks reached by VLIB_xxx_FUNCTION macros. Change-Id: I5df1f8ab07dca1944250e643ccf06e60a8462325 Signed-off-by: Dave Barach --- src/plugins/dpdk/ipsec/ipsec.c | 8 +- src/vlib-api.am | 4 +- src/vlib/buffer.c | 27 +- src/vlib/global_funcs.h | 2 +- src/vlib/node_cli.c | 28 +- src/vlib/node_funcs.h | 4 +- src/vlib/threads.c | 16 +- src/vlib/threads.h | 43 ++- src/vlibapi/api.h | 4 +- src/vlibapi/api_shared.c | 530 ++--------------------------------- src/vlibapi/node_serialize.c | 15 +- src/vlibmemory/memory_vlib.c | 471 +++++++++++++++++++++++++++++++ src/vnet/devices/virtio/vhost-user.c | 9 +- src/vpp-api-test.am | 2 - src/vpp/api/api.c | 1 - src/vpp/api/gmon.c | 9 +- 16 files changed, 575 insertions(+), 598 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c index 16bec20a..b0aaaaec 100644 --- a/src/plugins/dpdk/ipsec/ipsec.c +++ b/src/plugins/dpdk/ipsec/ipsec.c @@ -380,13 +380,9 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, im->cb.check_support_cb = dpdk_ipsec_check_support; im->cb.add_del_sa_sess_cb = add_del_sa_sess; - if (vec_len (vlib_mains) == 0) - vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index, + for (i = 1; i < tm->n_vlib_mains; i++) + vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index, VLIB_NODE_STATE_POLLING); - else - for (i = 1; i < tm->n_vlib_mains; i++) - vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index, - VLIB_NODE_STATE_POLLING); /* TODO cryptodev counters */ diff --git a/src/vlib-api.am b/src/vlib-api.am index c05929b1..4e1dae99 100644 --- a/src/vlib-api.am +++ b/src/vlib-api.am @@ -14,7 +14,7 @@ lib_LTLIBRARIES += libvlibmemory.la libvlibapi.la libvlibmemoryclient.la \ libvlibsocket.la -libvlibmemory_la_DEPENDENCIES = libvppinfra.la libsvm.la libvlib.la +libvlibmemory_la_DEPENDENCIES = libvppinfra.la libsvm.la libvlibmemory_la_LIBADD = $(libvlibmemory_la_DEPENDENCIES) -lpthread libvlibmemory_la_SOURCES = \ vlibmemory/api.h \ @@ -26,7 +26,7 @@ libvlibmemory_la_SOURCES = \ vlibmemory/unix_shared_memory_queue.c \ vlibmemory/unix_shared_memory_queue.h -libvlibapi_la_DEPENDENCIES = libvppinfra.la libvlib.la libvlibmemory.la +libvlibapi_la_DEPENDENCIES = libvppinfra.la libvlibapi_la_LIBADD = $(libvlibapi_la_DEPENDENCIES) libvlibapi_la_SOURCES = \ vlibapi/api.h \ diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 9f26bec7..6ba82584 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -261,7 +261,28 @@ done: return result; } -vlib_main_t **vlib_mains; +/* + * Hand-craft a static vector w/ length 1, so vec_len(vlib_mains) =1 + * and vlib_mains[0] = &vlib_global_main from the beginning of time. + * + * The only place which should ever expand vlib_mains is start_workers() + * in threads.c. It knows about the bootstrap vector. + */ +/* *INDENT-OFF* */ +static struct +{ + vec_header_t h; + vlib_main_t *vm; +} __attribute__ ((packed)) __bootstrap_vlib_main_vector + __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES))) = +{ + .h.len = 1, + .vm = &vlib_global_main, +}; +/* *INDENT-ON* */ + +vlib_main_t **vlib_mains = &__bootstrap_vlib_main_vector.vm; + /* When dubugging validate that given buffers are either known allocated or known free. */ @@ -280,7 +301,7 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm, ASSERT (os_get_cpu_number () == 0); /* smp disaster check */ - if (vlib_mains) + if (vec_len (vlib_mains) > 1) ASSERT (vm == vlib_mains[0]); is_free = expected_state == VLIB_BUFFER_KNOWN_ALLOCATED; @@ -956,7 +977,7 @@ show_buffers (vlib_main_t * vm, do { - curr_vm = vec_len (vlib_mains) ? vlib_mains[vm_index] : vm; + curr_vm = vlib_mains[vm_index]; bm = curr_vm->buffer_main; /* *INDENT-OFF* */ diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h index bbdbdef5..f51ec381 100644 --- a/src/vlib/global_funcs.h +++ b/src/vlib/global_funcs.h @@ -23,7 +23,7 @@ always_inline vlib_main_t * vlib_get_main (void) { vlib_main_t *vm; - vm = vlib_mains ? vlib_mains[os_get_cpu_number ()] : &vlib_global_main; + vm = vlib_mains[os_get_cpu_number ()]; ASSERT (vm); return vm; } diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c index 05d0f0b5..62ab2e64 100644 --- a/src/vlib/node_cli.c +++ b/src/vlib/node_cli.c @@ -248,16 +248,11 @@ show_node_runtime (vlib_main_t * vm, if (unformat (input, "max") || unformat (input, "m")) max = 1; - if (vec_len (vlib_mains) == 0) - vec_add1 (stat_vms, vm); - else + for (i = 0; i < vec_len (vlib_mains); i++) { - for (i = 0; i < vec_len (vlib_mains); i++) - { - stat_vm = vlib_mains[i]; - if (stat_vm) - vec_add1 (stat_vms, stat_vm); - } + stat_vm = vlib_mains[i]; + if (stat_vm) + vec_add1 (stat_vms, stat_vm); } /* @@ -331,7 +326,7 @@ show_node_runtime (vlib_main_t * vm, } } - if (vec_len (vlib_mains)) + if (vec_len (vlib_mains) > 1) { vlib_worker_thread_t *w = vlib_worker_threads + j; if (j > 0) @@ -404,16 +399,11 @@ clear_node_runtime (vlib_main_t * vm, vlib_main_t **stat_vms = 0, *stat_vm; vlib_node_runtime_t *r; - if (vec_len (vlib_mains) == 0) - vec_add1 (stat_vms, vm); - else + for (i = 0; i < vec_len (vlib_mains); i++) { - for (i = 0; i < vec_len (vlib_mains); i++) - { - stat_vm = vlib_mains[i]; - if (stat_vm) - vec_add1 (stat_vms, stat_vm); - } + stat_vm = vlib_mains[i]; + if (stat_vm) + vec_add1 (stat_vms, stat_vm); } vlib_worker_thread_barrier_sync (vm); diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index f49a8d6f..8ccfc438 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -201,7 +201,7 @@ vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index) vlib_frame_t *f; u32 cpu_index = frame_index & VLIB_CPU_MASK; u32 offset = frame_index & VLIB_OFFSET_MASK; - vm = vlib_mains ? vlib_mains[cpu_index] : vm; + vm = vlib_mains[cpu_index]; f = vm->heap_base + offset; return f; } @@ -213,7 +213,7 @@ vlib_frame_index_no_check (vlib_main_t * vm, vlib_frame_t * f) ASSERT (((uword) f & VLIB_CPU_MASK) == 0); - vm = vlib_mains ? vlib_mains[f->cpu_index] : vm; + vm = vlib_mains[f->cpu_index]; i = ((u8 *) f - (u8 *) vm->heap_base); return i | f->cpu_index; diff --git a/src/vlib/threads.c b/src/vlib/threads.c index e3ea3c9c..4676be97 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -570,9 +570,13 @@ start_workers (vlib_main_t * vm) if (n_vlib_mains > 1) { - vec_validate (vlib_mains, tm->n_vlib_mains - 1); + /* Replace hand-crafted length-1 vector with a real vector */ + vlib_mains = 0; + + vec_validate_aligned (vlib_mains, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); _vec_len (vlib_mains) = 0; - vec_add1 (vlib_mains, vm); + vec_add1_aligned (vlib_mains, vm, CLIB_CACHE_LINE_BYTES); vlib_worker_threads->wait_at_barrier = clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES); @@ -685,7 +689,7 @@ start_workers (vlib_main_t * vm) /* Packet trace buffers are guaranteed to be empty, nothing to do here */ clib_mem_set_heap (oldheap); - vec_add1 (vlib_mains, vm_clone); + vec_add1_aligned (vlib_mains, vm_clone, CLIB_CACHE_LINE_BYTES); vm_clone->error_main.counters = vec_dup (vlib_mains[0]->error_main.counters); @@ -805,7 +809,7 @@ vlib_worker_thread_node_runtime_update (void) ASSERT (os_get_cpu_number () == 0); - if (vec_len (vlib_mains) == 0) + if (vec_len (vlib_mains) == 1) return; vm = vlib_mains[0]; @@ -1148,7 +1152,7 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm) f64 deadline; u32 count; - if (!vlib_mains) + if (vec_len (vlib_mains) < 2) return; count = vec_len (vlib_mains) - 1; @@ -1179,7 +1183,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) { f64 deadline; - if (!vlib_mains) + if (vec_len (vlib_mains) < 2) return; if (--vlib_worker_threads[0].recursion_level > 0) diff --git a/src/vlib/threads.h b/src/vlib/threads.h index 75a5a281..a032311c 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -222,30 +222,25 @@ vlib_worker_thread_barrier_check (void) } } -#define foreach_vlib_main(body) \ -do { \ - vlib_main_t ** __vlib_mains = 0, *this_vlib_main; \ - int ii; \ - \ - if (vec_len (vlib_mains) == 0) \ - vec_add1 (__vlib_mains, &vlib_global_main); \ - else \ - { \ - for (ii = 0; ii < vec_len (vlib_mains); ii++) \ - { \ - this_vlib_main = vlib_mains[ii]; \ - if (this_vlib_main) \ - vec_add1 (__vlib_mains, this_vlib_main); \ - } \ - } \ - \ - for (ii = 0; ii < vec_len (__vlib_mains); ii++) \ - { \ - this_vlib_main = __vlib_mains[ii]; \ - /* body uses this_vlib_main... */ \ - (body); \ - } \ - vec_free (__vlib_mains); \ +#define foreach_vlib_main(body) \ +do { \ + vlib_main_t ** __vlib_mains = 0, *this_vlib_main; \ + int ii; \ + \ + for (ii = 0; ii < vec_len (vlib_mains); ii++) \ + { \ + this_vlib_main = vlib_mains[ii]; \ + if (this_vlib_main) \ + vec_add1 (__vlib_mains, this_vlib_main); \ + } \ + \ + for (ii = 0; ii < vec_len (__vlib_mains); ii++) \ + { \ + this_vlib_main = __vlib_mains[ii]; \ + /* body uses this_vlib_main... */ \ + (body); \ + } \ + vec_free (__vlib_mains); \ } while (0); #define foreach_sched_policy \ diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h index 2cbeb63c..87a56121 100644 --- a/src/vlibapi/api.h +++ b/src/vlibapi/api.h @@ -252,11 +252,13 @@ void vl_msg_api_queue_handler (unix_shared_memory_queue_t * q); vl_api_trace_t *vl_msg_api_trace_get (api_main_t * am, vl_api_trace_which_t which); +void vl_msg_api_barrier_sync (void) __attribute__ ((weak)); +void vl_msg_api_barrier_release (void) __attribute__ ((weak)); void vl_msg_api_free (void *); void vl_noop_handler (void *mp); -clib_error_t *vl_api_init (vlib_main_t * vm); void vl_msg_api_increment_missing_client_counter (void); void vl_msg_api_post_mortem_dump (void); +void vl_msg_api_post_mortem_dump_enable_disable (int enable); void vl_msg_api_register_pd_handler (void *handler, u16 msg_id_host_byte_order); int vl_msg_api_pd_handler (void *mp, int rv); diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c index 69ba10c1..6774e3dd 100644 --- a/src/vlibapi/api_shared.c +++ b/src/vlibapi/api_shared.c @@ -23,11 +23,6 @@ #include #include #include -#include -#include -#include -#include -#include #include #include #include @@ -36,19 +31,14 @@ #include #include -api_main_t api_main; - -void vl_msg_api_barrier_sync (void) __attribute__ ((weak)); -void -vl_msg_api_barrier_sync (void) -{ -} - -void vl_msg_api_barrier_release (void) __attribute__ ((weak)); -void -vl_msg_api_barrier_release (void) -{ -} +/* *INDENT-OFF* */ +api_main_t api_main = + { + .region_name = "/unset", + .api_uid = -1, + .api_gid = -1, + }; +/* *INDENT-ON* */ void vl_msg_api_increment_missing_client_counter (void) @@ -57,14 +47,6 @@ vl_msg_api_increment_missing_client_counter (void) am->missing_clients++; } -typedef enum -{ - DUMP, - CUSTOM_DUMP, - REPLAY, - INITIALIZERS, -} vl_api_replay_t; - int vl_msg_api_rx_trace_enabled (api_main_t * am) { @@ -397,6 +379,16 @@ vl_msg_api_trace_configure (api_main_t * am, vl_api_trace_which_t which, return 0; } +void +vl_msg_api_barrier_sync (void) +{ +} + +void +vl_msg_api_barrier_release (void) +{ +} + always_inline void msg_handler_internal (api_main_t * am, void *the_msg, int trace_it, int do_it, int free_it) @@ -748,495 +740,15 @@ vl_noop_handler (void *mp) { } -clib_error_t * -vl_api_init (vlib_main_t * vm) -{ - static u8 once; - api_main_t *am = &api_main; - - if (once) - return 0; - - once = 1; - - am->region_name = "/unset"; - /* - * Eventually passed to fchown, -1 => "current user" - * instead of 0 => "root". A very fine disctinction at best. - */ - if (am->api_uid == 0) - am->api_uid = -1; - if (am->api_gid == 0) - am->api_gid = -1; - - return (0); -} - -void vl_msg_api_custom_dump_configure (api_main_t * am) - __attribute__ ((weak)); -void -vl_msg_api_custom_dump_configure (api_main_t * am) -{ -} - -VLIB_INIT_FUNCTION (vl_api_init); - -static void -vl_msg_api_process_file (vlib_main_t * vm, u8 * filename, - u32 first_index, u32 last_index, - vl_api_replay_t which) -{ - vl_api_trace_file_header_t *hp; - int i, fd; - struct stat statb; - size_t file_size; - u8 *msg; - u8 endian_swap_needed = 0; - api_main_t *am = &api_main; - u8 *tmpbuf = 0; - u32 nitems; - void **saved_print_handlers = 0; - - fd = open ((char *) filename, O_RDONLY); - - if (fd < 0) - { - vlib_cli_output (vm, "Couldn't open %s\n", filename); - return; - } - - if (fstat (fd, &statb) < 0) - { - vlib_cli_output (vm, "Couldn't stat %s\n", filename); - close (fd); - return; - } - - if (!(statb.st_mode & S_IFREG) || (statb.st_size < sizeof (*hp))) - { - vlib_cli_output (vm, "File not plausible: %s\n", filename); - close (fd); - return; - } - - file_size = statb.st_size; - file_size = (file_size + 4095) & ~(4096); - - hp = mmap (0, file_size, PROT_READ, MAP_PRIVATE, fd, 0); - - if (hp == (vl_api_trace_file_header_t *) MAP_FAILED) - { - vlib_cli_output (vm, "mmap failed: %s\n", filename); - close (fd); - return; - } - close (fd); - - if ((clib_arch_is_little_endian && hp->endian == VL_API_BIG_ENDIAN) - || (clib_arch_is_big_endian && hp->endian == VL_API_LITTLE_ENDIAN)) - endian_swap_needed = 1; - - if (endian_swap_needed) - nitems = ntohl (hp->nitems); - else - nitems = hp->nitems; - - if (last_index == (u32) ~ 0) - { - last_index = nitems - 1; - } - - if (first_index >= nitems || last_index >= nitems) - { - vlib_cli_output (vm, "Range (%d, %d) outside file range (0, %d)\n", - first_index, last_index, nitems - 1); - munmap (hp, file_size); - return; - } - if (hp->wrapped) - vlib_cli_output (vm, - "Note: wrapped/incomplete trace, results may vary\n"); - - if (which == CUSTOM_DUMP) - { - saved_print_handlers = (void **) vec_dup (am->msg_print_handlers); - vl_msg_api_custom_dump_configure (am); - } - - - msg = (u8 *) (hp + 1); - - for (i = 0; i < first_index; i++) - { - trace_cfg_t *cfgp; - int size; - u16 msg_id; - - size = clib_host_to_net_u32 (*(u32 *) msg); - msg += sizeof (u32); - - if (clib_arch_is_little_endian) - msg_id = ntohs (*((u16 *) msg)); - else - msg_id = *((u16 *) msg); - - cfgp = am->api_trace_cfg + msg_id; - if (!cfgp) - { - vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id); - munmap (hp, file_size); - return; - } - msg += size; - } - - if (which == REPLAY) - am->replay_in_progress = 1; - - for (; i <= last_index; i++) - { - trace_cfg_t *cfgp; - u16 *msg_idp; - u16 msg_id; - int size; - - if (which == DUMP) - vlib_cli_output (vm, "---------- trace %d -----------\n", i); - - size = clib_host_to_net_u32 (*(u32 *) msg); - msg += sizeof (u32); - - if (clib_arch_is_little_endian) - msg_id = ntohs (*((u16 *) msg)); - else - msg_id = *((u16 *) msg); - - cfgp = am->api_trace_cfg + msg_id; - if (!cfgp) - { - vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id); - munmap (hp, file_size); - vec_free (tmpbuf); - am->replay_in_progress = 0; - return; - } - - /* Copy the buffer (from the read-only mmap'ed file) */ - vec_validate (tmpbuf, size - 1 + sizeof (uword)); - clib_memcpy (tmpbuf + sizeof (uword), msg, size); - memset (tmpbuf, 0xf, sizeof (uword)); - - /* - * Endian swap if needed. All msg data is supposed to be - * in network byte order. All msg handlers are supposed to - * know that. The generic message dumpers don't know that. - * One could fix apigen, I suppose. - */ - if ((which == DUMP && clib_arch_is_little_endian) || endian_swap_needed) - { - void (*endian_fp) (void *); - if (msg_id >= vec_len (am->msg_endian_handlers) - || (am->msg_endian_handlers[msg_id] == 0)) - { - vlib_cli_output (vm, "Ugh: msg id %d no endian swap\n", msg_id); - munmap (hp, file_size); - vec_free (tmpbuf); - am->replay_in_progress = 0; - return; - } - endian_fp = am->msg_endian_handlers[msg_id]; - (*endian_fp) (tmpbuf + sizeof (uword)); - } - - /* msg_id always in network byte order */ - if (clib_arch_is_little_endian) - { - msg_idp = (u16 *) (tmpbuf + sizeof (uword)); - *msg_idp = msg_id; - } - - switch (which) - { - case CUSTOM_DUMP: - case DUMP: - if (msg_id < vec_len (am->msg_print_handlers) && - am->msg_print_handlers[msg_id]) - { - u8 *(*print_fp) (void *, void *); - - print_fp = (void *) am->msg_print_handlers[msg_id]; - (*print_fp) (tmpbuf + sizeof (uword), vm); - } - else - { - vlib_cli_output (vm, "Skipping msg id %d: no print fcn\n", - msg_id); - break; - } - break; - - case INITIALIZERS: - if (msg_id < vec_len (am->msg_print_handlers) && - am->msg_print_handlers[msg_id]) - { - u8 *s; - int j; - u8 *(*print_fp) (void *, void *); - - print_fp = (void *) am->msg_print_handlers[msg_id]; - - vlib_cli_output (vm, "/*"); - - (*print_fp) (tmpbuf + sizeof (uword), vm); - vlib_cli_output (vm, "*/\n"); - - s = format (0, "static u8 * vl_api_%s_%d[%d] = {", - am->msg_names[msg_id], i, - am->api_trace_cfg[msg_id].size); - - for (j = 0; j < am->api_trace_cfg[msg_id].size; j++) - { - if ((j & 7) == 0) - s = format (s, "\n "); - s = format (s, "0x%02x,", tmpbuf[sizeof (uword) + j]); - } - s = format (s, "\n};\n%c", 0); - vlib_cli_output (vm, (char *) s); - vec_free (s); - } - break; - - case REPLAY: - if (msg_id < vec_len (am->msg_print_handlers) && - am->msg_print_handlers[msg_id] && cfgp->replay_enable) - { - void (*handler) (void *); - - handler = (void *) am->msg_handlers[msg_id]; - - if (!am->is_mp_safe[msg_id]) - vl_msg_api_barrier_sync (); - (*handler) (tmpbuf + sizeof (uword)); - if (!am->is_mp_safe[msg_id]) - vl_msg_api_barrier_release (); - } - else - { - if (cfgp->replay_enable) - vlib_cli_output (vm, "Skipping msg id %d: no handler\n", - msg_id); - break; - } - break; - } - - _vec_len (tmpbuf) = 0; - msg += size; - } - - if (saved_print_handlers) - { - clib_memcpy (am->msg_print_handlers, saved_print_handlers, - vec_len (am->msg_print_handlers) * sizeof (void *)); - vec_free (saved_print_handlers); - } - - munmap (hp, file_size); - vec_free (tmpbuf); - am->replay_in_progress = 0; -} - -u8 * -format_vl_msg_api_trace_status (u8 * s, va_list * args) -{ - api_main_t *am = va_arg (*args, api_main_t *); - vl_api_trace_which_t which = va_arg (*args, vl_api_trace_which_t); - vl_api_trace_t *tp; - char *trace_name; - - switch (which) - { - case VL_API_TRACE_TX: - tp = am->tx_trace; - trace_name = "TX trace"; - break; - - case VL_API_TRACE_RX: - tp = am->rx_trace; - trace_name = "RX trace"; - break; - - default: - abort (); - } - - if (tp == 0) - { - s = format (s, "%s: not yet configured.\n", trace_name); - return s; - } - - s = format (s, "%s: used %d of %d items, %s enabled, %s wrapped\n", - trace_name, vec_len (tp->traces), tp->nitems, - tp->enabled ? "is" : "is not", tp->wrapped ? "has" : "has not"); - return s; -} static u8 post_mortem_dump_enabled; -static clib_error_t * -api_trace_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - u32 nitems = 256 << 10; - api_main_t *am = &api_main; - vl_api_trace_which_t which = VL_API_TRACE_RX; - u8 *filename; - u32 first = 0; - u32 last = (u32) ~ 0; - FILE *fp; - int rv; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "on") || unformat (input, "enable")) - { - if (unformat (input, "nitems %d", &nitems)) - ; - vl_msg_api_trace_configure (am, which, nitems); - vl_msg_api_trace_onoff (am, which, 1 /* on */ ); - } - else if (unformat (input, "off")) - { - vl_msg_api_trace_onoff (am, which, 0); - } - else if (unformat (input, "save %s", &filename)) - { - u8 *chroot_filename; - if (strstr ((char *) filename, "..") - || index ((char *) filename, '/')) - { - vlib_cli_output (vm, "illegal characters in filename '%s'", - filename); - return 0; - } - - chroot_filename = format (0, "/tmp/%s%c", filename, 0); - - vec_free (filename); - - fp = fopen ((char *) chroot_filename, "w"); - if (fp == NULL) - { - vlib_cli_output (vm, "Couldn't create %s\n", chroot_filename); - return 0; - } - rv = vl_msg_api_trace_save (am, which, fp); - fclose (fp); - if (rv == -1) - vlib_cli_output (vm, "API Trace data not present\n"); - else if (rv == -2) - vlib_cli_output (vm, "File for writing is closed\n"); - else if (rv == -10) - vlib_cli_output (vm, "Error while writing header to file\n"); - else if (rv == -11) - vlib_cli_output (vm, "Error while writing trace to file\n"); - else if (rv == -12) - vlib_cli_output (vm, - "Error while writing end of buffer trace to file\n"); - else if (rv == -13) - vlib_cli_output (vm, - "Error while writing start of buffer trace to file\n"); - else if (rv < 0) - vlib_cli_output (vm, "Unkown error while saving: %d", rv); - else - vlib_cli_output (vm, "API trace saved to %s\n", chroot_filename); - vec_free (chroot_filename); - } - else if (unformat (input, "dump %s", &filename)) - { - vl_msg_api_process_file (vm, filename, first, last, DUMP); - } - else if (unformat (input, "custom-dump %s", &filename)) - { - vl_msg_api_process_file (vm, filename, first, last, CUSTOM_DUMP); - } - else if (unformat (input, "replay %s", &filename)) - { - vl_msg_api_process_file (vm, filename, first, last, REPLAY); - } - else if (unformat (input, "initializers %s", &filename)) - { - vl_msg_api_process_file (vm, filename, first, last, INITIALIZERS); - } - else if (unformat (input, "tx")) - { - which = VL_API_TRACE_TX; - } - else if (unformat (input, "first %d", &first)) - { - ; - } - else if (unformat (input, "last %d", &last)) - { - ; - } - else if (unformat (input, "status")) - { - vlib_cli_output (vm, "%U", format_vl_msg_api_trace_status, - am, which); - } - else if (unformat (input, "free")) - { - vl_msg_api_trace_onoff (am, which, 0); - vl_msg_api_trace_free (am, which); - } - else if (unformat (input, "post-mortem-on")) - post_mortem_dump_enabled = 1; - else if (unformat (input, "post-mortem-off")) - post_mortem_dump_enabled = 0; - else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (api_trace_command, static) = { - .path = "api trace", - .short_help = - "api trace [on|off][dump|save|replay ][status][free][post-mortem-on]", - .function = api_trace_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -api_config_fn (vlib_main_t * vm, unformat_input_t * input) +void +vl_msg_api_post_mortem_dump_enable_disable (int enable) { - u32 nitems = 256 << 10; - vl_api_trace_which_t which = VL_API_TRACE_RX; - api_main_t *am = &api_main; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "on") || unformat (input, "enable")) - { - if (unformat (input, "nitems %d", &nitems)) - ; - vl_msg_api_trace_configure (am, which, nitems); - vl_msg_api_trace_onoff (am, which, 1 /* on */ ); - post_mortem_dump_enabled = 1; - } - else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - return 0; + post_mortem_dump_enabled = enable; } -VLIB_CONFIG_FUNCTION (api_config_fn, "api-trace"); - void vl_msg_api_post_mortem_dump (void) { diff --git a/src/vlibapi/node_serialize.c b/src/vlibapi/node_serialize.c index 4dc1a7d2..50e5c41c 100644 --- a/src/vlibapi/node_serialize.c +++ b/src/vlibapi/node_serialize.c @@ -73,16 +73,11 @@ vlib_node_serialize (vlib_node_main_t * nm, u8 * vector, if (vec_len (stat_vms) == 0) { - if (vec_len (vlib_mains) == 0) - vec_add1 (stat_vms, vm); - else + for (i = 0; i < vec_len (vlib_mains); i++) { - for (i = 0; i < vec_len (vlib_mains); i++) - { - stat_vm = vlib_mains[i]; - if (stat_vm) - vec_add1 (stat_vms, stat_vm); - } + stat_vm = vlib_mains[i]; + if (stat_vm) + vec_add1 (stat_vms, stat_vm); } } @@ -286,7 +281,7 @@ vlib_node_unserialize (u8 * vector) return nodes_by_thread; } -#if CLIB_DEBUG > 0 +#if TEST_CODE static clib_error_t * test_node_serialize_command_fn (vlib_main_t * vm, diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index 3a7415c0..d2e05968 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -1437,6 +1439,475 @@ rpc_api_hookup (vlib_main_t * vm) VLIB_API_INIT_FUNCTION (rpc_api_hookup); +typedef enum +{ + DUMP, + CUSTOM_DUMP, + REPLAY, + INITIALIZERS, +} vl_api_replay_t; + +u8 * +format_vl_msg_api_trace_status (u8 * s, va_list * args) +{ + api_main_t *am = va_arg (*args, api_main_t *); + vl_api_trace_which_t which = va_arg (*args, vl_api_trace_which_t); + vl_api_trace_t *tp; + char *trace_name; + + switch (which) + { + case VL_API_TRACE_TX: + tp = am->tx_trace; + trace_name = "TX trace"; + break; + + case VL_API_TRACE_RX: + tp = am->rx_trace; + trace_name = "RX trace"; + break; + + default: + abort (); + } + + if (tp == 0) + { + s = format (s, "%s: not yet configured.\n", trace_name); + return s; + } + + s = format (s, "%s: used %d of %d items, %s enabled, %s wrapped\n", + trace_name, vec_len (tp->traces), tp->nitems, + tp->enabled ? "is" : "is not", tp->wrapped ? "has" : "has not"); + return s; +} + +void vl_msg_api_custom_dump_configure (api_main_t * am) + __attribute__ ((weak)); +void +vl_msg_api_custom_dump_configure (api_main_t * am) +{ +} + +static void +vl_msg_api_process_file (vlib_main_t * vm, u8 * filename, + u32 first_index, u32 last_index, + vl_api_replay_t which) +{ + vl_api_trace_file_header_t *hp; + int i, fd; + struct stat statb; + size_t file_size; + u8 *msg; + u8 endian_swap_needed = 0; + api_main_t *am = &api_main; + u8 *tmpbuf = 0; + u32 nitems; + void **saved_print_handlers = 0; + + fd = open ((char *) filename, O_RDONLY); + + if (fd < 0) + { + vlib_cli_output (vm, "Couldn't open %s\n", filename); + return; + } + + if (fstat (fd, &statb) < 0) + { + vlib_cli_output (vm, "Couldn't stat %s\n", filename); + close (fd); + return; + } + + if (!(statb.st_mode & S_IFREG) || (statb.st_size < sizeof (*hp))) + { + vlib_cli_output (vm, "File not plausible: %s\n", filename); + close (fd); + return; + } + + file_size = statb.st_size; + file_size = (file_size + 4095) & ~(4096); + + hp = mmap (0, file_size, PROT_READ, MAP_PRIVATE, fd, 0); + + if (hp == (vl_api_trace_file_header_t *) MAP_FAILED) + { + vlib_cli_output (vm, "mmap failed: %s\n", filename); + close (fd); + return; + } + close (fd); + + if ((clib_arch_is_little_endian && hp->endian == VL_API_BIG_ENDIAN) + || (clib_arch_is_big_endian && hp->endian == VL_API_LITTLE_ENDIAN)) + endian_swap_needed = 1; + + if (endian_swap_needed) + nitems = ntohl (hp->nitems); + else + nitems = hp->nitems; + + if (last_index == (u32) ~ 0) + { + last_index = nitems - 1; + } + + if (first_index >= nitems || last_index >= nitems) + { + vlib_cli_output (vm, "Range (%d, %d) outside file range (0, %d)\n", + first_index, last_index, nitems - 1); + munmap (hp, file_size); + return; + } + if (hp->wrapped) + vlib_cli_output (vm, + "Note: wrapped/incomplete trace, results may vary\n"); + + if (which == CUSTOM_DUMP) + { + saved_print_handlers = (void **) vec_dup (am->msg_print_handlers); + vl_msg_api_custom_dump_configure (am); + } + + + msg = (u8 *) (hp + 1); + + for (i = 0; i < first_index; i++) + { + trace_cfg_t *cfgp; + int size; + u16 msg_id; + + size = clib_host_to_net_u32 (*(u32 *) msg); + msg += sizeof (u32); + + if (clib_arch_is_little_endian) + msg_id = ntohs (*((u16 *) msg)); + else + msg_id = *((u16 *) msg); + + cfgp = am->api_trace_cfg + msg_id; + if (!cfgp) + { + vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id); + munmap (hp, file_size); + return; + } + msg += size; + } + + if (which == REPLAY) + am->replay_in_progress = 1; + + for (; i <= last_index; i++) + { + trace_cfg_t *cfgp; + u16 *msg_idp; + u16 msg_id; + int size; + + if (which == DUMP) + vlib_cli_output (vm, "---------- trace %d -----------\n", i); + + size = clib_host_to_net_u32 (*(u32 *) msg); + msg += sizeof (u32); + + if (clib_arch_is_little_endian) + msg_id = ntohs (*((u16 *) msg)); + else + msg_id = *((u16 *) msg); + + cfgp = am->api_trace_cfg + msg_id; + if (!cfgp) + { + vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id); + munmap (hp, file_size); + vec_free (tmpbuf); + am->replay_in_progress = 0; + return; + } + + /* Copy the buffer (from the read-only mmap'ed file) */ + vec_validate (tmpbuf, size - 1 + sizeof (uword)); + clib_memcpy (tmpbuf + sizeof (uword), msg, size); + memset (tmpbuf, 0xf, sizeof (uword)); + + /* + * Endian swap if needed. All msg data is supposed to be + * in network byte order. All msg handlers are supposed to + * know that. The generic message dumpers don't know that. + * One could fix apigen, I suppose. + */ + if ((which == DUMP && clib_arch_is_little_endian) || endian_swap_needed) + { + void (*endian_fp) (void *); + if (msg_id >= vec_len (am->msg_endian_handlers) + || (am->msg_endian_handlers[msg_id] == 0)) + { + vlib_cli_output (vm, "Ugh: msg id %d no endian swap\n", msg_id); + munmap (hp, file_size); + vec_free (tmpbuf); + am->replay_in_progress = 0; + return; + } + endian_fp = am->msg_endian_handlers[msg_id]; + (*endian_fp) (tmpbuf + sizeof (uword)); + } + + /* msg_id always in network byte order */ + if (clib_arch_is_little_endian) + { + msg_idp = (u16 *) (tmpbuf + sizeof (uword)); + *msg_idp = msg_id; + } + + switch (which) + { + case CUSTOM_DUMP: + case DUMP: + if (msg_id < vec_len (am->msg_print_handlers) && + am->msg_print_handlers[msg_id]) + { + u8 *(*print_fp) (void *, void *); + + print_fp = (void *) am->msg_print_handlers[msg_id]; + (*print_fp) (tmpbuf + sizeof (uword), vm); + } + else + { + vlib_cli_output (vm, "Skipping msg id %d: no print fcn\n", + msg_id); + break; + } + break; + + case INITIALIZERS: + if (msg_id < vec_len (am->msg_print_handlers) && + am->msg_print_handlers[msg_id]) + { + u8 *s; + int j; + u8 *(*print_fp) (void *, void *); + + print_fp = (void *) am->msg_print_handlers[msg_id]; + + vlib_cli_output (vm, "/*"); + + (*print_fp) (tmpbuf + sizeof (uword), vm); + vlib_cli_output (vm, "*/\n"); + + s = format (0, "static u8 * vl_api_%s_%d[%d] = {", + am->msg_names[msg_id], i, + am->api_trace_cfg[msg_id].size); + + for (j = 0; j < am->api_trace_cfg[msg_id].size; j++) + { + if ((j & 7) == 0) + s = format (s, "\n "); + s = format (s, "0x%02x,", tmpbuf[sizeof (uword) + j]); + } + s = format (s, "\n};\n%c", 0); + vlib_cli_output (vm, (char *) s); + vec_free (s); + } + break; + + case REPLAY: + if (msg_id < vec_len (am->msg_print_handlers) && + am->msg_print_handlers[msg_id] && cfgp->replay_enable) + { + void (*handler) (void *); + + handler = (void *) am->msg_handlers[msg_id]; + + if (!am->is_mp_safe[msg_id]) + vl_msg_api_barrier_sync (); + (*handler) (tmpbuf + sizeof (uword)); + if (!am->is_mp_safe[msg_id]) + vl_msg_api_barrier_release (); + } + else + { + if (cfgp->replay_enable) + vlib_cli_output (vm, "Skipping msg id %d: no handler\n", + msg_id); + break; + } + break; + } + + _vec_len (tmpbuf) = 0; + msg += size; + } + + if (saved_print_handlers) + { + clib_memcpy (am->msg_print_handlers, saved_print_handlers, + vec_len (am->msg_print_handlers) * sizeof (void *)); + vec_free (saved_print_handlers); + } + + munmap (hp, file_size); + vec_free (tmpbuf); + am->replay_in_progress = 0; +} + +static clib_error_t * +api_trace_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + u32 nitems = 256 << 10; + api_main_t *am = &api_main; + vl_api_trace_which_t which = VL_API_TRACE_RX; + u8 *filename; + u32 first = 0; + u32 last = (u32) ~ 0; + FILE *fp; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "on") || unformat (input, "enable")) + { + if (unformat (input, "nitems %d", &nitems)) + ; + vl_msg_api_trace_configure (am, which, nitems); + vl_msg_api_trace_onoff (am, which, 1 /* on */ ); + } + else if (unformat (input, "off")) + { + vl_msg_api_trace_onoff (am, which, 0); + } + else if (unformat (input, "save %s", &filename)) + { + u8 *chroot_filename; + if (strstr ((char *) filename, "..") + || index ((char *) filename, '/')) + { + vlib_cli_output (vm, "illegal characters in filename '%s'", + filename); + return 0; + } + + chroot_filename = format (0, "/tmp/%s%c", filename, 0); + + vec_free (filename); + + fp = fopen ((char *) chroot_filename, "w"); + if (fp == NULL) + { + vlib_cli_output (vm, "Couldn't create %s\n", chroot_filename); + return 0; + } + rv = vl_msg_api_trace_save (am, which, fp); + fclose (fp); + if (rv == -1) + vlib_cli_output (vm, "API Trace data not present\n"); + else if (rv == -2) + vlib_cli_output (vm, "File for writing is closed\n"); + else if (rv == -10) + vlib_cli_output (vm, "Error while writing header to file\n"); + else if (rv == -11) + vlib_cli_output (vm, "Error while writing trace to file\n"); + else if (rv == -12) + vlib_cli_output (vm, + "Error while writing end of buffer trace to file\n"); + else if (rv == -13) + vlib_cli_output (vm, + "Error while writing start of buffer trace to file\n"); + else if (rv < 0) + vlib_cli_output (vm, "Unkown error while saving: %d", rv); + else + vlib_cli_output (vm, "API trace saved to %s\n", chroot_filename); + vec_free (chroot_filename); + } + else if (unformat (input, "dump %s", &filename)) + { + vl_msg_api_process_file (vm, filename, first, last, DUMP); + } + else if (unformat (input, "custom-dump %s", &filename)) + { + vl_msg_api_process_file (vm, filename, first, last, CUSTOM_DUMP); + } + else if (unformat (input, "replay %s", &filename)) + { + vl_msg_api_process_file (vm, filename, first, last, REPLAY); + } + else if (unformat (input, "initializers %s", &filename)) + { + vl_msg_api_process_file (vm, filename, first, last, INITIALIZERS); + } + else if (unformat (input, "tx")) + { + which = VL_API_TRACE_TX; + } + else if (unformat (input, "first %d", &first)) + { + ; + } + else if (unformat (input, "last %d", &last)) + { + ; + } + else if (unformat (input, "status")) + { + vlib_cli_output (vm, "%U", format_vl_msg_api_trace_status, + am, which); + } + else if (unformat (input, "free")) + { + vl_msg_api_trace_onoff (am, which, 0); + vl_msg_api_trace_free (am, which); + } + else if (unformat (input, "post-mortem-on")) + vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ ); + else if (unformat (input, "post-mortem-off")) + vl_msg_api_post_mortem_dump_enable_disable (0 /* enable */ ); + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (api_trace_command, static) = { + .path = "api trace", + .short_help = + "api trace [on|off][dump|save|replay ][status][free][post-mortem-on]", + .function = api_trace_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +api_config_fn (vlib_main_t * vm, unformat_input_t * input) +{ + u32 nitems = 256 << 10; + vl_api_trace_which_t which = VL_API_TRACE_RX; + api_main_t *am = &api_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "on") || unformat (input, "enable")) + { + if (unformat (input, "nitems %d", &nitems)) + ; + vl_msg_api_trace_configure (am, which, nitems); + vl_msg_api_trace_onoff (am, which, 1 /* on */ ); + vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ ); + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + return 0; +} + +VLIB_CONFIG_FUNCTION (api_config_fn, "api-trace"); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index b6b4c04a..100ec613 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -374,8 +374,7 @@ vhost_user_rx_thread_placement () for (i = vum->input_cpu_first_index; i < vum->input_cpu_first_index + vum->input_cpu_count; i++) { - vlib_node_set_state (vlib_mains ? vlib_mains[i] : &vlib_global_main, - vhost_user_input_node.index, + vlib_node_set_state (vlib_mains[i], vhost_user_input_node.index, VLIB_NODE_STATE_DISABLED); vec_add1 (workers, i); } @@ -406,9 +405,9 @@ vhost_user_rx_thread_placement () iaq.qid = qid; iaq.vhost_iface_index = vui - vum->vhost_user_interfaces; vec_add1 (vhc->rx_queues, iaq); - vlib_node_set_state (vlib_mains ? vlib_mains[cpu_index] : - &vlib_global_main, vhost_user_input_node.index, - VLIB_NODE_STATE_POLLING); + vlib_node_set_state (vlib_mains[cpu_index], + vhost_user_input_node.index, + VLIB_NODE_STATE_POLLING); } }); /* *INDENT-ON* */ diff --git a/src/vpp-api-test.am b/src/vpp-api-test.am index f0d5df62..ceab687c 100644 --- a/src/vpp-api-test.am +++ b/src/vpp-api-test.am @@ -34,14 +34,12 @@ vpp_json_test_SOURCES = \ vat/json_test.c vpp_api_test_LDADD = \ - libvlib.la \ libvlibmemoryclient.la \ libsvm.la \ libvatplugin.la \ libvppinfra.la \ libvlibapi.la \ libvlibmemory.la \ - libvnet.la \ -lpthread -lm -lrt -ldl -lcrypto vpp_api_test_LDFLAGS = -Wl,--export-dynamic diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 828394ed..c85dc680 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -2143,7 +2143,6 @@ vpe_api_init (vlib_main_t * vm) am->oam_events_registration_hash = hash_create (0, sizeof (uword)); am->bfd_events_registration_hash = hash_create (0, sizeof (uword)); - vl_api_init (vm); vl_set_memory_region_name ("/vpe-api"); vl_enable_disable_memory_api (vm, 1 /* enable it */ ); diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c index 610f40ed..277be8c0 100644 --- a/src/vpp/api/gmon.c +++ b/src/vpp/api/gmon.c @@ -122,13 +122,8 @@ gmon_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* Initial wait for the world to settle down */ vlib_process_suspend (vm, 5.0); - if (vec_len (vlib_mains) == 0) - vec_add1 (gm->my_vlib_mains, &vlib_global_main); - else - { - for (i = 0; i < vec_len (vlib_mains); i++) - vec_add1 (gm->my_vlib_mains, vlib_mains[i]); - } + for (i = 0; i < vec_len (vlib_mains); i++) + vec_add1 (gm->my_vlib_mains, vlib_mains[i]); while (1) { -- cgit 1.2.3-korg From 374e2c5fc30a5bfabfd2eb6c2d3ca5797402af16 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 9 Mar 2017 20:38:15 +0100 Subject: Retire vpp_lite vpp_lite platform is not needed anymore as same efect can be achieved with following startup.conf config: plugins { plugin dpdk_plugin.so { disable } } Change-Id: I690ea8ceb1c6e1fe32e01e7da54e9958019a93bf Signed-off-by: Damjan Marion --- Makefile | 16 +- build-data/platforms/vpp.mk | 5 - build-data/platforms/vpp_lite.mk | 50 - src/configure.ac | 4 +- src/plugins/Makefile.am | 6 +- src/plugins/ixge.am | 20 + src/plugins/ixge/ixge.c | 2947 +++++++++++++++++++++++ src/plugins/ixge/ixge.h | 1293 ++++++++++ src/vlib/buffer.c | 6 + src/vnet.am | 14 +- src/vnet/devices/nic/ixge.c | 2938 ---------------------- src/vnet/devices/nic/ixge.h | 1293 ---------- src/vnet/devices/nic/sfp.c | 117 - src/vnet/devices/nic/sfp.h | 117 - src/vnet/ethernet/sfp.c | 117 + src/vnet/ethernet/sfp.h | 117 + src/vpp-api/lua/bench.lua | 4 +- src/vpp-api/lua/examples/cli/lua-cli.lua | 4 +- src/vpp-api/lua/examples/example-classifier.lua | 4 +- src/vpp-api/lua/examples/example-cli.lua | 4 +- src/vpp/vnet/main.c | 3 - test/framework.py | 4 +- 22 files changed, 4529 insertions(+), 4554 deletions(-) delete mode 100644 build-data/platforms/vpp_lite.mk create mode 100644 src/plugins/ixge.am create mode 100644 src/plugins/ixge/ixge.c create mode 100644 src/plugins/ixge/ixge.h delete mode 100644 src/vnet/devices/nic/ixge.c delete mode 100644 src/vnet/devices/nic/ixge.h delete mode 100644 src/vnet/devices/nic/sfp.c delete mode 100644 src/vnet/devices/nic/sfp.h create mode 100644 src/vnet/ethernet/sfp.c create mode 100644 src/vnet/ethernet/sfp.h (limited to 'src/vnet/devices') diff --git a/Makefile b/Makefile index f0173cc1..1527d60a 100644 --- a/Makefile +++ b/Makefile @@ -230,18 +230,18 @@ define test endef test: bootstrap - $(call test,vpp_lite,vpp_lite,test) + $(call test,vpp,vpp,test) test-debug: bootstrap - $(call test,vpp_lite,vpp_lite_debug,test) + $(call test,vpp,vpp_debug,test) test-all: bootstrap $(eval EXTENDED_TESTS=yes) - $(call test,vpp_lite,vpp_lite,test) + $(call test,vpp,vpp,test) test-all-debug: bootstrap $(eval EXTENDED_TESTS=yes) - $(call test,vpp_lite,vpp_lite_debug,test) + $(call test,vpp,vpp_debug,test) test-help: @make -C test help @@ -262,7 +262,7 @@ test-wipe-doc: @make -C test wipe-doc test-cov: bootstrap - $(call test,vpp_lite,vpp_lite_gcov,cov) + $(call test,vpp,vpp_gcov,cov) test-wipe-cov: @make -C test wipe-cov @@ -271,10 +271,10 @@ test-checkstyle: @make -C test checkstyle retest: - $(call test,vpp_lite,vpp_lite,retest) + $(call test,vpp,vpp,retest) retest-debug: - $(call test,vpp_lite,vpp_lite_debug,retest) + $(call test,vpp,vpp_debug,retest) STARTUP_DIR ?= $(PWD) ifeq ("$(wildcard $(STARTUP_CONF))","") @@ -376,8 +376,6 @@ endef verify: install-dep $(BR)/.bootstrap.ok dpdk-install-dev $(call banner,"Building for PLATFORM=vpp using gcc") @make -C build-root PLATFORM=vpp TAG=vpp wipe-all install-packages - $(call banner,"Building for PLATFORM=vpp_lite using gcc") - @make -C build-root PLATFORM=vpp_lite TAG=vpp_lite wipe-all install-packages ifeq ($(OS_ID)-$(OS_VERSION_ID),ubuntu-16.04) $(call banner,"Installing dependencies") @sudo -E apt-get update diff --git a/build-data/platforms/vpp.mk b/build-data/platforms/vpp.mk index 401a383a..c61375d8 100644 --- a/build-data/platforms/vpp.mk +++ b/build-data/platforms/vpp.mk @@ -35,11 +35,6 @@ vpp_uses_dpdk = yes vpp_root_packages = vpp gmod -vpp_configure_args_vpp = --with-dpdk - -# Set these parameters carefully. The vlib_buffer_t is 128 bytes, i.e. -vlib_configure_args_vpp = --with-pre-data=128 - # DPDK configuration parameters # vpp_uses_dpdk_cryptodev_sw = yes # vpp_uses_dpdk_mlx5_pmd = yes diff --git a/build-data/platforms/vpp_lite.mk b/build-data/platforms/vpp_lite.mk deleted file mode 100644 index a556b487..00000000 --- a/build-data/platforms/vpp_lite.mk +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2016 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# vector packet processor -vpp_lite_arch = native -ifeq ($(shell uname -m),x86_64) -vpp_lite_march = corei7 # Nehalem Instruction set -vpp_lite_mtune = corei7-avx # Optimize for Sandy Bridge -else -vpp_lite_march = native -vpp_lite_mtune = generic -endif -vpp_lite_native_tools = vppapigen - -vpp_lite_uses_dpdk = no - -# Uncoment to enable building unit tests -#vpp_lite_enable_tests = yes - -vpp_lite_root_packages = vpp gmod - -vlib_configure_args_vpp_lite = --with-pre-data=128 - -vnet_configure_args_vpp_lite = -vpp_configure_args_vpp_lite = - -vpp_lite_debug_TAG_CFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARCH) \ - -fstack-protector-all -fPIC -Werror -vpp_lite_debug_TAG_LDFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARCH) \ - -fstack-protector-all -fPIC -Werror - -vpp_lite_TAG_CFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) -mtune=$(MTUNE) \ - -fstack-protector -fPIC -Werror -vpp_lite_TAG_LDFLAGS = -g -O2 -DFORTIFY_SOURCE=2 -march=$(MARCH) -mtune=$(MTUNE) \ - -fstack-protector -fPIC -Werror - -vpp_lite_gcov_TAG_CFLAGS = -g -O0 -DCLIB_DEBUG -march=$(MARCH) \ - -fPIC -Werror -fprofile-arcs -ftest-coverage -vpp_lite_gcov_TAG_LDFLAGS = -g -O0 -DCLIB_DEBUG -march=$(MARCH) \ - -fPIC -Werror -coverage diff --git a/src/configure.ac b/src/configure.ac index c22d152e..d90740d9 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -97,7 +97,6 @@ DISABLE_ARG(papi, [Disable Python API bindings]) DISABLE_ARG(japi, [Disable Java API bindings]) # --with-X -WITH_ARG(dpdk, [Use use DPDK]) WITH_ARG(dpdk_crypto_sw,[Use DPDK cryptodev SW PMDs]) WITH_ARG(dpdk_mlx5_pmd, [Use DPDK with mlx5 PMD]) @@ -130,7 +129,6 @@ AC_ARG_WITH(pre-data, AC_SUBST(PRE_DATA_SIZE, [$with_pre_data]) AC_SUBST(APICLI, [-DVPP_API_TEST_BUILTIN=${n_with_apicli}]) -AC_DEFINE_UNQUOTED(DPDK, [${n_with_dpdk}]) AC_DEFINE_UNQUOTED(DPDK_SHARED_LIB, [${n_enable_dpdk_shared}]) AC_DEFINE_UNQUOTED(DPDK_CRYPTO_SW, [${n_with_dpdk_crypto_sw}]) AC_DEFINE_UNQUOTED(WITH_LIBSSL, [${n_with_libssl}]) @@ -147,9 +145,11 @@ AC_SUBST(AR_FLAGS) # Please keep alphabetical order PLUGIN_ENABLED(acl) +PLUGIN_ENABLED(dpdk) PLUGIN_ENABLED(flowperpkt) PLUGIN_ENABLED(ila) PLUGIN_ENABLED(ioam) +PLUGIN_ENABLED(ixge) PLUGIN_ENABLED(lb) PLUGIN_ENABLED(sixrd) PLUGIN_ENABLED(snat) diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 7b36049e..255e644f 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -33,7 +33,7 @@ if ENABLE_ACL_PLUGIN include acl.am endif -if WITH_DPDK +if ENABLE_DPDK_PLUGIN include dpdk.am endif @@ -49,6 +49,10 @@ if ENABLE_IOAM_PLUGIN include ioam.am endif +if ENABLE_IXGE_PLUGIN +include ixge.am +endif + if ENABLE_LB_PLUGIN include lb.am endif diff --git a/src/plugins/ixge.am b/src/plugins/ixge.am new file mode 100644 index 00000000..7e61344b --- /dev/null +++ b/src/plugins/ixge.am @@ -0,0 +1,20 @@ +# Copyright (c) 2016 Cisco Systems, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vppplugins_LTLIBRARIES += ixge_plugin.la + +ixge_plugin_la_SOURCES = ixge/ixge.c + +noinst_HEADERS += ixge/ixge.h + +# vi:syntax=automake diff --git a/src/plugins/ixge/ixge.c b/src/plugins/ixge/ixge.c new file mode 100644 index 00000000..4eebc457 --- /dev/null +++ b/src/plugins/ixge/ixge.c @@ -0,0 +1,2947 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * WARNING! + * This driver is not intended for production use and it is unsupported. + * It is provided for educational use only. + * Please use supported DPDK driver instead. + */ + +#if __x86_64__ +#include + +#ifndef CLIB_HAVE_VEC128 +#warning HACK: ixge driver wont really work, missing u32x4 +typedef unsigned long long u32x4; +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#define IXGE_ALWAYS_POLL 0 + +#define EVENT_SET_FLAGS 0 +#define IXGE_HWBP_RACE_ELOG 0 + +#define PCI_VENDOR_ID_INTEL 0x8086 + +/* 10 GIG E (XGE) PHY IEEE 802.3 clause 45 definitions. */ +#define XGE_PHY_DEV_TYPE_PMA_PMD 1 +#define XGE_PHY_DEV_TYPE_PHY_XS 4 +#define XGE_PHY_ID1 0x2 +#define XGE_PHY_ID2 0x3 +#define XGE_PHY_CONTROL 0x0 +#define XGE_PHY_CONTROL_RESET (1 << 15) + +ixge_main_t ixge_main; +static vlib_node_registration_t ixge_input_node; +static vlib_node_registration_t ixge_process_node; + +static void +ixge_semaphore_get (ixge_device_t * xd) +{ + ixge_main_t *xm = &ixge_main; + vlib_main_t *vm = xm->vlib_main; + ixge_regs_t *r = xd->regs; + u32 i; + + i = 0; + while (!(r->software_semaphore & (1 << 0))) + { + if (i > 0) + vlib_process_suspend (vm, 100e-6); + i++; + } + do + { + r->software_semaphore |= 1 << 1; + } + while (!(r->software_semaphore & (1 << 1))); +} + +static void +ixge_semaphore_release (ixge_device_t * xd) +{ + ixge_regs_t *r = xd->regs; + r->software_semaphore &= ~3; +} + +static void +ixge_software_firmware_sync (ixge_device_t * xd, u32 sw_mask) +{ + ixge_main_t *xm = &ixge_main; + vlib_main_t *vm = xm->vlib_main; + ixge_regs_t *r = xd->regs; + u32 fw_mask = sw_mask << 5; + u32 m, done = 0; + + while (!done) + { + ixge_semaphore_get (xd); + m = r->software_firmware_sync; + done = (m & fw_mask) == 0; + if (done) + r->software_firmware_sync = m | sw_mask; + ixge_semaphore_release (xd); + if (!done) + vlib_process_suspend (vm, 10e-3); + } +} + +static void +ixge_software_firmware_sync_release (ixge_device_t * xd, u32 sw_mask) +{ + ixge_regs_t *r = xd->regs; + ixge_semaphore_get (xd); + r->software_firmware_sync &= ~sw_mask; + ixge_semaphore_release (xd); +} + +u32 +ixge_read_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, + u32 v, u32 is_read) +{ + ixge_regs_t *r = xd->regs; + const u32 busy_bit = 1 << 30; + u32 x; + + ASSERT (xd->phy_index < 2); + ixge_software_firmware_sync (xd, 1 << (1 + xd->phy_index)); + + ASSERT (reg_index < (1 << 16)); + ASSERT (dev_type < (1 << 5)); + if (!is_read) + r->xge_mac.phy_data = v; + + /* Address cycle. */ + x = + reg_index | (dev_type << 16) | (xd-> + phys[xd->phy_index].mdio_address << 21); + r->xge_mac.phy_command = x | busy_bit; + /* Busy wait timed to take 28e-6 secs. No suspend. */ + while (r->xge_mac.phy_command & busy_bit) + ; + + r->xge_mac.phy_command = x | ((is_read ? 2 : 1) << 26) | busy_bit; + while (r->xge_mac.phy_command & busy_bit) + ; + + if (is_read) + v = r->xge_mac.phy_data >> 16; + + ixge_software_firmware_sync_release (xd, 1 << (1 + xd->phy_index)); + + return v; +} + +static u32 +ixge_read_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index) +{ + return ixge_read_write_phy_reg (xd, dev_type, reg_index, 0, /* is_read */ + 1); +} + +static void +ixge_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, u32 v) +{ + (void) ixge_read_write_phy_reg (xd, dev_type, reg_index, v, /* is_read */ + 0); +} + +static void +ixge_i2c_put_bits (i2c_bus_t * b, int scl, int sda) +{ + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data); + u32 v; + + v = 0; + v |= (sda != 0) << 3; + v |= (scl != 0) << 1; + xd->regs->i2c_control = v; +} + +static void +ixge_i2c_get_bits (i2c_bus_t * b, int *scl, int *sda) +{ + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data); + u32 v; + + v = xd->regs->i2c_control; + *sda = (v & (1 << 2)) != 0; + *scl = (v & (1 << 0)) != 0; +} + +static u16 +ixge_read_eeprom (ixge_device_t * xd, u32 address) +{ + ixge_regs_t *r = xd->regs; + u32 v; + r->eeprom_read = (( /* start bit */ (1 << 0)) | (address << 2)); + /* Wait for done bit. */ + while (!((v = r->eeprom_read) & (1 << 1))) + ; + return v >> 16; +} + +static void +ixge_sfp_enable_disable_laser (ixge_device_t * xd, uword enable) +{ + u32 tx_disable_bit = 1 << 3; + if (enable) + xd->regs->sdp_control &= ~tx_disable_bit; + else + xd->regs->sdp_control |= tx_disable_bit; +} + +static void +ixge_sfp_enable_disable_10g (ixge_device_t * xd, uword enable) +{ + u32 is_10g_bit = 1 << 5; + if (enable) + xd->regs->sdp_control |= is_10g_bit; + else + xd->regs->sdp_control &= ~is_10g_bit; +} + +static clib_error_t * +ixge_sfp_phy_init_from_eeprom (ixge_device_t * xd, u16 sfp_type) +{ + u16 a, id, reg_values_addr = 0; + + a = ixge_read_eeprom (xd, 0x2b); + if (a == 0 || a == 0xffff) + return clib_error_create ("no init sequence in eeprom"); + + while (1) + { + id = ixge_read_eeprom (xd, ++a); + if (id == 0xffff) + break; + reg_values_addr = ixge_read_eeprom (xd, ++a); + if (id == sfp_type) + break; + } + if (id != sfp_type) + return clib_error_create ("failed to find id 0x%x", sfp_type); + + ixge_software_firmware_sync (xd, 1 << 3); + while (1) + { + u16 v = ixge_read_eeprom (xd, ++reg_values_addr); + if (v == 0xffff) + break; + xd->regs->core_analog_config = v; + } + ixge_software_firmware_sync_release (xd, 1 << 3); + + /* Make sure laser is off. We'll turn on the laser when + the interface is brought up. */ + ixge_sfp_enable_disable_laser (xd, /* enable */ 0); + ixge_sfp_enable_disable_10g (xd, /* is_10g */ 1); + + return 0; +} + +static void +ixge_sfp_device_up_down (ixge_device_t * xd, uword is_up) +{ + u32 v; + + if (is_up) + { + /* pma/pmd 10g serial SFI. */ + xd->regs->xge_mac.auto_negotiation_control2 &= ~(3 << 16); + xd->regs->xge_mac.auto_negotiation_control2 |= 2 << 16; + + v = xd->regs->xge_mac.auto_negotiation_control; + v &= ~(7 << 13); + v |= (0 << 13); + /* Restart autoneg. */ + v |= (1 << 12); + xd->regs->xge_mac.auto_negotiation_control = v; + + while (!(xd->regs->xge_mac.link_partner_ability[0] & 0xf0000)) + ; + + v = xd->regs->xge_mac.auto_negotiation_control; + + /* link mode 10g sfi serdes */ + v &= ~(7 << 13); + v |= (3 << 13); + + /* Restart autoneg. */ + v |= (1 << 12); + xd->regs->xge_mac.auto_negotiation_control = v; + + xd->regs->xge_mac.link_status; + } + + ixge_sfp_enable_disable_laser (xd, /* enable */ is_up); + + /* Give time for link partner to notice that we're up. */ + if (is_up && vlib_in_process_context (vlib_get_main ())) + { + vlib_process_suspend (vlib_get_main (), 300e-3); + } +} + +always_inline ixge_dma_regs_t * +get_dma_regs (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 qi) +{ + ixge_regs_t *r = xd->regs; + ASSERT (qi < 128); + if (rt == VLIB_RX) + return qi < 64 ? &r->rx_dma0[qi] : &r->rx_dma1[qi - 64]; + else + return &r->tx_dma[qi]; +} + +static clib_error_t * +ixge_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); + uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, hif->dev_instance); + ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0); + + if (is_up) + { + xd->regs->rx_enable |= 1; + xd->regs->tx_dma_control |= 1; + dr->control |= 1 << 25; + while (!(dr->control & (1 << 25))) + ; + } + else + { + xd->regs->rx_enable &= ~1; + xd->regs->tx_dma_control &= ~1; + } + + ixge_sfp_device_up_down (xd, is_up); + + return /* no error */ 0; +} + +static void +ixge_sfp_phy_init (ixge_device_t * xd) +{ + ixge_phy_t *phy = xd->phys + xd->phy_index; + i2c_bus_t *ib = &xd->i2c_bus; + + ib->private_data = xd->device_index; + ib->put_bits = ixge_i2c_put_bits; + ib->get_bits = ixge_i2c_get_bits; + vlib_i2c_init (ib); + + vlib_i2c_read_eeprom (ib, 0x50, 0, 128, (u8 *) & xd->sfp_eeprom); + + if (vlib_i2c_bus_timed_out (ib) || !sfp_eeprom_is_valid (&xd->sfp_eeprom)) + xd->sfp_eeprom.id = SFP_ID_unknown; + else + { + /* FIXME 5 => SR/LR eeprom ID. */ + clib_error_t *e = + ixge_sfp_phy_init_from_eeprom (xd, 5 + xd->pci_function); + if (e) + clib_error_report (e); + } + + phy->mdio_address = ~0; +} + +static void +ixge_phy_init (ixge_device_t * xd) +{ + ixge_main_t *xm = &ixge_main; + vlib_main_t *vm = xm->vlib_main; + ixge_phy_t *phy = xd->phys + xd->phy_index; + + switch (xd->device_id) + { + case IXGE_82599_sfp: + case IXGE_82599_sfp_em: + case IXGE_82599_sfp_fcoe: + /* others? */ + return ixge_sfp_phy_init (xd); + + default: + break; + } + + /* Probe address of phy. */ + { + u32 i, v; + + phy->mdio_address = ~0; + for (i = 0; i < 32; i++) + { + phy->mdio_address = i; + v = ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1); + if (v != 0xffff && v != 0) + break; + } + + /* No PHY found? */ + if (i >= 32) + return; + } + + phy->id = + ((ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1) << 16) | + ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID2)); + + { + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__,.format = + "ixge %d, phy id 0x%d mdio address %d",.format_args = "i4i4i4",}; + struct + { + u32 instance, id, address; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->instance = xd->device_index; + ed->id = phy->id; + ed->address = phy->mdio_address; + } + + /* Reset phy. */ + ixge_write_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL, + XGE_PHY_CONTROL_RESET); + + /* Wait for self-clearning reset bit to clear. */ + do + { + vlib_process_suspend (vm, 1e-3); + } + while (ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL) & + XGE_PHY_CONTROL_RESET); +} + +static u8 * +format_ixge_rx_from_hw_descriptor (u8 * s, va_list * va) +{ + ixge_rx_from_hw_descriptor_t *d = + va_arg (*va, ixge_rx_from_hw_descriptor_t *); + u32 s0 = d->status[0], s2 = d->status[2]; + u32 is_ip4, is_ip6, is_ip, is_tcp, is_udp; + uword indent = format_get_indent (s); + + s = format (s, "%s-owned", + (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE) ? "sw" : + "hw"); + s = + format (s, ", length this descriptor %d, l3 offset %d", + d->n_packet_bytes_this_descriptor, + IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s0)); + if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) + s = format (s, ", end-of-packet"); + + s = format (s, "\n%U", format_white_space, indent); + + if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR) + s = format (s, "layer2 error"); + + if (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2) + { + s = format (s, "layer 2 type %d", (s0 & 0x1f)); + return s; + } + + if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN) + s = format (s, "vlan header 0x%x\n%U", d->vlan_tag, + format_white_space, indent); + + if ((is_ip4 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4))) + { + s = format (s, "ip4%s", + (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT) ? " options" : + ""); + if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED) + s = format (s, " checksum %s", + (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) ? + "bad" : "ok"); + } + if ((is_ip6 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6))) + s = format (s, "ip6%s", + (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT) ? " extended" : + ""); + is_tcp = is_udp = 0; + if ((is_ip = (is_ip4 | is_ip6))) + { + is_tcp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP) != 0; + is_udp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP) != 0; + if (is_tcp) + s = format (s, ", tcp"); + if (is_udp) + s = format (s, ", udp"); + } + + if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED) + s = format (s, ", tcp checksum %s", + (s2 & IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR) ? "bad" : + "ok"); + if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED) + s = format (s, ", udp checksum %s", + (s2 & IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR) ? "bad" : + "ok"); + + return s; +} + +static u8 * +format_ixge_tx_descriptor (u8 * s, va_list * va) +{ + ixge_tx_descriptor_t *d = va_arg (*va, ixge_tx_descriptor_t *); + u32 s0 = d->status0, s1 = d->status1; + uword indent = format_get_indent (s); + u32 v; + + s = format (s, "buffer 0x%Lx, %d packet bytes, %d bytes this buffer", + d->buffer_address, s1 >> 14, d->n_bytes_this_buffer); + + s = format (s, "\n%U", format_white_space, indent); + + if ((v = (s0 >> 0) & 3)) + s = format (s, "reserved 0x%x, ", v); + + if ((v = (s0 >> 2) & 3)) + s = format (s, "mac 0x%x, ", v); + + if ((v = (s0 >> 4) & 0xf) != 3) + s = format (s, "type 0x%x, ", v); + + s = format (s, "%s%s%s%s%s%s%s%s", + (s0 & (1 << 8)) ? "eop, " : "", + (s0 & (1 << 9)) ? "insert-fcs, " : "", + (s0 & (1 << 10)) ? "reserved26, " : "", + (s0 & (1 << 11)) ? "report-status, " : "", + (s0 & (1 << 12)) ? "reserved28, " : "", + (s0 & (1 << 13)) ? "is-advanced, " : "", + (s0 & (1 << 14)) ? "vlan-enable, " : "", + (s0 & (1 << 15)) ? "tx-segmentation, " : ""); + + if ((v = s1 & 0xf) != 0) + s = format (s, "status 0x%x, ", v); + + if ((v = (s1 >> 4) & 0xf)) + s = format (s, "context 0x%x, ", v); + + if ((v = (s1 >> 8) & 0x3f)) + s = format (s, "options 0x%x, ", v); + + return s; +} + +typedef struct +{ + ixge_descriptor_t before, after; + + u32 buffer_index; + + u16 device_index; + + u8 queue_index; + + u8 is_start_of_packet; + + /* Copy of VLIB buffer; packet data stored in pre_data. */ + vlib_buffer_t buffer; +} ixge_rx_dma_trace_t; + +static u8 * +format_ixge_rx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + vlib_node_t *node = va_arg (*va, vlib_node_t *); + vnet_main_t *vnm = vnet_get_main (); + ixge_rx_dma_trace_t *t = va_arg (*va, ixge_rx_dma_trace_t *); + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index); + format_function_t *f; + uword indent = format_get_indent (s); + + { + vnet_sw_interface_t *sw = + vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + s = + format (s, "%U rx queue %d", format_vnet_sw_interface_name, vnm, sw, + t->queue_index); + } + + s = format (s, "\n%Ubefore: %U", + format_white_space, indent, + format_ixge_rx_from_hw_descriptor, &t->before); + s = format (s, "\n%Uafter : head/tail address 0x%Lx/0x%Lx", + format_white_space, indent, + t->after.rx_to_hw.head_address, t->after.rx_to_hw.tail_address); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U", format_white_space, indent); + + f = node->format_buffer; + if (!f || !t->is_start_of_packet) + f = format_hex_bytes; + s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); + + return s; +} + +#define foreach_ixge_error \ + _ (none, "no error") \ + _ (tx_full_drops, "tx ring full drops") \ + _ (ip4_checksum_error, "ip4 checksum errors") \ + _ (rx_alloc_fail, "rx buf alloc from free list failed") \ + _ (rx_alloc_no_physmem, "rx buf alloc failed no physmem") + +typedef enum +{ +#define _(f,s) IXGE_ERROR_##f, + foreach_ixge_error +#undef _ + IXGE_N_ERROR, +} ixge_error_t; + +always_inline void +ixge_rx_next_and_error_from_status_x1 (ixge_device_t * xd, + u32 s00, u32 s02, + u8 * next0, u8 * error0, u32 * flags0) +{ + u8 is0_ip4, is0_ip6, n0, e0; + u32 f0; + + e0 = IXGE_ERROR_none; + n0 = IXGE_RX_NEXT_ETHERNET_INPUT; + + is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; + n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0; + + e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) + ? IXGE_ERROR_ip4_checksum_error : e0); + + is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; + n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0; + + n0 = (xd->per_interface_next_index != ~0) ? + xd->per_interface_next_index : n0; + + /* Check for error. */ + n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0; + + f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED + | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) + ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); + + f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR + | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) + ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); + + *error0 = e0; + *next0 = n0; + *flags0 = f0; +} + +always_inline void +ixge_rx_next_and_error_from_status_x2 (ixge_device_t * xd, + u32 s00, u32 s02, + u32 s10, u32 s12, + u8 * next0, u8 * error0, u32 * flags0, + u8 * next1, u8 * error1, u32 * flags1) +{ + u8 is0_ip4, is0_ip6, n0, e0; + u8 is1_ip4, is1_ip6, n1, e1; + u32 f0, f1; + + e0 = e1 = IXGE_ERROR_none; + n0 = n1 = IXGE_RX_NEXT_IP4_INPUT; + + is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; + is1_ip4 = s12 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; + + n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0; + n1 = is1_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n1; + + e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) + ? IXGE_ERROR_ip4_checksum_error : e0); + e1 = (is1_ip4 && (s12 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) + ? IXGE_ERROR_ip4_checksum_error : e1); + + is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; + is1_ip6 = s10 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; + + n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0; + n1 = is1_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n1; + + n0 = (xd->per_interface_next_index != ~0) ? + xd->per_interface_next_index : n0; + n1 = (xd->per_interface_next_index != ~0) ? + xd->per_interface_next_index : n1; + + /* Check for error. */ + n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0; + n1 = e1 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n1; + + *error0 = e0; + *error1 = e1; + + *next0 = n0; + *next1 = n1; + + f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED + | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) + ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); + f1 = ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED + | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) + ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); + + f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR + | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) + ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); + f1 |= ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR + | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) + ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); + + *flags0 = f0; + *flags1 = f1; +} + +static void +ixge_rx_trace (ixge_main_t * xm, + ixge_device_t * xd, + ixge_dma_queue_t * dq, + ixge_descriptor_t * before_descriptors, + u32 * before_buffers, + ixge_descriptor_t * after_descriptors, uword n_descriptors) +{ + vlib_main_t *vm = xm->vlib_main; + vlib_node_runtime_t *node = dq->rx.node; + ixge_rx_from_hw_descriptor_t *bd; + ixge_rx_to_hw_descriptor_t *ad; + u32 *b, n_left, is_sop, next_index_sop; + + n_left = n_descriptors; + b = before_buffers; + bd = &before_descriptors->rx_from_hw; + ad = &after_descriptors->rx_to_hw; + is_sop = dq->rx.is_start_of_packet; + next_index_sop = dq->rx.saved_start_of_packet_next_index; + + while (n_left >= 2) + { + u32 bi0, bi1, flags0, flags1; + vlib_buffer_t *b0, *b1; + ixge_rx_dma_trace_t *t0, *t1; + u8 next0, error0, next1, error1; + + bi0 = b[0]; + bi1 = b[1]; + n_left -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + ixge_rx_next_and_error_from_status_x2 (xd, + bd[0].status[0], bd[0].status[2], + bd[1].status[0], bd[1].status[2], + &next0, &error0, &flags0, + &next1, &error1, &flags1); + + next_index_sop = is_sop ? next0 : next_index_sop; + vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0); + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->is_start_of_packet = is_sop; + is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + next_index_sop = is_sop ? next1 : next_index_sop; + vlib_trace_buffer (vm, node, next_index_sop, b1, /* follow_chain */ 0); + t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); + t1->is_start_of_packet = is_sop; + is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + t0->queue_index = dq->queue_index; + t1->queue_index = dq->queue_index; + t0->device_index = xd->device_index; + t1->device_index = xd->device_index; + t0->before.rx_from_hw = bd[0]; + t1->before.rx_from_hw = bd[1]; + t0->after.rx_to_hw = ad[0]; + t1->after.rx_to_hw = ad[1]; + t0->buffer_index = bi0; + t1->buffer_index = bi1; + memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data)); + memcpy (t0->buffer.pre_data, b0->data + b0->current_data, + sizeof (t0->buffer.pre_data)); + memcpy (t1->buffer.pre_data, b1->data + b1->current_data, + sizeof (t1->buffer.pre_data)); + + b += 2; + bd += 2; + ad += 2; + } + + while (n_left >= 1) + { + u32 bi0, flags0; + vlib_buffer_t *b0; + ixge_rx_dma_trace_t *t0; + u8 next0, error0; + + bi0 = b[0]; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ixge_rx_next_and_error_from_status_x1 (xd, + bd[0].status[0], bd[0].status[2], + &next0, &error0, &flags0); + + next_index_sop = is_sop ? next0 : next_index_sop; + vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0); + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->is_start_of_packet = is_sop; + is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + t0->queue_index = dq->queue_index; + t0->device_index = xd->device_index; + t0->before.rx_from_hw = bd[0]; + t0->after.rx_to_hw = ad[0]; + t0->buffer_index = bi0; + memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + memcpy (t0->buffer.pre_data, b0->data + b0->current_data, + sizeof (t0->buffer.pre_data)); + + b += 1; + bd += 1; + ad += 1; + } +} + +typedef struct +{ + ixge_tx_descriptor_t descriptor; + + u32 buffer_index; + + u16 device_index; + + u8 queue_index; + + u8 is_start_of_packet; + + /* Copy of VLIB buffer; packet data stored in pre_data. */ + vlib_buffer_t buffer; +} ixge_tx_dma_trace_t; + +static u8 * +format_ixge_tx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ixge_tx_dma_trace_t *t = va_arg (*va, ixge_tx_dma_trace_t *); + vnet_main_t *vnm = vnet_get_main (); + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index); + format_function_t *f; + uword indent = format_get_indent (s); + + { + vnet_sw_interface_t *sw = + vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + s = + format (s, "%U tx queue %d", format_vnet_sw_interface_name, vnm, sw, + t->queue_index); + } + + s = format (s, "\n%Udescriptor: %U", + format_white_space, indent, + format_ixge_tx_descriptor, &t->descriptor); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U", format_white_space, indent); + + f = format_ethernet_header_with_length; + if (!f || !t->is_start_of_packet) + f = format_hex_bytes; + s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); + + return s; +} + +typedef struct +{ + vlib_node_runtime_t *node; + + u32 is_start_of_packet; + + u32 n_bytes_in_packet; + + ixge_tx_descriptor_t *start_of_packet_descriptor; +} ixge_tx_state_t; + +static void +ixge_tx_trace (ixge_main_t * xm, + ixge_device_t * xd, + ixge_dma_queue_t * dq, + ixge_tx_state_t * tx_state, + ixge_tx_descriptor_t * descriptors, + u32 * buffers, uword n_descriptors) +{ + vlib_main_t *vm = xm->vlib_main; + vlib_node_runtime_t *node = tx_state->node; + ixge_tx_descriptor_t *d; + u32 *b, n_left, is_sop; + + n_left = n_descriptors; + b = buffers; + d = descriptors; + is_sop = tx_state->is_start_of_packet; + + while (n_left >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + ixge_tx_dma_trace_t *t0, *t1; + + bi0 = b[0]; + bi1 = b[1]; + n_left -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->is_start_of_packet = is_sop; + is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); + t1->is_start_of_packet = is_sop; + is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + t0->queue_index = dq->queue_index; + t1->queue_index = dq->queue_index; + t0->device_index = xd->device_index; + t1->device_index = xd->device_index; + t0->descriptor = d[0]; + t1->descriptor = d[1]; + t0->buffer_index = bi0; + t1->buffer_index = bi1; + memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data)); + memcpy (t0->buffer.pre_data, b0->data + b0->current_data, + sizeof (t0->buffer.pre_data)); + memcpy (t1->buffer.pre_data, b1->data + b1->current_data, + sizeof (t1->buffer.pre_data)); + + b += 2; + d += 2; + } + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t *b0; + ixge_tx_dma_trace_t *t0; + + bi0 = b[0]; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->is_start_of_packet = is_sop; + is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + t0->queue_index = dq->queue_index; + t0->device_index = xd->device_index; + t0->descriptor = d[0]; + t0->buffer_index = bi0; + memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + memcpy (t0->buffer.pre_data, b0->data + b0->current_data, + sizeof (t0->buffer.pre_data)); + + b += 1; + d += 1; + } +} + +always_inline uword +ixge_ring_sub (ixge_dma_queue_t * q, u32 i0, u32 i1) +{ + i32 d = i1 - i0; + ASSERT (i0 < q->n_descriptors); + ASSERT (i1 < q->n_descriptors); + return d < 0 ? q->n_descriptors + d : d; +} + +always_inline uword +ixge_ring_add (ixge_dma_queue_t * q, u32 i0, u32 i1) +{ + u32 d = i0 + i1; + ASSERT (i0 < q->n_descriptors); + ASSERT (i1 < q->n_descriptors); + d -= d >= q->n_descriptors ? q->n_descriptors : 0; + return d; +} + +always_inline uword +ixge_tx_descriptor_matches_template (ixge_main_t * xm, + ixge_tx_descriptor_t * d) +{ + u32 cmp; + + cmp = ((d->status0 & xm->tx_descriptor_template_mask.status0) + ^ xm->tx_descriptor_template.status0); + if (cmp) + return 0; + cmp = ((d->status1 & xm->tx_descriptor_template_mask.status1) + ^ xm->tx_descriptor_template.status1); + if (cmp) + return 0; + + return 1; +} + +static uword +ixge_tx_no_wrap (ixge_main_t * xm, + ixge_device_t * xd, + ixge_dma_queue_t * dq, + u32 * buffers, + u32 start_descriptor_index, + u32 n_descriptors, ixge_tx_state_t * tx_state) +{ + vlib_main_t *vm = xm->vlib_main; + ixge_tx_descriptor_t *d, *d_sop; + u32 n_left = n_descriptors; + u32 *to_free = vec_end (xm->tx_buffers_pending_free); + u32 *to_tx = + vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index); + u32 is_sop = tx_state->is_start_of_packet; + u32 len_sop = tx_state->n_bytes_in_packet; + u16 template_status = xm->tx_descriptor_template.status0; + u32 descriptor_prefetch_rotor = 0; + + ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors); + d = &dq->descriptors[start_descriptor_index].tx; + d_sop = is_sop ? d : tx_state->start_of_packet_descriptor; + + while (n_left >= 4) + { + vlib_buffer_t *b0, *b1; + u32 bi0, fi0, len0; + u32 bi1, fi1, len1; + u8 is_eop0, is_eop1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD); + vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD); + + if ((descriptor_prefetch_rotor & 0x3) == 0) + CLIB_PREFETCH (d + 4, CLIB_CACHE_LINE_BYTES, STORE); + + descriptor_prefetch_rotor += 2; + + bi0 = buffers[0]; + bi1 = buffers[1]; + + to_free[0] = fi0 = to_tx[0]; + to_tx[0] = bi0; + to_free += fi0 != 0; + + to_free[0] = fi1 = to_tx[1]; + to_tx[1] = bi1; + to_free += fi1 != 0; + + buffers += 2; + n_left -= 2; + to_tx += 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + is_eop1 = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + len0 = b0->current_length; + len1 = b1->current_length; + + ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0)); + ASSERT (ixge_tx_descriptor_matches_template (xm, d + 1)); + + d[0].buffer_address = + vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data; + d[1].buffer_address = + vlib_get_buffer_data_physical_address (vm, bi1) + b1->current_data; + + d[0].n_bytes_this_buffer = len0; + d[1].n_bytes_this_buffer = len1; + + d[0].status0 = + template_status | (is_eop0 << + IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); + d[1].status0 = + template_status | (is_eop1 << + IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); + + len_sop = (is_sop ? 0 : len_sop) + len0; + d_sop[0].status1 = + IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); + d += 1; + d_sop = is_eop0 ? d : d_sop; + + is_sop = is_eop0; + + len_sop = (is_sop ? 0 : len_sop) + len1; + d_sop[0].status1 = + IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); + d += 1; + d_sop = is_eop1 ? d : d_sop; + + is_sop = is_eop1; + } + + while (n_left > 0) + { + vlib_buffer_t *b0; + u32 bi0, fi0, len0; + u8 is_eop0; + + bi0 = buffers[0]; + + to_free[0] = fi0 = to_tx[0]; + to_tx[0] = bi0; + to_free += fi0 != 0; + + buffers += 1; + n_left -= 1; + to_tx += 1; + + b0 = vlib_get_buffer (vm, bi0); + + is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; + + len0 = b0->current_length; + + ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0)); + + d[0].buffer_address = + vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data; + + d[0].n_bytes_this_buffer = len0; + + d[0].status0 = + template_status | (is_eop0 << + IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); + + len_sop = (is_sop ? 0 : len_sop) + len0; + d_sop[0].status1 = + IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); + d += 1; + d_sop = is_eop0 ? d : d_sop; + + is_sop = is_eop0; + } + + if (tx_state->node->flags & VLIB_NODE_FLAG_TRACE) + { + to_tx = + vec_elt_at_index (dq->descriptor_buffer_indices, + start_descriptor_index); + ixge_tx_trace (xm, xd, dq, tx_state, + &dq->descriptors[start_descriptor_index].tx, to_tx, + n_descriptors); + } + + _vec_len (xm->tx_buffers_pending_free) = + to_free - xm->tx_buffers_pending_free; + + /* When we are done d_sop can point to end of ring. Wrap it if so. */ + { + ixge_tx_descriptor_t *d_start = &dq->descriptors[0].tx; + + ASSERT (d_sop - d_start <= dq->n_descriptors); + d_sop = d_sop - d_start == dq->n_descriptors ? d_start : d_sop; + } + + tx_state->is_start_of_packet = is_sop; + tx_state->start_of_packet_descriptor = d_sop; + tx_state->n_bytes_in_packet = len_sop; + + return n_descriptors; +} + +static uword +ixge_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * f) +{ + ixge_main_t *xm = &ixge_main; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + ixge_device_t *xd = vec_elt_at_index (xm->devices, rd->dev_instance); + ixge_dma_queue_t *dq; + u32 *from, n_left_tx, n_descriptors_to_tx, n_tail_drop; + u32 queue_index = 0; /* fixme parameter */ + ixge_tx_state_t tx_state; + + tx_state.node = node; + tx_state.is_start_of_packet = 1; + tx_state.start_of_packet_descriptor = 0; + tx_state.n_bytes_in_packet = 0; + + from = vlib_frame_vector_args (f); + + dq = vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index); + + dq->head_index = dq->tx.head_index_write_back[0]; + + /* Since head == tail means ring is empty we can send up to dq->n_descriptors - 1. */ + n_left_tx = dq->n_descriptors - 1; + n_left_tx -= ixge_ring_sub (dq, dq->head_index, dq->tail_index); + + _vec_len (xm->tx_buffers_pending_free) = 0; + + n_descriptors_to_tx = f->n_vectors; + n_tail_drop = 0; + if (PREDICT_FALSE (n_descriptors_to_tx > n_left_tx)) + { + i32 i, n_ok, i_eop, i_sop; + + i_sop = i_eop = ~0; + for (i = n_left_tx - 1; i >= 0; i--) + { + vlib_buffer_t *b = vlib_get_buffer (vm, from[i]); + if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + if (i_sop != ~0 && i_eop != ~0) + break; + i_eop = i; + i_sop = i + 1; + } + } + if (i == 0) + n_ok = 0; + else + n_ok = i_eop + 1; + + { + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__,.format = + "ixge %d, ring full to tx %d head %d tail %d",.format_args = + "i2i2i2i2",}; + struct + { + u16 instance, to_tx, head, tail; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->instance = xd->device_index; + ed->to_tx = n_descriptors_to_tx; + ed->head = dq->head_index; + ed->tail = dq->tail_index; + } + + if (n_ok < n_descriptors_to_tx) + { + n_tail_drop = n_descriptors_to_tx - n_ok; + vec_add (xm->tx_buffers_pending_free, from + n_ok, n_tail_drop); + vlib_error_count (vm, ixge_input_node.index, + IXGE_ERROR_tx_full_drops, n_tail_drop); + } + + n_descriptors_to_tx = n_ok; + } + + dq->tx.n_buffers_on_ring += n_descriptors_to_tx; + + /* Process from tail to end of descriptor ring. */ + if (n_descriptors_to_tx > 0 && dq->tail_index < dq->n_descriptors) + { + u32 n = + clib_min (dq->n_descriptors - dq->tail_index, n_descriptors_to_tx); + n = ixge_tx_no_wrap (xm, xd, dq, from, dq->tail_index, n, &tx_state); + from += n; + n_descriptors_to_tx -= n; + dq->tail_index += n; + ASSERT (dq->tail_index <= dq->n_descriptors); + if (dq->tail_index == dq->n_descriptors) + dq->tail_index = 0; + } + + if (n_descriptors_to_tx > 0) + { + u32 n = + ixge_tx_no_wrap (xm, xd, dq, from, 0, n_descriptors_to_tx, &tx_state); + from += n; + ASSERT (n == n_descriptors_to_tx); + dq->tail_index += n; + ASSERT (dq->tail_index <= dq->n_descriptors); + if (dq->tail_index == dq->n_descriptors) + dq->tail_index = 0; + } + + /* We should only get full packets. */ + ASSERT (tx_state.is_start_of_packet); + + /* Report status when last descriptor is done. */ + { + u32 i = dq->tail_index == 0 ? dq->n_descriptors - 1 : dq->tail_index - 1; + ixge_tx_descriptor_t *d = &dq->descriptors[i].tx; + d->status0 |= IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS; + } + + /* Give new descriptors to hardware. */ + { + ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_TX, queue_index); + + CLIB_MEMORY_BARRIER (); + + dr->tail_index = dq->tail_index; + } + + /* Free any buffers that are done. */ + { + u32 n = _vec_len (xm->tx_buffers_pending_free); + if (n > 0) + { + vlib_buffer_free_no_next (vm, xm->tx_buffers_pending_free, n); + _vec_len (xm->tx_buffers_pending_free) = 0; + ASSERT (dq->tx.n_buffers_on_ring >= n); + dq->tx.n_buffers_on_ring -= (n - n_tail_drop); + } + } + + return f->n_vectors; +} + +static uword +ixge_rx_queue_no_wrap (ixge_main_t * xm, + ixge_device_t * xd, + ixge_dma_queue_t * dq, + u32 start_descriptor_index, u32 n_descriptors) +{ + vlib_main_t *vm = xm->vlib_main; + vlib_node_runtime_t *node = dq->rx.node; + ixge_descriptor_t *d; + static ixge_descriptor_t *d_trace_save; + static u32 *d_trace_buffers; + u32 n_descriptors_left = n_descriptors; + u32 *to_rx = + vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index); + u32 *to_add; + u32 bi_sop = dq->rx.saved_start_of_packet_buffer_index; + u32 bi_last = dq->rx.saved_last_buffer_index; + u32 next_index_sop = dq->rx.saved_start_of_packet_next_index; + u32 is_sop = dq->rx.is_start_of_packet; + u32 next_index, n_left_to_next, *to_next; + u32 n_packets = 0; + u32 n_bytes = 0; + u32 n_trace = vlib_get_trace_count (vm, node); + vlib_buffer_t *b_last, b_dummy; + + ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors); + d = &dq->descriptors[start_descriptor_index]; + + b_last = bi_last != ~0 ? vlib_get_buffer (vm, bi_last) : &b_dummy; + next_index = dq->rx.next_index; + + if (n_trace > 0) + { + u32 n = clib_min (n_trace, n_descriptors); + if (d_trace_save) + { + _vec_len (d_trace_save) = 0; + _vec_len (d_trace_buffers) = 0; + } + vec_add (d_trace_save, (ixge_descriptor_t *) d, n); + vec_add (d_trace_buffers, to_rx, n); + } + + { + uword l = vec_len (xm->rx_buffers_to_add); + + if (l < n_descriptors_left) + { + u32 n_to_alloc = 2 * dq->n_descriptors - l; + u32 n_allocated; + + vec_resize (xm->rx_buffers_to_add, n_to_alloc); + + _vec_len (xm->rx_buffers_to_add) = l; + n_allocated = vlib_buffer_alloc_from_free_list + (vm, xm->rx_buffers_to_add + l, n_to_alloc, + xm->vlib_buffer_free_list_index); + _vec_len (xm->rx_buffers_to_add) += n_allocated; + + /* Handle transient allocation failure */ + if (PREDICT_FALSE (l + n_allocated <= n_descriptors_left)) + { + if (n_allocated == 0) + vlib_error_count (vm, ixge_input_node.index, + IXGE_ERROR_rx_alloc_no_physmem, 1); + else + vlib_error_count (vm, ixge_input_node.index, + IXGE_ERROR_rx_alloc_fail, 1); + + n_descriptors_left = l + n_allocated; + } + n_descriptors = n_descriptors_left; + } + + /* Add buffers from end of vector going backwards. */ + to_add = vec_end (xm->rx_buffers_to_add) - 1; + } + + while (n_descriptors_left > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_descriptors_left >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t *b0, *b1; + u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0; + u32 bi1, fi1, len1, l3_offset1, s21, s01, flags1; + u8 is_eop0, error0, next0; + u8 is_eop1, error1, next1; + ixge_descriptor_t d0, d1; + + vlib_prefetch_buffer_with_index (vm, to_rx[2], STORE); + vlib_prefetch_buffer_with_index (vm, to_rx[3], STORE); + + CLIB_PREFETCH (d + 2, 32, STORE); + + d0.as_u32x4 = d[0].as_u32x4; + d1.as_u32x4 = d[1].as_u32x4; + + s20 = d0.rx_from_hw.status[2]; + s21 = d1.rx_from_hw.status[2]; + + s00 = d0.rx_from_hw.status[0]; + s01 = d1.rx_from_hw.status[0]; + + if (! + ((s20 & s21) & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE)) + goto found_hw_owned_descriptor_x2; + + bi0 = to_rx[0]; + bi1 = to_rx[1]; + + ASSERT (to_add - 1 >= xm->rx_buffers_to_add); + fi0 = to_add[0]; + fi1 = to_add[-1]; + + to_rx[0] = fi0; + to_rx[1] = fi1; + to_rx += 2; + to_add -= 2; + + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, bi0)); + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, bi1)); + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, fi0)); + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, fi1)); + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + + CLIB_PREFETCH (b0->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b1->data, CLIB_CACHE_LINE_BYTES, LOAD); + + is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; + is_eop1 = (s21 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; + + ixge_rx_next_and_error_from_status_x2 (xd, s00, s20, s01, s21, + &next0, &error0, &flags0, + &next1, &error1, &flags1); + + next0 = is_sop ? next0 : next_index_sop; + next1 = is_eop0 ? next1 : next0; + next_index_sop = next1; + + b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT); + b1->flags |= flags1 | (!is_eop1 << VLIB_BUFFER_LOG2_NEXT_PRESENT); + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + b0->error = node->errors[error0]; + b1->error = node->errors[error1]; + + len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor; + len1 = d1.rx_from_hw.n_packet_bytes_this_descriptor; + n_bytes += len0 + len1; + n_packets += is_eop0 + is_eop1; + + /* Give new buffers to hardware. */ + d0.rx_to_hw.tail_address = + vlib_get_buffer_data_physical_address (vm, fi0); + d1.rx_to_hw.tail_address = + vlib_get_buffer_data_physical_address (vm, fi1); + d0.rx_to_hw.head_address = d[0].rx_to_hw.tail_address; + d1.rx_to_hw.head_address = d[1].rx_to_hw.tail_address; + d[0].as_u32x4 = d0.as_u32x4; + d[1].as_u32x4 = d1.as_u32x4; + + d += 2; + n_descriptors_left -= 2; + + /* Point to either l2 or l3 header depending on next. */ + l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT)) + ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0; + l3_offset1 = (is_eop0 && (next1 != IXGE_RX_NEXT_ETHERNET_INPUT)) + ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s01) : 0; + + b0->current_length = len0 - l3_offset0; + b1->current_length = len1 - l3_offset1; + b0->current_data = l3_offset0; + b1->current_data = l3_offset1; + + b_last->next_buffer = is_sop ? ~0 : bi0; + b0->next_buffer = is_eop0 ? ~0 : bi1; + bi_last = bi1; + b_last = b1; + + if (CLIB_DEBUG > 0) + { + u32 bi_sop0 = is_sop ? bi0 : bi_sop; + u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0; + + if (is_eop0) + { + u8 *msg = vlib_validate_buffer (vm, bi_sop0, + /* follow_buffer_next */ 1); + ASSERT (!msg); + } + if (is_eop1) + { + u8 *msg = vlib_validate_buffer (vm, bi_sop1, + /* follow_buffer_next */ 1); + ASSERT (!msg); + } + } + if (0) /* "Dave" version */ + { + u32 bi_sop0 = is_sop ? bi0 : bi_sop; + u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0; + + if (is_eop0) + { + to_next[0] = bi_sop0; + to_next++; + n_left_to_next--; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi_sop0, next0); + } + if (is_eop1) + { + to_next[0] = bi_sop1; + to_next++; + n_left_to_next--; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi_sop1, next1); + } + is_sop = is_eop1; + bi_sop = bi_sop1; + } + if (1) /* "Eliot" version */ + { + /* Speculatively enqueue to cached next. */ + u8 saved_is_sop = is_sop; + u32 bi_sop_save = bi_sop; + + bi_sop = saved_is_sop ? bi0 : bi_sop; + to_next[0] = bi_sop; + to_next += is_eop0; + n_left_to_next -= is_eop0; + + bi_sop = is_eop0 ? bi1 : bi_sop; + to_next[0] = bi_sop; + to_next += is_eop1; + n_left_to_next -= is_eop1; + + is_sop = is_eop1; + + if (PREDICT_FALSE + (!(next0 == next_index && next1 == next_index))) + { + /* Undo speculation. */ + to_next -= is_eop0 + is_eop1; + n_left_to_next += is_eop0 + is_eop1; + + /* Re-do both descriptors being careful about where we enqueue. */ + bi_sop = saved_is_sop ? bi0 : bi_sop_save; + if (is_eop0) + { + if (next0 != next_index) + vlib_set_next_frame_buffer (vm, node, next0, bi_sop); + else + { + to_next[0] = bi_sop; + to_next += 1; + n_left_to_next -= 1; + } + } + + bi_sop = is_eop0 ? bi1 : bi_sop; + if (is_eop1) + { + if (next1 != next_index) + vlib_set_next_frame_buffer (vm, node, next1, bi_sop); + else + { + to_next[0] = bi_sop; + to_next += 1; + n_left_to_next -= 1; + } + } + + /* Switch cached next index when next for both packets is the same. */ + if (is_eop0 && is_eop1 && next0 == next1) + { + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + next_index = next0; + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + } + } + } + } + + /* Bail out of dual loop and proceed with single loop. */ + found_hw_owned_descriptor_x2: + + while (n_descriptors_left > 0 && n_left_to_next > 0) + { + vlib_buffer_t *b0; + u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0; + u8 is_eop0, error0, next0; + ixge_descriptor_t d0; + + d0.as_u32x4 = d[0].as_u32x4; + + s20 = d0.rx_from_hw.status[2]; + s00 = d0.rx_from_hw.status[0]; + + if (!(s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE)) + goto found_hw_owned_descriptor_x1; + + bi0 = to_rx[0]; + ASSERT (to_add >= xm->rx_buffers_to_add); + fi0 = to_add[0]; + + to_rx[0] = fi0; + to_rx += 1; + to_add -= 1; + + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, bi0)); + ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == + vlib_buffer_is_known (vm, fi0)); + + b0 = vlib_get_buffer (vm, bi0); + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; + ixge_rx_next_and_error_from_status_x1 + (xd, s00, s20, &next0, &error0, &flags0); + + next0 = is_sop ? next0 : next_index_sop; + next_index_sop = next0; + + b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT); + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + b0->error = node->errors[error0]; + + len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor; + n_bytes += len0; + n_packets += is_eop0; + + /* Give new buffer to hardware. */ + d0.rx_to_hw.tail_address = + vlib_get_buffer_data_physical_address (vm, fi0); + d0.rx_to_hw.head_address = d0.rx_to_hw.tail_address; + d[0].as_u32x4 = d0.as_u32x4; + + d += 1; + n_descriptors_left -= 1; + + /* Point to either l2 or l3 header depending on next. */ + l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT)) + ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0; + b0->current_length = len0 - l3_offset0; + b0->current_data = l3_offset0; + + b_last->next_buffer = is_sop ? ~0 : bi0; + bi_last = bi0; + b_last = b0; + + bi_sop = is_sop ? bi0 : bi_sop; + + if (CLIB_DEBUG > 0 && is_eop0) + { + u8 *msg = + vlib_validate_buffer (vm, bi_sop, /* follow_buffer_next */ 1); + ASSERT (!msg); + } + + if (0) /* "Dave" version */ + { + if (is_eop0) + { + to_next[0] = bi_sop; + to_next++; + n_left_to_next--; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi_sop, next0); + } + } + if (1) /* "Eliot" version */ + { + if (PREDICT_TRUE (next0 == next_index)) + { + to_next[0] = bi_sop; + to_next += is_eop0; + n_left_to_next -= is_eop0; + } + else + { + if (next0 != next_index && is_eop0) + vlib_set_next_frame_buffer (vm, node, next0, bi_sop); + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + next_index = next0; + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + } + } + is_sop = is_eop0; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + +found_hw_owned_descriptor_x1: + if (n_descriptors_left > 0) + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + + _vec_len (xm->rx_buffers_to_add) = (to_add + 1) - xm->rx_buffers_to_add; + + { + u32 n_done = n_descriptors - n_descriptors_left; + + if (n_trace > 0 && n_done > 0) + { + u32 n = clib_min (n_trace, n_done); + ixge_rx_trace (xm, xd, dq, + d_trace_save, + d_trace_buffers, + &dq->descriptors[start_descriptor_index], n); + vlib_set_trace_count (vm, node, n_trace - n); + } + if (d_trace_save) + { + _vec_len (d_trace_save) = 0; + _vec_len (d_trace_buffers) = 0; + } + + /* Don't keep a reference to b_last if we don't have to. + Otherwise we can over-write a next_buffer pointer after already haven + enqueued a packet. */ + if (is_sop) + { + b_last->next_buffer = ~0; + bi_last = ~0; + } + + dq->rx.n_descriptors_done_this_call = n_done; + dq->rx.n_descriptors_done_total += n_done; + dq->rx.is_start_of_packet = is_sop; + dq->rx.saved_start_of_packet_buffer_index = bi_sop; + dq->rx.saved_last_buffer_index = bi_last; + dq->rx.saved_start_of_packet_next_index = next_index_sop; + dq->rx.next_index = next_index; + dq->rx.n_bytes += n_bytes; + + return n_packets; + } +} + +static uword +ixge_rx_queue (ixge_main_t * xm, + ixge_device_t * xd, + vlib_node_runtime_t * node, u32 queue_index) +{ + ixge_dma_queue_t *dq = + vec_elt_at_index (xd->dma_queues[VLIB_RX], queue_index); + ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, dq->queue_index); + uword n_packets = 0; + u32 hw_head_index, sw_head_index; + + /* One time initialization. */ + if (!dq->rx.node) + { + dq->rx.node = node; + dq->rx.is_start_of_packet = 1; + dq->rx.saved_start_of_packet_buffer_index = ~0; + dq->rx.saved_last_buffer_index = ~0; + } + + dq->rx.next_index = node->cached_next_index; + + dq->rx.n_descriptors_done_total = 0; + dq->rx.n_descriptors_done_this_call = 0; + dq->rx.n_bytes = 0; + + /* Fetch head from hardware and compare to where we think we are. */ + hw_head_index = dr->head_index; + sw_head_index = dq->head_index; + + if (hw_head_index == sw_head_index) + goto done; + + if (hw_head_index < sw_head_index) + { + u32 n_tried = dq->n_descriptors - sw_head_index; + n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried); + sw_head_index = + ixge_ring_add (dq, sw_head_index, + dq->rx.n_descriptors_done_this_call); + + if (dq->rx.n_descriptors_done_this_call != n_tried) + goto done; + } + if (hw_head_index >= sw_head_index) + { + u32 n_tried = hw_head_index - sw_head_index; + n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried); + sw_head_index = + ixge_ring_add (dq, sw_head_index, + dq->rx.n_descriptors_done_this_call); + } + +done: + dq->head_index = sw_head_index; + dq->tail_index = + ixge_ring_add (dq, dq->tail_index, dq->rx.n_descriptors_done_total); + + /* Give tail back to hardware. */ + CLIB_MEMORY_BARRIER (); + + dr->tail_index = dq->tail_index; + + vlib_increment_combined_counter (vnet_main. + interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + 0 /* cpu_index */ , + xd->vlib_sw_if_index, n_packets, + dq->rx.n_bytes); + + return n_packets; +} + +static void +ixge_interrupt (ixge_main_t * xm, ixge_device_t * xd, u32 i) +{ + vlib_main_t *vm = xm->vlib_main; + ixge_regs_t *r = xd->regs; + + if (i != 20) + { + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__,.format = + "ixge %d, %s",.format_args = "i1t1",.n_enum_strings = + 16,.enum_strings = + { + "flow director", + "rx miss", + "pci exception", + "mailbox", + "link status change", + "linksec key exchange", + "manageability event", + "reserved23", + "sdp0", + "sdp1", + "sdp2", + "sdp3", + "ecc", "descriptor handler error", "tcp timer", "other",},}; + struct + { + u8 instance; + u8 index; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->instance = xd->device_index; + ed->index = i - 16; + } + else + { + u32 v = r->xge_mac.link_status; + uword is_up = (v & (1 << 30)) != 0; + + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__,.format = + "ixge %d, link status change 0x%x",.format_args = "i4i4",}; + struct + { + u32 instance, link_status; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->instance = xd->device_index; + ed->link_status = v; + xd->link_status_at_last_link_change = v; + + vlib_process_signal_event (vm, ixge_process_node.index, + EVENT_SET_FLAGS, + ((is_up << 31) | xd->vlib_hw_if_index)); + } +} + +always_inline u32 +clean_block (u32 * b, u32 * t, u32 n_left) +{ + u32 *t0 = t; + + while (n_left >= 4) + { + u32 bi0, bi1, bi2, bi3; + + t[0] = bi0 = b[0]; + b[0] = 0; + t += bi0 != 0; + + t[0] = bi1 = b[1]; + b[1] = 0; + t += bi1 != 0; + + t[0] = bi2 = b[2]; + b[2] = 0; + t += bi2 != 0; + + t[0] = bi3 = b[3]; + b[3] = 0; + t += bi3 != 0; + + b += 4; + n_left -= 4; + } + + while (n_left > 0) + { + u32 bi0; + + t[0] = bi0 = b[0]; + b[0] = 0; + t += bi0 != 0; + b += 1; + n_left -= 1; + } + + return t - t0; +} + +static void +ixge_tx_queue (ixge_main_t * xm, ixge_device_t * xd, u32 queue_index) +{ + vlib_main_t *vm = xm->vlib_main; + ixge_dma_queue_t *dq = + vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index); + u32 n_clean, *b, *t, *t0; + i32 n_hw_owned_descriptors; + i32 first_to_clean, last_to_clean; + u64 hwbp_race = 0; + + /* Handle case where head write back pointer update + * arrives after the interrupt during high PCI bus loads. + */ + while ((dq->head_index == dq->tx.head_index_write_back[0]) && + dq->tx.n_buffers_on_ring && (dq->head_index != dq->tail_index)) + { + hwbp_race++; + if (IXGE_HWBP_RACE_ELOG && (hwbp_race == 1)) + { + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__,.format = + "ixge %d tx head index race: head %4d, tail %4d, buffs %4d",.format_args + = "i4i4i4i4",}; + struct + { + u32 instance, head_index, tail_index, n_buffers_on_ring; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->instance = xd->device_index; + ed->head_index = dq->head_index; + ed->tail_index = dq->tail_index; + ed->n_buffers_on_ring = dq->tx.n_buffers_on_ring; + } + } + + dq->head_index = dq->tx.head_index_write_back[0]; + n_hw_owned_descriptors = ixge_ring_sub (dq, dq->head_index, dq->tail_index); + ASSERT (dq->tx.n_buffers_on_ring >= n_hw_owned_descriptors); + n_clean = dq->tx.n_buffers_on_ring - n_hw_owned_descriptors; + + if (IXGE_HWBP_RACE_ELOG && hwbp_race) + { + ELOG_TYPE_DECLARE (e) = + { + .function = (char *) __FUNCTION__,.format = + "ixge %d tx head index race: head %4d, hw_owned %4d, n_clean %4d, retries %d",.format_args + = "i4i4i4i4i4",}; + struct + { + u32 instance, head_index, n_hw_owned_descriptors, n_clean, retries; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->instance = xd->device_index; + ed->head_index = dq->head_index; + ed->n_hw_owned_descriptors = n_hw_owned_descriptors; + ed->n_clean = n_clean; + ed->retries = hwbp_race; + } + + /* + * This function used to wait until hardware owned zero descriptors. + * At high PPS rates, that doesn't happen until the TX ring is + * completely full of descriptors which need to be cleaned up. + * That, in turn, causes TX ring-full drops and/or long RX service + * interruptions. + */ + if (n_clean == 0) + return; + + /* Clean the n_clean descriptors prior to the reported hardware head */ + last_to_clean = dq->head_index - 1; + last_to_clean = (last_to_clean < 0) ? last_to_clean + dq->n_descriptors : + last_to_clean; + + first_to_clean = (last_to_clean) - (n_clean - 1); + first_to_clean = (first_to_clean < 0) ? first_to_clean + dq->n_descriptors : + first_to_clean; + + vec_resize (xm->tx_buffers_pending_free, dq->n_descriptors - 1); + t0 = t = xm->tx_buffers_pending_free; + b = dq->descriptor_buffer_indices + first_to_clean; + + /* Wrap case: clean from first to end, then start to last */ + if (first_to_clean > last_to_clean) + { + t += clean_block (b, t, (dq->n_descriptors - 1) - first_to_clean); + first_to_clean = 0; + b = dq->descriptor_buffer_indices; + } + + /* Typical case: clean from first to last */ + if (first_to_clean <= last_to_clean) + t += clean_block (b, t, (last_to_clean - first_to_clean) + 1); + + if (t > t0) + { + u32 n = t - t0; + vlib_buffer_free_no_next (vm, t0, n); + ASSERT (dq->tx.n_buffers_on_ring >= n); + dq->tx.n_buffers_on_ring -= n; + _vec_len (xm->tx_buffers_pending_free) = 0; + } +} + +/* RX queue interrupts 0 thru 7; TX 8 thru 15. */ +always_inline uword +ixge_interrupt_is_rx_queue (uword i) +{ + return i < 8; +} + +always_inline uword +ixge_interrupt_is_tx_queue (uword i) +{ + return i >= 8 && i < 16; +} + +always_inline uword +ixge_tx_queue_to_interrupt (uword i) +{ + return 8 + i; +} + +always_inline uword +ixge_rx_queue_to_interrupt (uword i) +{ + return 0 + i; +} + +always_inline uword +ixge_interrupt_rx_queue (uword i) +{ + ASSERT (ixge_interrupt_is_rx_queue (i)); + return i - 0; +} + +always_inline uword +ixge_interrupt_tx_queue (uword i) +{ + ASSERT (ixge_interrupt_is_tx_queue (i)); + return i - 8; +} + +static uword +ixge_device_input (ixge_main_t * xm, + ixge_device_t * xd, vlib_node_runtime_t * node) +{ + ixge_regs_t *r = xd->regs; + u32 i, s; + uword n_rx_packets = 0; + + s = r->interrupt.status_write_1_to_set; + if (s) + r->interrupt.status_write_1_to_clear = s; + + /* *INDENT-OFF* */ + foreach_set_bit (i, s, ({ + if (ixge_interrupt_is_rx_queue (i)) + n_rx_packets += ixge_rx_queue (xm, xd, node, ixge_interrupt_rx_queue (i)); + + else if (ixge_interrupt_is_tx_queue (i)) + ixge_tx_queue (xm, xd, ixge_interrupt_tx_queue (i)); + + else + ixge_interrupt (xm, xd, i); + })); + /* *INDENT-ON* */ + + return n_rx_packets; +} + +static uword +ixge_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd; + uword n_rx_packets = 0; + + if (node->state == VLIB_NODE_STATE_INTERRUPT) + { + uword i; + + /* Loop over devices with interrupts. */ + /* *INDENT-OFF* */ + foreach_set_bit (i, node->runtime_data[0], ({ + xd = vec_elt_at_index (xm->devices, i); + n_rx_packets += ixge_device_input (xm, xd, node); + + /* Re-enable interrupts since we're going to stay in interrupt mode. */ + if (! (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) + xd->regs->interrupt.enable_write_1_to_set = ~0; + })); + /* *INDENT-ON* */ + + /* Clear mask of devices with pending interrupts. */ + node->runtime_data[0] = 0; + } + else + { + /* Poll all devices for input/interrupts. */ + vec_foreach (xd, xm->devices) + { + n_rx_packets += ixge_device_input (xm, xd, node); + + /* Re-enable interrupts when switching out of polling mode. */ + if (node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) + xd->regs->interrupt.enable_write_1_to_set = ~0; + } + } + + return n_rx_packets; +} + +static char *ixge_error_strings[] = { +#define _(n,s) s, + foreach_ixge_error +#undef _ +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ixge_input_node, static) = { + .function = ixge_input, + .type = VLIB_NODE_TYPE_INPUT, + .name = "ixge-input", + + /* Will be enabled if/when hardware is detected. */ + .state = VLIB_NODE_STATE_DISABLED, + + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_ixge_rx_dma_trace, + + .n_errors = IXGE_N_ERROR, + .error_strings = ixge_error_strings, + + .n_next_nodes = IXGE_RX_N_NEXT, + .next_nodes = { + [IXGE_RX_NEXT_DROP] = "error-drop", + [IXGE_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", + [IXGE_RX_NEXT_IP4_INPUT] = "ip4-input", + [IXGE_RX_NEXT_IP6_INPUT] = "ip6-input", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH_CLONE (ixge_input) +CLIB_MULTIARCH_SELECT_FN (ixge_input) +/* *INDENT-ON* */ + +static u8 * +format_ixge_device_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, i); + return format (s, "TenGigabitEthernet%U", + format_vlib_pci_handle, &xd->pci_device.bus_address); +} + +#define IXGE_COUNTER_IS_64_BIT (1 << 0) +#define IXGE_COUNTER_NOT_CLEAR_ON_READ (1 << 1) + +static u8 ixge_counter_flags[] = { +#define _(a,f) 0, +#define _64(a,f) IXGE_COUNTER_IS_64_BIT, + foreach_ixge_counter +#undef _ +#undef _64 +}; + +static void +ixge_update_counters (ixge_device_t * xd) +{ + /* Byte offset for counter registers. */ + static u32 reg_offsets[] = { +#define _(a,f) (a) / sizeof (u32), +#define _64(a,f) _(a,f) + foreach_ixge_counter +#undef _ +#undef _64 + }; + volatile u32 *r = (volatile u32 *) xd->regs; + int i; + + for (i = 0; i < ARRAY_LEN (xd->counters); i++) + { + u32 o = reg_offsets[i]; + xd->counters[i] += r[o]; + if (ixge_counter_flags[i] & IXGE_COUNTER_NOT_CLEAR_ON_READ) + r[o] = 0; + if (ixge_counter_flags[i] & IXGE_COUNTER_IS_64_BIT) + xd->counters[i] += (u64) r[o + 1] << (u64) 32; + } +} + +static u8 * +format_ixge_device_id (u8 * s, va_list * args) +{ + u32 device_id = va_arg (*args, u32); + char *t = 0; + switch (device_id) + { +#define _(f,n) case n: t = #f; break; + foreach_ixge_pci_device_id; +#undef _ + default: + t = 0; + break; + } + if (t == 0) + s = format (s, "unknown 0x%x", device_id); + else + s = format (s, "%s", t); + return s; +} + +static u8 * +format_ixge_link_status (u8 * s, va_list * args) +{ + ixge_device_t *xd = va_arg (*args, ixge_device_t *); + u32 v = xd->link_status_at_last_link_change; + + s = format (s, "%s", (v & (1 << 30)) ? "up" : "down"); + + { + char *modes[] = { + "1g", "10g parallel", "10g serial", "autoneg", + }; + char *speeds[] = { + "unknown", "100m", "1g", "10g", + }; + s = format (s, ", mode %s, speed %s", + modes[(v >> 26) & 3], speeds[(v >> 28) & 3]); + } + + return s; +} + +static u8 * +format_ixge_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, dev_instance); + ixge_phy_t *phy = xd->phys + xd->phy_index; + uword indent = format_get_indent (s); + + ixge_update_counters (xd); + xd->link_status_at_last_link_change = xd->regs->xge_mac.link_status; + + s = format (s, "Intel 8259X: id %U\n%Ulink %U", + format_ixge_device_id, xd->device_id, + format_white_space, indent + 2, format_ixge_link_status, xd); + + { + + s = format (s, "\n%UPCIe %U", format_white_space, indent + 2, + format_vlib_pci_link_speed, &xd->pci_device); + } + + s = format (s, "\n%U", format_white_space, indent + 2); + if (phy->mdio_address != ~0) + s = format (s, "PHY address %d, id 0x%x", phy->mdio_address, phy->id); + else if (xd->sfp_eeprom.id == SFP_ID_sfp) + s = format (s, "SFP %U", format_sfp_eeprom, &xd->sfp_eeprom); + else + s = format (s, "PHY not found"); + + /* FIXME */ + { + ixge_dma_queue_t *dq = vec_elt_at_index (xd->dma_queues[VLIB_RX], 0); + ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0); + u32 hw_head_index = dr->head_index; + u32 sw_head_index = dq->head_index; + u32 nitems; + + nitems = ixge_ring_sub (dq, hw_head_index, sw_head_index); + s = format (s, "\n%U%d unprocessed, %d total buffers on rx queue 0 ring", + format_white_space, indent + 2, nitems, dq->n_descriptors); + + s = format (s, "\n%U%d buffers in driver rx cache", + format_white_space, indent + 2, + vec_len (xm->rx_buffers_to_add)); + + s = format (s, "\n%U%d buffers on tx queue 0 ring", + format_white_space, indent + 2, + xd->dma_queues[VLIB_TX][0].tx.n_buffers_on_ring); + } + { + u32 i; + u64 v; + static char *names[] = { +#define _(a,f) #f, +#define _64(a,f) _(a,f) + foreach_ixge_counter +#undef _ +#undef _64 + }; + + for (i = 0; i < ARRAY_LEN (names); i++) + { + v = xd->counters[i] - xd->counters_last_clear[i]; + if (v != 0) + s = format (s, "\n%U%-40U%16Ld", + format_white_space, indent + 2, + format_c_identifier, names[i], v); + } + } + + return s; +} + +static void +ixge_clear_hw_interface_counters (u32 instance) +{ + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd = vec_elt_at_index (xm->devices, instance); + ixge_update_counters (xd); + memcpy (xd->counters_last_clear, xd->counters, sizeof (xd->counters)); +} + +/* + * Dynamically redirect all pkts from a specific interface + * to the specified node + */ +static void +ixge_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + ixge_main_t *xm = &ixge_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + ixge_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + xd->per_interface_next_index = node_index; + return; + } + + xd->per_interface_next_index = + vlib_node_add_next (xm->vlib_main, ixge_input_node.index, node_index); +} + + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (ixge_device_class) = { + .name = "ixge", + .tx_function = ixge_interface_tx, + .format_device_name = format_ixge_device_name, + .format_device = format_ixge_device, + .format_tx_trace = format_ixge_tx_dma_trace, + .clear_counters = ixge_clear_hw_interface_counters, + .admin_up_down_function = ixge_interface_admin_up_down, + .rx_redirect_to_node = ixge_set_interface_next_node, + .flatten_output_chains = 1, +}; +/* *INDENT-ON* */ + +#define IXGE_N_BYTES_IN_RX_BUFFER (2048) // DAW-HACK: Set Rx buffer size so all packets < ETH_MTU_SIZE fit in the buffer (i.e. sop & eop for all descriptors). + +static clib_error_t * +ixge_dma_init (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 queue_index) +{ + ixge_main_t *xm = &ixge_main; + vlib_main_t *vm = xm->vlib_main; + ixge_dma_queue_t *dq; + clib_error_t *error = 0; + + vec_validate (xd->dma_queues[rt], queue_index); + dq = vec_elt_at_index (xd->dma_queues[rt], queue_index); + + if (!xm->n_descriptors_per_cache_line) + xm->n_descriptors_per_cache_line = + CLIB_CACHE_LINE_BYTES / sizeof (dq->descriptors[0]); + + if (!xm->n_bytes_in_rx_buffer) + xm->n_bytes_in_rx_buffer = IXGE_N_BYTES_IN_RX_BUFFER; + xm->n_bytes_in_rx_buffer = round_pow2 (xm->n_bytes_in_rx_buffer, 1024); + if (!xm->vlib_buffer_free_list_index) + { + xm->vlib_buffer_free_list_index = + vlib_buffer_get_or_create_free_list (vm, xm->n_bytes_in_rx_buffer, + "ixge rx"); + ASSERT (xm->vlib_buffer_free_list_index != 0); + } + + if (!xm->n_descriptors[rt]) + xm->n_descriptors[rt] = 4 * VLIB_FRAME_SIZE; + + dq->queue_index = queue_index; + dq->n_descriptors = + round_pow2 (xm->n_descriptors[rt], xm->n_descriptors_per_cache_line); + dq->head_index = dq->tail_index = 0; + + dq->descriptors = vlib_physmem_alloc_aligned (vm, &error, + dq->n_descriptors * + sizeof (dq->descriptors[0]), + 128 /* per chip spec */ ); + if (error) + return error; + + memset (dq->descriptors, 0, + dq->n_descriptors * sizeof (dq->descriptors[0])); + vec_resize (dq->descriptor_buffer_indices, dq->n_descriptors); + + if (rt == VLIB_RX) + { + u32 n_alloc, i; + + n_alloc = vlib_buffer_alloc_from_free_list + (vm, dq->descriptor_buffer_indices, + vec_len (dq->descriptor_buffer_indices), + xm->vlib_buffer_free_list_index); + ASSERT (n_alloc == vec_len (dq->descriptor_buffer_indices)); + for (i = 0; i < n_alloc; i++) + { + vlib_buffer_t *b = + vlib_get_buffer (vm, dq->descriptor_buffer_indices[i]); + dq->descriptors[i].rx_to_hw.tail_address = + vlib_physmem_virtual_to_physical (vm, b->data); + } + } + else + { + u32 i; + + dq->tx.head_index_write_back = + vlib_physmem_alloc (vm, &error, CLIB_CACHE_LINE_BYTES); + + for (i = 0; i < dq->n_descriptors; i++) + dq->descriptors[i].tx = xm->tx_descriptor_template; + + vec_validate (xm->tx_buffers_pending_free, dq->n_descriptors - 1); + } + + { + ixge_dma_regs_t *dr = get_dma_regs (xd, rt, queue_index); + u64 a; + + a = vlib_physmem_virtual_to_physical (vm, dq->descriptors); + dr->descriptor_address[0] = a & 0xFFFFFFFF; + dr->descriptor_address[1] = a >> (u64) 32; + dr->n_descriptor_bytes = dq->n_descriptors * sizeof (dq->descriptors[0]); + dq->head_index = dq->tail_index = 0; + + if (rt == VLIB_RX) + { + ASSERT ((xm->n_bytes_in_rx_buffer / 1024) < 32); + dr->rx_split_control = + ( /* buffer size */ ((xm->n_bytes_in_rx_buffer / 1024) << 0) + | ( /* lo free descriptor threshold (units of 64 descriptors) */ + (1 << 22)) | ( /* descriptor type: advanced one buffer */ + (1 << 25)) | ( /* drop if no descriptors available */ + (1 << 28))); + + /* Give hardware all but last 16 cache lines' worth of descriptors. */ + dq->tail_index = dq->n_descriptors - + 16 * xm->n_descriptors_per_cache_line; + } + else + { + /* Make sure its initialized before hardware can get to it. */ + dq->tx.head_index_write_back[0] = dq->head_index; + + a = + vlib_physmem_virtual_to_physical (vm, dq->tx.head_index_write_back); + dr->tx.head_index_write_back_address[0] = /* enable bit */ 1 | a; + dr->tx.head_index_write_back_address[1] = (u64) a >> (u64) 32; + } + + /* DMA on 82599 does not work with [13] rx data write relaxed ordering + and [12] undocumented set. */ + if (rt == VLIB_RX) + dr->dca_control &= ~((1 << 13) | (1 << 12)); + + CLIB_MEMORY_BARRIER (); + + if (rt == VLIB_TX) + { + xd->regs->tx_dma_control |= (1 << 0); + dr->control |= ((32 << 0) /* prefetch threshold */ + | (64 << 8) /* host threshold */ + | (0 << 16) /* writeback threshold */ ); + } + + /* Enable this queue and wait for hardware to initialize + before adding to tail. */ + if (rt == VLIB_TX) + { + dr->control |= 1 << 25; + while (!(dr->control & (1 << 25))) + ; + } + + /* Set head/tail indices and enable DMA. */ + dr->head_index = dq->head_index; + dr->tail_index = dq->tail_index; + } + + return error; +} + +static u32 +ixge_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags) +{ + ixge_device_t *xd; + ixge_regs_t *r; + u32 old; + ixge_main_t *xm = &ixge_main; + + xd = vec_elt_at_index (xm->devices, hw->dev_instance); + r = xd->regs; + + old = r->filter_control; + + if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) + r->filter_control = old | (1 << 9) /* unicast promiscuous */ ; + else + r->filter_control = old & ~(1 << 9); + + return old; +} + +static void +ixge_device_init (ixge_main_t * xm) +{ + vnet_main_t *vnm = vnet_get_main (); + ixge_device_t *xd; + + /* Reset chip(s). */ + vec_foreach (xd, xm->devices) + { + ixge_regs_t *r = xd->regs; + const u32 reset_bit = (1 << 26) | (1 << 3); + + r->control |= reset_bit; + + /* No need to suspend. Timed to take ~1e-6 secs */ + while (r->control & reset_bit) + ; + + /* Software loaded. */ + r->extended_control |= (1 << 28); + + ixge_phy_init (xd); + + /* Register ethernet interface. */ + { + u8 addr8[6]; + u32 i, addr32[2]; + clib_error_t *error; + + addr32[0] = r->rx_ethernet_address0[0][0]; + addr32[1] = r->rx_ethernet_address0[0][1]; + for (i = 0; i < 6; i++) + addr8[i] = addr32[i / 4] >> ((i % 4) * 8); + + error = ethernet_register_interface + (vnm, ixge_device_class.index, xd->device_index, + /* ethernet address */ addr8, + &xd->vlib_hw_if_index, ixge_flag_change); + if (error) + clib_error_report (error); + } + + { + vnet_sw_interface_t *sw = + vnet_get_hw_sw_interface (vnm, xd->vlib_hw_if_index); + xd->vlib_sw_if_index = sw->sw_if_index; + } + + ixge_dma_init (xd, VLIB_RX, /* queue_index */ 0); + + xm->n_descriptors[VLIB_TX] = 20 * VLIB_FRAME_SIZE; + + ixge_dma_init (xd, VLIB_TX, /* queue_index */ 0); + + /* RX/TX queue 0 gets mapped to interrupt bits 0 & 8. */ + r->interrupt.queue_mapping[0] = (( /* valid bit */ (1 << 7) | + ixge_rx_queue_to_interrupt (0)) << 0); + + r->interrupt.queue_mapping[0] |= (( /* valid bit */ (1 << 7) | + ixge_tx_queue_to_interrupt (0)) << 8); + + /* No use in getting too many interrupts. + Limit them to one every 3/4 ring size at line rate + min sized packets. + No need for this since kernel/vlib main loop provides adequate interrupt + limiting scheme. */ + if (0) + { + f64 line_rate_max_pps = + 10e9 / (8 * (64 + /* interframe padding */ 20)); + ixge_throttle_queue_interrupt (r, 0, + .75 * xm->n_descriptors[VLIB_RX] / + line_rate_max_pps); + } + + /* Accept all multicast and broadcast packets. Should really add them + to the dst_ethernet_address register array. */ + r->filter_control |= (1 << 10) | (1 << 8); + + /* Enable frames up to size in mac frame size register. */ + r->xge_mac.control |= 1 << 2; + r->xge_mac.rx_max_frame_size = (9216 + 14) << 16; + + /* Enable all interrupts. */ + if (!IXGE_ALWAYS_POLL) + r->interrupt.enable_write_1_to_set = ~0; + } +} + +static uword +ixge_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + vnet_main_t *vnm = vnet_get_main (); + ixge_main_t *xm = &ixge_main; + ixge_device_t *xd; + uword event_type, *event_data = 0; + f64 timeout, link_debounce_deadline; + + ixge_device_init (xm); + + /* Clear all counters. */ + vec_foreach (xd, xm->devices) + { + ixge_update_counters (xd); + memset (xd->counters, 0, sizeof (xd->counters)); + } + + timeout = 30.0; + link_debounce_deadline = 1e70; + + while (1) + { + /* 36 bit stat counters could overflow in ~50 secs. + We poll every 30 secs to be conservative. */ + vlib_process_wait_for_event_or_clock (vm, timeout); + + event_type = vlib_process_get_events (vm, &event_data); + + switch (event_type) + { + case EVENT_SET_FLAGS: + /* 1 ms */ + link_debounce_deadline = vlib_time_now (vm) + 1e-3; + timeout = 1e-3; + break; + + case ~0: + /* No events found: timer expired. */ + if (vlib_time_now (vm) > link_debounce_deadline) + { + vec_foreach (xd, xm->devices) + { + ixge_regs_t *r = xd->regs; + u32 v = r->xge_mac.link_status; + uword is_up = (v & (1 << 30)) != 0; + + vnet_hw_interface_set_flags + (vnm, xd->vlib_hw_if_index, + is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + } + link_debounce_deadline = 1e70; + timeout = 30.0; + } + break; + + default: + ASSERT (0); + } + + if (event_data) + _vec_len (event_data) = 0; + + /* Query stats every 30 secs. */ + { + f64 now = vlib_time_now (vm); + if (now - xm->time_last_stats_update > 30) + { + xm->time_last_stats_update = now; + vec_foreach (xd, xm->devices) ixge_update_counters (xd); + } + } + } + + return 0; +} + +static vlib_node_registration_t ixge_process_node = { + .function = ixge_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "ixge-process", +}; + +clib_error_t * +ixge_init (vlib_main_t * vm) +{ + ixge_main_t *xm = &ixge_main; + clib_error_t *error; + + xm->vlib_main = vm; + memset (&xm->tx_descriptor_template, 0, + sizeof (xm->tx_descriptor_template)); + memset (&xm->tx_descriptor_template_mask, 0, + sizeof (xm->tx_descriptor_template_mask)); + xm->tx_descriptor_template.status0 = + (IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED | + IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED | + IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS); + xm->tx_descriptor_template_mask.status0 = 0xffff; + xm->tx_descriptor_template_mask.status1 = 0x00003fff; + + xm->tx_descriptor_template_mask.status0 &= + ~(IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET + | IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS); + xm->tx_descriptor_template_mask.status1 &= + ~(IXGE_TX_DESCRIPTOR_STATUS1_DONE); + + error = vlib_call_init_function (vm, pci_bus_init); + + return error; +} + +VLIB_INIT_FUNCTION (ixge_init); + + +static void +ixge_pci_intr_handler (vlib_pci_device_t * dev) +{ + ixge_main_t *xm = &ixge_main; + vlib_main_t *vm = xm->vlib_main; + + vlib_node_set_interrupt_pending (vm, ixge_input_node.index); + + /* Let node know which device is interrupting. */ + { + vlib_node_runtime_t *rt = + vlib_node_get_runtime (vm, ixge_input_node.index); + rt->runtime_data[0] |= 1 << dev->private_data; + } +} + +static clib_error_t * +ixge_pci_init (vlib_main_t * vm, vlib_pci_device_t * dev) +{ + ixge_main_t *xm = &ixge_main; + clib_error_t *error; + void *r; + ixge_device_t *xd; + + /* Device found: make sure we have dma memory. */ + if (unix_physmem_is_fake (vm)) + return clib_error_return (0, "no physical memory available"); + + error = vlib_pci_map_resource (dev, 0, &r); + if (error) + return error; + + vec_add2 (xm->devices, xd, 1); + + if (vec_len (xm->devices) == 1) + { + ixge_input_node.function = ixge_input_multiarch_select (); + } + + xd->pci_device = dev[0]; + xd->device_id = xd->pci_device.config0.header.device_id; + xd->regs = r; + xd->device_index = xd - xm->devices; + xd->pci_function = dev->bus_address.function; + xd->per_interface_next_index = ~0; + + + /* Chip found so enable node. */ + { + vlib_node_set_state (vm, ixge_input_node.index, + (IXGE_ALWAYS_POLL + ? VLIB_NODE_STATE_POLLING + : VLIB_NODE_STATE_INTERRUPT)); + + dev->private_data = xd->device_index; + } + + if (vec_len (xm->devices) == 1) + { + vlib_register_node (vm, &ixge_process_node); + xm->process_node_index = ixge_process_node.index; + } + + error = vlib_pci_bus_master_enable (dev); + + if (error) + return error; + + return vlib_pci_intr_enable (dev); +} + +/* *INDENT-OFF* */ +PCI_REGISTER_DEVICE (ixge_pci_device_registration,static) = { + .init_function = ixge_pci_init, + .interrupt_handler = ixge_pci_intr_handler, + .supported_devices = { +#define _(t,i) { .vendor_id = PCI_VENDOR_ID_INTEL, .device_id = i, }, + foreach_ixge_pci_device_id +#undef _ + { 0 }, + }, +}; +/* *INDENT-ON* */ + +void +ixge_set_next_node (ixge_rx_next_t next, char *name) +{ + vlib_node_registration_t *r = &ixge_input_node; + + switch (next) + { + case IXGE_RX_NEXT_IP4_INPUT: + case IXGE_RX_NEXT_IP6_INPUT: + case IXGE_RX_NEXT_ETHERNET_INPUT: + r->next_nodes[next] = name; + break; + + default: + clib_warning ("%s: illegal next %d\n", __FUNCTION__, next); + break; + } +} +#endif + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .default_disabled = 1, +}; + +/* *INDENT-ON* */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ixge/ixge.h b/src/plugins/ixge/ixge.h new file mode 100644 index 00000000..779603b3 --- /dev/null +++ b/src/plugins/ixge/ixge.h @@ -0,0 +1,1293 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_ixge_h +#define included_ixge_h + +#include +#include +#include +#include +#include +#include + +typedef volatile struct +{ + /* [31:7] 128 byte aligned. */ + u32 descriptor_address[2]; + u32 n_descriptor_bytes; + + /* [5] rx/tx descriptor dca enable + [6] rx packet head dca enable + [7] rx packet tail dca enable + [9] rx/tx descriptor relaxed order + [11] rx/tx descriptor write back relaxed order + [13] rx/tx data write/read relaxed order + [15] rx head data write relaxed order + [31:24] apic id for cpu's cache. */ + u32 dca_control; + + u32 head_index; + + /* [4:0] tail buffer size (in 1k byte units) + [13:8] head buffer size (in 64 byte units) + [24:22] lo free descriptors threshold (units of 64 descriptors) + [27:25] descriptor type 0 = legacy, 1 = advanced one buffer (e.g. tail), + 2 = advanced header splitting (head + tail), 5 = advanced header + splitting (head only). + [28] drop if no descriptors available. */ + u32 rx_split_control; + + u32 tail_index; + CLIB_PAD_FROM_TO (0x1c, 0x28); + + /* [7:0] rx/tx prefetch threshold + [15:8] rx/tx host threshold + [24:16] rx/tx write back threshold + [25] rx/tx enable + [26] tx descriptor writeback flush + [30] rx strip vlan enable */ + u32 control; + + u32 rx_coallesce_control; + + union + { + struct + { + /* packets bytes lo hi */ + u32 stats[3]; + + u32 unused; + } rx; + + struct + { + u32 unused[2]; + + /* [0] enables head write back. */ + u32 head_index_write_back_address[2]; + } tx; + }; +} ixge_dma_regs_t; + +/* Only advanced descriptors are supported. */ +typedef struct +{ + u64 tail_address; + u64 head_address; +} ixge_rx_to_hw_descriptor_t; + +typedef struct +{ + u32 status[3]; + u16 n_packet_bytes_this_descriptor; + u16 vlan_tag; +} ixge_rx_from_hw_descriptor_t; + +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2 (1 << (4 + 11)) +/* Valid if not layer2. */ +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4 (1 << (4 + 0)) +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT (1 << (4 + 1)) +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6 (1 << (4 + 2)) +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT (1 << (4 + 3)) +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP (1 << (4 + 4)) +#define IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP (1 << (4 + 5)) +#define IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET(s) (((s) >> 21) & 0x3ff) + +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE (1 << (0 + 0)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET (1 << (0 + 1)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN (1 << (0 + 3)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED (1 << (0 + 4)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED (1 << (0 + 5)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED (1 << (0 + 6)) +#define IXGE_RX_DESCRIPTOR_STATUS2_NOT_UNICAST (1 << (0 + 7)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IS_DOUBLE_VLAN (1 << (0 + 9)) +#define IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR (1 << (0 + 10)) +#define IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR (1 << (20 + 9)) +#define IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR (1 << (20 + 10)) +#define IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR (1 << (20 + 11)) + +/* For layer2 packets stats0 bottom 3 bits give ether type index from filter. */ +#define IXGE_RX_DESCRIPTOR_STATUS0_LAYER2_ETHERNET_TYPE(s) ((s) & 7) + +typedef struct +{ + u64 buffer_address; + u16 n_bytes_this_buffer; + u16 status0; + u32 status1; +#define IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED (3 << 4) +#define IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED (1 << (8 + 5)) +#define IXGE_TX_DESCRIPTOR_STATUS0_LOG2_REPORT_STATUS (8 + 3) +#define IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS (1 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_REPORT_STATUS) +#define IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS (1 << (8 + 1)) +#define IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET (8 + 0) +#define IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET (1 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET) +#define IXGE_TX_DESCRIPTOR_STATUS1_DONE (1 << 0) +#define IXGE_TX_DESCRIPTOR_STATUS1_CONTEXT(i) (/* valid */ (1 << 7) | ((i) << 4)) +#define IXGE_TX_DESCRIPTOR_STATUS1_IPSEC_OFFLOAD (1 << (8 + 2)) +#define IXGE_TX_DESCRIPTOR_STATUS1_INSERT_TCP_UDP_CHECKSUM (1 << (8 + 1)) +#define IXGE_TX_DESCRIPTOR_STATUS1_INSERT_IP4_CHECKSUM (1 << (8 + 0)) +#define IXGE_TX_DESCRIPTOR_STATUS0_N_BYTES_THIS_BUFFER(l) ((l) << 0) +#define IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET(l) ((l) << 14) +} ixge_tx_descriptor_t; + +typedef struct +{ + struct + { + u8 checksum_start_offset; + u8 checksum_insert_offset; + u16 checksum_end_offset; + } ip, tcp; + u32 status0; + + u8 status1; + + /* Byte offset after UDP/TCP header. */ + u8 payload_offset; + + u16 max_tcp_segment_size; +} __attribute__ ((packed)) ixge_tx_context_descriptor_t; + +typedef union +{ + ixge_rx_to_hw_descriptor_t rx_to_hw; + ixge_rx_from_hw_descriptor_t rx_from_hw; + ixge_tx_descriptor_t tx; + u32x4 as_u32x4; +} ixge_descriptor_t; + +typedef volatile struct +{ + /* [2] pcie master disable + [3] mac reset + [26] global device reset */ + u32 control; + u32 control_alias; + /* [3:2] device id (0 or 1 for dual port chips) + [7] link is up + [17:10] num vfs + [18] io active + [19] pcie master enable status */ + u32 status_read_only; + CLIB_PAD_FROM_TO (0xc, 0x18); + /* [14] pf reset done + [17] relaxed ordering disable + [26] extended vlan enable + [28] driver loaded */ + u32 extended_control; + CLIB_PAD_FROM_TO (0x1c, 0x20); + + /* software definable pins. + sdp_data [7:0] + sdp_is_output [15:8] + sdp_is_native [23:16] + sdp_function [31:24]. + */ + u32 sdp_control; + CLIB_PAD_FROM_TO (0x24, 0x28); + + /* [0] i2c clock in + [1] i2c clock out + [2] i2c data in + [3] i2c data out */ + u32 i2c_control; + CLIB_PAD_FROM_TO (0x2c, 0x4c); + u32 tcp_timer; + + CLIB_PAD_FROM_TO (0x50, 0x200); + + u32 led_control; + + CLIB_PAD_FROM_TO (0x204, 0x600); + u32 core_spare; + CLIB_PAD_FROM_TO (0x604, 0x700); + + struct + { + u32 vflr_events_clear[4]; + u32 mailbox_interrupt_status[4]; + u32 mailbox_interrupt_enable[4]; + CLIB_PAD_FROM_TO (0x730, 0x800); + } pf_foo; + + struct + { + u32 status_write_1_to_clear; + CLIB_PAD_FROM_TO (0x804, 0x808); + u32 status_write_1_to_set; + CLIB_PAD_FROM_TO (0x80c, 0x810); + u32 status_auto_clear_enable; + CLIB_PAD_FROM_TO (0x814, 0x820); + + /* [11:3] minimum inter-interrupt interval + (2e-6 units; 20e-6 units for fast ethernet). + [15] low-latency interrupt moderation enable + [20:16] low-latency interrupt credit + [27:21] interval counter + [31] write disable for credit and counter (write only). */ + u32 throttle0[24]; + + u32 enable_write_1_to_set; + CLIB_PAD_FROM_TO (0x884, 0x888); + u32 enable_write_1_to_clear; + CLIB_PAD_FROM_TO (0x88c, 0x890); + u32 enable_auto_clear; + u32 msi_to_eitr_select; + /* [3:0] spd 0-3 interrupt detection enable + [4] msi-x enable + [5] other clear disable (makes other bits in status not clear on read) + etc. */ + u32 control; + CLIB_PAD_FROM_TO (0x89c, 0x900); + + /* Defines interrupt mapping for 128 rx + 128 tx queues. + 64 x 4 8 bit entries. + For register [i]: + [5:0] bit in interrupt status for rx queue 2*i + 0 + [7] valid bit + [13:8] bit for tx queue 2*i + 0 + [15] valid bit + similar for rx 2*i + 1 and tx 2*i + 1. */ + u32 queue_mapping[64]; + + /* tcp timer [7:0] and other interrupts [15:8] */ + u32 misc_mapping; + CLIB_PAD_FROM_TO (0xa04, 0xa90); + + /* 64 interrupts determined by mappings. */ + u32 status1_write_1_to_clear[4]; + u32 enable1_write_1_to_set[4]; + u32 enable1_write_1_to_clear[4]; + CLIB_PAD_FROM_TO (0xac0, 0xad0); + u32 status1_enable_auto_clear[4]; + CLIB_PAD_FROM_TO (0xae0, 0x1000); + } interrupt; + + ixge_dma_regs_t rx_dma0[64]; + + CLIB_PAD_FROM_TO (0x2000, 0x2140); + u32 dcb_rx_packet_plane_t4_config[8]; + u32 dcb_rx_packet_plane_t4_status[8]; + CLIB_PAD_FROM_TO (0x2180, 0x2300); + + /* reg i defines mapping for 4 rx queues starting at 4*i + 0. */ + u32 rx_queue_stats_mapping[32]; + u32 rx_queue_stats_control; + + CLIB_PAD_FROM_TO (0x2384, 0x2410); + u32 fc_user_descriptor_ptr[2]; + u32 fc_buffer_control; + CLIB_PAD_FROM_TO (0x241c, 0x2420); + u32 fc_rx_dma; + CLIB_PAD_FROM_TO (0x2424, 0x2430); + u32 dcb_packet_plane_control; + CLIB_PAD_FROM_TO (0x2434, 0x2f00); + + u32 rx_dma_control; + u32 pf_queue_drop_enable; + CLIB_PAD_FROM_TO (0x2f08, 0x2f20); + u32 rx_dma_descriptor_cache_config; + CLIB_PAD_FROM_TO (0x2f24, 0x3000); + + /* 1 bit. */ + u32 rx_enable; + CLIB_PAD_FROM_TO (0x3004, 0x3008); + /* [15:0] ether type (little endian) + [31:16] opcode (big endian) */ + u32 flow_control_control; + CLIB_PAD_FROM_TO (0x300c, 0x3020); + /* 3 bit traffic class for each of 8 priorities. */ + u32 rx_priority_to_traffic_class; + CLIB_PAD_FROM_TO (0x3024, 0x3028); + u32 rx_coallesce_data_buffer_control; + CLIB_PAD_FROM_TO (0x302c, 0x3190); + u32 rx_packet_buffer_flush_detect; + CLIB_PAD_FROM_TO (0x3194, 0x3200); + u32 flow_control_tx_timers[4]; /* 2 timer values */ + CLIB_PAD_FROM_TO (0x3210, 0x3220); + u32 flow_control_rx_threshold_lo[8]; + CLIB_PAD_FROM_TO (0x3240, 0x3260); + u32 flow_control_rx_threshold_hi[8]; + CLIB_PAD_FROM_TO (0x3280, 0x32a0); + u32 flow_control_refresh_threshold; + CLIB_PAD_FROM_TO (0x32a4, 0x3c00); + /* For each of 8 traffic classes (units of bytes). */ + u32 rx_packet_buffer_size[8]; + CLIB_PAD_FROM_TO (0x3c20, 0x3d00); + u32 flow_control_config; + CLIB_PAD_FROM_TO (0x3d04, 0x4200); + + struct + { + u32 pcs_config; + CLIB_PAD_FROM_TO (0x4204, 0x4208); + u32 link_control; + u32 link_status; + u32 pcs_debug[2]; + u32 auto_negotiation; + u32 link_partner_ability; + u32 auto_negotiation_tx_next_page; + u32 auto_negotiation_link_partner_next_page; + CLIB_PAD_FROM_TO (0x4228, 0x4240); + } gige_mac; + + struct + { + /* [0] tx crc enable + [2] enable frames up to max frame size register [31:16] + [10] pad frames < 64 bytes if specified by user + [15] loopback enable + [16] mdc hi speed + [17] turn off mdc between mdio packets */ + u32 control; + + /* [5] rx symbol error (all bits clear on read) + [6] rx illegal symbol + [7] rx idle error + [8] rx local fault + [9] rx remote fault */ + u32 status; + + u32 pause_and_pace_control; + CLIB_PAD_FROM_TO (0x424c, 0x425c); + u32 phy_command; + u32 phy_data; + CLIB_PAD_FROM_TO (0x4264, 0x4268); + + /* [31:16] max frame size in bytes. */ + u32 rx_max_frame_size; + CLIB_PAD_FROM_TO (0x426c, 0x4288); + + /* [0] + [2] pcs receive link up? (latch lo) + [7] local fault + [1] + [0] pcs 10g base r capable + [1] pcs 10g base x capable + [2] pcs 10g base w capable + [10] rx local fault + [11] tx local fault + [15:14] 2 => device present at this address (else not present) */ + u32 xgxs_status[2]; + + u32 base_x_pcs_status; + + /* [0] pass unrecognized flow control frames + [1] discard pause frames + [2] rx priority flow control enable (only in dcb mode) + [3] rx flow control enable. */ + u32 flow_control; + + /* [3:0] tx lanes change polarity + [7:4] rx lanes change polarity + [11:8] swizzle tx lanes + [15:12] swizzle rx lanes + 4 x 2 bit tx lane swap + 4 x 2 bit rx lane swap. */ + u32 serdes_control; + + u32 fifo_control; + + /* [0] force link up + [1] autoneg ack2 bit to transmit + [6:2] autoneg selector field to transmit + [8:7] 10g pma/pmd type 0 => xaui, 1 kx4, 2 cx4 + [9] 1g pma/pmd type 0 => sfi, 1 => kx/bx + [10] disable 10g on without main power + [11] restart autoneg on transition to dx power state + [12] restart autoneg + [15:13] link mode: + 0 => 1g no autoneg + 1 => 10g kx4 parallel link no autoneg + 2 => 1g bx autoneg + 3 => 10g sfi serdes + 4 => kx4/kx/kr + 5 => xgmii 1g/100m + 6 => kx4/kx/kr 1g an + 7 kx4/kx/kr sgmii. + [16] kr support + [17] fec requested + [18] fec ability + etc. */ + u32 auto_negotiation_control; + + /* [0] signal detect 1g/100m + [1] fec signal detect + [2] 10g serial pcs fec block lock + [3] 10g serial high error rate + [4] 10g serial pcs block lock + [5] kx/kx4/kr autoneg next page received + [6] kx/kx4/kr backplane autoneg next page received + [7] link status clear to read + [11:8] 10g signal detect (4 lanes) (for serial just lane 0) + [12] 10g serial signal detect + [16:13] 10g parallel lane sync status + [17] 10g parallel align status + [18] 1g sync status + [19] kx/kx4/kr backplane autoneg is idle + [20] 1g autoneg enabled + [21] 1g pcs enabled for sgmii + [22] 10g xgxs enabled + [23] 10g serial fec enabled (forward error detection) + [24] 10g kr pcs enabled + [25] sgmii enabled + [27:26] mac link mode + 0 => 1g + 1 => 10g parallel + 2 => 10g serial + 3 => autoneg + [29:28] link speed + 1 => 100m + 2 => 1g + 3 => 10g + [30] link is up + [31] kx/kx4/kr backplane autoneg completed successfully. */ + u32 link_status; + + /* [17:16] pma/pmd for 10g serial + 0 => kr, 2 => sfi + [18] disable dme pages */ + u32 auto_negotiation_control2; + + CLIB_PAD_FROM_TO (0x42ac, 0x42b0); + u32 link_partner_ability[2]; + CLIB_PAD_FROM_TO (0x42b8, 0x42d0); + u32 manageability_control; + u32 link_partner_next_page[2]; + CLIB_PAD_FROM_TO (0x42dc, 0x42e0); + u32 kr_pcs_control; + u32 kr_pcs_status; + u32 fec_status[2]; + CLIB_PAD_FROM_TO (0x42f0, 0x4314); + u32 sgmii_control; + CLIB_PAD_FROM_TO (0x4318, 0x4324); + u32 link_status2; + CLIB_PAD_FROM_TO (0x4328, 0x4900); + } xge_mac; + + u32 tx_dcb_control; + u32 tx_dcb_descriptor_plane_queue_select; + u32 tx_dcb_descriptor_plane_t1_config; + u32 tx_dcb_descriptor_plane_t1_status; + CLIB_PAD_FROM_TO (0x4910, 0x4950); + + /* For each TC in units of 1k bytes. */ + u32 tx_packet_buffer_thresholds[8]; + CLIB_PAD_FROM_TO (0x4970, 0x4980); + struct + { + u32 mmw; + u32 config; + u32 status; + u32 rate_drift; + } dcb_tx_rate_scheduler; + CLIB_PAD_FROM_TO (0x4990, 0x4a80); + u32 tx_dma_control; + CLIB_PAD_FROM_TO (0x4a84, 0x4a88); + u32 tx_dma_tcp_flags_control[2]; + CLIB_PAD_FROM_TO (0x4a90, 0x4b00); + u32 pf_mailbox[64]; + CLIB_PAD_FROM_TO (0x4c00, 0x5000); + + /* RX */ + u32 checksum_control; + CLIB_PAD_FROM_TO (0x5004, 0x5008); + u32 rx_filter_control; + CLIB_PAD_FROM_TO (0x500c, 0x5010); + u32 management_vlan_tag[8]; + u32 management_udp_tcp_ports[8]; + CLIB_PAD_FROM_TO (0x5050, 0x5078); + /* little endian. */ + u32 extended_vlan_ether_type; + CLIB_PAD_FROM_TO (0x507c, 0x5080); + /* [1] store/dma bad packets + [8] accept all multicast + [9] accept all unicast + [10] accept all broadcast. */ + u32 filter_control; + CLIB_PAD_FROM_TO (0x5084, 0x5088); + /* [15:0] vlan ethernet type (0x8100) little endian + [28] cfi bit expected + [29] drop packets with unexpected cfi bit + [30] vlan filter enable. */ + u32 vlan_control; + CLIB_PAD_FROM_TO (0x508c, 0x5090); + /* [1:0] hi bit of ethernet address for 12 bit index into multicast table + 0 => 47, 1 => 46, 2 => 45, 3 => 43. + [2] enable multicast filter + */ + u32 multicast_control; + CLIB_PAD_FROM_TO (0x5094, 0x5100); + u32 fcoe_rx_control; + CLIB_PAD_FROM_TO (0x5104, 0x5108); + u32 fc_flt_context; + CLIB_PAD_FROM_TO (0x510c, 0x5110); + u32 fc_filter_control; + CLIB_PAD_FROM_TO (0x5114, 0x5120); + u32 rx_message_type_lo; + CLIB_PAD_FROM_TO (0x5124, 0x5128); + /* [15:0] ethernet type (little endian) + [18:16] matche pri in vlan tag + [19] priority match enable + [25:20] virtualization pool + [26] pool enable + [27] is fcoe + [30] ieee 1588 timestamp enable + [31] filter enable. + (See ethernet_type_queue_select.) */ + u32 ethernet_type_queue_filter[8]; + CLIB_PAD_FROM_TO (0x5148, 0x5160); + /* [7:0] l2 ethernet type and + [15:8] l2 ethernet type or */ + u32 management_decision_filters1[8]; + u32 vf_vm_tx_switch_loopback_enable[2]; + u32 rx_time_sync_control; + CLIB_PAD_FROM_TO (0x518c, 0x5190); + u32 management_ethernet_type_filters[4]; + u32 rx_timestamp_attributes_lo; + u32 rx_timestamp_hi; + u32 rx_timestamp_attributes_hi; + CLIB_PAD_FROM_TO (0x51ac, 0x51b0); + u32 pf_virtual_control; + CLIB_PAD_FROM_TO (0x51b4, 0x51d8); + u32 fc_offset_parameter; + CLIB_PAD_FROM_TO (0x51dc, 0x51e0); + u32 vf_rx_enable[2]; + u32 rx_timestamp_lo; + CLIB_PAD_FROM_TO (0x51ec, 0x5200); + /* 12 bits determined by multicast_control + lookup bits in this vector. */ + u32 multicast_enable[128]; + + /* [0] ethernet address [31:0] + [1] [15:0] ethernet address [47:32] + [31] valid bit. + Index 0 is read from eeprom after reset. */ + u32 rx_ethernet_address0[16][2]; + + CLIB_PAD_FROM_TO (0x5480, 0x5800); + u32 wake_up_control; + CLIB_PAD_FROM_TO (0x5804, 0x5808); + u32 wake_up_filter_control; + CLIB_PAD_FROM_TO (0x580c, 0x5818); + u32 multiple_rx_queue_command_82598; + CLIB_PAD_FROM_TO (0x581c, 0x5820); + u32 management_control; + u32 management_filter_control; + CLIB_PAD_FROM_TO (0x5828, 0x5838); + u32 wake_up_ip4_address_valid; + CLIB_PAD_FROM_TO (0x583c, 0x5840); + u32 wake_up_ip4_address_table[4]; + u32 management_control_to_host; + CLIB_PAD_FROM_TO (0x5854, 0x5880); + u32 wake_up_ip6_address_table[4]; + + /* unicast_and broadcast_and vlan_and ip_address_and + etc. */ + u32 management_decision_filters[8]; + + u32 management_ip4_or_ip6_address_filters[4][4]; + CLIB_PAD_FROM_TO (0x58f0, 0x5900); + u32 wake_up_packet_length; + CLIB_PAD_FROM_TO (0x5904, 0x5910); + u32 management_ethernet_address_filters[4][2]; + CLIB_PAD_FROM_TO (0x5930, 0x5a00); + u32 wake_up_packet_memory[32]; + CLIB_PAD_FROM_TO (0x5a80, 0x5c00); + u32 redirection_table_82598[32]; + u32 rss_random_keys_82598[10]; + CLIB_PAD_FROM_TO (0x5ca8, 0x6000); + + ixge_dma_regs_t tx_dma[128]; + + u32 pf_vm_vlan_insert[64]; + u32 tx_dma_tcp_max_alloc_size_requests; + CLIB_PAD_FROM_TO (0x8104, 0x8110); + u32 vf_tx_enable[2]; + CLIB_PAD_FROM_TO (0x8118, 0x8120); + /* [0] dcb mode enable + [1] virtualization mode enable + [3:2] number of tcs/qs per pool. */ + u32 multiple_tx_queues_command; + CLIB_PAD_FROM_TO (0x8124, 0x8200); + u32 pf_vf_anti_spoof[8]; + u32 pf_dma_tx_switch_control; + CLIB_PAD_FROM_TO (0x8224, 0x82e0); + u32 tx_strict_low_latency_queues[4]; + CLIB_PAD_FROM_TO (0x82f0, 0x8600); + u32 tx_queue_stats_mapping_82599[32]; + u32 tx_queue_packet_counts[32]; + u32 tx_queue_byte_counts[32][2]; + + struct + { + u32 control; + u32 status; + u32 buffer_almost_full; + CLIB_PAD_FROM_TO (0x880c, 0x8810); + u32 buffer_min_ifg; + CLIB_PAD_FROM_TO (0x8814, 0x8900); + } tx_security; + + struct + { + u32 index; + u32 salt; + u32 key[4]; + CLIB_PAD_FROM_TO (0x8918, 0x8a00); + } tx_ipsec; + + struct + { + u32 capabilities; + u32 control; + u32 tx_sci[2]; + u32 sa; + u32 sa_pn[2]; + u32 key[2][4]; + /* untagged packets, encrypted packets, protected packets, + encrypted bytes, protected bytes */ + u32 stats[5]; + CLIB_PAD_FROM_TO (0x8a50, 0x8c00); + } tx_link_security; + + struct + { + u32 control; + u32 timestamp_value[2]; + u32 system_time[2]; + u32 increment_attributes; + u32 time_adjustment_offset[2]; + u32 aux_control; + u32 target_time[2][2]; + CLIB_PAD_FROM_TO (0x8c34, 0x8c3c); + u32 aux_time_stamp[2][2]; + CLIB_PAD_FROM_TO (0x8c4c, 0x8d00); + } tx_timesync; + + struct + { + u32 control; + u32 status; + CLIB_PAD_FROM_TO (0x8d08, 0x8e00); + } rx_security; + + struct + { + u32 index; + u32 ip_address[4]; + u32 spi; + u32 ip_index; + u32 key[4]; + u32 salt; + u32 mode; + CLIB_PAD_FROM_TO (0x8e34, 0x8f00); + } rx_ipsec; + + struct + { + u32 capabilities; + u32 control; + u32 sci[2]; + u32 sa[2]; + u32 sa_pn[2]; + u32 key[2][4]; + /* see datasheet */ + u32 stats[17]; + CLIB_PAD_FROM_TO (0x8f84, 0x9000); + } rx_link_security; + + /* 4 wake up, 2 management, 2 wake up. */ + u32 flexible_filters[8][16][4]; + CLIB_PAD_FROM_TO (0x9800, 0xa000); + + /* 4096 bits. */ + u32 vlan_filter[128]; + + /* [0] ethernet address [31:0] + [1] [15:0] ethernet address [47:32] + [31] valid bit. + Index 0 is read from eeprom after reset. */ + u32 rx_ethernet_address1[128][2]; + + /* select one of 64 pools for each rx address. */ + u32 rx_ethernet_address_pool_select[128][2]; + CLIB_PAD_FROM_TO (0xaa00, 0xc800); + u32 tx_priority_to_traffic_class; + CLIB_PAD_FROM_TO (0xc804, 0xcc00); + + /* In bytes units of 1k. Total packet buffer is 160k. */ + u32 tx_packet_buffer_size[8]; + + CLIB_PAD_FROM_TO (0xcc20, 0xcd10); + u32 tx_manageability_tc_mapping; + CLIB_PAD_FROM_TO (0xcd14, 0xcd20); + u32 dcb_tx_packet_plane_t2_config[8]; + u32 dcb_tx_packet_plane_t2_status[8]; + CLIB_PAD_FROM_TO (0xcd60, 0xce00); + + u32 tx_flow_control_status; + CLIB_PAD_FROM_TO (0xce04, 0xd000); + + ixge_dma_regs_t rx_dma1[64]; + + struct + { + /* Bigendian ip4 src/dst address. */ + u32 src_address[128]; + u32 dst_address[128]; + + /* TCP/UDP ports [15:0] src [31:16] dst; bigendian. */ + u32 tcp_udp_port[128]; + + /* [1:0] protocol tcp, udp, sctp, other + [4:2] match priority (highest wins) + [13:8] pool + [25] src address match disable + [26] dst address match disable + [27] src port match disable + [28] dst port match disable + [29] protocol match disable + [30] pool match disable + [31] enable. */ + u32 control[128]; + + /* [12] size bypass + [19:13] must be 0x80 + [20] low-latency interrupt + [27:21] rx queue. */ + u32 interrupt[128]; + } ip4_filters; + + CLIB_PAD_FROM_TO (0xea00, 0xeb00); + /* 4 bit rss output index indexed by 7 bit hash. + 128 8 bit fields = 32 registers. */ + u32 redirection_table_82599[32]; + + u32 rss_random_key_82599[10]; + CLIB_PAD_FROM_TO (0xeba8, 0xec00); + /* [15:0] reserved + [22:16] rx queue index + [29] low-latency interrupt on match + [31] enable */ + u32 ethernet_type_queue_select[8]; + CLIB_PAD_FROM_TO (0xec20, 0xec30); + u32 syn_packet_queue_filter; + CLIB_PAD_FROM_TO (0xec34, 0xec60); + u32 immediate_interrupt_rx_vlan_priority; + CLIB_PAD_FROM_TO (0xec64, 0xec70); + u32 rss_queues_per_traffic_class; + CLIB_PAD_FROM_TO (0xec74, 0xec90); + u32 lli_size_threshold; + CLIB_PAD_FROM_TO (0xec94, 0xed00); + + struct + { + u32 control; + CLIB_PAD_FROM_TO (0xed04, 0xed10); + u32 table[8]; + CLIB_PAD_FROM_TO (0xed30, 0xee00); + } fcoe_redirection; + + struct + { + /* [1:0] packet buffer allocation 0 => disabled, else 64k*2^(f-1) + [3] packet buffer initialization done + [4] perfetch match mode + [5] report status in rss field of rx descriptors + [7] report status always + [14:8] drop queue + [20:16] flex 2 byte packet offset (units of 2 bytes) + [27:24] max linked list length + [31:28] full threshold. */ + u32 control; + CLIB_PAD_FROM_TO (0xee04, 0xee0c); + + u32 data[8]; + + /* [1:0] 0 => no action, 1 => add, 2 => remove, 3 => query. + [2] valid filter found by query command + [3] filter update override + [4] ip6 adress table + [6:5] l4 protocol reserved, udp, tcp, sctp + [7] is ip6 + [8] clear head/tail + [9] packet drop action + [10] matched packet generates low-latency interrupt + [11] last in linked list + [12] collision + [15] rx queue enable + [22:16] rx queue + [29:24] pool. */ + u32 command; + + CLIB_PAD_FROM_TO (0xee30, 0xee3c); + /* ip4 dst/src address, tcp ports, udp ports. + set bits mean bit is ignored. */ + u32 ip4_masks[4]; + u32 filter_length; + u32 usage_stats; + u32 failed_usage_stats; + u32 filters_match_stats; + u32 filters_miss_stats; + CLIB_PAD_FROM_TO (0xee60, 0xee68); + /* Lookup, signature. */ + u32 hash_keys[2]; + /* [15:0] ip6 src address 1 bit per byte + [31:16] ip6 dst address. */ + u32 ip6_mask; + /* [0] vlan id + [1] vlan priority + [2] pool + [3] ip protocol + [4] flex + [5] dst ip6. */ + u32 other_mask; + CLIB_PAD_FROM_TO (0xee78, 0xf000); + } flow_director; + + struct + { + u32 l2_control[64]; + u32 vlan_pool_filter[64]; + u32 vlan_pool_filter_bitmap[128]; + u32 dst_ethernet_address[128]; + u32 mirror_rule[4]; + u32 mirror_rule_vlan[8]; + u32 mirror_rule_pool[8]; + CLIB_PAD_FROM_TO (0xf650, 0x10010); + } pf_bar; + + u32 eeprom_flash_control; + /* [0] start + [1] done + [15:2] address + [31:16] read data. */ + u32 eeprom_read; + CLIB_PAD_FROM_TO (0x10018, 0x1001c); + u32 flash_access; + CLIB_PAD_FROM_TO (0x10020, 0x10114); + u32 flash_data; + u32 flash_control; + u32 flash_read_data; + CLIB_PAD_FROM_TO (0x10120, 0x1013c); + u32 flash_opcode; + u32 software_semaphore; + CLIB_PAD_FROM_TO (0x10144, 0x10148); + u32 firmware_semaphore; + CLIB_PAD_FROM_TO (0x1014c, 0x10160); + u32 software_firmware_sync; + CLIB_PAD_FROM_TO (0x10164, 0x10200); + u32 general_rx_control; + CLIB_PAD_FROM_TO (0x10204, 0x11000); + + struct + { + u32 control; + CLIB_PAD_FROM_TO (0x11004, 0x11010); + /* [3:0] enable counters + [7:4] leaky bucket counter mode + [29] reset + [30] stop + [31] start. */ + u32 counter_control; + /* [7:0],[15:8],[23:16],[31:24] event for counters 0-3. + event codes: + 0x0 bad tlp + 0x10 reqs that reached timeout + etc. */ + u32 counter_event; + CLIB_PAD_FROM_TO (0x11018, 0x11020); + u32 counters_clear_on_read[4]; + u32 counter_config[4]; + struct + { + u32 address; + u32 data; + } indirect_access; + CLIB_PAD_FROM_TO (0x11048, 0x11050); + u32 extended_control; + CLIB_PAD_FROM_TO (0x11054, 0x11064); + u32 mirrored_revision_id; + CLIB_PAD_FROM_TO (0x11068, 0x11070); + u32 dca_requester_id_information; + + /* [0] global disable + [4:1] mode: 0 => legacy, 1 => dca 1.0. */ + u32 dca_control; + CLIB_PAD_FROM_TO (0x11078, 0x110b0); + /* [0] pci completion abort + [1] unsupported i/o address + [2] wrong byte enable + [3] pci timeout */ + u32 pcie_interrupt_status; + CLIB_PAD_FROM_TO (0x110b4, 0x110b8); + u32 pcie_interrupt_enable; + CLIB_PAD_FROM_TO (0x110bc, 0x110c0); + u32 msi_x_pba_clear[8]; + CLIB_PAD_FROM_TO (0x110e0, 0x12300); + } pcie; + + u32 interrupt_throttle1[128 - 24]; + CLIB_PAD_FROM_TO (0x124a0, 0x14f00); + + u32 core_analog_config; + CLIB_PAD_FROM_TO (0x14f04, 0x14f10); + u32 core_common_config; + CLIB_PAD_FROM_TO (0x14f14, 0x15f14); + + u32 link_sec_software_firmware_interface; +} ixge_regs_t; + +typedef union +{ + struct + { + /* Addresses bigendian. */ + union + { + struct + { + ip6_address_t src_address; + u32 unused[1]; + } ip6; + struct + { + u32 unused[3]; + ip4_address_t src_address, dst_address; + } ip4; + }; + + /* [15:0] src port (little endian). + [31:16] dst port. */ + u32 tcp_udp_ports; + + /* [15:0] vlan (cfi bit set to 0). + [31:16] flex bytes. bigendian. */ + u32 vlan_and_flex_word; + + /* [14:0] hash + [15] bucket valid + [31:16] signature (signature filers)/sw-index (perfect match). */ + u32 hash; + }; + + u32 as_u32[8]; +} ixge_flow_director_key_t; + +always_inline void +ixge_throttle_queue_interrupt (ixge_regs_t * r, + u32 queue_interrupt_index, + f64 inter_interrupt_interval_in_secs) +{ + volatile u32 *tr = + (queue_interrupt_index < ARRAY_LEN (r->interrupt.throttle0) + ? &r->interrupt.throttle0[queue_interrupt_index] + : &r->interrupt_throttle1[queue_interrupt_index]); + ASSERT (queue_interrupt_index < 128); + u32 v; + i32 i, mask = (1 << 9) - 1; + + i = flt_round_nearest (inter_interrupt_interval_in_secs / 2e-6); + i = i < 1 ? 1 : i; + i = i >= mask ? mask : i; + + v = tr[0]; + v &= ~(mask << 3); + v |= i << 3; + tr[0] = v; +} + +#define foreach_ixge_counter \ + _ (0x40d0, rx_total_packets) \ + _64 (0x40c0, rx_total_bytes) \ + _ (0x41b0, rx_good_packets_before_filtering) \ + _64 (0x41b4, rx_good_bytes_before_filtering) \ + _ (0x2f50, rx_dma_good_packets) \ + _64 (0x2f54, rx_dma_good_bytes) \ + _ (0x2f5c, rx_dma_duplicated_good_packets) \ + _64 (0x2f60, rx_dma_duplicated_good_bytes) \ + _ (0x2f68, rx_dma_good_loopback_packets) \ + _64 (0x2f6c, rx_dma_good_loopback_bytes) \ + _ (0x2f74, rx_dma_good_duplicated_loopback_packets) \ + _64 (0x2f78, rx_dma_good_duplicated_loopback_bytes) \ + _ (0x4074, rx_good_packets) \ + _64 (0x4088, rx_good_bytes) \ + _ (0x407c, rx_multicast_packets) \ + _ (0x4078, rx_broadcast_packets) \ + _ (0x405c, rx_64_byte_packets) \ + _ (0x4060, rx_65_127_byte_packets) \ + _ (0x4064, rx_128_255_byte_packets) \ + _ (0x4068, rx_256_511_byte_packets) \ + _ (0x406c, rx_512_1023_byte_packets) \ + _ (0x4070, rx_gt_1023_byte_packets) \ + _ (0x4000, rx_crc_errors) \ + _ (0x4120, rx_ip_checksum_errors) \ + _ (0x4004, rx_illegal_symbol_errors) \ + _ (0x4008, rx_error_symbol_errors) \ + _ (0x4034, rx_mac_local_faults) \ + _ (0x4038, rx_mac_remote_faults) \ + _ (0x4040, rx_length_errors) \ + _ (0x41a4, rx_xons) \ + _ (0x41a8, rx_xoffs) \ + _ (0x40a4, rx_undersize_packets) \ + _ (0x40a8, rx_fragments) \ + _ (0x40ac, rx_oversize_packets) \ + _ (0x40b0, rx_jabbers) \ + _ (0x40b4, rx_management_packets) \ + _ (0x40b8, rx_management_drops) \ + _ (0x3fa0, rx_missed_packets_pool_0) \ + _ (0x40d4, tx_total_packets) \ + _ (0x4080, tx_good_packets) \ + _64 (0x4090, tx_good_bytes) \ + _ (0x40f0, tx_multicast_packets) \ + _ (0x40f4, tx_broadcast_packets) \ + _ (0x87a0, tx_dma_good_packets) \ + _64 (0x87a4, tx_dma_good_bytes) \ + _ (0x40d8, tx_64_byte_packets) \ + _ (0x40dc, tx_65_127_byte_packets) \ + _ (0x40e0, tx_128_255_byte_packets) \ + _ (0x40e4, tx_256_511_byte_packets) \ + _ (0x40e8, tx_512_1023_byte_packets) \ + _ (0x40ec, tx_gt_1023_byte_packets) \ + _ (0x4010, tx_undersize_drops) \ + _ (0x8780, switch_security_violation_packets) \ + _ (0x5118, fc_crc_errors) \ + _ (0x241c, fc_rx_drops) \ + _ (0x2424, fc_last_error_count) \ + _ (0x2428, fcoe_rx_packets) \ + _ (0x242c, fcoe_rx_dwords) \ + _ (0x8784, fcoe_tx_packets) \ + _ (0x8788, fcoe_tx_dwords) \ + _ (0x1030, queue_0_rx_count) \ + _ (0x1430, queue_0_drop_count) \ + _ (0x1070, queue_1_rx_count) \ + _ (0x1470, queue_1_drop_count) \ + _ (0x10b0, queue_2_rx_count) \ + _ (0x14b0, queue_2_drop_count) \ + _ (0x10f0, queue_3_rx_count) \ + _ (0x14f0, queue_3_drop_count) \ + _ (0x1130, queue_4_rx_count) \ + _ (0x1530, queue_4_drop_count) \ + _ (0x1170, queue_5_rx_count) \ + _ (0x1570, queue_5_drop_count) \ + _ (0x11b0, queue_6_rx_count) \ + _ (0x15b0, queue_6_drop_count) \ + _ (0x11f0, queue_7_rx_count) \ + _ (0x15f0, queue_7_drop_count) \ + _ (0x1230, queue_8_rx_count) \ + _ (0x1630, queue_8_drop_count) \ + _ (0x1270, queue_9_rx_count) \ + _ (0x1270, queue_9_drop_count) + + + + +typedef enum +{ +#define _(a,f) IXGE_COUNTER_##f, +#define _64(a,f) _(a,f) + foreach_ixge_counter +#undef _ +#undef _64 + IXGE_N_COUNTER, +} ixge_counter_type_t; + +typedef struct +{ + u32 mdio_address; + + /* 32 bit ID read from ID registers. */ + u32 id; +} ixge_phy_t; + +typedef struct +{ + /* Cache aligned descriptors. */ + ixge_descriptor_t *descriptors; + + /* Number of descriptors in table. */ + u32 n_descriptors; + + /* Software head and tail pointers into descriptor ring. */ + u32 head_index, tail_index; + + /* Index into dma_queues vector. */ + u32 queue_index; + + /* Buffer indices corresponding to each active descriptor. */ + u32 *descriptor_buffer_indices; + + union + { + struct + { + u32 *volatile head_index_write_back; + + u32 n_buffers_on_ring; + } tx; + + struct + { + /* Buffer indices to use to replenish each descriptor. */ + u32 *replenish_buffer_indices; + + vlib_node_runtime_t *node; + u32 next_index; + + u32 saved_start_of_packet_buffer_index; + + u32 saved_start_of_packet_next_index; + u32 saved_last_buffer_index; + + u32 is_start_of_packet; + + u32 n_descriptors_done_total; + + u32 n_descriptors_done_this_call; + + u32 n_bytes; + } rx; + }; +} ixge_dma_queue_t; + +#define foreach_ixge_pci_device_id \ + _ (82598, 0x10b6) \ + _ (82598_bx, 0x1508) \ + _ (82598af_dual_port, 0x10c6) \ + _ (82598af_single_port, 0x10c7) \ + _ (82598at, 0x10c8) \ + _ (82598at2, 0x150b) \ + _ (82598eb_sfp_lom, 0x10db) \ + _ (82598eb_cx4, 0x10dd) \ + _ (82598_cx4_dual_port, 0x10ec) \ + _ (82598_da_dual_port, 0x10f1) \ + _ (82598_sr_dual_port_em, 0x10e1) \ + _ (82598eb_xf_lr, 0x10f4) \ + _ (82599_kx4, 0x10f7) \ + _ (82599_kx4_mezz, 0x1514) \ + _ (82599_kr, 0x1517) \ + _ (82599_combo_backplane, 0x10f8) \ + _ (82599_cx4, 0x10f9) \ + _ (82599_sfp, 0x10fb) \ + _ (82599_backplane_fcoe, 0x152a) \ + _ (82599_sfp_fcoe, 0x1529) \ + _ (82599_sfp_em, 0x1507) \ + _ (82599_xaui_lom, 0x10fc) \ + _ (82599_t3_lom, 0x151c) \ + _ (x540t, 0x1528) + +typedef enum +{ +#define _(f,n) IXGE_##f = n, + foreach_ixge_pci_device_id +#undef _ +} ixge_pci_device_id_t; + +typedef struct +{ + /* registers */ + ixge_regs_t *regs; + + /* Specific next index when using dynamic redirection */ + u32 per_interface_next_index; + + /* PCI bus info. */ + vlib_pci_device_t pci_device; + + /* From PCI config space header. */ + ixge_pci_device_id_t device_id; + + u16 device_index; + + /* 0 or 1. */ + u16 pci_function; + + /* VLIB interface for this instance. */ + u32 vlib_hw_if_index, vlib_sw_if_index; + + ixge_dma_queue_t *dma_queues[VLIB_N_RX_TX]; + + /* Phy index (0 or 1) and address on MDI bus. */ + u32 phy_index; + ixge_phy_t phys[2]; + + /* Value of link_status register at last link change. */ + u32 link_status_at_last_link_change; + + i2c_bus_t i2c_bus; + sfp_eeprom_t sfp_eeprom; + + /* Counters. */ + u64 counters[IXGE_N_COUNTER], counters_last_clear[IXGE_N_COUNTER]; +} ixge_device_t; + +typedef struct +{ + vlib_main_t *vlib_main; + + /* Vector of devices. */ + ixge_device_t *devices; + + /* Descriptor ring sizes. */ + u32 n_descriptors[VLIB_N_RX_TX]; + + /* RX buffer size. Must be at least 1k; will be rounded to + next largest 1k size. */ + u32 n_bytes_in_rx_buffer; + + u32 n_descriptors_per_cache_line; + + u32 vlib_buffer_free_list_index; + + u32 process_node_index; + + /* Template and mask for initializing/validating TX descriptors. */ + ixge_tx_descriptor_t tx_descriptor_template, tx_descriptor_template_mask; + + /* Vector of buffers for which TX is done and can be freed. */ + u32 *tx_buffers_pending_free; + + u32 *rx_buffers_to_add; + + f64 time_last_stats_update; +} ixge_main_t; + +ixge_main_t ixge_main; +vnet_device_class_t ixge_device_class; + +typedef enum +{ + IXGE_RX_NEXT_IP4_INPUT, + IXGE_RX_NEXT_IP6_INPUT, + IXGE_RX_NEXT_ETHERNET_INPUT, + IXGE_RX_NEXT_DROP, + IXGE_RX_N_NEXT, +} ixge_rx_next_t; + +void ixge_set_next_node (ixge_rx_next_t, char *); + +#endif /* included_ixge_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 6ba82584..a517a597 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -44,6 +44,7 @@ */ #include +#include uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, @@ -583,6 +584,11 @@ alloc_from_free_list (vlib_main_t * vm, dst = alloc_buffers; + /* wait with buffer memory allocation as long as possible + in case external buffer manager takes over */ + if (PREDICT_FALSE (vm->os_physmem_alloc_aligned == 0)) + unix_physmem_init (vm, 0 /* fail_if_physical_memory_not_present */ ); + n_filled = fill_free_list (vm, free_list, n_alloc_buffers); if (n_filled == 0) return 0; diff --git a/src/vnet.am b/src/vnet.am index ca24c9c5..bc9655cc 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -114,14 +114,16 @@ libvnet_la_SOURCES += \ vnet/ethernet/init.c \ vnet/ethernet/interface.c \ vnet/ethernet/node.c \ - vnet/ethernet/pg.c + vnet/ethernet/pg.c \ + vnet/ethernet/sfp.c nobase_include_HEADERS += \ vnet/ethernet/arp_packet.h \ vnet/ethernet/error.def \ vnet/ethernet/ethernet.h \ vnet/ethernet/packet.h \ - vnet/ethernet/types.def + vnet/ethernet/types.def \ + vnet/ethernet/sfp.h ######################################## # Layer 2 protocol: Ethernet bridging @@ -792,14 +794,6 @@ nobase_include_HEADERS += \ vnet/pg/pg.h \ vnet/pg/edit.h -if !WITH_DPDK -libvnet_la_SOURCES += \ - vnet/devices/nic/ixge.c \ - vnet/devices/nic/ixge.h \ - vnet/devices/nic/sfp.c \ - vnet/devices/nic/sfp.h -endif - ######################################## # virtio ######################################## diff --git a/src/vnet/devices/nic/ixge.c b/src/vnet/devices/nic/ixge.c deleted file mode 100644 index d4c4c6b7..00000000 --- a/src/vnet/devices/nic/ixge.c +++ /dev/null @@ -1,2938 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * WARNING! - * This driver is not intended for production use and it is unsupported. - * It is provided for educational use only. - * Please use supported DPDK driver instead. - */ - -#if __x86_64__ -#include - -#ifndef CLIB_HAVE_VEC128 -#warning HACK: ixge driver wont really work, missing u32x4 -typedef unsigned long long u32x4; -#endif - -#include -#include -#include -#include -#include -#include - -#define IXGE_ALWAYS_POLL 0 - -#define EVENT_SET_FLAGS 0 -#define IXGE_HWBP_RACE_ELOG 0 - -#define PCI_VENDOR_ID_INTEL 0x8086 - -/* 10 GIG E (XGE) PHY IEEE 802.3 clause 45 definitions. */ -#define XGE_PHY_DEV_TYPE_PMA_PMD 1 -#define XGE_PHY_DEV_TYPE_PHY_XS 4 -#define XGE_PHY_ID1 0x2 -#define XGE_PHY_ID2 0x3 -#define XGE_PHY_CONTROL 0x0 -#define XGE_PHY_CONTROL_RESET (1 << 15) - -ixge_main_t ixge_main; -static vlib_node_registration_t ixge_input_node; -static vlib_node_registration_t ixge_process_node; - -static void -ixge_semaphore_get (ixge_device_t * xd) -{ - ixge_main_t *xm = &ixge_main; - vlib_main_t *vm = xm->vlib_main; - ixge_regs_t *r = xd->regs; - u32 i; - - i = 0; - while (!(r->software_semaphore & (1 << 0))) - { - if (i > 0) - vlib_process_suspend (vm, 100e-6); - i++; - } - do - { - r->software_semaphore |= 1 << 1; - } - while (!(r->software_semaphore & (1 << 1))); -} - -static void -ixge_semaphore_release (ixge_device_t * xd) -{ - ixge_regs_t *r = xd->regs; - r->software_semaphore &= ~3; -} - -static void -ixge_software_firmware_sync (ixge_device_t * xd, u32 sw_mask) -{ - ixge_main_t *xm = &ixge_main; - vlib_main_t *vm = xm->vlib_main; - ixge_regs_t *r = xd->regs; - u32 fw_mask = sw_mask << 5; - u32 m, done = 0; - - while (!done) - { - ixge_semaphore_get (xd); - m = r->software_firmware_sync; - done = (m & fw_mask) == 0; - if (done) - r->software_firmware_sync = m | sw_mask; - ixge_semaphore_release (xd); - if (!done) - vlib_process_suspend (vm, 10e-3); - } -} - -static void -ixge_software_firmware_sync_release (ixge_device_t * xd, u32 sw_mask) -{ - ixge_regs_t *r = xd->regs; - ixge_semaphore_get (xd); - r->software_firmware_sync &= ~sw_mask; - ixge_semaphore_release (xd); -} - -u32 -ixge_read_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, - u32 v, u32 is_read) -{ - ixge_regs_t *r = xd->regs; - const u32 busy_bit = 1 << 30; - u32 x; - - ASSERT (xd->phy_index < 2); - ixge_software_firmware_sync (xd, 1 << (1 + xd->phy_index)); - - ASSERT (reg_index < (1 << 16)); - ASSERT (dev_type < (1 << 5)); - if (!is_read) - r->xge_mac.phy_data = v; - - /* Address cycle. */ - x = - reg_index | (dev_type << 16) | (xd-> - phys[xd->phy_index].mdio_address << 21); - r->xge_mac.phy_command = x | busy_bit; - /* Busy wait timed to take 28e-6 secs. No suspend. */ - while (r->xge_mac.phy_command & busy_bit) - ; - - r->xge_mac.phy_command = x | ((is_read ? 2 : 1) << 26) | busy_bit; - while (r->xge_mac.phy_command & busy_bit) - ; - - if (is_read) - v = r->xge_mac.phy_data >> 16; - - ixge_software_firmware_sync_release (xd, 1 << (1 + xd->phy_index)); - - return v; -} - -static u32 -ixge_read_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index) -{ - return ixge_read_write_phy_reg (xd, dev_type, reg_index, 0, /* is_read */ - 1); -} - -static void -ixge_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, u32 v) -{ - (void) ixge_read_write_phy_reg (xd, dev_type, reg_index, v, /* is_read */ - 0); -} - -static void -ixge_i2c_put_bits (i2c_bus_t * b, int scl, int sda) -{ - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data); - u32 v; - - v = 0; - v |= (sda != 0) << 3; - v |= (scl != 0) << 1; - xd->regs->i2c_control = v; -} - -static void -ixge_i2c_get_bits (i2c_bus_t * b, int *scl, int *sda) -{ - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data); - u32 v; - - v = xd->regs->i2c_control; - *sda = (v & (1 << 2)) != 0; - *scl = (v & (1 << 0)) != 0; -} - -static u16 -ixge_read_eeprom (ixge_device_t * xd, u32 address) -{ - ixge_regs_t *r = xd->regs; - u32 v; - r->eeprom_read = (( /* start bit */ (1 << 0)) | (address << 2)); - /* Wait for done bit. */ - while (!((v = r->eeprom_read) & (1 << 1))) - ; - return v >> 16; -} - -static void -ixge_sfp_enable_disable_laser (ixge_device_t * xd, uword enable) -{ - u32 tx_disable_bit = 1 << 3; - if (enable) - xd->regs->sdp_control &= ~tx_disable_bit; - else - xd->regs->sdp_control |= tx_disable_bit; -} - -static void -ixge_sfp_enable_disable_10g (ixge_device_t * xd, uword enable) -{ - u32 is_10g_bit = 1 << 5; - if (enable) - xd->regs->sdp_control |= is_10g_bit; - else - xd->regs->sdp_control &= ~is_10g_bit; -} - -static clib_error_t * -ixge_sfp_phy_init_from_eeprom (ixge_device_t * xd, u16 sfp_type) -{ - u16 a, id, reg_values_addr = 0; - - a = ixge_read_eeprom (xd, 0x2b); - if (a == 0 || a == 0xffff) - return clib_error_create ("no init sequence in eeprom"); - - while (1) - { - id = ixge_read_eeprom (xd, ++a); - if (id == 0xffff) - break; - reg_values_addr = ixge_read_eeprom (xd, ++a); - if (id == sfp_type) - break; - } - if (id != sfp_type) - return clib_error_create ("failed to find id 0x%x", sfp_type); - - ixge_software_firmware_sync (xd, 1 << 3); - while (1) - { - u16 v = ixge_read_eeprom (xd, ++reg_values_addr); - if (v == 0xffff) - break; - xd->regs->core_analog_config = v; - } - ixge_software_firmware_sync_release (xd, 1 << 3); - - /* Make sure laser is off. We'll turn on the laser when - the interface is brought up. */ - ixge_sfp_enable_disable_laser (xd, /* enable */ 0); - ixge_sfp_enable_disable_10g (xd, /* is_10g */ 1); - - return 0; -} - -static void -ixge_sfp_device_up_down (ixge_device_t * xd, uword is_up) -{ - u32 v; - - if (is_up) - { - /* pma/pmd 10g serial SFI. */ - xd->regs->xge_mac.auto_negotiation_control2 &= ~(3 << 16); - xd->regs->xge_mac.auto_negotiation_control2 |= 2 << 16; - - v = xd->regs->xge_mac.auto_negotiation_control; - v &= ~(7 << 13); - v |= (0 << 13); - /* Restart autoneg. */ - v |= (1 << 12); - xd->regs->xge_mac.auto_negotiation_control = v; - - while (!(xd->regs->xge_mac.link_partner_ability[0] & 0xf0000)) - ; - - v = xd->regs->xge_mac.auto_negotiation_control; - - /* link mode 10g sfi serdes */ - v &= ~(7 << 13); - v |= (3 << 13); - - /* Restart autoneg. */ - v |= (1 << 12); - xd->regs->xge_mac.auto_negotiation_control = v; - - xd->regs->xge_mac.link_status; - } - - ixge_sfp_enable_disable_laser (xd, /* enable */ is_up); - - /* Give time for link partner to notice that we're up. */ - if (is_up && vlib_in_process_context (vlib_get_main ())) - { - vlib_process_suspend (vlib_get_main (), 300e-3); - } -} - -always_inline ixge_dma_regs_t * -get_dma_regs (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 qi) -{ - ixge_regs_t *r = xd->regs; - ASSERT (qi < 128); - if (rt == VLIB_RX) - return qi < 64 ? &r->rx_dma0[qi] : &r->rx_dma1[qi - 64]; - else - return &r->tx_dma[qi]; -} - -static clib_error_t * -ixge_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) -{ - vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); - uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, hif->dev_instance); - ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0); - - if (is_up) - { - xd->regs->rx_enable |= 1; - xd->regs->tx_dma_control |= 1; - dr->control |= 1 << 25; - while (!(dr->control & (1 << 25))) - ; - } - else - { - xd->regs->rx_enable &= ~1; - xd->regs->tx_dma_control &= ~1; - } - - ixge_sfp_device_up_down (xd, is_up); - - return /* no error */ 0; -} - -static void -ixge_sfp_phy_init (ixge_device_t * xd) -{ - ixge_phy_t *phy = xd->phys + xd->phy_index; - i2c_bus_t *ib = &xd->i2c_bus; - - ib->private_data = xd->device_index; - ib->put_bits = ixge_i2c_put_bits; - ib->get_bits = ixge_i2c_get_bits; - vlib_i2c_init (ib); - - vlib_i2c_read_eeprom (ib, 0x50, 0, 128, (u8 *) & xd->sfp_eeprom); - - if (vlib_i2c_bus_timed_out (ib) || !sfp_eeprom_is_valid (&xd->sfp_eeprom)) - xd->sfp_eeprom.id = SFP_ID_unknown; - else - { - /* FIXME 5 => SR/LR eeprom ID. */ - clib_error_t *e = - ixge_sfp_phy_init_from_eeprom (xd, 5 + xd->pci_function); - if (e) - clib_error_report (e); - } - - phy->mdio_address = ~0; -} - -static void -ixge_phy_init (ixge_device_t * xd) -{ - ixge_main_t *xm = &ixge_main; - vlib_main_t *vm = xm->vlib_main; - ixge_phy_t *phy = xd->phys + xd->phy_index; - - switch (xd->device_id) - { - case IXGE_82599_sfp: - case IXGE_82599_sfp_em: - case IXGE_82599_sfp_fcoe: - /* others? */ - return ixge_sfp_phy_init (xd); - - default: - break; - } - - /* Probe address of phy. */ - { - u32 i, v; - - phy->mdio_address = ~0; - for (i = 0; i < 32; i++) - { - phy->mdio_address = i; - v = ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1); - if (v != 0xffff && v != 0) - break; - } - - /* No PHY found? */ - if (i >= 32) - return; - } - - phy->id = - ((ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1) << 16) | - ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID2)); - - { - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "ixge %d, phy id 0x%d mdio address %d",.format_args = "i4i4i4",}; - struct - { - u32 instance, id, address; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->instance = xd->device_index; - ed->id = phy->id; - ed->address = phy->mdio_address; - } - - /* Reset phy. */ - ixge_write_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL, - XGE_PHY_CONTROL_RESET); - - /* Wait for self-clearning reset bit to clear. */ - do - { - vlib_process_suspend (vm, 1e-3); - } - while (ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL) & - XGE_PHY_CONTROL_RESET); -} - -static u8 * -format_ixge_rx_from_hw_descriptor (u8 * s, va_list * va) -{ - ixge_rx_from_hw_descriptor_t *d = - va_arg (*va, ixge_rx_from_hw_descriptor_t *); - u32 s0 = d->status[0], s2 = d->status[2]; - u32 is_ip4, is_ip6, is_ip, is_tcp, is_udp; - uword indent = format_get_indent (s); - - s = format (s, "%s-owned", - (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE) ? "sw" : - "hw"); - s = - format (s, ", length this descriptor %d, l3 offset %d", - d->n_packet_bytes_this_descriptor, - IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s0)); - if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) - s = format (s, ", end-of-packet"); - - s = format (s, "\n%U", format_white_space, indent); - - if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR) - s = format (s, "layer2 error"); - - if (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2) - { - s = format (s, "layer 2 type %d", (s0 & 0x1f)); - return s; - } - - if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN) - s = format (s, "vlan header 0x%x\n%U", d->vlan_tag, - format_white_space, indent); - - if ((is_ip4 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4))) - { - s = format (s, "ip4%s", - (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT) ? " options" : - ""); - if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED) - s = format (s, " checksum %s", - (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) ? - "bad" : "ok"); - } - if ((is_ip6 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6))) - s = format (s, "ip6%s", - (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT) ? " extended" : - ""); - is_tcp = is_udp = 0; - if ((is_ip = (is_ip4 | is_ip6))) - { - is_tcp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP) != 0; - is_udp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP) != 0; - if (is_tcp) - s = format (s, ", tcp"); - if (is_udp) - s = format (s, ", udp"); - } - - if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED) - s = format (s, ", tcp checksum %s", - (s2 & IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR) ? "bad" : - "ok"); - if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED) - s = format (s, ", udp checksum %s", - (s2 & IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR) ? "bad" : - "ok"); - - return s; -} - -static u8 * -format_ixge_tx_descriptor (u8 * s, va_list * va) -{ - ixge_tx_descriptor_t *d = va_arg (*va, ixge_tx_descriptor_t *); - u32 s0 = d->status0, s1 = d->status1; - uword indent = format_get_indent (s); - u32 v; - - s = format (s, "buffer 0x%Lx, %d packet bytes, %d bytes this buffer", - d->buffer_address, s1 >> 14, d->n_bytes_this_buffer); - - s = format (s, "\n%U", format_white_space, indent); - - if ((v = (s0 >> 0) & 3)) - s = format (s, "reserved 0x%x, ", v); - - if ((v = (s0 >> 2) & 3)) - s = format (s, "mac 0x%x, ", v); - - if ((v = (s0 >> 4) & 0xf) != 3) - s = format (s, "type 0x%x, ", v); - - s = format (s, "%s%s%s%s%s%s%s%s", - (s0 & (1 << 8)) ? "eop, " : "", - (s0 & (1 << 9)) ? "insert-fcs, " : "", - (s0 & (1 << 10)) ? "reserved26, " : "", - (s0 & (1 << 11)) ? "report-status, " : "", - (s0 & (1 << 12)) ? "reserved28, " : "", - (s0 & (1 << 13)) ? "is-advanced, " : "", - (s0 & (1 << 14)) ? "vlan-enable, " : "", - (s0 & (1 << 15)) ? "tx-segmentation, " : ""); - - if ((v = s1 & 0xf) != 0) - s = format (s, "status 0x%x, ", v); - - if ((v = (s1 >> 4) & 0xf)) - s = format (s, "context 0x%x, ", v); - - if ((v = (s1 >> 8) & 0x3f)) - s = format (s, "options 0x%x, ", v); - - return s; -} - -typedef struct -{ - ixge_descriptor_t before, after; - - u32 buffer_index; - - u16 device_index; - - u8 queue_index; - - u8 is_start_of_packet; - - /* Copy of VLIB buffer; packet data stored in pre_data. */ - vlib_buffer_t buffer; -} ixge_rx_dma_trace_t; - -static u8 * -format_ixge_rx_dma_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - vlib_node_t *node = va_arg (*va, vlib_node_t *); - vnet_main_t *vnm = vnet_get_main (); - ixge_rx_dma_trace_t *t = va_arg (*va, ixge_rx_dma_trace_t *); - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index); - format_function_t *f; - uword indent = format_get_indent (s); - - { - vnet_sw_interface_t *sw = - vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); - s = - format (s, "%U rx queue %d", format_vnet_sw_interface_name, vnm, sw, - t->queue_index); - } - - s = format (s, "\n%Ubefore: %U", - format_white_space, indent, - format_ixge_rx_from_hw_descriptor, &t->before); - s = format (s, "\n%Uafter : head/tail address 0x%Lx/0x%Lx", - format_white_space, indent, - t->after.rx_to_hw.head_address, t->after.rx_to_hw.tail_address); - - s = format (s, "\n%Ubuffer 0x%x: %U", - format_white_space, indent, - t->buffer_index, format_vlib_buffer, &t->buffer); - - s = format (s, "\n%U", format_white_space, indent); - - f = node->format_buffer; - if (!f || !t->is_start_of_packet) - f = format_hex_bytes; - s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); - - return s; -} - -#define foreach_ixge_error \ - _ (none, "no error") \ - _ (tx_full_drops, "tx ring full drops") \ - _ (ip4_checksum_error, "ip4 checksum errors") \ - _ (rx_alloc_fail, "rx buf alloc from free list failed") \ - _ (rx_alloc_no_physmem, "rx buf alloc failed no physmem") - -typedef enum -{ -#define _(f,s) IXGE_ERROR_##f, - foreach_ixge_error -#undef _ - IXGE_N_ERROR, -} ixge_error_t; - -always_inline void -ixge_rx_next_and_error_from_status_x1 (ixge_device_t * xd, - u32 s00, u32 s02, - u8 * next0, u8 * error0, u32 * flags0) -{ - u8 is0_ip4, is0_ip6, n0, e0; - u32 f0; - - e0 = IXGE_ERROR_none; - n0 = IXGE_RX_NEXT_ETHERNET_INPUT; - - is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; - n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0; - - e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) - ? IXGE_ERROR_ip4_checksum_error : e0); - - is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; - n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0; - - n0 = (xd->per_interface_next_index != ~0) ? - xd->per_interface_next_index : n0; - - /* Check for error. */ - n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0; - - f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED - | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) - ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); - - f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR - | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) - ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); - - *error0 = e0; - *next0 = n0; - *flags0 = f0; -} - -always_inline void -ixge_rx_next_and_error_from_status_x2 (ixge_device_t * xd, - u32 s00, u32 s02, - u32 s10, u32 s12, - u8 * next0, u8 * error0, u32 * flags0, - u8 * next1, u8 * error1, u32 * flags1) -{ - u8 is0_ip4, is0_ip6, n0, e0; - u8 is1_ip4, is1_ip6, n1, e1; - u32 f0, f1; - - e0 = e1 = IXGE_ERROR_none; - n0 = n1 = IXGE_RX_NEXT_IP4_INPUT; - - is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; - is1_ip4 = s12 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; - - n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0; - n1 = is1_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n1; - - e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) - ? IXGE_ERROR_ip4_checksum_error : e0); - e1 = (is1_ip4 && (s12 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) - ? IXGE_ERROR_ip4_checksum_error : e1); - - is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; - is1_ip6 = s10 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; - - n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0; - n1 = is1_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n1; - - n0 = (xd->per_interface_next_index != ~0) ? - xd->per_interface_next_index : n0; - n1 = (xd->per_interface_next_index != ~0) ? - xd->per_interface_next_index : n1; - - /* Check for error. */ - n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0; - n1 = e1 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n1; - - *error0 = e0; - *error1 = e1; - - *next0 = n0; - *next1 = n1; - - f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED - | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) - ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); - f1 = ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED - | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) - ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); - - f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR - | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) - ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); - f1 |= ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR - | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) - ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); - - *flags0 = f0; - *flags1 = f1; -} - -static void -ixge_rx_trace (ixge_main_t * xm, - ixge_device_t * xd, - ixge_dma_queue_t * dq, - ixge_descriptor_t * before_descriptors, - u32 * before_buffers, - ixge_descriptor_t * after_descriptors, uword n_descriptors) -{ - vlib_main_t *vm = xm->vlib_main; - vlib_node_runtime_t *node = dq->rx.node; - ixge_rx_from_hw_descriptor_t *bd; - ixge_rx_to_hw_descriptor_t *ad; - u32 *b, n_left, is_sop, next_index_sop; - - n_left = n_descriptors; - b = before_buffers; - bd = &before_descriptors->rx_from_hw; - ad = &after_descriptors->rx_to_hw; - is_sop = dq->rx.is_start_of_packet; - next_index_sop = dq->rx.saved_start_of_packet_next_index; - - while (n_left >= 2) - { - u32 bi0, bi1, flags0, flags1; - vlib_buffer_t *b0, *b1; - ixge_rx_dma_trace_t *t0, *t1; - u8 next0, error0, next1, error1; - - bi0 = b[0]; - bi1 = b[1]; - n_left -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - ixge_rx_next_and_error_from_status_x2 (xd, - bd[0].status[0], bd[0].status[2], - bd[1].status[0], bd[1].status[2], - &next0, &error0, &flags0, - &next1, &error1, &flags1); - - next_index_sop = is_sop ? next0 : next_index_sop; - vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0); - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); - t0->is_start_of_packet = is_sop; - is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - next_index_sop = is_sop ? next1 : next_index_sop; - vlib_trace_buffer (vm, node, next_index_sop, b1, /* follow_chain */ 0); - t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); - t1->is_start_of_packet = is_sop; - is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - t0->queue_index = dq->queue_index; - t1->queue_index = dq->queue_index; - t0->device_index = xd->device_index; - t1->device_index = xd->device_index; - t0->before.rx_from_hw = bd[0]; - t1->before.rx_from_hw = bd[1]; - t0->after.rx_to_hw = ad[0]; - t1->after.rx_to_hw = ad[1]; - t0->buffer_index = bi0; - t1->buffer_index = bi1; - memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); - memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data)); - memcpy (t0->buffer.pre_data, b0->data + b0->current_data, - sizeof (t0->buffer.pre_data)); - memcpy (t1->buffer.pre_data, b1->data + b1->current_data, - sizeof (t1->buffer.pre_data)); - - b += 2; - bd += 2; - ad += 2; - } - - while (n_left >= 1) - { - u32 bi0, flags0; - vlib_buffer_t *b0; - ixge_rx_dma_trace_t *t0; - u8 next0, error0; - - bi0 = b[0]; - n_left -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - ixge_rx_next_and_error_from_status_x1 (xd, - bd[0].status[0], bd[0].status[2], - &next0, &error0, &flags0); - - next_index_sop = is_sop ? next0 : next_index_sop; - vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0); - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); - t0->is_start_of_packet = is_sop; - is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - t0->queue_index = dq->queue_index; - t0->device_index = xd->device_index; - t0->before.rx_from_hw = bd[0]; - t0->after.rx_to_hw = ad[0]; - t0->buffer_index = bi0; - memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); - memcpy (t0->buffer.pre_data, b0->data + b0->current_data, - sizeof (t0->buffer.pre_data)); - - b += 1; - bd += 1; - ad += 1; - } -} - -typedef struct -{ - ixge_tx_descriptor_t descriptor; - - u32 buffer_index; - - u16 device_index; - - u8 queue_index; - - u8 is_start_of_packet; - - /* Copy of VLIB buffer; packet data stored in pre_data. */ - vlib_buffer_t buffer; -} ixge_tx_dma_trace_t; - -static u8 * -format_ixge_tx_dma_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - ixge_tx_dma_trace_t *t = va_arg (*va, ixge_tx_dma_trace_t *); - vnet_main_t *vnm = vnet_get_main (); - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index); - format_function_t *f; - uword indent = format_get_indent (s); - - { - vnet_sw_interface_t *sw = - vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); - s = - format (s, "%U tx queue %d", format_vnet_sw_interface_name, vnm, sw, - t->queue_index); - } - - s = format (s, "\n%Udescriptor: %U", - format_white_space, indent, - format_ixge_tx_descriptor, &t->descriptor); - - s = format (s, "\n%Ubuffer 0x%x: %U", - format_white_space, indent, - t->buffer_index, format_vlib_buffer, &t->buffer); - - s = format (s, "\n%U", format_white_space, indent); - - f = format_ethernet_header_with_length; - if (!f || !t->is_start_of_packet) - f = format_hex_bytes; - s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); - - return s; -} - -typedef struct -{ - vlib_node_runtime_t *node; - - u32 is_start_of_packet; - - u32 n_bytes_in_packet; - - ixge_tx_descriptor_t *start_of_packet_descriptor; -} ixge_tx_state_t; - -static void -ixge_tx_trace (ixge_main_t * xm, - ixge_device_t * xd, - ixge_dma_queue_t * dq, - ixge_tx_state_t * tx_state, - ixge_tx_descriptor_t * descriptors, - u32 * buffers, uword n_descriptors) -{ - vlib_main_t *vm = xm->vlib_main; - vlib_node_runtime_t *node = tx_state->node; - ixge_tx_descriptor_t *d; - u32 *b, n_left, is_sop; - - n_left = n_descriptors; - b = buffers; - d = descriptors; - is_sop = tx_state->is_start_of_packet; - - while (n_left >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - ixge_tx_dma_trace_t *t0, *t1; - - bi0 = b[0]; - bi1 = b[1]; - n_left -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); - t0->is_start_of_packet = is_sop; - is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); - t1->is_start_of_packet = is_sop; - is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - t0->queue_index = dq->queue_index; - t1->queue_index = dq->queue_index; - t0->device_index = xd->device_index; - t1->device_index = xd->device_index; - t0->descriptor = d[0]; - t1->descriptor = d[1]; - t0->buffer_index = bi0; - t1->buffer_index = bi1; - memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); - memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data)); - memcpy (t0->buffer.pre_data, b0->data + b0->current_data, - sizeof (t0->buffer.pre_data)); - memcpy (t1->buffer.pre_data, b1->data + b1->current_data, - sizeof (t1->buffer.pre_data)); - - b += 2; - d += 2; - } - - while (n_left >= 1) - { - u32 bi0; - vlib_buffer_t *b0; - ixge_tx_dma_trace_t *t0; - - bi0 = b[0]; - n_left -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); - t0->is_start_of_packet = is_sop; - is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - t0->queue_index = dq->queue_index; - t0->device_index = xd->device_index; - t0->descriptor = d[0]; - t0->buffer_index = bi0; - memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); - memcpy (t0->buffer.pre_data, b0->data + b0->current_data, - sizeof (t0->buffer.pre_data)); - - b += 1; - d += 1; - } -} - -always_inline uword -ixge_ring_sub (ixge_dma_queue_t * q, u32 i0, u32 i1) -{ - i32 d = i1 - i0; - ASSERT (i0 < q->n_descriptors); - ASSERT (i1 < q->n_descriptors); - return d < 0 ? q->n_descriptors + d : d; -} - -always_inline uword -ixge_ring_add (ixge_dma_queue_t * q, u32 i0, u32 i1) -{ - u32 d = i0 + i1; - ASSERT (i0 < q->n_descriptors); - ASSERT (i1 < q->n_descriptors); - d -= d >= q->n_descriptors ? q->n_descriptors : 0; - return d; -} - -always_inline uword -ixge_tx_descriptor_matches_template (ixge_main_t * xm, - ixge_tx_descriptor_t * d) -{ - u32 cmp; - - cmp = ((d->status0 & xm->tx_descriptor_template_mask.status0) - ^ xm->tx_descriptor_template.status0); - if (cmp) - return 0; - cmp = ((d->status1 & xm->tx_descriptor_template_mask.status1) - ^ xm->tx_descriptor_template.status1); - if (cmp) - return 0; - - return 1; -} - -static uword -ixge_tx_no_wrap (ixge_main_t * xm, - ixge_device_t * xd, - ixge_dma_queue_t * dq, - u32 * buffers, - u32 start_descriptor_index, - u32 n_descriptors, ixge_tx_state_t * tx_state) -{ - vlib_main_t *vm = xm->vlib_main; - ixge_tx_descriptor_t *d, *d_sop; - u32 n_left = n_descriptors; - u32 *to_free = vec_end (xm->tx_buffers_pending_free); - u32 *to_tx = - vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index); - u32 is_sop = tx_state->is_start_of_packet; - u32 len_sop = tx_state->n_bytes_in_packet; - u16 template_status = xm->tx_descriptor_template.status0; - u32 descriptor_prefetch_rotor = 0; - - ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors); - d = &dq->descriptors[start_descriptor_index].tx; - d_sop = is_sop ? d : tx_state->start_of_packet_descriptor; - - while (n_left >= 4) - { - vlib_buffer_t *b0, *b1; - u32 bi0, fi0, len0; - u32 bi1, fi1, len1; - u8 is_eop0, is_eop1; - - /* Prefetch next iteration. */ - vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD); - vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD); - - if ((descriptor_prefetch_rotor & 0x3) == 0) - CLIB_PREFETCH (d + 4, CLIB_CACHE_LINE_BYTES, STORE); - - descriptor_prefetch_rotor += 2; - - bi0 = buffers[0]; - bi1 = buffers[1]; - - to_free[0] = fi0 = to_tx[0]; - to_tx[0] = bi0; - to_free += fi0 != 0; - - to_free[0] = fi1 = to_tx[1]; - to_tx[1] = bi1; - to_free += fi1 != 0; - - buffers += 2; - n_left -= 2; - to_tx += 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - is_eop1 = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - len0 = b0->current_length; - len1 = b1->current_length; - - ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0)); - ASSERT (ixge_tx_descriptor_matches_template (xm, d + 1)); - - d[0].buffer_address = - vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data; - d[1].buffer_address = - vlib_get_buffer_data_physical_address (vm, bi1) + b1->current_data; - - d[0].n_bytes_this_buffer = len0; - d[1].n_bytes_this_buffer = len1; - - d[0].status0 = - template_status | (is_eop0 << - IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); - d[1].status0 = - template_status | (is_eop1 << - IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); - - len_sop = (is_sop ? 0 : len_sop) + len0; - d_sop[0].status1 = - IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); - d += 1; - d_sop = is_eop0 ? d : d_sop; - - is_sop = is_eop0; - - len_sop = (is_sop ? 0 : len_sop) + len1; - d_sop[0].status1 = - IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); - d += 1; - d_sop = is_eop1 ? d : d_sop; - - is_sop = is_eop1; - } - - while (n_left > 0) - { - vlib_buffer_t *b0; - u32 bi0, fi0, len0; - u8 is_eop0; - - bi0 = buffers[0]; - - to_free[0] = fi0 = to_tx[0]; - to_tx[0] = bi0; - to_free += fi0 != 0; - - buffers += 1; - n_left -= 1; - to_tx += 1; - - b0 = vlib_get_buffer (vm, bi0); - - is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - len0 = b0->current_length; - - ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0)); - - d[0].buffer_address = - vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data; - - d[0].n_bytes_this_buffer = len0; - - d[0].status0 = - template_status | (is_eop0 << - IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); - - len_sop = (is_sop ? 0 : len_sop) + len0; - d_sop[0].status1 = - IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); - d += 1; - d_sop = is_eop0 ? d : d_sop; - - is_sop = is_eop0; - } - - if (tx_state->node->flags & VLIB_NODE_FLAG_TRACE) - { - to_tx = - vec_elt_at_index (dq->descriptor_buffer_indices, - start_descriptor_index); - ixge_tx_trace (xm, xd, dq, tx_state, - &dq->descriptors[start_descriptor_index].tx, to_tx, - n_descriptors); - } - - _vec_len (xm->tx_buffers_pending_free) = - to_free - xm->tx_buffers_pending_free; - - /* When we are done d_sop can point to end of ring. Wrap it if so. */ - { - ixge_tx_descriptor_t *d_start = &dq->descriptors[0].tx; - - ASSERT (d_sop - d_start <= dq->n_descriptors); - d_sop = d_sop - d_start == dq->n_descriptors ? d_start : d_sop; - } - - tx_state->is_start_of_packet = is_sop; - tx_state->start_of_packet_descriptor = d_sop; - tx_state->n_bytes_in_packet = len_sop; - - return n_descriptors; -} - -static uword -ixge_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * f) -{ - ixge_main_t *xm = &ixge_main; - vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; - ixge_device_t *xd = vec_elt_at_index (xm->devices, rd->dev_instance); - ixge_dma_queue_t *dq; - u32 *from, n_left_tx, n_descriptors_to_tx, n_tail_drop; - u32 queue_index = 0; /* fixme parameter */ - ixge_tx_state_t tx_state; - - tx_state.node = node; - tx_state.is_start_of_packet = 1; - tx_state.start_of_packet_descriptor = 0; - tx_state.n_bytes_in_packet = 0; - - from = vlib_frame_vector_args (f); - - dq = vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index); - - dq->head_index = dq->tx.head_index_write_back[0]; - - /* Since head == tail means ring is empty we can send up to dq->n_descriptors - 1. */ - n_left_tx = dq->n_descriptors - 1; - n_left_tx -= ixge_ring_sub (dq, dq->head_index, dq->tail_index); - - _vec_len (xm->tx_buffers_pending_free) = 0; - - n_descriptors_to_tx = f->n_vectors; - n_tail_drop = 0; - if (PREDICT_FALSE (n_descriptors_to_tx > n_left_tx)) - { - i32 i, n_ok, i_eop, i_sop; - - i_sop = i_eop = ~0; - for (i = n_left_tx - 1; i >= 0; i--) - { - vlib_buffer_t *b = vlib_get_buffer (vm, from[i]); - if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - if (i_sop != ~0 && i_eop != ~0) - break; - i_eop = i; - i_sop = i + 1; - } - } - if (i == 0) - n_ok = 0; - else - n_ok = i_eop + 1; - - { - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "ixge %d, ring full to tx %d head %d tail %d",.format_args = - "i2i2i2i2",}; - struct - { - u16 instance, to_tx, head, tail; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->instance = xd->device_index; - ed->to_tx = n_descriptors_to_tx; - ed->head = dq->head_index; - ed->tail = dq->tail_index; - } - - if (n_ok < n_descriptors_to_tx) - { - n_tail_drop = n_descriptors_to_tx - n_ok; - vec_add (xm->tx_buffers_pending_free, from + n_ok, n_tail_drop); - vlib_error_count (vm, ixge_input_node.index, - IXGE_ERROR_tx_full_drops, n_tail_drop); - } - - n_descriptors_to_tx = n_ok; - } - - dq->tx.n_buffers_on_ring += n_descriptors_to_tx; - - /* Process from tail to end of descriptor ring. */ - if (n_descriptors_to_tx > 0 && dq->tail_index < dq->n_descriptors) - { - u32 n = - clib_min (dq->n_descriptors - dq->tail_index, n_descriptors_to_tx); - n = ixge_tx_no_wrap (xm, xd, dq, from, dq->tail_index, n, &tx_state); - from += n; - n_descriptors_to_tx -= n; - dq->tail_index += n; - ASSERT (dq->tail_index <= dq->n_descriptors); - if (dq->tail_index == dq->n_descriptors) - dq->tail_index = 0; - } - - if (n_descriptors_to_tx > 0) - { - u32 n = - ixge_tx_no_wrap (xm, xd, dq, from, 0, n_descriptors_to_tx, &tx_state); - from += n; - ASSERT (n == n_descriptors_to_tx); - dq->tail_index += n; - ASSERT (dq->tail_index <= dq->n_descriptors); - if (dq->tail_index == dq->n_descriptors) - dq->tail_index = 0; - } - - /* We should only get full packets. */ - ASSERT (tx_state.is_start_of_packet); - - /* Report status when last descriptor is done. */ - { - u32 i = dq->tail_index == 0 ? dq->n_descriptors - 1 : dq->tail_index - 1; - ixge_tx_descriptor_t *d = &dq->descriptors[i].tx; - d->status0 |= IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS; - } - - /* Give new descriptors to hardware. */ - { - ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_TX, queue_index); - - CLIB_MEMORY_BARRIER (); - - dr->tail_index = dq->tail_index; - } - - /* Free any buffers that are done. */ - { - u32 n = _vec_len (xm->tx_buffers_pending_free); - if (n > 0) - { - vlib_buffer_free_no_next (vm, xm->tx_buffers_pending_free, n); - _vec_len (xm->tx_buffers_pending_free) = 0; - ASSERT (dq->tx.n_buffers_on_ring >= n); - dq->tx.n_buffers_on_ring -= (n - n_tail_drop); - } - } - - return f->n_vectors; -} - -static uword -ixge_rx_queue_no_wrap (ixge_main_t * xm, - ixge_device_t * xd, - ixge_dma_queue_t * dq, - u32 start_descriptor_index, u32 n_descriptors) -{ - vlib_main_t *vm = xm->vlib_main; - vlib_node_runtime_t *node = dq->rx.node; - ixge_descriptor_t *d; - static ixge_descriptor_t *d_trace_save; - static u32 *d_trace_buffers; - u32 n_descriptors_left = n_descriptors; - u32 *to_rx = - vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index); - u32 *to_add; - u32 bi_sop = dq->rx.saved_start_of_packet_buffer_index; - u32 bi_last = dq->rx.saved_last_buffer_index; - u32 next_index_sop = dq->rx.saved_start_of_packet_next_index; - u32 is_sop = dq->rx.is_start_of_packet; - u32 next_index, n_left_to_next, *to_next; - u32 n_packets = 0; - u32 n_bytes = 0; - u32 n_trace = vlib_get_trace_count (vm, node); - vlib_buffer_t *b_last, b_dummy; - - ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors); - d = &dq->descriptors[start_descriptor_index]; - - b_last = bi_last != ~0 ? vlib_get_buffer (vm, bi_last) : &b_dummy; - next_index = dq->rx.next_index; - - if (n_trace > 0) - { - u32 n = clib_min (n_trace, n_descriptors); - if (d_trace_save) - { - _vec_len (d_trace_save) = 0; - _vec_len (d_trace_buffers) = 0; - } - vec_add (d_trace_save, (ixge_descriptor_t *) d, n); - vec_add (d_trace_buffers, to_rx, n); - } - - { - uword l = vec_len (xm->rx_buffers_to_add); - - if (l < n_descriptors_left) - { - u32 n_to_alloc = 2 * dq->n_descriptors - l; - u32 n_allocated; - - vec_resize (xm->rx_buffers_to_add, n_to_alloc); - - _vec_len (xm->rx_buffers_to_add) = l; - n_allocated = vlib_buffer_alloc_from_free_list - (vm, xm->rx_buffers_to_add + l, n_to_alloc, - xm->vlib_buffer_free_list_index); - _vec_len (xm->rx_buffers_to_add) += n_allocated; - - /* Handle transient allocation failure */ - if (PREDICT_FALSE (l + n_allocated <= n_descriptors_left)) - { - if (n_allocated == 0) - vlib_error_count (vm, ixge_input_node.index, - IXGE_ERROR_rx_alloc_no_physmem, 1); - else - vlib_error_count (vm, ixge_input_node.index, - IXGE_ERROR_rx_alloc_fail, 1); - - n_descriptors_left = l + n_allocated; - } - n_descriptors = n_descriptors_left; - } - - /* Add buffers from end of vector going backwards. */ - to_add = vec_end (xm->rx_buffers_to_add) - 1; - } - - while (n_descriptors_left > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_descriptors_left >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t *b0, *b1; - u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0; - u32 bi1, fi1, len1, l3_offset1, s21, s01, flags1; - u8 is_eop0, error0, next0; - u8 is_eop1, error1, next1; - ixge_descriptor_t d0, d1; - - vlib_prefetch_buffer_with_index (vm, to_rx[2], STORE); - vlib_prefetch_buffer_with_index (vm, to_rx[3], STORE); - - CLIB_PREFETCH (d + 2, 32, STORE); - - d0.as_u32x4 = d[0].as_u32x4; - d1.as_u32x4 = d[1].as_u32x4; - - s20 = d0.rx_from_hw.status[2]; - s21 = d1.rx_from_hw.status[2]; - - s00 = d0.rx_from_hw.status[0]; - s01 = d1.rx_from_hw.status[0]; - - if (! - ((s20 & s21) & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE)) - goto found_hw_owned_descriptor_x2; - - bi0 = to_rx[0]; - bi1 = to_rx[1]; - - ASSERT (to_add - 1 >= xm->rx_buffers_to_add); - fi0 = to_add[0]; - fi1 = to_add[-1]; - - to_rx[0] = fi0; - to_rx[1] = fi1; - to_rx += 2; - to_add -= 2; - - ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == - vlib_buffer_is_known (vm, bi0)); - ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == - vlib_buffer_is_known (vm, bi1)); - ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == - vlib_buffer_is_known (vm, fi0)); - ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == - vlib_buffer_is_known (vm, fi1)); - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - /* - * Turn this on if you run into - * "bad monkey" contexts, and you want to know exactly - * which nodes they've visited... See main.c... - */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); - - CLIB_PREFETCH (b0->data, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (b1->data, CLIB_CACHE_LINE_BYTES, LOAD); - - is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; - is_eop1 = (s21 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; - - ixge_rx_next_and_error_from_status_x2 (xd, s00, s20, s01, s21, - &next0, &error0, &flags0, - &next1, &error1, &flags1); - - next0 = is_sop ? next0 : next_index_sop; - next1 = is_eop0 ? next1 : next0; - next_index_sop = next1; - - b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT); - b1->flags |= flags1 | (!is_eop1 << VLIB_BUFFER_LOG2_NEXT_PRESENT); - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; - - b0->error = node->errors[error0]; - b1->error = node->errors[error1]; - - len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor; - len1 = d1.rx_from_hw.n_packet_bytes_this_descriptor; - n_bytes += len0 + len1; - n_packets += is_eop0 + is_eop1; - - /* Give new buffers to hardware. */ - d0.rx_to_hw.tail_address = - vlib_get_buffer_data_physical_address (vm, fi0); - d1.rx_to_hw.tail_address = - vlib_get_buffer_data_physical_address (vm, fi1); - d0.rx_to_hw.head_address = d[0].rx_to_hw.tail_address; - d1.rx_to_hw.head_address = d[1].rx_to_hw.tail_address; - d[0].as_u32x4 = d0.as_u32x4; - d[1].as_u32x4 = d1.as_u32x4; - - d += 2; - n_descriptors_left -= 2; - - /* Point to either l2 or l3 header depending on next. */ - l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT)) - ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0; - l3_offset1 = (is_eop0 && (next1 != IXGE_RX_NEXT_ETHERNET_INPUT)) - ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s01) : 0; - - b0->current_length = len0 - l3_offset0; - b1->current_length = len1 - l3_offset1; - b0->current_data = l3_offset0; - b1->current_data = l3_offset1; - - b_last->next_buffer = is_sop ? ~0 : bi0; - b0->next_buffer = is_eop0 ? ~0 : bi1; - bi_last = bi1; - b_last = b1; - - if (CLIB_DEBUG > 0) - { - u32 bi_sop0 = is_sop ? bi0 : bi_sop; - u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0; - - if (is_eop0) - { - u8 *msg = vlib_validate_buffer (vm, bi_sop0, - /* follow_buffer_next */ 1); - ASSERT (!msg); - } - if (is_eop1) - { - u8 *msg = vlib_validate_buffer (vm, bi_sop1, - /* follow_buffer_next */ 1); - ASSERT (!msg); - } - } - if (0) /* "Dave" version */ - { - u32 bi_sop0 = is_sop ? bi0 : bi_sop; - u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0; - - if (is_eop0) - { - to_next[0] = bi_sop0; - to_next++; - n_left_to_next--; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi_sop0, next0); - } - if (is_eop1) - { - to_next[0] = bi_sop1; - to_next++; - n_left_to_next--; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi_sop1, next1); - } - is_sop = is_eop1; - bi_sop = bi_sop1; - } - if (1) /* "Eliot" version */ - { - /* Speculatively enqueue to cached next. */ - u8 saved_is_sop = is_sop; - u32 bi_sop_save = bi_sop; - - bi_sop = saved_is_sop ? bi0 : bi_sop; - to_next[0] = bi_sop; - to_next += is_eop0; - n_left_to_next -= is_eop0; - - bi_sop = is_eop0 ? bi1 : bi_sop; - to_next[0] = bi_sop; - to_next += is_eop1; - n_left_to_next -= is_eop1; - - is_sop = is_eop1; - - if (PREDICT_FALSE - (!(next0 == next_index && next1 == next_index))) - { - /* Undo speculation. */ - to_next -= is_eop0 + is_eop1; - n_left_to_next += is_eop0 + is_eop1; - - /* Re-do both descriptors being careful about where we enqueue. */ - bi_sop = saved_is_sop ? bi0 : bi_sop_save; - if (is_eop0) - { - if (next0 != next_index) - vlib_set_next_frame_buffer (vm, node, next0, bi_sop); - else - { - to_next[0] = bi_sop; - to_next += 1; - n_left_to_next -= 1; - } - } - - bi_sop = is_eop0 ? bi1 : bi_sop; - if (is_eop1) - { - if (next1 != next_index) - vlib_set_next_frame_buffer (vm, node, next1, bi_sop); - else - { - to_next[0] = bi_sop; - to_next += 1; - n_left_to_next -= 1; - } - } - - /* Switch cached next index when next for both packets is the same. */ - if (is_eop0 && is_eop1 && next0 == next1) - { - vlib_put_next_frame (vm, node, next_index, - n_left_to_next); - next_index = next0; - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - } - } - } - } - - /* Bail out of dual loop and proceed with single loop. */ - found_hw_owned_descriptor_x2: - - while (n_descriptors_left > 0 && n_left_to_next > 0) - { - vlib_buffer_t *b0; - u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0; - u8 is_eop0, error0, next0; - ixge_descriptor_t d0; - - d0.as_u32x4 = d[0].as_u32x4; - - s20 = d0.rx_from_hw.status[2]; - s00 = d0.rx_from_hw.status[0]; - - if (!(s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE)) - goto found_hw_owned_descriptor_x1; - - bi0 = to_rx[0]; - ASSERT (to_add >= xm->rx_buffers_to_add); - fi0 = to_add[0]; - - to_rx[0] = fi0; - to_rx += 1; - to_add -= 1; - - ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == - vlib_buffer_is_known (vm, bi0)); - ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == - vlib_buffer_is_known (vm, fi0)); - - b0 = vlib_get_buffer (vm, bi0); - - /* - * Turn this on if you run into - * "bad monkey" contexts, and you want to know exactly - * which nodes they've visited... - */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - - is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; - ixge_rx_next_and_error_from_status_x1 - (xd, s00, s20, &next0, &error0, &flags0); - - next0 = is_sop ? next0 : next_index_sop; - next_index_sop = next0; - - b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT); - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - - b0->error = node->errors[error0]; - - len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor; - n_bytes += len0; - n_packets += is_eop0; - - /* Give new buffer to hardware. */ - d0.rx_to_hw.tail_address = - vlib_get_buffer_data_physical_address (vm, fi0); - d0.rx_to_hw.head_address = d0.rx_to_hw.tail_address; - d[0].as_u32x4 = d0.as_u32x4; - - d += 1; - n_descriptors_left -= 1; - - /* Point to either l2 or l3 header depending on next. */ - l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT)) - ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0; - b0->current_length = len0 - l3_offset0; - b0->current_data = l3_offset0; - - b_last->next_buffer = is_sop ? ~0 : bi0; - bi_last = bi0; - b_last = b0; - - bi_sop = is_sop ? bi0 : bi_sop; - - if (CLIB_DEBUG > 0 && is_eop0) - { - u8 *msg = - vlib_validate_buffer (vm, bi_sop, /* follow_buffer_next */ 1); - ASSERT (!msg); - } - - if (0) /* "Dave" version */ - { - if (is_eop0) - { - to_next[0] = bi_sop; - to_next++; - n_left_to_next--; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi_sop, next0); - } - } - if (1) /* "Eliot" version */ - { - if (PREDICT_TRUE (next0 == next_index)) - { - to_next[0] = bi_sop; - to_next += is_eop0; - n_left_to_next -= is_eop0; - } - else - { - if (next0 != next_index && is_eop0) - vlib_set_next_frame_buffer (vm, node, next0, bi_sop); - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - next_index = next0; - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - } - } - is_sop = is_eop0; - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - -found_hw_owned_descriptor_x1: - if (n_descriptors_left > 0) - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - - _vec_len (xm->rx_buffers_to_add) = (to_add + 1) - xm->rx_buffers_to_add; - - { - u32 n_done = n_descriptors - n_descriptors_left; - - if (n_trace > 0 && n_done > 0) - { - u32 n = clib_min (n_trace, n_done); - ixge_rx_trace (xm, xd, dq, - d_trace_save, - d_trace_buffers, - &dq->descriptors[start_descriptor_index], n); - vlib_set_trace_count (vm, node, n_trace - n); - } - if (d_trace_save) - { - _vec_len (d_trace_save) = 0; - _vec_len (d_trace_buffers) = 0; - } - - /* Don't keep a reference to b_last if we don't have to. - Otherwise we can over-write a next_buffer pointer after already haven - enqueued a packet. */ - if (is_sop) - { - b_last->next_buffer = ~0; - bi_last = ~0; - } - - dq->rx.n_descriptors_done_this_call = n_done; - dq->rx.n_descriptors_done_total += n_done; - dq->rx.is_start_of_packet = is_sop; - dq->rx.saved_start_of_packet_buffer_index = bi_sop; - dq->rx.saved_last_buffer_index = bi_last; - dq->rx.saved_start_of_packet_next_index = next_index_sop; - dq->rx.next_index = next_index; - dq->rx.n_bytes += n_bytes; - - return n_packets; - } -} - -static uword -ixge_rx_queue (ixge_main_t * xm, - ixge_device_t * xd, - vlib_node_runtime_t * node, u32 queue_index) -{ - ixge_dma_queue_t *dq = - vec_elt_at_index (xd->dma_queues[VLIB_RX], queue_index); - ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, dq->queue_index); - uword n_packets = 0; - u32 hw_head_index, sw_head_index; - - /* One time initialization. */ - if (!dq->rx.node) - { - dq->rx.node = node; - dq->rx.is_start_of_packet = 1; - dq->rx.saved_start_of_packet_buffer_index = ~0; - dq->rx.saved_last_buffer_index = ~0; - } - - dq->rx.next_index = node->cached_next_index; - - dq->rx.n_descriptors_done_total = 0; - dq->rx.n_descriptors_done_this_call = 0; - dq->rx.n_bytes = 0; - - /* Fetch head from hardware and compare to where we think we are. */ - hw_head_index = dr->head_index; - sw_head_index = dq->head_index; - - if (hw_head_index == sw_head_index) - goto done; - - if (hw_head_index < sw_head_index) - { - u32 n_tried = dq->n_descriptors - sw_head_index; - n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried); - sw_head_index = - ixge_ring_add (dq, sw_head_index, - dq->rx.n_descriptors_done_this_call); - - if (dq->rx.n_descriptors_done_this_call != n_tried) - goto done; - } - if (hw_head_index >= sw_head_index) - { - u32 n_tried = hw_head_index - sw_head_index; - n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried); - sw_head_index = - ixge_ring_add (dq, sw_head_index, - dq->rx.n_descriptors_done_this_call); - } - -done: - dq->head_index = sw_head_index; - dq->tail_index = - ixge_ring_add (dq, dq->tail_index, dq->rx.n_descriptors_done_total); - - /* Give tail back to hardware. */ - CLIB_MEMORY_BARRIER (); - - dr->tail_index = dq->tail_index; - - vlib_increment_combined_counter (vnet_main. - interface_main.combined_sw_if_counters + - VNET_INTERFACE_COUNTER_RX, - 0 /* cpu_index */ , - xd->vlib_sw_if_index, n_packets, - dq->rx.n_bytes); - - return n_packets; -} - -static void -ixge_interrupt (ixge_main_t * xm, ixge_device_t * xd, u32 i) -{ - vlib_main_t *vm = xm->vlib_main; - ixge_regs_t *r = xd->regs; - - if (i != 20) - { - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "ixge %d, %s",.format_args = "i1t1",.n_enum_strings = - 16,.enum_strings = - { - "flow director", - "rx miss", - "pci exception", - "mailbox", - "link status change", - "linksec key exchange", - "manageability event", - "reserved23", - "sdp0", - "sdp1", - "sdp2", - "sdp3", - "ecc", "descriptor handler error", "tcp timer", "other",},}; - struct - { - u8 instance; - u8 index; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->instance = xd->device_index; - ed->index = i - 16; - } - else - { - u32 v = r->xge_mac.link_status; - uword is_up = (v & (1 << 30)) != 0; - - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "ixge %d, link status change 0x%x",.format_args = "i4i4",}; - struct - { - u32 instance, link_status; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->instance = xd->device_index; - ed->link_status = v; - xd->link_status_at_last_link_change = v; - - vlib_process_signal_event (vm, ixge_process_node.index, - EVENT_SET_FLAGS, - ((is_up << 31) | xd->vlib_hw_if_index)); - } -} - -always_inline u32 -clean_block (u32 * b, u32 * t, u32 n_left) -{ - u32 *t0 = t; - - while (n_left >= 4) - { - u32 bi0, bi1, bi2, bi3; - - t[0] = bi0 = b[0]; - b[0] = 0; - t += bi0 != 0; - - t[0] = bi1 = b[1]; - b[1] = 0; - t += bi1 != 0; - - t[0] = bi2 = b[2]; - b[2] = 0; - t += bi2 != 0; - - t[0] = bi3 = b[3]; - b[3] = 0; - t += bi3 != 0; - - b += 4; - n_left -= 4; - } - - while (n_left > 0) - { - u32 bi0; - - t[0] = bi0 = b[0]; - b[0] = 0; - t += bi0 != 0; - b += 1; - n_left -= 1; - } - - return t - t0; -} - -static void -ixge_tx_queue (ixge_main_t * xm, ixge_device_t * xd, u32 queue_index) -{ - vlib_main_t *vm = xm->vlib_main; - ixge_dma_queue_t *dq = - vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index); - u32 n_clean, *b, *t, *t0; - i32 n_hw_owned_descriptors; - i32 first_to_clean, last_to_clean; - u64 hwbp_race = 0; - - /* Handle case where head write back pointer update - * arrives after the interrupt during high PCI bus loads. - */ - while ((dq->head_index == dq->tx.head_index_write_back[0]) && - dq->tx.n_buffers_on_ring && (dq->head_index != dq->tail_index)) - { - hwbp_race++; - if (IXGE_HWBP_RACE_ELOG && (hwbp_race == 1)) - { - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "ixge %d tx head index race: head %4d, tail %4d, buffs %4d",.format_args - = "i4i4i4i4",}; - struct - { - u32 instance, head_index, tail_index, n_buffers_on_ring; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->instance = xd->device_index; - ed->head_index = dq->head_index; - ed->tail_index = dq->tail_index; - ed->n_buffers_on_ring = dq->tx.n_buffers_on_ring; - } - } - - dq->head_index = dq->tx.head_index_write_back[0]; - n_hw_owned_descriptors = ixge_ring_sub (dq, dq->head_index, dq->tail_index); - ASSERT (dq->tx.n_buffers_on_ring >= n_hw_owned_descriptors); - n_clean = dq->tx.n_buffers_on_ring - n_hw_owned_descriptors; - - if (IXGE_HWBP_RACE_ELOG && hwbp_race) - { - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "ixge %d tx head index race: head %4d, hw_owned %4d, n_clean %4d, retries %d",.format_args - = "i4i4i4i4i4",}; - struct - { - u32 instance, head_index, n_hw_owned_descriptors, n_clean, retries; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->instance = xd->device_index; - ed->head_index = dq->head_index; - ed->n_hw_owned_descriptors = n_hw_owned_descriptors; - ed->n_clean = n_clean; - ed->retries = hwbp_race; - } - - /* - * This function used to wait until hardware owned zero descriptors. - * At high PPS rates, that doesn't happen until the TX ring is - * completely full of descriptors which need to be cleaned up. - * That, in turn, causes TX ring-full drops and/or long RX service - * interruptions. - */ - if (n_clean == 0) - return; - - /* Clean the n_clean descriptors prior to the reported hardware head */ - last_to_clean = dq->head_index - 1; - last_to_clean = (last_to_clean < 0) ? last_to_clean + dq->n_descriptors : - last_to_clean; - - first_to_clean = (last_to_clean) - (n_clean - 1); - first_to_clean = (first_to_clean < 0) ? first_to_clean + dq->n_descriptors : - first_to_clean; - - vec_resize (xm->tx_buffers_pending_free, dq->n_descriptors - 1); - t0 = t = xm->tx_buffers_pending_free; - b = dq->descriptor_buffer_indices + first_to_clean; - - /* Wrap case: clean from first to end, then start to last */ - if (first_to_clean > last_to_clean) - { - t += clean_block (b, t, (dq->n_descriptors - 1) - first_to_clean); - first_to_clean = 0; - b = dq->descriptor_buffer_indices; - } - - /* Typical case: clean from first to last */ - if (first_to_clean <= last_to_clean) - t += clean_block (b, t, (last_to_clean - first_to_clean) + 1); - - if (t > t0) - { - u32 n = t - t0; - vlib_buffer_free_no_next (vm, t0, n); - ASSERT (dq->tx.n_buffers_on_ring >= n); - dq->tx.n_buffers_on_ring -= n; - _vec_len (xm->tx_buffers_pending_free) = 0; - } -} - -/* RX queue interrupts 0 thru 7; TX 8 thru 15. */ -always_inline uword -ixge_interrupt_is_rx_queue (uword i) -{ - return i < 8; -} - -always_inline uword -ixge_interrupt_is_tx_queue (uword i) -{ - return i >= 8 && i < 16; -} - -always_inline uword -ixge_tx_queue_to_interrupt (uword i) -{ - return 8 + i; -} - -always_inline uword -ixge_rx_queue_to_interrupt (uword i) -{ - return 0 + i; -} - -always_inline uword -ixge_interrupt_rx_queue (uword i) -{ - ASSERT (ixge_interrupt_is_rx_queue (i)); - return i - 0; -} - -always_inline uword -ixge_interrupt_tx_queue (uword i) -{ - ASSERT (ixge_interrupt_is_tx_queue (i)); - return i - 8; -} - -static uword -ixge_device_input (ixge_main_t * xm, - ixge_device_t * xd, vlib_node_runtime_t * node) -{ - ixge_regs_t *r = xd->regs; - u32 i, s; - uword n_rx_packets = 0; - - s = r->interrupt.status_write_1_to_set; - if (s) - r->interrupt.status_write_1_to_clear = s; - - /* *INDENT-OFF* */ - foreach_set_bit (i, s, ({ - if (ixge_interrupt_is_rx_queue (i)) - n_rx_packets += ixge_rx_queue (xm, xd, node, ixge_interrupt_rx_queue (i)); - - else if (ixge_interrupt_is_tx_queue (i)) - ixge_tx_queue (xm, xd, ixge_interrupt_tx_queue (i)); - - else - ixge_interrupt (xm, xd, i); - })); - /* *INDENT-ON* */ - - return n_rx_packets; -} - -static uword -ixge_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) -{ - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd; - uword n_rx_packets = 0; - - if (node->state == VLIB_NODE_STATE_INTERRUPT) - { - uword i; - - /* Loop over devices with interrupts. */ - /* *INDENT-OFF* */ - foreach_set_bit (i, node->runtime_data[0], ({ - xd = vec_elt_at_index (xm->devices, i); - n_rx_packets += ixge_device_input (xm, xd, node); - - /* Re-enable interrupts since we're going to stay in interrupt mode. */ - if (! (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) - xd->regs->interrupt.enable_write_1_to_set = ~0; - })); - /* *INDENT-ON* */ - - /* Clear mask of devices with pending interrupts. */ - node->runtime_data[0] = 0; - } - else - { - /* Poll all devices for input/interrupts. */ - vec_foreach (xd, xm->devices) - { - n_rx_packets += ixge_device_input (xm, xd, node); - - /* Re-enable interrupts when switching out of polling mode. */ - if (node->flags & - VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) - xd->regs->interrupt.enable_write_1_to_set = ~0; - } - } - - return n_rx_packets; -} - -static char *ixge_error_strings[] = { -#define _(n,s) s, - foreach_ixge_error -#undef _ -}; - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ixge_input_node, static) = { - .function = ixge_input, - .type = VLIB_NODE_TYPE_INPUT, - .name = "ixge-input", - - /* Will be enabled if/when hardware is detected. */ - .state = VLIB_NODE_STATE_DISABLED, - - .format_buffer = format_ethernet_header_with_length, - .format_trace = format_ixge_rx_dma_trace, - - .n_errors = IXGE_N_ERROR, - .error_strings = ixge_error_strings, - - .n_next_nodes = IXGE_RX_N_NEXT, - .next_nodes = { - [IXGE_RX_NEXT_DROP] = "error-drop", - [IXGE_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", - [IXGE_RX_NEXT_IP4_INPUT] = "ip4-input", - [IXGE_RX_NEXT_IP6_INPUT] = "ip6-input", - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH_CLONE (ixge_input) -CLIB_MULTIARCH_SELECT_FN (ixge_input) -/* *INDENT-ON* */ - -static u8 * -format_ixge_device_name (u8 * s, va_list * args) -{ - u32 i = va_arg (*args, u32); - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, i); - return format (s, "TenGigabitEthernet%U", - format_vlib_pci_handle, &xd->pci_device.bus_address); -} - -#define IXGE_COUNTER_IS_64_BIT (1 << 0) -#define IXGE_COUNTER_NOT_CLEAR_ON_READ (1 << 1) - -static u8 ixge_counter_flags[] = { -#define _(a,f) 0, -#define _64(a,f) IXGE_COUNTER_IS_64_BIT, - foreach_ixge_counter -#undef _ -#undef _64 -}; - -static void -ixge_update_counters (ixge_device_t * xd) -{ - /* Byte offset for counter registers. */ - static u32 reg_offsets[] = { -#define _(a,f) (a) / sizeof (u32), -#define _64(a,f) _(a,f) - foreach_ixge_counter -#undef _ -#undef _64 - }; - volatile u32 *r = (volatile u32 *) xd->regs; - int i; - - for (i = 0; i < ARRAY_LEN (xd->counters); i++) - { - u32 o = reg_offsets[i]; - xd->counters[i] += r[o]; - if (ixge_counter_flags[i] & IXGE_COUNTER_NOT_CLEAR_ON_READ) - r[o] = 0; - if (ixge_counter_flags[i] & IXGE_COUNTER_IS_64_BIT) - xd->counters[i] += (u64) r[o + 1] << (u64) 32; - } -} - -static u8 * -format_ixge_device_id (u8 * s, va_list * args) -{ - u32 device_id = va_arg (*args, u32); - char *t = 0; - switch (device_id) - { -#define _(f,n) case n: t = #f; break; - foreach_ixge_pci_device_id; -#undef _ - default: - t = 0; - break; - } - if (t == 0) - s = format (s, "unknown 0x%x", device_id); - else - s = format (s, "%s", t); - return s; -} - -static u8 * -format_ixge_link_status (u8 * s, va_list * args) -{ - ixge_device_t *xd = va_arg (*args, ixge_device_t *); - u32 v = xd->link_status_at_last_link_change; - - s = format (s, "%s", (v & (1 << 30)) ? "up" : "down"); - - { - char *modes[] = { - "1g", "10g parallel", "10g serial", "autoneg", - }; - char *speeds[] = { - "unknown", "100m", "1g", "10g", - }; - s = format (s, ", mode %s, speed %s", - modes[(v >> 26) & 3], speeds[(v >> 28) & 3]); - } - - return s; -} - -static u8 * -format_ixge_device (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - CLIB_UNUSED (int verbose) = va_arg (*args, int); - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, dev_instance); - ixge_phy_t *phy = xd->phys + xd->phy_index; - uword indent = format_get_indent (s); - - ixge_update_counters (xd); - xd->link_status_at_last_link_change = xd->regs->xge_mac.link_status; - - s = format (s, "Intel 8259X: id %U\n%Ulink %U", - format_ixge_device_id, xd->device_id, - format_white_space, indent + 2, format_ixge_link_status, xd); - - { - - s = format (s, "\n%UPCIe %U", format_white_space, indent + 2, - format_vlib_pci_link_speed, &xd->pci_device); - } - - s = format (s, "\n%U", format_white_space, indent + 2); - if (phy->mdio_address != ~0) - s = format (s, "PHY address %d, id 0x%x", phy->mdio_address, phy->id); - else if (xd->sfp_eeprom.id == SFP_ID_sfp) - s = format (s, "SFP %U", format_sfp_eeprom, &xd->sfp_eeprom); - else - s = format (s, "PHY not found"); - - /* FIXME */ - { - ixge_dma_queue_t *dq = vec_elt_at_index (xd->dma_queues[VLIB_RX], 0); - ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0); - u32 hw_head_index = dr->head_index; - u32 sw_head_index = dq->head_index; - u32 nitems; - - nitems = ixge_ring_sub (dq, hw_head_index, sw_head_index); - s = format (s, "\n%U%d unprocessed, %d total buffers on rx queue 0 ring", - format_white_space, indent + 2, nitems, dq->n_descriptors); - - s = format (s, "\n%U%d buffers in driver rx cache", - format_white_space, indent + 2, - vec_len (xm->rx_buffers_to_add)); - - s = format (s, "\n%U%d buffers on tx queue 0 ring", - format_white_space, indent + 2, - xd->dma_queues[VLIB_TX][0].tx.n_buffers_on_ring); - } - { - u32 i; - u64 v; - static char *names[] = { -#define _(a,f) #f, -#define _64(a,f) _(a,f) - foreach_ixge_counter -#undef _ -#undef _64 - }; - - for (i = 0; i < ARRAY_LEN (names); i++) - { - v = xd->counters[i] - xd->counters_last_clear[i]; - if (v != 0) - s = format (s, "\n%U%-40U%16Ld", - format_white_space, indent + 2, - format_c_identifier, names[i], v); - } - } - - return s; -} - -static void -ixge_clear_hw_interface_counters (u32 instance) -{ - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, instance); - ixge_update_counters (xd); - memcpy (xd->counters_last_clear, xd->counters, sizeof (xd->counters)); -} - -/* - * Dynamically redirect all pkts from a specific interface - * to the specified node - */ -static void -ixge_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, - u32 node_index) -{ - ixge_main_t *xm = &ixge_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - ixge_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); - - /* Shut off redirection */ - if (node_index == ~0) - { - xd->per_interface_next_index = node_index; - return; - } - - xd->per_interface_next_index = - vlib_node_add_next (xm->vlib_main, ixge_input_node.index, node_index); -} - - -/* *INDENT-OFF* */ -VNET_DEVICE_CLASS (ixge_device_class) = { - .name = "ixge", - .tx_function = ixge_interface_tx, - .format_device_name = format_ixge_device_name, - .format_device = format_ixge_device, - .format_tx_trace = format_ixge_tx_dma_trace, - .clear_counters = ixge_clear_hw_interface_counters, - .admin_up_down_function = ixge_interface_admin_up_down, - .rx_redirect_to_node = ixge_set_interface_next_node, - .flatten_output_chains = 1, -}; -/* *INDENT-ON* */ - -#define IXGE_N_BYTES_IN_RX_BUFFER (2048) // DAW-HACK: Set Rx buffer size so all packets < ETH_MTU_SIZE fit in the buffer (i.e. sop & eop for all descriptors). - -static clib_error_t * -ixge_dma_init (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 queue_index) -{ - ixge_main_t *xm = &ixge_main; - vlib_main_t *vm = xm->vlib_main; - ixge_dma_queue_t *dq; - clib_error_t *error = 0; - - vec_validate (xd->dma_queues[rt], queue_index); - dq = vec_elt_at_index (xd->dma_queues[rt], queue_index); - - if (!xm->n_descriptors_per_cache_line) - xm->n_descriptors_per_cache_line = - CLIB_CACHE_LINE_BYTES / sizeof (dq->descriptors[0]); - - if (!xm->n_bytes_in_rx_buffer) - xm->n_bytes_in_rx_buffer = IXGE_N_BYTES_IN_RX_BUFFER; - xm->n_bytes_in_rx_buffer = round_pow2 (xm->n_bytes_in_rx_buffer, 1024); - if (!xm->vlib_buffer_free_list_index) - { - xm->vlib_buffer_free_list_index = - vlib_buffer_get_or_create_free_list (vm, xm->n_bytes_in_rx_buffer, - "ixge rx"); - ASSERT (xm->vlib_buffer_free_list_index != 0); - } - - if (!xm->n_descriptors[rt]) - xm->n_descriptors[rt] = 4 * VLIB_FRAME_SIZE; - - dq->queue_index = queue_index; - dq->n_descriptors = - round_pow2 (xm->n_descriptors[rt], xm->n_descriptors_per_cache_line); - dq->head_index = dq->tail_index = 0; - - dq->descriptors = vlib_physmem_alloc_aligned (vm, &error, - dq->n_descriptors * - sizeof (dq->descriptors[0]), - 128 /* per chip spec */ ); - if (error) - return error; - - memset (dq->descriptors, 0, - dq->n_descriptors * sizeof (dq->descriptors[0])); - vec_resize (dq->descriptor_buffer_indices, dq->n_descriptors); - - if (rt == VLIB_RX) - { - u32 n_alloc, i; - - n_alloc = vlib_buffer_alloc_from_free_list - (vm, dq->descriptor_buffer_indices, - vec_len (dq->descriptor_buffer_indices), - xm->vlib_buffer_free_list_index); - ASSERT (n_alloc == vec_len (dq->descriptor_buffer_indices)); - for (i = 0; i < n_alloc; i++) - { - vlib_buffer_t *b = - vlib_get_buffer (vm, dq->descriptor_buffer_indices[i]); - dq->descriptors[i].rx_to_hw.tail_address = - vlib_physmem_virtual_to_physical (vm, b->data); - } - } - else - { - u32 i; - - dq->tx.head_index_write_back = - vlib_physmem_alloc (vm, &error, CLIB_CACHE_LINE_BYTES); - - for (i = 0; i < dq->n_descriptors; i++) - dq->descriptors[i].tx = xm->tx_descriptor_template; - - vec_validate (xm->tx_buffers_pending_free, dq->n_descriptors - 1); - } - - { - ixge_dma_regs_t *dr = get_dma_regs (xd, rt, queue_index); - u64 a; - - a = vlib_physmem_virtual_to_physical (vm, dq->descriptors); - dr->descriptor_address[0] = a & 0xFFFFFFFF; - dr->descriptor_address[1] = a >> (u64) 32; - dr->n_descriptor_bytes = dq->n_descriptors * sizeof (dq->descriptors[0]); - dq->head_index = dq->tail_index = 0; - - if (rt == VLIB_RX) - { - ASSERT ((xm->n_bytes_in_rx_buffer / 1024) < 32); - dr->rx_split_control = - ( /* buffer size */ ((xm->n_bytes_in_rx_buffer / 1024) << 0) - | ( /* lo free descriptor threshold (units of 64 descriptors) */ - (1 << 22)) | ( /* descriptor type: advanced one buffer */ - (1 << 25)) | ( /* drop if no descriptors available */ - (1 << 28))); - - /* Give hardware all but last 16 cache lines' worth of descriptors. */ - dq->tail_index = dq->n_descriptors - - 16 * xm->n_descriptors_per_cache_line; - } - else - { - /* Make sure its initialized before hardware can get to it. */ - dq->tx.head_index_write_back[0] = dq->head_index; - - a = - vlib_physmem_virtual_to_physical (vm, dq->tx.head_index_write_back); - dr->tx.head_index_write_back_address[0] = /* enable bit */ 1 | a; - dr->tx.head_index_write_back_address[1] = (u64) a >> (u64) 32; - } - - /* DMA on 82599 does not work with [13] rx data write relaxed ordering - and [12] undocumented set. */ - if (rt == VLIB_RX) - dr->dca_control &= ~((1 << 13) | (1 << 12)); - - CLIB_MEMORY_BARRIER (); - - if (rt == VLIB_TX) - { - xd->regs->tx_dma_control |= (1 << 0); - dr->control |= ((32 << 0) /* prefetch threshold */ - | (64 << 8) /* host threshold */ - | (0 << 16) /* writeback threshold */ ); - } - - /* Enable this queue and wait for hardware to initialize - before adding to tail. */ - if (rt == VLIB_TX) - { - dr->control |= 1 << 25; - while (!(dr->control & (1 << 25))) - ; - } - - /* Set head/tail indices and enable DMA. */ - dr->head_index = dq->head_index; - dr->tail_index = dq->tail_index; - } - - return error; -} - -static u32 -ixge_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags) -{ - ixge_device_t *xd; - ixge_regs_t *r; - u32 old; - ixge_main_t *xm = &ixge_main; - - xd = vec_elt_at_index (xm->devices, hw->dev_instance); - r = xd->regs; - - old = r->filter_control; - - if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) - r->filter_control = old | (1 << 9) /* unicast promiscuous */ ; - else - r->filter_control = old & ~(1 << 9); - - return old; -} - -static void -ixge_device_init (ixge_main_t * xm) -{ - vnet_main_t *vnm = vnet_get_main (); - ixge_device_t *xd; - - /* Reset chip(s). */ - vec_foreach (xd, xm->devices) - { - ixge_regs_t *r = xd->regs; - const u32 reset_bit = (1 << 26) | (1 << 3); - - r->control |= reset_bit; - - /* No need to suspend. Timed to take ~1e-6 secs */ - while (r->control & reset_bit) - ; - - /* Software loaded. */ - r->extended_control |= (1 << 28); - - ixge_phy_init (xd); - - /* Register ethernet interface. */ - { - u8 addr8[6]; - u32 i, addr32[2]; - clib_error_t *error; - - addr32[0] = r->rx_ethernet_address0[0][0]; - addr32[1] = r->rx_ethernet_address0[0][1]; - for (i = 0; i < 6; i++) - addr8[i] = addr32[i / 4] >> ((i % 4) * 8); - - error = ethernet_register_interface - (vnm, ixge_device_class.index, xd->device_index, - /* ethernet address */ addr8, - &xd->vlib_hw_if_index, ixge_flag_change); - if (error) - clib_error_report (error); - } - - { - vnet_sw_interface_t *sw = - vnet_get_hw_sw_interface (vnm, xd->vlib_hw_if_index); - xd->vlib_sw_if_index = sw->sw_if_index; - } - - ixge_dma_init (xd, VLIB_RX, /* queue_index */ 0); - - xm->n_descriptors[VLIB_TX] = 20 * VLIB_FRAME_SIZE; - - ixge_dma_init (xd, VLIB_TX, /* queue_index */ 0); - - /* RX/TX queue 0 gets mapped to interrupt bits 0 & 8. */ - r->interrupt.queue_mapping[0] = (( /* valid bit */ (1 << 7) | - ixge_rx_queue_to_interrupt (0)) << 0); - - r->interrupt.queue_mapping[0] |= (( /* valid bit */ (1 << 7) | - ixge_tx_queue_to_interrupt (0)) << 8); - - /* No use in getting too many interrupts. - Limit them to one every 3/4 ring size at line rate - min sized packets. - No need for this since kernel/vlib main loop provides adequate interrupt - limiting scheme. */ - if (0) - { - f64 line_rate_max_pps = - 10e9 / (8 * (64 + /* interframe padding */ 20)); - ixge_throttle_queue_interrupt (r, 0, - .75 * xm->n_descriptors[VLIB_RX] / - line_rate_max_pps); - } - - /* Accept all multicast and broadcast packets. Should really add them - to the dst_ethernet_address register array. */ - r->filter_control |= (1 << 10) | (1 << 8); - - /* Enable frames up to size in mac frame size register. */ - r->xge_mac.control |= 1 << 2; - r->xge_mac.rx_max_frame_size = (9216 + 14) << 16; - - /* Enable all interrupts. */ - if (!IXGE_ALWAYS_POLL) - r->interrupt.enable_write_1_to_set = ~0; - } -} - -static uword -ixge_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - vnet_main_t *vnm = vnet_get_main (); - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd; - uword event_type, *event_data = 0; - f64 timeout, link_debounce_deadline; - - ixge_device_init (xm); - - /* Clear all counters. */ - vec_foreach (xd, xm->devices) - { - ixge_update_counters (xd); - memset (xd->counters, 0, sizeof (xd->counters)); - } - - timeout = 30.0; - link_debounce_deadline = 1e70; - - while (1) - { - /* 36 bit stat counters could overflow in ~50 secs. - We poll every 30 secs to be conservative. */ - vlib_process_wait_for_event_or_clock (vm, timeout); - - event_type = vlib_process_get_events (vm, &event_data); - - switch (event_type) - { - case EVENT_SET_FLAGS: - /* 1 ms */ - link_debounce_deadline = vlib_time_now (vm) + 1e-3; - timeout = 1e-3; - break; - - case ~0: - /* No events found: timer expired. */ - if (vlib_time_now (vm) > link_debounce_deadline) - { - vec_foreach (xd, xm->devices) - { - ixge_regs_t *r = xd->regs; - u32 v = r->xge_mac.link_status; - uword is_up = (v & (1 << 30)) != 0; - - vnet_hw_interface_set_flags - (vnm, xd->vlib_hw_if_index, - is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); - } - link_debounce_deadline = 1e70; - timeout = 30.0; - } - break; - - default: - ASSERT (0); - } - - if (event_data) - _vec_len (event_data) = 0; - - /* Query stats every 30 secs. */ - { - f64 now = vlib_time_now (vm); - if (now - xm->time_last_stats_update > 30) - { - xm->time_last_stats_update = now; - vec_foreach (xd, xm->devices) ixge_update_counters (xd); - } - } - } - - return 0; -} - -static vlib_node_registration_t ixge_process_node = { - .function = ixge_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "ixge-process", -}; - -clib_error_t * -ixge_init (vlib_main_t * vm) -{ - ixge_main_t *xm = &ixge_main; - clib_error_t *error; - - xm->vlib_main = vm; - memset (&xm->tx_descriptor_template, 0, - sizeof (xm->tx_descriptor_template)); - memset (&xm->tx_descriptor_template_mask, 0, - sizeof (xm->tx_descriptor_template_mask)); - xm->tx_descriptor_template.status0 = - (IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED | - IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED | - IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS); - xm->tx_descriptor_template_mask.status0 = 0xffff; - xm->tx_descriptor_template_mask.status1 = 0x00003fff; - - xm->tx_descriptor_template_mask.status0 &= - ~(IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET - | IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS); - xm->tx_descriptor_template_mask.status1 &= - ~(IXGE_TX_DESCRIPTOR_STATUS1_DONE); - - error = vlib_call_init_function (vm, pci_bus_init); - - return error; -} - -VLIB_INIT_FUNCTION (ixge_init); - - -static void -ixge_pci_intr_handler (vlib_pci_device_t * dev) -{ - ixge_main_t *xm = &ixge_main; - vlib_main_t *vm = xm->vlib_main; - - vlib_node_set_interrupt_pending (vm, ixge_input_node.index); - - /* Let node know which device is interrupting. */ - { - vlib_node_runtime_t *rt = - vlib_node_get_runtime (vm, ixge_input_node.index); - rt->runtime_data[0] |= 1 << dev->private_data; - } -} - -static clib_error_t * -ixge_pci_init (vlib_main_t * vm, vlib_pci_device_t * dev) -{ - ixge_main_t *xm = &ixge_main; - clib_error_t *error; - void *r; - ixge_device_t *xd; - - /* Device found: make sure we have dma memory. */ - if (unix_physmem_is_fake (vm)) - return clib_error_return (0, "no physical memory available"); - - error = vlib_pci_map_resource (dev, 0, &r); - if (error) - return error; - - vec_add2 (xm->devices, xd, 1); - - if (vec_len (xm->devices) == 1) - { - ixge_input_node.function = ixge_input_multiarch_select (); - } - - xd->pci_device = dev[0]; - xd->device_id = xd->pci_device.config0.header.device_id; - xd->regs = r; - xd->device_index = xd - xm->devices; - xd->pci_function = dev->bus_address.function; - xd->per_interface_next_index = ~0; - - - /* Chip found so enable node. */ - { - vlib_node_set_state (vm, ixge_input_node.index, - (IXGE_ALWAYS_POLL - ? VLIB_NODE_STATE_POLLING - : VLIB_NODE_STATE_INTERRUPT)); - - dev->private_data = xd->device_index; - } - - if (vec_len (xm->devices) == 1) - { - vlib_register_node (vm, &ixge_process_node); - xm->process_node_index = ixge_process_node.index; - } - - error = vlib_pci_bus_master_enable (dev); - - if (error) - return error; - - return vlib_pci_intr_enable (dev); -} - -/* *INDENT-OFF* */ -PCI_REGISTER_DEVICE (ixge_pci_device_registration,static) = { - .init_function = ixge_pci_init, - .interrupt_handler = ixge_pci_intr_handler, - .supported_devices = { -#define _(t,i) { .vendor_id = PCI_VENDOR_ID_INTEL, .device_id = i, }, - foreach_ixge_pci_device_id -#undef _ - { 0 }, - }, -}; -/* *INDENT-ON* */ - -void -ixge_set_next_node (ixge_rx_next_t next, char *name) -{ - vlib_node_registration_t *r = &ixge_input_node; - - switch (next) - { - case IXGE_RX_NEXT_IP4_INPUT: - case IXGE_RX_NEXT_IP6_INPUT: - case IXGE_RX_NEXT_ETHERNET_INPUT: - r->next_nodes[next] = name; - break; - - default: - clib_warning ("%s: illegal next %d\n", __FUNCTION__, next); - break; - } -} -#endif - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/nic/ixge.h b/src/vnet/devices/nic/ixge.h deleted file mode 100644 index a8e652dc..00000000 --- a/src/vnet/devices/nic/ixge.h +++ /dev/null @@ -1,1293 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_ixge_h -#define included_ixge_h - -#include -#include -#include -#include -#include -#include - -typedef volatile struct -{ - /* [31:7] 128 byte aligned. */ - u32 descriptor_address[2]; - u32 n_descriptor_bytes; - - /* [5] rx/tx descriptor dca enable - [6] rx packet head dca enable - [7] rx packet tail dca enable - [9] rx/tx descriptor relaxed order - [11] rx/tx descriptor write back relaxed order - [13] rx/tx data write/read relaxed order - [15] rx head data write relaxed order - [31:24] apic id for cpu's cache. */ - u32 dca_control; - - u32 head_index; - - /* [4:0] tail buffer size (in 1k byte units) - [13:8] head buffer size (in 64 byte units) - [24:22] lo free descriptors threshold (units of 64 descriptors) - [27:25] descriptor type 0 = legacy, 1 = advanced one buffer (e.g. tail), - 2 = advanced header splitting (head + tail), 5 = advanced header - splitting (head only). - [28] drop if no descriptors available. */ - u32 rx_split_control; - - u32 tail_index; - CLIB_PAD_FROM_TO (0x1c, 0x28); - - /* [7:0] rx/tx prefetch threshold - [15:8] rx/tx host threshold - [24:16] rx/tx write back threshold - [25] rx/tx enable - [26] tx descriptor writeback flush - [30] rx strip vlan enable */ - u32 control; - - u32 rx_coallesce_control; - - union - { - struct - { - /* packets bytes lo hi */ - u32 stats[3]; - - u32 unused; - } rx; - - struct - { - u32 unused[2]; - - /* [0] enables head write back. */ - u32 head_index_write_back_address[2]; - } tx; - }; -} ixge_dma_regs_t; - -/* Only advanced descriptors are supported. */ -typedef struct -{ - u64 tail_address; - u64 head_address; -} ixge_rx_to_hw_descriptor_t; - -typedef struct -{ - u32 status[3]; - u16 n_packet_bytes_this_descriptor; - u16 vlan_tag; -} ixge_rx_from_hw_descriptor_t; - -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2 (1 << (4 + 11)) -/* Valid if not layer2. */ -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4 (1 << (4 + 0)) -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT (1 << (4 + 1)) -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6 (1 << (4 + 2)) -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT (1 << (4 + 3)) -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP (1 << (4 + 4)) -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP (1 << (4 + 5)) -#define IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET(s) (((s) >> 21) & 0x3ff) - -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE (1 << (0 + 0)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET (1 << (0 + 1)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN (1 << (0 + 3)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED (1 << (0 + 4)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED (1 << (0 + 5)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED (1 << (0 + 6)) -#define IXGE_RX_DESCRIPTOR_STATUS2_NOT_UNICAST (1 << (0 + 7)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_DOUBLE_VLAN (1 << (0 + 9)) -#define IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR (1 << (0 + 10)) -#define IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR (1 << (20 + 9)) -#define IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR (1 << (20 + 10)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR (1 << (20 + 11)) - -/* For layer2 packets stats0 bottom 3 bits give ether type index from filter. */ -#define IXGE_RX_DESCRIPTOR_STATUS0_LAYER2_ETHERNET_TYPE(s) ((s) & 7) - -typedef struct -{ - u64 buffer_address; - u16 n_bytes_this_buffer; - u16 status0; - u32 status1; -#define IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED (3 << 4) -#define IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED (1 << (8 + 5)) -#define IXGE_TX_DESCRIPTOR_STATUS0_LOG2_REPORT_STATUS (8 + 3) -#define IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS (1 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_REPORT_STATUS) -#define IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS (1 << (8 + 1)) -#define IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET (8 + 0) -#define IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET (1 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET) -#define IXGE_TX_DESCRIPTOR_STATUS1_DONE (1 << 0) -#define IXGE_TX_DESCRIPTOR_STATUS1_CONTEXT(i) (/* valid */ (1 << 7) | ((i) << 4)) -#define IXGE_TX_DESCRIPTOR_STATUS1_IPSEC_OFFLOAD (1 << (8 + 2)) -#define IXGE_TX_DESCRIPTOR_STATUS1_INSERT_TCP_UDP_CHECKSUM (1 << (8 + 1)) -#define IXGE_TX_DESCRIPTOR_STATUS1_INSERT_IP4_CHECKSUM (1 << (8 + 0)) -#define IXGE_TX_DESCRIPTOR_STATUS0_N_BYTES_THIS_BUFFER(l) ((l) << 0) -#define IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET(l) ((l) << 14) -} ixge_tx_descriptor_t; - -typedef struct -{ - struct - { - u8 checksum_start_offset; - u8 checksum_insert_offset; - u16 checksum_end_offset; - } ip, tcp; - u32 status0; - - u8 status1; - - /* Byte offset after UDP/TCP header. */ - u8 payload_offset; - - u16 max_tcp_segment_size; -} __attribute__ ((packed)) ixge_tx_context_descriptor_t; - -typedef union -{ - ixge_rx_to_hw_descriptor_t rx_to_hw; - ixge_rx_from_hw_descriptor_t rx_from_hw; - ixge_tx_descriptor_t tx; - u32x4 as_u32x4; -} ixge_descriptor_t; - -typedef volatile struct -{ - /* [2] pcie master disable - [3] mac reset - [26] global device reset */ - u32 control; - u32 control_alias; - /* [3:2] device id (0 or 1 for dual port chips) - [7] link is up - [17:10] num vfs - [18] io active - [19] pcie master enable status */ - u32 status_read_only; - CLIB_PAD_FROM_TO (0xc, 0x18); - /* [14] pf reset done - [17] relaxed ordering disable - [26] extended vlan enable - [28] driver loaded */ - u32 extended_control; - CLIB_PAD_FROM_TO (0x1c, 0x20); - - /* software definable pins. - sdp_data [7:0] - sdp_is_output [15:8] - sdp_is_native [23:16] - sdp_function [31:24]. - */ - u32 sdp_control; - CLIB_PAD_FROM_TO (0x24, 0x28); - - /* [0] i2c clock in - [1] i2c clock out - [2] i2c data in - [3] i2c data out */ - u32 i2c_control; - CLIB_PAD_FROM_TO (0x2c, 0x4c); - u32 tcp_timer; - - CLIB_PAD_FROM_TO (0x50, 0x200); - - u32 led_control; - - CLIB_PAD_FROM_TO (0x204, 0x600); - u32 core_spare; - CLIB_PAD_FROM_TO (0x604, 0x700); - - struct - { - u32 vflr_events_clear[4]; - u32 mailbox_interrupt_status[4]; - u32 mailbox_interrupt_enable[4]; - CLIB_PAD_FROM_TO (0x730, 0x800); - } pf_foo; - - struct - { - u32 status_write_1_to_clear; - CLIB_PAD_FROM_TO (0x804, 0x808); - u32 status_write_1_to_set; - CLIB_PAD_FROM_TO (0x80c, 0x810); - u32 status_auto_clear_enable; - CLIB_PAD_FROM_TO (0x814, 0x820); - - /* [11:3] minimum inter-interrupt interval - (2e-6 units; 20e-6 units for fast ethernet). - [15] low-latency interrupt moderation enable - [20:16] low-latency interrupt credit - [27:21] interval counter - [31] write disable for credit and counter (write only). */ - u32 throttle0[24]; - - u32 enable_write_1_to_set; - CLIB_PAD_FROM_TO (0x884, 0x888); - u32 enable_write_1_to_clear; - CLIB_PAD_FROM_TO (0x88c, 0x890); - u32 enable_auto_clear; - u32 msi_to_eitr_select; - /* [3:0] spd 0-3 interrupt detection enable - [4] msi-x enable - [5] other clear disable (makes other bits in status not clear on read) - etc. */ - u32 control; - CLIB_PAD_FROM_TO (0x89c, 0x900); - - /* Defines interrupt mapping for 128 rx + 128 tx queues. - 64 x 4 8 bit entries. - For register [i]: - [5:0] bit in interrupt status for rx queue 2*i + 0 - [7] valid bit - [13:8] bit for tx queue 2*i + 0 - [15] valid bit - similar for rx 2*i + 1 and tx 2*i + 1. */ - u32 queue_mapping[64]; - - /* tcp timer [7:0] and other interrupts [15:8] */ - u32 misc_mapping; - CLIB_PAD_FROM_TO (0xa04, 0xa90); - - /* 64 interrupts determined by mappings. */ - u32 status1_write_1_to_clear[4]; - u32 enable1_write_1_to_set[4]; - u32 enable1_write_1_to_clear[4]; - CLIB_PAD_FROM_TO (0xac0, 0xad0); - u32 status1_enable_auto_clear[4]; - CLIB_PAD_FROM_TO (0xae0, 0x1000); - } interrupt; - - ixge_dma_regs_t rx_dma0[64]; - - CLIB_PAD_FROM_TO (0x2000, 0x2140); - u32 dcb_rx_packet_plane_t4_config[8]; - u32 dcb_rx_packet_plane_t4_status[8]; - CLIB_PAD_FROM_TO (0x2180, 0x2300); - - /* reg i defines mapping for 4 rx queues starting at 4*i + 0. */ - u32 rx_queue_stats_mapping[32]; - u32 rx_queue_stats_control; - - CLIB_PAD_FROM_TO (0x2384, 0x2410); - u32 fc_user_descriptor_ptr[2]; - u32 fc_buffer_control; - CLIB_PAD_FROM_TO (0x241c, 0x2420); - u32 fc_rx_dma; - CLIB_PAD_FROM_TO (0x2424, 0x2430); - u32 dcb_packet_plane_control; - CLIB_PAD_FROM_TO (0x2434, 0x2f00); - - u32 rx_dma_control; - u32 pf_queue_drop_enable; - CLIB_PAD_FROM_TO (0x2f08, 0x2f20); - u32 rx_dma_descriptor_cache_config; - CLIB_PAD_FROM_TO (0x2f24, 0x3000); - - /* 1 bit. */ - u32 rx_enable; - CLIB_PAD_FROM_TO (0x3004, 0x3008); - /* [15:0] ether type (little endian) - [31:16] opcode (big endian) */ - u32 flow_control_control; - CLIB_PAD_FROM_TO (0x300c, 0x3020); - /* 3 bit traffic class for each of 8 priorities. */ - u32 rx_priority_to_traffic_class; - CLIB_PAD_FROM_TO (0x3024, 0x3028); - u32 rx_coallesce_data_buffer_control; - CLIB_PAD_FROM_TO (0x302c, 0x3190); - u32 rx_packet_buffer_flush_detect; - CLIB_PAD_FROM_TO (0x3194, 0x3200); - u32 flow_control_tx_timers[4]; /* 2 timer values */ - CLIB_PAD_FROM_TO (0x3210, 0x3220); - u32 flow_control_rx_threshold_lo[8]; - CLIB_PAD_FROM_TO (0x3240, 0x3260); - u32 flow_control_rx_threshold_hi[8]; - CLIB_PAD_FROM_TO (0x3280, 0x32a0); - u32 flow_control_refresh_threshold; - CLIB_PAD_FROM_TO (0x32a4, 0x3c00); - /* For each of 8 traffic classes (units of bytes). */ - u32 rx_packet_buffer_size[8]; - CLIB_PAD_FROM_TO (0x3c20, 0x3d00); - u32 flow_control_config; - CLIB_PAD_FROM_TO (0x3d04, 0x4200); - - struct - { - u32 pcs_config; - CLIB_PAD_FROM_TO (0x4204, 0x4208); - u32 link_control; - u32 link_status; - u32 pcs_debug[2]; - u32 auto_negotiation; - u32 link_partner_ability; - u32 auto_negotiation_tx_next_page; - u32 auto_negotiation_link_partner_next_page; - CLIB_PAD_FROM_TO (0x4228, 0x4240); - } gige_mac; - - struct - { - /* [0] tx crc enable - [2] enable frames up to max frame size register [31:16] - [10] pad frames < 64 bytes if specified by user - [15] loopback enable - [16] mdc hi speed - [17] turn off mdc between mdio packets */ - u32 control; - - /* [5] rx symbol error (all bits clear on read) - [6] rx illegal symbol - [7] rx idle error - [8] rx local fault - [9] rx remote fault */ - u32 status; - - u32 pause_and_pace_control; - CLIB_PAD_FROM_TO (0x424c, 0x425c); - u32 phy_command; - u32 phy_data; - CLIB_PAD_FROM_TO (0x4264, 0x4268); - - /* [31:16] max frame size in bytes. */ - u32 rx_max_frame_size; - CLIB_PAD_FROM_TO (0x426c, 0x4288); - - /* [0] - [2] pcs receive link up? (latch lo) - [7] local fault - [1] - [0] pcs 10g base r capable - [1] pcs 10g base x capable - [2] pcs 10g base w capable - [10] rx local fault - [11] tx local fault - [15:14] 2 => device present at this address (else not present) */ - u32 xgxs_status[2]; - - u32 base_x_pcs_status; - - /* [0] pass unrecognized flow control frames - [1] discard pause frames - [2] rx priority flow control enable (only in dcb mode) - [3] rx flow control enable. */ - u32 flow_control; - - /* [3:0] tx lanes change polarity - [7:4] rx lanes change polarity - [11:8] swizzle tx lanes - [15:12] swizzle rx lanes - 4 x 2 bit tx lane swap - 4 x 2 bit rx lane swap. */ - u32 serdes_control; - - u32 fifo_control; - - /* [0] force link up - [1] autoneg ack2 bit to transmit - [6:2] autoneg selector field to transmit - [8:7] 10g pma/pmd type 0 => xaui, 1 kx4, 2 cx4 - [9] 1g pma/pmd type 0 => sfi, 1 => kx/bx - [10] disable 10g on without main power - [11] restart autoneg on transition to dx power state - [12] restart autoneg - [15:13] link mode: - 0 => 1g no autoneg - 1 => 10g kx4 parallel link no autoneg - 2 => 1g bx autoneg - 3 => 10g sfi serdes - 4 => kx4/kx/kr - 5 => xgmii 1g/100m - 6 => kx4/kx/kr 1g an - 7 kx4/kx/kr sgmii. - [16] kr support - [17] fec requested - [18] fec ability - etc. */ - u32 auto_negotiation_control; - - /* [0] signal detect 1g/100m - [1] fec signal detect - [2] 10g serial pcs fec block lock - [3] 10g serial high error rate - [4] 10g serial pcs block lock - [5] kx/kx4/kr autoneg next page received - [6] kx/kx4/kr backplane autoneg next page received - [7] link status clear to read - [11:8] 10g signal detect (4 lanes) (for serial just lane 0) - [12] 10g serial signal detect - [16:13] 10g parallel lane sync status - [17] 10g parallel align status - [18] 1g sync status - [19] kx/kx4/kr backplane autoneg is idle - [20] 1g autoneg enabled - [21] 1g pcs enabled for sgmii - [22] 10g xgxs enabled - [23] 10g serial fec enabled (forward error detection) - [24] 10g kr pcs enabled - [25] sgmii enabled - [27:26] mac link mode - 0 => 1g - 1 => 10g parallel - 2 => 10g serial - 3 => autoneg - [29:28] link speed - 1 => 100m - 2 => 1g - 3 => 10g - [30] link is up - [31] kx/kx4/kr backplane autoneg completed successfully. */ - u32 link_status; - - /* [17:16] pma/pmd for 10g serial - 0 => kr, 2 => sfi - [18] disable dme pages */ - u32 auto_negotiation_control2; - - CLIB_PAD_FROM_TO (0x42ac, 0x42b0); - u32 link_partner_ability[2]; - CLIB_PAD_FROM_TO (0x42b8, 0x42d0); - u32 manageability_control; - u32 link_partner_next_page[2]; - CLIB_PAD_FROM_TO (0x42dc, 0x42e0); - u32 kr_pcs_control; - u32 kr_pcs_status; - u32 fec_status[2]; - CLIB_PAD_FROM_TO (0x42f0, 0x4314); - u32 sgmii_control; - CLIB_PAD_FROM_TO (0x4318, 0x4324); - u32 link_status2; - CLIB_PAD_FROM_TO (0x4328, 0x4900); - } xge_mac; - - u32 tx_dcb_control; - u32 tx_dcb_descriptor_plane_queue_select; - u32 tx_dcb_descriptor_plane_t1_config; - u32 tx_dcb_descriptor_plane_t1_status; - CLIB_PAD_FROM_TO (0x4910, 0x4950); - - /* For each TC in units of 1k bytes. */ - u32 tx_packet_buffer_thresholds[8]; - CLIB_PAD_FROM_TO (0x4970, 0x4980); - struct - { - u32 mmw; - u32 config; - u32 status; - u32 rate_drift; - } dcb_tx_rate_scheduler; - CLIB_PAD_FROM_TO (0x4990, 0x4a80); - u32 tx_dma_control; - CLIB_PAD_FROM_TO (0x4a84, 0x4a88); - u32 tx_dma_tcp_flags_control[2]; - CLIB_PAD_FROM_TO (0x4a90, 0x4b00); - u32 pf_mailbox[64]; - CLIB_PAD_FROM_TO (0x4c00, 0x5000); - - /* RX */ - u32 checksum_control; - CLIB_PAD_FROM_TO (0x5004, 0x5008); - u32 rx_filter_control; - CLIB_PAD_FROM_TO (0x500c, 0x5010); - u32 management_vlan_tag[8]; - u32 management_udp_tcp_ports[8]; - CLIB_PAD_FROM_TO (0x5050, 0x5078); - /* little endian. */ - u32 extended_vlan_ether_type; - CLIB_PAD_FROM_TO (0x507c, 0x5080); - /* [1] store/dma bad packets - [8] accept all multicast - [9] accept all unicast - [10] accept all broadcast. */ - u32 filter_control; - CLIB_PAD_FROM_TO (0x5084, 0x5088); - /* [15:0] vlan ethernet type (0x8100) little endian - [28] cfi bit expected - [29] drop packets with unexpected cfi bit - [30] vlan filter enable. */ - u32 vlan_control; - CLIB_PAD_FROM_TO (0x508c, 0x5090); - /* [1:0] hi bit of ethernet address for 12 bit index into multicast table - 0 => 47, 1 => 46, 2 => 45, 3 => 43. - [2] enable multicast filter - */ - u32 multicast_control; - CLIB_PAD_FROM_TO (0x5094, 0x5100); - u32 fcoe_rx_control; - CLIB_PAD_FROM_TO (0x5104, 0x5108); - u32 fc_flt_context; - CLIB_PAD_FROM_TO (0x510c, 0x5110); - u32 fc_filter_control; - CLIB_PAD_FROM_TO (0x5114, 0x5120); - u32 rx_message_type_lo; - CLIB_PAD_FROM_TO (0x5124, 0x5128); - /* [15:0] ethernet type (little endian) - [18:16] matche pri in vlan tag - [19] priority match enable - [25:20] virtualization pool - [26] pool enable - [27] is fcoe - [30] ieee 1588 timestamp enable - [31] filter enable. - (See ethernet_type_queue_select.) */ - u32 ethernet_type_queue_filter[8]; - CLIB_PAD_FROM_TO (0x5148, 0x5160); - /* [7:0] l2 ethernet type and - [15:8] l2 ethernet type or */ - u32 management_decision_filters1[8]; - u32 vf_vm_tx_switch_loopback_enable[2]; - u32 rx_time_sync_control; - CLIB_PAD_FROM_TO (0x518c, 0x5190); - u32 management_ethernet_type_filters[4]; - u32 rx_timestamp_attributes_lo; - u32 rx_timestamp_hi; - u32 rx_timestamp_attributes_hi; - CLIB_PAD_FROM_TO (0x51ac, 0x51b0); - u32 pf_virtual_control; - CLIB_PAD_FROM_TO (0x51b4, 0x51d8); - u32 fc_offset_parameter; - CLIB_PAD_FROM_TO (0x51dc, 0x51e0); - u32 vf_rx_enable[2]; - u32 rx_timestamp_lo; - CLIB_PAD_FROM_TO (0x51ec, 0x5200); - /* 12 bits determined by multicast_control - lookup bits in this vector. */ - u32 multicast_enable[128]; - - /* [0] ethernet address [31:0] - [1] [15:0] ethernet address [47:32] - [31] valid bit. - Index 0 is read from eeprom after reset. */ - u32 rx_ethernet_address0[16][2]; - - CLIB_PAD_FROM_TO (0x5480, 0x5800); - u32 wake_up_control; - CLIB_PAD_FROM_TO (0x5804, 0x5808); - u32 wake_up_filter_control; - CLIB_PAD_FROM_TO (0x580c, 0x5818); - u32 multiple_rx_queue_command_82598; - CLIB_PAD_FROM_TO (0x581c, 0x5820); - u32 management_control; - u32 management_filter_control; - CLIB_PAD_FROM_TO (0x5828, 0x5838); - u32 wake_up_ip4_address_valid; - CLIB_PAD_FROM_TO (0x583c, 0x5840); - u32 wake_up_ip4_address_table[4]; - u32 management_control_to_host; - CLIB_PAD_FROM_TO (0x5854, 0x5880); - u32 wake_up_ip6_address_table[4]; - - /* unicast_and broadcast_and vlan_and ip_address_and - etc. */ - u32 management_decision_filters[8]; - - u32 management_ip4_or_ip6_address_filters[4][4]; - CLIB_PAD_FROM_TO (0x58f0, 0x5900); - u32 wake_up_packet_length; - CLIB_PAD_FROM_TO (0x5904, 0x5910); - u32 management_ethernet_address_filters[4][2]; - CLIB_PAD_FROM_TO (0x5930, 0x5a00); - u32 wake_up_packet_memory[32]; - CLIB_PAD_FROM_TO (0x5a80, 0x5c00); - u32 redirection_table_82598[32]; - u32 rss_random_keys_82598[10]; - CLIB_PAD_FROM_TO (0x5ca8, 0x6000); - - ixge_dma_regs_t tx_dma[128]; - - u32 pf_vm_vlan_insert[64]; - u32 tx_dma_tcp_max_alloc_size_requests; - CLIB_PAD_FROM_TO (0x8104, 0x8110); - u32 vf_tx_enable[2]; - CLIB_PAD_FROM_TO (0x8118, 0x8120); - /* [0] dcb mode enable - [1] virtualization mode enable - [3:2] number of tcs/qs per pool. */ - u32 multiple_tx_queues_command; - CLIB_PAD_FROM_TO (0x8124, 0x8200); - u32 pf_vf_anti_spoof[8]; - u32 pf_dma_tx_switch_control; - CLIB_PAD_FROM_TO (0x8224, 0x82e0); - u32 tx_strict_low_latency_queues[4]; - CLIB_PAD_FROM_TO (0x82f0, 0x8600); - u32 tx_queue_stats_mapping_82599[32]; - u32 tx_queue_packet_counts[32]; - u32 tx_queue_byte_counts[32][2]; - - struct - { - u32 control; - u32 status; - u32 buffer_almost_full; - CLIB_PAD_FROM_TO (0x880c, 0x8810); - u32 buffer_min_ifg; - CLIB_PAD_FROM_TO (0x8814, 0x8900); - } tx_security; - - struct - { - u32 index; - u32 salt; - u32 key[4]; - CLIB_PAD_FROM_TO (0x8918, 0x8a00); - } tx_ipsec; - - struct - { - u32 capabilities; - u32 control; - u32 tx_sci[2]; - u32 sa; - u32 sa_pn[2]; - u32 key[2][4]; - /* untagged packets, encrypted packets, protected packets, - encrypted bytes, protected bytes */ - u32 stats[5]; - CLIB_PAD_FROM_TO (0x8a50, 0x8c00); - } tx_link_security; - - struct - { - u32 control; - u32 timestamp_value[2]; - u32 system_time[2]; - u32 increment_attributes; - u32 time_adjustment_offset[2]; - u32 aux_control; - u32 target_time[2][2]; - CLIB_PAD_FROM_TO (0x8c34, 0x8c3c); - u32 aux_time_stamp[2][2]; - CLIB_PAD_FROM_TO (0x8c4c, 0x8d00); - } tx_timesync; - - struct - { - u32 control; - u32 status; - CLIB_PAD_FROM_TO (0x8d08, 0x8e00); - } rx_security; - - struct - { - u32 index; - u32 ip_address[4]; - u32 spi; - u32 ip_index; - u32 key[4]; - u32 salt; - u32 mode; - CLIB_PAD_FROM_TO (0x8e34, 0x8f00); - } rx_ipsec; - - struct - { - u32 capabilities; - u32 control; - u32 sci[2]; - u32 sa[2]; - u32 sa_pn[2]; - u32 key[2][4]; - /* see datasheet */ - u32 stats[17]; - CLIB_PAD_FROM_TO (0x8f84, 0x9000); - } rx_link_security; - - /* 4 wake up, 2 management, 2 wake up. */ - u32 flexible_filters[8][16][4]; - CLIB_PAD_FROM_TO (0x9800, 0xa000); - - /* 4096 bits. */ - u32 vlan_filter[128]; - - /* [0] ethernet address [31:0] - [1] [15:0] ethernet address [47:32] - [31] valid bit. - Index 0 is read from eeprom after reset. */ - u32 rx_ethernet_address1[128][2]; - - /* select one of 64 pools for each rx address. */ - u32 rx_ethernet_address_pool_select[128][2]; - CLIB_PAD_FROM_TO (0xaa00, 0xc800); - u32 tx_priority_to_traffic_class; - CLIB_PAD_FROM_TO (0xc804, 0xcc00); - - /* In bytes units of 1k. Total packet buffer is 160k. */ - u32 tx_packet_buffer_size[8]; - - CLIB_PAD_FROM_TO (0xcc20, 0xcd10); - u32 tx_manageability_tc_mapping; - CLIB_PAD_FROM_TO (0xcd14, 0xcd20); - u32 dcb_tx_packet_plane_t2_config[8]; - u32 dcb_tx_packet_plane_t2_status[8]; - CLIB_PAD_FROM_TO (0xcd60, 0xce00); - - u32 tx_flow_control_status; - CLIB_PAD_FROM_TO (0xce04, 0xd000); - - ixge_dma_regs_t rx_dma1[64]; - - struct - { - /* Bigendian ip4 src/dst address. */ - u32 src_address[128]; - u32 dst_address[128]; - - /* TCP/UDP ports [15:0] src [31:16] dst; bigendian. */ - u32 tcp_udp_port[128]; - - /* [1:0] protocol tcp, udp, sctp, other - [4:2] match priority (highest wins) - [13:8] pool - [25] src address match disable - [26] dst address match disable - [27] src port match disable - [28] dst port match disable - [29] protocol match disable - [30] pool match disable - [31] enable. */ - u32 control[128]; - - /* [12] size bypass - [19:13] must be 0x80 - [20] low-latency interrupt - [27:21] rx queue. */ - u32 interrupt[128]; - } ip4_filters; - - CLIB_PAD_FROM_TO (0xea00, 0xeb00); - /* 4 bit rss output index indexed by 7 bit hash. - 128 8 bit fields = 32 registers. */ - u32 redirection_table_82599[32]; - - u32 rss_random_key_82599[10]; - CLIB_PAD_FROM_TO (0xeba8, 0xec00); - /* [15:0] reserved - [22:16] rx queue index - [29] low-latency interrupt on match - [31] enable */ - u32 ethernet_type_queue_select[8]; - CLIB_PAD_FROM_TO (0xec20, 0xec30); - u32 syn_packet_queue_filter; - CLIB_PAD_FROM_TO (0xec34, 0xec60); - u32 immediate_interrupt_rx_vlan_priority; - CLIB_PAD_FROM_TO (0xec64, 0xec70); - u32 rss_queues_per_traffic_class; - CLIB_PAD_FROM_TO (0xec74, 0xec90); - u32 lli_size_threshold; - CLIB_PAD_FROM_TO (0xec94, 0xed00); - - struct - { - u32 control; - CLIB_PAD_FROM_TO (0xed04, 0xed10); - u32 table[8]; - CLIB_PAD_FROM_TO (0xed30, 0xee00); - } fcoe_redirection; - - struct - { - /* [1:0] packet buffer allocation 0 => disabled, else 64k*2^(f-1) - [3] packet buffer initialization done - [4] perfetch match mode - [5] report status in rss field of rx descriptors - [7] report status always - [14:8] drop queue - [20:16] flex 2 byte packet offset (units of 2 bytes) - [27:24] max linked list length - [31:28] full threshold. */ - u32 control; - CLIB_PAD_FROM_TO (0xee04, 0xee0c); - - u32 data[8]; - - /* [1:0] 0 => no action, 1 => add, 2 => remove, 3 => query. - [2] valid filter found by query command - [3] filter update override - [4] ip6 adress table - [6:5] l4 protocol reserved, udp, tcp, sctp - [7] is ip6 - [8] clear head/tail - [9] packet drop action - [10] matched packet generates low-latency interrupt - [11] last in linked list - [12] collision - [15] rx queue enable - [22:16] rx queue - [29:24] pool. */ - u32 command; - - CLIB_PAD_FROM_TO (0xee30, 0xee3c); - /* ip4 dst/src address, tcp ports, udp ports. - set bits mean bit is ignored. */ - u32 ip4_masks[4]; - u32 filter_length; - u32 usage_stats; - u32 failed_usage_stats; - u32 filters_match_stats; - u32 filters_miss_stats; - CLIB_PAD_FROM_TO (0xee60, 0xee68); - /* Lookup, signature. */ - u32 hash_keys[2]; - /* [15:0] ip6 src address 1 bit per byte - [31:16] ip6 dst address. */ - u32 ip6_mask; - /* [0] vlan id - [1] vlan priority - [2] pool - [3] ip protocol - [4] flex - [5] dst ip6. */ - u32 other_mask; - CLIB_PAD_FROM_TO (0xee78, 0xf000); - } flow_director; - - struct - { - u32 l2_control[64]; - u32 vlan_pool_filter[64]; - u32 vlan_pool_filter_bitmap[128]; - u32 dst_ethernet_address[128]; - u32 mirror_rule[4]; - u32 mirror_rule_vlan[8]; - u32 mirror_rule_pool[8]; - CLIB_PAD_FROM_TO (0xf650, 0x10010); - } pf_bar; - - u32 eeprom_flash_control; - /* [0] start - [1] done - [15:2] address - [31:16] read data. */ - u32 eeprom_read; - CLIB_PAD_FROM_TO (0x10018, 0x1001c); - u32 flash_access; - CLIB_PAD_FROM_TO (0x10020, 0x10114); - u32 flash_data; - u32 flash_control; - u32 flash_read_data; - CLIB_PAD_FROM_TO (0x10120, 0x1013c); - u32 flash_opcode; - u32 software_semaphore; - CLIB_PAD_FROM_TO (0x10144, 0x10148); - u32 firmware_semaphore; - CLIB_PAD_FROM_TO (0x1014c, 0x10160); - u32 software_firmware_sync; - CLIB_PAD_FROM_TO (0x10164, 0x10200); - u32 general_rx_control; - CLIB_PAD_FROM_TO (0x10204, 0x11000); - - struct - { - u32 control; - CLIB_PAD_FROM_TO (0x11004, 0x11010); - /* [3:0] enable counters - [7:4] leaky bucket counter mode - [29] reset - [30] stop - [31] start. */ - u32 counter_control; - /* [7:0],[15:8],[23:16],[31:24] event for counters 0-3. - event codes: - 0x0 bad tlp - 0x10 reqs that reached timeout - etc. */ - u32 counter_event; - CLIB_PAD_FROM_TO (0x11018, 0x11020); - u32 counters_clear_on_read[4]; - u32 counter_config[4]; - struct - { - u32 address; - u32 data; - } indirect_access; - CLIB_PAD_FROM_TO (0x11048, 0x11050); - u32 extended_control; - CLIB_PAD_FROM_TO (0x11054, 0x11064); - u32 mirrored_revision_id; - CLIB_PAD_FROM_TO (0x11068, 0x11070); - u32 dca_requester_id_information; - - /* [0] global disable - [4:1] mode: 0 => legacy, 1 => dca 1.0. */ - u32 dca_control; - CLIB_PAD_FROM_TO (0x11078, 0x110b0); - /* [0] pci completion abort - [1] unsupported i/o address - [2] wrong byte enable - [3] pci timeout */ - u32 pcie_interrupt_status; - CLIB_PAD_FROM_TO (0x110b4, 0x110b8); - u32 pcie_interrupt_enable; - CLIB_PAD_FROM_TO (0x110bc, 0x110c0); - u32 msi_x_pba_clear[8]; - CLIB_PAD_FROM_TO (0x110e0, 0x12300); - } pcie; - - u32 interrupt_throttle1[128 - 24]; - CLIB_PAD_FROM_TO (0x124a0, 0x14f00); - - u32 core_analog_config; - CLIB_PAD_FROM_TO (0x14f04, 0x14f10); - u32 core_common_config; - CLIB_PAD_FROM_TO (0x14f14, 0x15f14); - - u32 link_sec_software_firmware_interface; -} ixge_regs_t; - -typedef union -{ - struct - { - /* Addresses bigendian. */ - union - { - struct - { - ip6_address_t src_address; - u32 unused[1]; - } ip6; - struct - { - u32 unused[3]; - ip4_address_t src_address, dst_address; - } ip4; - }; - - /* [15:0] src port (little endian). - [31:16] dst port. */ - u32 tcp_udp_ports; - - /* [15:0] vlan (cfi bit set to 0). - [31:16] flex bytes. bigendian. */ - u32 vlan_and_flex_word; - - /* [14:0] hash - [15] bucket valid - [31:16] signature (signature filers)/sw-index (perfect match). */ - u32 hash; - }; - - u32 as_u32[8]; -} ixge_flow_director_key_t; - -always_inline void -ixge_throttle_queue_interrupt (ixge_regs_t * r, - u32 queue_interrupt_index, - f64 inter_interrupt_interval_in_secs) -{ - volatile u32 *tr = - (queue_interrupt_index < ARRAY_LEN (r->interrupt.throttle0) - ? &r->interrupt.throttle0[queue_interrupt_index] - : &r->interrupt_throttle1[queue_interrupt_index]); - ASSERT (queue_interrupt_index < 128); - u32 v; - i32 i, mask = (1 << 9) - 1; - - i = flt_round_nearest (inter_interrupt_interval_in_secs / 2e-6); - i = i < 1 ? 1 : i; - i = i >= mask ? mask : i; - - v = tr[0]; - v &= ~(mask << 3); - v |= i << 3; - tr[0] = v; -} - -#define foreach_ixge_counter \ - _ (0x40d0, rx_total_packets) \ - _64 (0x40c0, rx_total_bytes) \ - _ (0x41b0, rx_good_packets_before_filtering) \ - _64 (0x41b4, rx_good_bytes_before_filtering) \ - _ (0x2f50, rx_dma_good_packets) \ - _64 (0x2f54, rx_dma_good_bytes) \ - _ (0x2f5c, rx_dma_duplicated_good_packets) \ - _64 (0x2f60, rx_dma_duplicated_good_bytes) \ - _ (0x2f68, rx_dma_good_loopback_packets) \ - _64 (0x2f6c, rx_dma_good_loopback_bytes) \ - _ (0x2f74, rx_dma_good_duplicated_loopback_packets) \ - _64 (0x2f78, rx_dma_good_duplicated_loopback_bytes) \ - _ (0x4074, rx_good_packets) \ - _64 (0x4088, rx_good_bytes) \ - _ (0x407c, rx_multicast_packets) \ - _ (0x4078, rx_broadcast_packets) \ - _ (0x405c, rx_64_byte_packets) \ - _ (0x4060, rx_65_127_byte_packets) \ - _ (0x4064, rx_128_255_byte_packets) \ - _ (0x4068, rx_256_511_byte_packets) \ - _ (0x406c, rx_512_1023_byte_packets) \ - _ (0x4070, rx_gt_1023_byte_packets) \ - _ (0x4000, rx_crc_errors) \ - _ (0x4120, rx_ip_checksum_errors) \ - _ (0x4004, rx_illegal_symbol_errors) \ - _ (0x4008, rx_error_symbol_errors) \ - _ (0x4034, rx_mac_local_faults) \ - _ (0x4038, rx_mac_remote_faults) \ - _ (0x4040, rx_length_errors) \ - _ (0x41a4, rx_xons) \ - _ (0x41a8, rx_xoffs) \ - _ (0x40a4, rx_undersize_packets) \ - _ (0x40a8, rx_fragments) \ - _ (0x40ac, rx_oversize_packets) \ - _ (0x40b0, rx_jabbers) \ - _ (0x40b4, rx_management_packets) \ - _ (0x40b8, rx_management_drops) \ - _ (0x3fa0, rx_missed_packets_pool_0) \ - _ (0x40d4, tx_total_packets) \ - _ (0x4080, tx_good_packets) \ - _64 (0x4090, tx_good_bytes) \ - _ (0x40f0, tx_multicast_packets) \ - _ (0x40f4, tx_broadcast_packets) \ - _ (0x87a0, tx_dma_good_packets) \ - _64 (0x87a4, tx_dma_good_bytes) \ - _ (0x40d8, tx_64_byte_packets) \ - _ (0x40dc, tx_65_127_byte_packets) \ - _ (0x40e0, tx_128_255_byte_packets) \ - _ (0x40e4, tx_256_511_byte_packets) \ - _ (0x40e8, tx_512_1023_byte_packets) \ - _ (0x40ec, tx_gt_1023_byte_packets) \ - _ (0x4010, tx_undersize_drops) \ - _ (0x8780, switch_security_violation_packets) \ - _ (0x5118, fc_crc_errors) \ - _ (0x241c, fc_rx_drops) \ - _ (0x2424, fc_last_error_count) \ - _ (0x2428, fcoe_rx_packets) \ - _ (0x242c, fcoe_rx_dwords) \ - _ (0x8784, fcoe_tx_packets) \ - _ (0x8788, fcoe_tx_dwords) \ - _ (0x1030, queue_0_rx_count) \ - _ (0x1430, queue_0_drop_count) \ - _ (0x1070, queue_1_rx_count) \ - _ (0x1470, queue_1_drop_count) \ - _ (0x10b0, queue_2_rx_count) \ - _ (0x14b0, queue_2_drop_count) \ - _ (0x10f0, queue_3_rx_count) \ - _ (0x14f0, queue_3_drop_count) \ - _ (0x1130, queue_4_rx_count) \ - _ (0x1530, queue_4_drop_count) \ - _ (0x1170, queue_5_rx_count) \ - _ (0x1570, queue_5_drop_count) \ - _ (0x11b0, queue_6_rx_count) \ - _ (0x15b0, queue_6_drop_count) \ - _ (0x11f0, queue_7_rx_count) \ - _ (0x15f0, queue_7_drop_count) \ - _ (0x1230, queue_8_rx_count) \ - _ (0x1630, queue_8_drop_count) \ - _ (0x1270, queue_9_rx_count) \ - _ (0x1270, queue_9_drop_count) - - - - -typedef enum -{ -#define _(a,f) IXGE_COUNTER_##f, -#define _64(a,f) _(a,f) - foreach_ixge_counter -#undef _ -#undef _64 - IXGE_N_COUNTER, -} ixge_counter_type_t; - -typedef struct -{ - u32 mdio_address; - - /* 32 bit ID read from ID registers. */ - u32 id; -} ixge_phy_t; - -typedef struct -{ - /* Cache aligned descriptors. */ - ixge_descriptor_t *descriptors; - - /* Number of descriptors in table. */ - u32 n_descriptors; - - /* Software head and tail pointers into descriptor ring. */ - u32 head_index, tail_index; - - /* Index into dma_queues vector. */ - u32 queue_index; - - /* Buffer indices corresponding to each active descriptor. */ - u32 *descriptor_buffer_indices; - - union - { - struct - { - u32 *volatile head_index_write_back; - - u32 n_buffers_on_ring; - } tx; - - struct - { - /* Buffer indices to use to replenish each descriptor. */ - u32 *replenish_buffer_indices; - - vlib_node_runtime_t *node; - u32 next_index; - - u32 saved_start_of_packet_buffer_index; - - u32 saved_start_of_packet_next_index; - u32 saved_last_buffer_index; - - u32 is_start_of_packet; - - u32 n_descriptors_done_total; - - u32 n_descriptors_done_this_call; - - u32 n_bytes; - } rx; - }; -} ixge_dma_queue_t; - -#define foreach_ixge_pci_device_id \ - _ (82598, 0x10b6) \ - _ (82598_bx, 0x1508) \ - _ (82598af_dual_port, 0x10c6) \ - _ (82598af_single_port, 0x10c7) \ - _ (82598at, 0x10c8) \ - _ (82598at2, 0x150b) \ - _ (82598eb_sfp_lom, 0x10db) \ - _ (82598eb_cx4, 0x10dd) \ - _ (82598_cx4_dual_port, 0x10ec) \ - _ (82598_da_dual_port, 0x10f1) \ - _ (82598_sr_dual_port_em, 0x10e1) \ - _ (82598eb_xf_lr, 0x10f4) \ - _ (82599_kx4, 0x10f7) \ - _ (82599_kx4_mezz, 0x1514) \ - _ (82599_kr, 0x1517) \ - _ (82599_combo_backplane, 0x10f8) \ - _ (82599_cx4, 0x10f9) \ - _ (82599_sfp, 0x10fb) \ - _ (82599_backplane_fcoe, 0x152a) \ - _ (82599_sfp_fcoe, 0x1529) \ - _ (82599_sfp_em, 0x1507) \ - _ (82599_xaui_lom, 0x10fc) \ - _ (82599_t3_lom, 0x151c) \ - _ (x540t, 0x1528) - -typedef enum -{ -#define _(f,n) IXGE_##f = n, - foreach_ixge_pci_device_id -#undef _ -} ixge_pci_device_id_t; - -typedef struct -{ - /* registers */ - ixge_regs_t *regs; - - /* Specific next index when using dynamic redirection */ - u32 per_interface_next_index; - - /* PCI bus info. */ - vlib_pci_device_t pci_device; - - /* From PCI config space header. */ - ixge_pci_device_id_t device_id; - - u16 device_index; - - /* 0 or 1. */ - u16 pci_function; - - /* VLIB interface for this instance. */ - u32 vlib_hw_if_index, vlib_sw_if_index; - - ixge_dma_queue_t *dma_queues[VLIB_N_RX_TX]; - - /* Phy index (0 or 1) and address on MDI bus. */ - u32 phy_index; - ixge_phy_t phys[2]; - - /* Value of link_status register at last link change. */ - u32 link_status_at_last_link_change; - - i2c_bus_t i2c_bus; - sfp_eeprom_t sfp_eeprom; - - /* Counters. */ - u64 counters[IXGE_N_COUNTER], counters_last_clear[IXGE_N_COUNTER]; -} ixge_device_t; - -typedef struct -{ - vlib_main_t *vlib_main; - - /* Vector of devices. */ - ixge_device_t *devices; - - /* Descriptor ring sizes. */ - u32 n_descriptors[VLIB_N_RX_TX]; - - /* RX buffer size. Must be at least 1k; will be rounded to - next largest 1k size. */ - u32 n_bytes_in_rx_buffer; - - u32 n_descriptors_per_cache_line; - - u32 vlib_buffer_free_list_index; - - u32 process_node_index; - - /* Template and mask for initializing/validating TX descriptors. */ - ixge_tx_descriptor_t tx_descriptor_template, tx_descriptor_template_mask; - - /* Vector of buffers for which TX is done and can be freed. */ - u32 *tx_buffers_pending_free; - - u32 *rx_buffers_to_add; - - f64 time_last_stats_update; -} ixge_main_t; - -ixge_main_t ixge_main; -vnet_device_class_t ixge_device_class; - -typedef enum -{ - IXGE_RX_NEXT_IP4_INPUT, - IXGE_RX_NEXT_IP6_INPUT, - IXGE_RX_NEXT_ETHERNET_INPUT, - IXGE_RX_NEXT_DROP, - IXGE_RX_N_NEXT, -} ixge_rx_next_t; - -void ixge_set_next_node (ixge_rx_next_t, char *); - -#endif /* included_ixge_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/nic/sfp.c b/src/vnet/devices/nic/sfp.c deleted file mode 100644 index 9e9c008d..00000000 --- a/src/vnet/devices/nic/sfp.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -static u8 * -format_space_terminated (u8 * s, va_list * args) -{ - u32 l = va_arg (*args, u32); - u8 *v = va_arg (*args, u8 *); - u8 *p; - - for (p = v + l - 1; p >= v && p[0] == ' '; p--) - ; - vec_add (s, v, clib_min (p - v + 1, l)); - return s; -} - -static u8 * -format_sfp_id (u8 * s, va_list * args) -{ - u32 id = va_arg (*args, u32); - char *t = 0; - switch (id) - { -#define _(f) case SFP_ID_##f: t = #f; break; - foreach_sfp_id -#undef _ - default: - return format (s, "unknown 0x%x", id); - } - return format (s, "%s", t); -} - -static u8 * -format_sfp_compatibility (u8 * s, va_list * args) -{ - u32 c = va_arg (*args, u32); - char *t = 0; - switch (c) - { -#define _(a,b,f) case SFP_COMPATIBILITY_##f: t = #f; break; - foreach_sfp_compatibility -#undef _ - default: - return format (s, "unknown 0x%x", c); - } - return format (s, "%s", t); -} - -u32 -sfp_is_comatible (sfp_eeprom_t * e, sfp_compatibility_t c) -{ - static struct - { - u8 byte, bit; - } t[] = - { -#define _(a,b,f) { .byte = a, .bit = b, }, - foreach_sfp_compatibility -#undef _ - }; - - ASSERT (c < ARRAY_LEN (t)); - return (e->compatibility[t[c].byte] & (1 << t[c].bit)) != 0; -} - -u8 * -format_sfp_eeprom (u8 * s, va_list * args) -{ - sfp_eeprom_t *e = va_arg (*args, sfp_eeprom_t *); - uword indent = format_get_indent (s); - int i; - - if (e->id != SFP_ID_sfp) - s = format (s, "id %U, ", format_sfp_id, e->id); - - s = format (s, "compatibility:"); - for (i = 0; i < SFP_N_COMPATIBILITY; i++) - if (sfp_is_comatible (e, i)) - s = format (s, " %U", format_sfp_compatibility, i); - - s = format (s, "\n%Uvendor: %U, part %U", - format_white_space, indent, - format_space_terminated, sizeof (e->vendor_name), - e->vendor_name, format_space_terminated, - sizeof (e->vendor_part_number), e->vendor_part_number); - s = - format (s, "\n%Urevision: %U, serial: %U, date code: %U", - format_white_space, indent, format_space_terminated, - sizeof (e->vendor_revision), e->vendor_revision, - format_space_terminated, sizeof (e->vendor_serial_number), - e->vendor_serial_number, format_space_terminated, - sizeof (e->vendor_date_code), e->vendor_date_code); - - return s; -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/nic/sfp.h b/src/vnet/devices/nic/sfp.h deleted file mode 100644 index a1ac7997..00000000 --- a/src/vnet/devices/nic/sfp.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_vnet_optics_sfp_h -#define included_vnet_optics_sfp_h - -#include - -#define foreach_sfp_id \ - _ (unknown) \ - _ (gbic) \ - _ (on_motherboard) \ - _ (sfp) - -typedef enum -{ -#define _(f) SFP_ID_##f, - foreach_sfp_id -#undef _ -} sfp_id_t; - -typedef struct -{ - u8 id; - u8 extended_id; - u8 connector_type; - u8 compatibility[8]; - u8 encoding; - u8 nominal_bit_rate_100mbits_per_sec; - u8 reserved13; - u8 link_length[5]; - u8 reserved19; - u8 vendor_name[16]; - u8 reserved36; - u8 vendor_oui[3]; - u8 vendor_part_number[16]; - u8 vendor_revision[4]; - /* 16 bit value network byte order. */ - u8 laser_wavelength_in_nm[2]; - u8 reserved62; - u8 checksum_0_to_62; - - u8 options[2]; - u8 max_bit_rate_margin_percent; - u8 min_bit_rate_margin_percent; - u8 vendor_serial_number[16]; - u8 vendor_date_code[8]; - u8 reserved92[3]; - u8 checksum_63_to_94; - u8 vendor_specific[32]; - u8 reserved128[384]; - - /* Vendor specific data follows. */ - u8 vendor_specific1[0]; -} sfp_eeprom_t; - -always_inline uword -sfp_eeprom_is_valid (sfp_eeprom_t * e) -{ - int i; - u8 sum = 0; - for (i = 0; i < 63; i++) - sum += ((u8 *) e)[i]; - return sum == e->checksum_0_to_62; -} - -/* _ (byte_index, bit_index, name) */ -#define foreach_sfp_compatibility \ - _ (0, 4, 10g_base_sr) \ - _ (0, 5, 10g_base_lr) \ - _ (1, 2, oc48_long_reach) \ - _ (1, 1, oc48_intermediate_reach) \ - _ (1, 0, oc48_short_reach) \ - _ (2, 6, oc12_long_reach) \ - _ (2, 5, oc12_intermediate_reach) \ - _ (2, 4, oc12_short_reach) \ - _ (2, 2, oc3_long_reach) \ - _ (2, 1, oc3_intermediate_reach) \ - _ (2, 0, oc3_short_reach) \ - _ (3, 3, 1g_base_t) \ - _ (3, 2, 1g_base_cx) \ - _ (3, 1, 1g_base_lx) \ - _ (3, 0, 1g_base_sx) - -typedef enum -{ -#define _(a,b,f) SFP_COMPATIBILITY_##f, - foreach_sfp_compatibility -#undef _ - SFP_N_COMPATIBILITY, -} sfp_compatibility_t; - -u32 sfp_is_comatible (sfp_eeprom_t * e, sfp_compatibility_t c); - -format_function_t format_sfp_eeprom; - -#endif /* included_vnet_optics_sfp_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ethernet/sfp.c b/src/vnet/ethernet/sfp.c new file mode 100644 index 00000000..624740e3 --- /dev/null +++ b/src/vnet/ethernet/sfp.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +static u8 * +format_space_terminated (u8 * s, va_list * args) +{ + u32 l = va_arg (*args, u32); + u8 *v = va_arg (*args, u8 *); + u8 *p; + + for (p = v + l - 1; p >= v && p[0] == ' '; p--) + ; + vec_add (s, v, clib_min (p - v + 1, l)); + return s; +} + +static u8 * +format_sfp_id (u8 * s, va_list * args) +{ + u32 id = va_arg (*args, u32); + char *t = 0; + switch (id) + { +#define _(f) case SFP_ID_##f: t = #f; break; + foreach_sfp_id +#undef _ + default: + return format (s, "unknown 0x%x", id); + } + return format (s, "%s", t); +} + +static u8 * +format_sfp_compatibility (u8 * s, va_list * args) +{ + u32 c = va_arg (*args, u32); + char *t = 0; + switch (c) + { +#define _(a,b,f) case SFP_COMPATIBILITY_##f: t = #f; break; + foreach_sfp_compatibility +#undef _ + default: + return format (s, "unknown 0x%x", c); + } + return format (s, "%s", t); +} + +u32 +sfp_is_comatible (sfp_eeprom_t * e, sfp_compatibility_t c) +{ + static struct + { + u8 byte, bit; + } t[] = + { +#define _(a,b,f) { .byte = a, .bit = b, }, + foreach_sfp_compatibility +#undef _ + }; + + ASSERT (c < ARRAY_LEN (t)); + return (e->compatibility[t[c].byte] & (1 << t[c].bit)) != 0; +} + +u8 * +format_sfp_eeprom (u8 * s, va_list * args) +{ + sfp_eeprom_t *e = va_arg (*args, sfp_eeprom_t *); + uword indent = format_get_indent (s); + int i; + + if (e->id != SFP_ID_sfp) + s = format (s, "id %U, ", format_sfp_id, e->id); + + s = format (s, "compatibility:"); + for (i = 0; i < SFP_N_COMPATIBILITY; i++) + if (sfp_is_comatible (e, i)) + s = format (s, " %U", format_sfp_compatibility, i); + + s = format (s, "\n%Uvendor: %U, part %U", + format_white_space, indent, + format_space_terminated, sizeof (e->vendor_name), + e->vendor_name, format_space_terminated, + sizeof (e->vendor_part_number), e->vendor_part_number); + s = + format (s, "\n%Urevision: %U, serial: %U, date code: %U", + format_white_space, indent, format_space_terminated, + sizeof (e->vendor_revision), e->vendor_revision, + format_space_terminated, sizeof (e->vendor_serial_number), + e->vendor_serial_number, format_space_terminated, + sizeof (e->vendor_date_code), e->vendor_date_code); + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/sfp.h b/src/vnet/ethernet/sfp.h new file mode 100644 index 00000000..a1ac7997 --- /dev/null +++ b/src/vnet/ethernet/sfp.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_optics_sfp_h +#define included_vnet_optics_sfp_h + +#include + +#define foreach_sfp_id \ + _ (unknown) \ + _ (gbic) \ + _ (on_motherboard) \ + _ (sfp) + +typedef enum +{ +#define _(f) SFP_ID_##f, + foreach_sfp_id +#undef _ +} sfp_id_t; + +typedef struct +{ + u8 id; + u8 extended_id; + u8 connector_type; + u8 compatibility[8]; + u8 encoding; + u8 nominal_bit_rate_100mbits_per_sec; + u8 reserved13; + u8 link_length[5]; + u8 reserved19; + u8 vendor_name[16]; + u8 reserved36; + u8 vendor_oui[3]; + u8 vendor_part_number[16]; + u8 vendor_revision[4]; + /* 16 bit value network byte order. */ + u8 laser_wavelength_in_nm[2]; + u8 reserved62; + u8 checksum_0_to_62; + + u8 options[2]; + u8 max_bit_rate_margin_percent; + u8 min_bit_rate_margin_percent; + u8 vendor_serial_number[16]; + u8 vendor_date_code[8]; + u8 reserved92[3]; + u8 checksum_63_to_94; + u8 vendor_specific[32]; + u8 reserved128[384]; + + /* Vendor specific data follows. */ + u8 vendor_specific1[0]; +} sfp_eeprom_t; + +always_inline uword +sfp_eeprom_is_valid (sfp_eeprom_t * e) +{ + int i; + u8 sum = 0; + for (i = 0; i < 63; i++) + sum += ((u8 *) e)[i]; + return sum == e->checksum_0_to_62; +} + +/* _ (byte_index, bit_index, name) */ +#define foreach_sfp_compatibility \ + _ (0, 4, 10g_base_sr) \ + _ (0, 5, 10g_base_lr) \ + _ (1, 2, oc48_long_reach) \ + _ (1, 1, oc48_intermediate_reach) \ + _ (1, 0, oc48_short_reach) \ + _ (2, 6, oc12_long_reach) \ + _ (2, 5, oc12_intermediate_reach) \ + _ (2, 4, oc12_short_reach) \ + _ (2, 2, oc3_long_reach) \ + _ (2, 1, oc3_intermediate_reach) \ + _ (2, 0, oc3_short_reach) \ + _ (3, 3, 1g_base_t) \ + _ (3, 2, 1g_base_cx) \ + _ (3, 1, 1g_base_lx) \ + _ (3, 0, 1g_base_sx) + +typedef enum +{ +#define _(a,b,f) SFP_COMPATIBILITY_##f, + foreach_sfp_compatibility +#undef _ + SFP_N_COMPATIBILITY, +} sfp_compatibility_t; + +u32 sfp_is_comatible (sfp_eeprom_t * e, sfp_compatibility_t c); + +format_function_t format_sfp_eeprom; + +#endif /* included_vnet_optics_sfp_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp-api/lua/bench.lua b/src/vpp-api/lua/bench.lua index 8e5a0b4b..c7231b90 100644 --- a/src/vpp-api/lua/bench.lua +++ b/src/vpp-api/lua/bench.lua @@ -53,9 +53,9 @@ function do_bench() end root_dir = "/home/ubuntu/vpp" -pneum_path = root_dir .. "/build-root/install-vpp_lite_debug-native/vpp-api/lib64/libpneum.so" +pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so" vpp:init({ pneum_path = pneum_path }) -vpp:json_api(root_dir .. "/build-root/install-vpp_lite_debug-native/vpp/vpp-api/vpe.api.json") +vpp:json_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api.json") vpp:connect("lua-bench") local n_tests = 10 diff --git a/src/vpp-api/lua/examples/cli/lua-cli.lua b/src/vpp-api/lua/examples/cli/lua-cli.lua index b3a24d7d..4a27af53 100644 --- a/src/vpp-api/lua/examples/cli/lua-cli.lua +++ b/src/vpp-api/lua/examples/cli/lua-cli.lua @@ -557,12 +557,12 @@ end function init_vpp(vpp) local root_dir = "/home/ubuntu/vpp" - local pneum_path = root_dir .. "/build-root/install-vpp_lite_debug-native/vpp-api/lib64/libpneum.so" + local pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so" vpp:init({ pneum_path = pneum_path }) vpp:init({ pneum_path = pneum_path }) - vpp:json_api(root_dir .. "/build-root/install-vpp_lite_debug-native/vpp/vpp-api/vpe.api.json") + vpp:json_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api.json") diff --git a/src/vpp-api/lua/examples/example-classifier.lua b/src/vpp-api/lua/examples/example-classifier.lua index ec9c3d3e..b1270757 100644 --- a/src/vpp-api/lua/examples/example-classifier.lua +++ b/src/vpp-api/lua/examples/example-classifier.lua @@ -20,12 +20,12 @@ local vpp = require "vpp-lapi" local bit = require("bit") root_dir = "/home/ubuntu/vpp" -pneum_path = root_dir .. "/build-root/install-vpp_lite_debug-native/vpp-api/lib64/libpneum.so" +pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so" vpp:init({ pneum_path = pneum_path }) -vpp:json_api(root_dir .. "/build-root/install-vpp_lite_debug-native/vpp/vpp-api/vpe.api.json") +vpp:json_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api.json") vpp:connect("aytest") diff --git a/src/vpp-api/lua/examples/example-cli.lua b/src/vpp-api/lua/examples/example-cli.lua index 8b84989f..85425caf 100644 --- a/src/vpp-api/lua/examples/example-cli.lua +++ b/src/vpp-api/lua/examples/example-cli.lua @@ -18,11 +18,11 @@ vpp = require "vpp-lapi" root_dir = "/home/ubuntu/vpp" -pneum_path = root_dir .. "/build-root/install-vpp_lite_debug-native/vpp-api/lib64/libpneum.so" +pneum_path = root_dir .. "/build-root/install-vpp_debug-native/vpp-api/lib64/libpneum.so" vpp:init({ pneum_path = pneum_path }) -vpp:json_api(root_dir .. "/build-root/install-vpp_lite_debug-native/vpp/vpp-api/vpe.api.json") +vpp:json_api(root_dir .. "/build-root/install-vpp_debug-native/vpp/vpp-api/vpe.api.json") vpp:connect("aytest") diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c index a566d956..d6a12325 100644 --- a/src/vpp/vnet/main.c +++ b/src/vpp/vnet/main.c @@ -199,9 +199,6 @@ defaulted: { vm->init_functions_called = hash_create (0, /* value bytes */ 0); vpe_main_init (vm); -#if DPDK == 0 - unix_physmem_init (vm, 0 /* fail_if_physical_memory_not_present */ ); -#endif return vlib_unix_main (argc, argv); } else diff --git a/test/framework.py b/test/framework.py index 6b1799a5..a0284e37 100644 --- a/test/framework.py +++ b/test/framework.py @@ -157,7 +157,9 @@ class VppTestCase(unittest.TestCase): cls.vpp_cmdline = [cls.vpp_bin, "unix", "{", "nodaemon", debug_cli, coredump_size, "}", "api-trace", "{", "on", "}", - "api-segment", "{", "prefix", cls.shm_prefix, "}"] + "api-segment", "{", "prefix", cls.shm_prefix, "}", + "plugins", "{", "plugin", "dpdk_plugin.so", "{", + "disable", "}", "}"] if cls.plugin_path is not None: cls.vpp_cmdline.extend(["plugin_path", cls.plugin_path]) cls.logger.info("vpp_cmdline: %s" % cls.vpp_cmdline) -- cgit 1.2.3-korg From f6dae05b8b5f4e17d1cf7e108bf8a6af3a2f9785 Mon Sep 17 00:00:00 2001 From: Steven Date: Thu, 9 Mar 2017 23:49:32 -0800 Subject: vhost: wrong value return for VHOST_USER_VRING_GET_BASE When the VM is migrated, the driver sends VHOST_USER_VRING_GET_BASE message to the device to get the vring offset. The device is supposed to shut down the vring, and return the current vring offset. What the code did was to shutdown the vring, initialize the vring, and return 0 to the driver. The fix is to first store last_avail_idx in the message and then close the vring. Change-Id: I432e9f50f36d89fe53a45e050edcf5e1218caf7a Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 100ec613..3cbeca9b 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -583,7 +583,10 @@ vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid) vring->callfd_idx = ~0; } if (vring->errfd != -1) - close (vring->errfd); + { + close (vring->errfd); + vring->errfd = -1; + } vhost_user_vring_init (vui, qid); } @@ -1026,12 +1029,16 @@ vhost_user_socket_read (unix_file_t * uf) goto close_socket; } - /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */ - vhost_user_vring_close (vui, msg.state.index); - + /* + * Copy last_avail_idx from the vring before closing it because + * closing the vring also initializes the vring last_avail_idx + */ msg.state.num = vui->vrings[msg.state.index].last_avail_idx; msg.flags |= 4; msg.size = sizeof (msg.state); + + /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */ + vhost_user_vring_close (vui, msg.state.index); break; case VHOST_USER_NONE: -- cgit 1.2.3-korg From 180279b912827c30494ec1b90ee4325a15cb337c Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Thu, 16 Mar 2017 15:49:09 -0400 Subject: Fix IP feature ordering. Drop comes before lookup when enabled. is_first_or_last is not required when setting a feature, the anchor is added in find_config_with_features(). Don't make the PG interfaces automatically L3 enabled, this way we can have tests that check the L3 protocol disbaled behaviour. Change-Id: Icef22a920b27ff9cec6ab2da6b05f05c532cb60f Signed-off-by: Neale Ranns --- src/vnet/devices/devices.c | 1 - src/vnet/feature/feature.c | 15 ------- src/vnet/feature/feature.h | 3 -- src/vnet/interface_output.c | 1 - src/vnet/ip/ip4_forward.c | 29 ++++++------- src/vnet/pg/stream.c | 4 -- test/test_ip4.py | 99 ++++++++++++++++++++++++++++++++++++++++++- test/test_ip6.py | 100 +++++++++++++++++++++++++++++++++++++++++++- test/test_ip_mcast.py | 20 +-------- test/test_mpls.py | 83 ++++++++++++++++++++++++++++++++++++ test/vpp_interface.py | 5 +++ test/vpp_ip_route.py | 17 ++++++++ 12 files changed, 316 insertions(+), 61 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index c81043c6..38f3002b 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -55,7 +55,6 @@ VNET_FEATURE_ARC_INIT (device_input, static) = { .arc_name = "device-input", .start_nodes = VNET_FEATURES ("device-input"), - .end_node = "ethernet-input", .arc_index_ptr = &feature_main.device_input_feature_arc_index, }; diff --git a/src/vnet/feature/feature.c b/src/vnet/feature/feature.c index 5a4be029..f0e9004c 100644 --- a/src/vnet/feature/feature.c +++ b/src/vnet/feature/feature.c @@ -109,9 +109,6 @@ vnet_feature_init (vlib_main_t * vm) freg = freg->next; } - cm->end_feature_index = - vnet_get_feature_index (arc_index, areg->end_node); - /* next */ areg = areg->next; arc_index++; @@ -185,7 +182,6 @@ vnet_feature_enable_disable_with_index (u8 arc_index, u32 feature_index, vnet_feature_main_t *fm = &feature_main; vnet_feature_config_main_t *cm; i16 feature_count; - int is_first_or_last; u32 ci; if (arc_index == (u8) ~ 0) @@ -214,19 +210,8 @@ vnet_feature_enable_disable_with_index (u8 arc_index, u32 feature_index, /* update feature count */ enable_disable = (enable_disable > 0); feature_count += enable_disable ? 1 : -1; - is_first_or_last = (feature_count == enable_disable); ASSERT (feature_count >= 0); - if (is_first_or_last && cm->end_feature_index != ~0) - { - /*register end node */ - ci = (enable_disable - ? vnet_config_add_feature - : vnet_config_del_feature) - (vlib_get_main (), &cm->config_main, ci, cm->end_feature_index, 0, 0); - cm->config_index_by_sw_if_index[sw_if_index] = ci; - } - fm->sw_if_index_has_features[arc_index] = clib_bitmap_set (fm->sw_if_index_has_features[arc_index], sw_if_index, (feature_count > 0)); diff --git a/src/vnet/feature/feature.h b/src/vnet/feature/feature.h index 77b1499d..7ec43ea8 100644 --- a/src/vnet/feature/feature.h +++ b/src/vnet/feature/feature.h @@ -30,8 +30,6 @@ typedef struct _vnet_feature_arc_registration /** Start nodes */ char **start_nodes; int n_start_nodes; - /** End node */ - char *end_node; /* Feature arc index, assigned by init function */ u8 feature_arc_index; u8 *arc_index_ptr; @@ -66,7 +64,6 @@ typedef struct vnet_feature_config_main_t_ { vnet_config_main_t config_main; u32 *config_index_by_sw_if_index; - u32 end_feature_index; } vnet_feature_config_main_t; typedef struct diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c index abac50b6..03f2cdca 100644 --- a/src/vnet/interface_output.c +++ b/src/vnet/interface_output.c @@ -1258,7 +1258,6 @@ VNET_FEATURE_ARC_INIT (interface_output, static) = { .arc_name = "interface-output", .start_nodes = VNET_FEATURES (0), - .end_node = "interface-tx", .arc_index_ptr = &vnet_main.interface_main.output_feature_arc_index, }; diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 34bc6c5d..0dad61d4 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -847,9 +847,8 @@ ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) !is_enable, 0, 0); - vnet_feature_enable_disable ("ip4-multicast", - "ip4-mfib-forward-lookup", - sw_if_index, is_enable, 0, 0); + vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", + sw_if_index, !is_enable, 0, 0); } static clib_error_t * @@ -954,7 +953,6 @@ VNET_FEATURE_ARC_INIT (ip4_unicast, static) = { .arc_name = "ip4-unicast", .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"), - .end_node = "ip4-lookup", .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index, }; @@ -1021,27 +1019,25 @@ VNET_FEATURE_INIT (ip4_vxlan_bypass, static) = .runs_before = VNET_FEATURES ("ip4-lookup"), }; -VNET_FEATURE_INIT (ip4_lookup, static) = +VNET_FEATURE_INIT (ip4_drop, static) = { .arc_name = "ip4-unicast", - .node_name = "ip4-lookup", - .runs_before = VNET_FEATURES ("ip4-drop"), + .node_name = "ip4-drop", + .runs_before = VNET_FEATURES ("ip4-lookup"), }; -VNET_FEATURE_INIT (ip4_drop, static) = +VNET_FEATURE_INIT (ip4_lookup, static) = { .arc_name = "ip4-unicast", - .node_name = "ip4-drop", + .node_name = "ip4-lookup", .runs_before = 0, /* not before any other features */ }; - /* Built-in ip4 multicast rx feature path definition */ VNET_FEATURE_ARC_INIT (ip4_multicast, static) = { .arc_name = "ip4-multicast", .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"), - .end_node = "ip4-lookup-multicast", .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index, }; @@ -1052,17 +1048,17 @@ VNET_FEATURE_INIT (ip4_vpath_mc, static) = .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), }; -VNET_FEATURE_INIT (ip4_lookup_mc, static) = +VNET_FEATURE_INIT (ip4_mc_drop, static) = { .arc_name = "ip4-multicast", - .node_name = "ip4-mfib-forward-lookup", - .runs_before = VNET_FEATURES ("ip4-drop"), + .node_name = "ip4-drop", + .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), }; -VNET_FEATURE_INIT (ip4_mc_drop, static) = +VNET_FEATURE_INIT (ip4_lookup_mc, static) = { .arc_name = "ip4-multicast", - .node_name = "ip4-drop", + .node_name = "ip4-mfib-forward-lookup", .runs_before = 0, /* last feature */ }; @@ -1071,7 +1067,6 @@ VNET_FEATURE_ARC_INIT (ip4_output, static) = { .arc_name = "ip4-output", .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"), - .end_node = "interface-output", .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index, }; diff --git a/src/vnet/pg/stream.c b/src/vnet/pg/stream.c index 560c4b07..05d820a3 100644 --- a/src/vnet/pg/stream.c +++ b/src/vnet/pg/stream.c @@ -223,10 +223,6 @@ pg_interface_add_or_get (pg_main_t * pg, uword if_id) CLIB_CACHE_LINE_BYTES); *pi->lockp = 0; } - - ip4_sw_interface_enable_disable (pi->hw_if_index, 1); - ip6_sw_interface_enable_disable (pi->hw_if_index, 1); - mpls_sw_interface_enable_disable (&mpls_main, pi->hw_if_index, 1); } return i; diff --git a/test/test_ip4.py b/test/test_ip4.py index 7f6e92fa..79af5492 100644 --- a/test/test_ip4.py +++ b/test/test_ip4.py @@ -5,7 +5,8 @@ import unittest from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppSubInterface, VppDot1QSubint, VppDot1ADSubint -from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip_route import VppIpRoute, VppRoutePath, VppIpMRoute, \ + VppMRoutePath, MRouteItfFlags, MRouteEntryFlags from scapy.packet import Raw from scapy.layers.l2 import Ether, Dot1Q @@ -546,5 +547,101 @@ class TestIPNull(VppTestCase): self.assertEqual(icmp.dst, "10.0.0.2") +class TestIPDisabled(VppTestCase): + """ IPv4 disabled """ + + def setUp(self): + super(TestIPDisabled, self).setUp() + + # create 2 pg interfaces + self.create_pg_interfaces(range(2)) + + # PG0 is IP enalbed + self.pg0.admin_up() + self.pg0.config_ip4() + self.pg0.resolve_arp() + + # PG 1 is not IP enabled + self.pg1.admin_up() + + def tearDown(self): + super(TestIPDisabled, self).tearDown() + for i in self.pg_interfaces: + i.unconfig_ip4() + i.admin_down() + + def send_and_assert_no_replies(self, intf, pkts, remark): + intf.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + for i in self.pg_interfaces: + i.get_capture(0) + i.assert_nothing_captured(remark=remark) + + def test_ip_disabled(self): + """ IP Disabled """ + + # + # An (S,G). + # one accepting interface, pg0, 2 forwarding interfaces + # + route_232_1_1_1 = VppIpMRoute( + self, + "0.0.0.0", + "232.1.1.1", 32, + MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE, + [VppMRoutePath(self.pg1.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT), + VppMRoutePath(self.pg0.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)]) + route_232_1_1_1.add_vpp_config() + + pu = (Ether(src=self.pg1.remote_mac, + dst=self.pg1.local_mac) / + IP(src="10.10.10.10", dst=self.pg0.remote_ip4) / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + pm = (Ether(src=self.pg1.remote_mac, + dst=self.pg1.local_mac) / + IP(src="10.10.10.10", dst="232.1.1.1") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + + # + # PG1 does not forward IP traffic + # + self.send_and_assert_no_replies(self.pg1, pu, "IP disabled") + self.send_and_assert_no_replies(self.pg1, pm, "IP disabled") + + # + # IP enable PG1 + # + self.pg1.config_ip4() + + # + # Now we get packets through + # + self.pg1.add_stream(pu) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + rx = self.pg0.get_capture(1) + + self.pg1.add_stream(pm) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + rx = self.pg0.get_capture(1) + + # + # Disable PG1 + # + self.pg1.unconfig_ip4() + + # + # PG1 does not forward IP traffic + # + self.send_and_assert_no_replies(self.pg1, pu, "IP disabled") + self.send_and_assert_no_replies(self.pg1, pm, "IP disabled") + + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/test_ip6.py b/test/test_ip6.py index e57e034d..a8e8d4de 100644 --- a/test/test_ip6.py +++ b/test/test_ip6.py @@ -6,7 +6,8 @@ from socket import AF_INET6 from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppSubInterface, VppDot1QSubint from vpp_pg_interface import is_ipv6_misc -from vpp_ip_route import VppIpRoute, VppRoutePath, find_route +from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, VppIpMRoute, \ + VppMRoutePath, MRouteItfFlags, MRouteEntryFlags from vpp_neighbor import find_nbr, VppNeighbor from scapy.packet import Raw @@ -981,5 +982,102 @@ class TestIPNull(VppTestCase): self.assertEqual(icmp.code, 1) +class TestIPDisabled(VppTestCase): + """ IPv6 disabled """ + + def setUp(self): + super(TestIPDisabled, self).setUp() + + # create 2 pg interfaces + self.create_pg_interfaces(range(2)) + + # PG0 is IP enalbed + self.pg0.admin_up() + self.pg0.config_ip6() + self.pg0.resolve_ndp() + + # PG 1 is not IP enabled + self.pg1.admin_up() + + def tearDown(self): + super(TestIPDisabled, self).tearDown() + for i in self.pg_interfaces: + i.unconfig_ip4() + i.admin_down() + + def send_and_assert_no_replies(self, intf, pkts, remark): + intf.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + for i in self.pg_interfaces: + i.get_capture(0) + i.assert_nothing_captured(remark=remark) + + def test_ip_disabled(self): + """ IP Disabled """ + + # + # An (S,G). + # one accepting interface, pg0, 2 forwarding interfaces + # + route_ff_01 = VppIpMRoute( + self, + "::", + "ffef::1", 128, + MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE, + [VppMRoutePath(self.pg1.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT), + VppMRoutePath(self.pg0.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)], + is_ip6=1) + route_ff_01.add_vpp_config() + + pu = (Ether(src=self.pg1.remote_mac, + dst=self.pg1.local_mac) / + IPv6(src="2001::1", dst=self.pg0.remote_ip6) / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + pm = (Ether(src=self.pg1.remote_mac, + dst=self.pg1.local_mac) / + IPv6(src="2001::1", dst="ffef::1") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + + # + # PG1 does not forward IP traffic + # + self.send_and_assert_no_replies(self.pg1, pu, "IPv6 disabled") + self.send_and_assert_no_replies(self.pg1, pm, "IPv6 disabled") + + # + # IP enable PG1 + # + self.pg1.config_ip6() + + # + # Now we get packets through + # + self.pg1.add_stream(pu) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + rx = self.pg0.get_capture(1) + + self.pg1.add_stream(pm) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + rx = self.pg0.get_capture(1) + + # + # Disable PG1 + # + self.pg1.unconfig_ip6() + + # + # PG1 does not forward IP traffic + # + self.send_and_assert_no_replies(self.pg1, pu, "IPv6 disabled") + self.send_and_assert_no_replies(self.pg1, pm, "IPv6 disabled") + + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/test_ip_mcast.py b/test/test_ip_mcast.py index 864cb803..094942b3 100644 --- a/test/test_ip_mcast.py +++ b/test/test_ip_mcast.py @@ -4,7 +4,8 @@ import unittest from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppSubInterface, VppDot1QSubint, VppDot1ADSubint -from vpp_ip_route import VppIpMRoute, VppMRoutePath, VppMFibSignal +from vpp_ip_route import VppIpMRoute, VppMRoutePath, VppMFibSignal, \ + MRouteItfFlags, MRouteEntryFlags from scapy.packet import Raw from scapy.layers.l2 import Ether @@ -12,23 +13,6 @@ from scapy.layers.inet import IP, UDP, getmacbyip, ICMP from scapy.layers.inet6 import IPv6, getmacbyip6 from util import ppp - -class MRouteItfFlags: - MFIB_ITF_FLAG_NONE = 0 - MFIB_ITF_FLAG_NEGATE_SIGNAL = 1 - MFIB_ITF_FLAG_ACCEPT = 2 - MFIB_ITF_FLAG_FORWARD = 4 - MFIB_ITF_FLAG_SIGNAL_PRESENT = 8 - MFIB_ITF_FLAG_INTERNAL_COPY = 16 - - -class MRouteEntryFlags: - MFIB_ENTRY_FLAG_NONE = 0 - MFIB_ENTRY_FLAG_SIGNAL = 1 - MFIB_ENTRY_FLAG_DROP = 2 - MFIB_ENTRY_FLAG_CONNECTED = 4 - MFIB_ENTRY_FLAG_INHERIT_ACCEPT = 8 - # # The number of packets sent is set to 90 so that when we replicate more than 3 # times, which we do for some entries, we will generate more than 256 packets diff --git a/test/test_mpls.py b/test/test_mpls.py index 9082637b..fc832644 100644 --- a/test/test_mpls.py +++ b/test/test_mpls.py @@ -738,5 +738,88 @@ class TestMPLS(VppTestCase): route_35_eos.remove_vpp_config() route_34_eos.remove_vpp_config() + +class TestMPLSDisabled(VppTestCase): + """ MPLS disabled """ + + def setUp(self): + super(TestMPLSDisabled, self).setUp() + + # create 2 pg interfaces + self.create_pg_interfaces(range(2)) + + # PG0 is MPLS enalbed + self.pg0.admin_up() + self.pg0.config_ip4() + self.pg0.resolve_arp() + self.pg0.enable_mpls() + + # PG 1 is not MPLS enabled + self.pg1.admin_up() + + def tearDown(self): + super(TestMPLSDisabled, self).tearDown() + for i in self.pg_interfaces: + i.unconfig_ip4() + i.admin_down() + + def send_and_assert_no_replies(self, intf, pkts, remark): + intf.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + for i in self.pg_interfaces: + i.get_capture(0) + i.assert_nothing_captured(remark=remark) + + def test_mpls_disabled(self): + """ MPLS Disabled """ + + tx = (Ether(src=self.pg1.remote_mac, + dst=self.pg1.local_mac) / + MPLS(label=32, ttl=64) / + IPv6(src="2001::1", dst=self.pg0.remote_ip6) / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + + # + # A simple MPLS xconnect - eos label in label out + # + route_32_eos = VppMplsRoute(self, 32, 1, + [VppRoutePath(self.pg0.remote_ip4, + self.pg0.sw_if_index, + labels=[33])]) + route_32_eos.add_vpp_config() + + # + # PG1 does not forward IP traffic + # + self.send_and_assert_no_replies(self.pg1, tx, "MPLS disabled") + + # + # MPLS enable PG1 + # + self.pg1.enable_mpls() + + # + # Now we get packets through + # + self.pg1.add_stream(tx) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg0.get_capture(1) + + # + # Disable PG1 + # + self.pg1.disable_mpls() + + # + # PG1 does not forward IP traffic + # + self.send_and_assert_no_replies(self.pg1, tx, "IPv6 disabled") + self.send_and_assert_no_replies(self.pg1, tx, "IPv6 disabled") + + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_interface.py b/test/vpp_interface.py index aeaf27a8..5dba0978 100644 --- a/test/vpp_interface.py +++ b/test/vpp_interface.py @@ -321,6 +321,11 @@ class VppInterface(object): self.test.vapi.sw_interface_enable_disable_mpls( self.sw_if_index) + def disable_mpls(self): + """Enable MPLS on the VPP interface.""" + self.test.vapi.sw_interface_enable_disable_mpls( + self.sw_if_index, 0) + def is_ip4_entry_in_fib_dump(self, dump): for i in dump: if i.address == self.local_ip4n and \ diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py index e1c2b4b4..faf5f801 100644 --- a/test/vpp_ip_route.py +++ b/test/vpp_ip_route.py @@ -12,6 +12,23 @@ MPLS_IETF_MAX_LABEL = 0xfffff MPLS_LABEL_INVALID = MPLS_IETF_MAX_LABEL + 1 +class MRouteItfFlags: + MFIB_ITF_FLAG_NONE = 0 + MFIB_ITF_FLAG_NEGATE_SIGNAL = 1 + MFIB_ITF_FLAG_ACCEPT = 2 + MFIB_ITF_FLAG_FORWARD = 4 + MFIB_ITF_FLAG_SIGNAL_PRESENT = 8 + MFIB_ITF_FLAG_INTERNAL_COPY = 16 + + +class MRouteEntryFlags: + MFIB_ENTRY_FLAG_NONE = 0 + MFIB_ENTRY_FLAG_SIGNAL = 1 + MFIB_ENTRY_FLAG_DROP = 2 + MFIB_ENTRY_FLAG_CONNECTED = 4 + MFIB_ENTRY_FLAG_INHERIT_ACCEPT = 8 + + def find_route(test, ip_addr, len, table_id=0, inet=AF_INET): if inet == AF_INET: s = 4 -- cgit 1.2.3-korg From eb743fad56b32cb20ad2d2cadc4760f9c25be5e1 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 20 Mar 2017 16:34:15 +0100 Subject: vnet: add device-input threadplacement infra This change adds two new debug CLI command: - "show interface placmenet" to display which thread (main or worker) is responsible for processing interface rx queue vpp# show interface placement Thread 0 (vpp_main): node af-packet-input: host-vpp1 queue 0 Thread 1 (vpp_wk_0): node af-packet-input: host-virbr0 queue 0 Thread 2 (vpp_wk_1): node af-packet-input: host-vpp2 queue 0 host-lxcbr0 queue 0 - "set interface placmenet" to assign thread (main or worker) which process specific interface rx queue vpp# set interface placement host-vpp1 queue 0 main Change-Id: Id4dd00cf2b05e10fae2125ac7cb4411b446c5e9c Signed-off-by: Damjan Marion --- src/vlib/threads.c | 14 +- src/vnet/devices/af_packet/af_packet.c | 54 +------- src/vnet/devices/af_packet/af_packet.h | 6 - src/vnet/devices/af_packet/node.c | 23 ++-- src/vnet/devices/devices.c | 240 +++++++++++++++++++++++++++++++++ src/vnet/devices/devices.h | 45 +++++++ src/vnet/interface.h | 6 + 7 files changed, 310 insertions(+), 78 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 40789f59..ef3a24d3 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -685,9 +685,6 @@ start_workers (vlib_main_t * vm) clib_memcpy (rt->runtime_data, n->runtime_data, clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, n->runtime_data_bytes)); - else if (CLIB_DEBUG > 0) - memset (rt->runtime_data, 0xfe, - VLIB_NODE_RUNTIME_DATA_SIZE); } nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] = @@ -701,9 +698,6 @@ start_workers (vlib_main_t * vm) clib_memcpy (rt->runtime_data, n->runtime_data, clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, n->runtime_data_bytes)); - else if (CLIB_DEBUG > 0) - memset (rt->runtime_data, 0xfe, - VLIB_NODE_RUNTIME_DATA_SIZE); } nm_clone->processes = vec_dup (nm->processes); @@ -1405,15 +1399,15 @@ vlib_worker_thread_fn (void *arg) clib_time_init (&vm->clib_time); clib_mem_set_heap (w->thread_mheap); + /* Wait until the dpdk init sequence is complete */ + while (tm->extern_thread_mgmt && tm->worker_thread_release == 0) + vlib_worker_thread_barrier_check (); + e = vlib_call_init_exit_functions (vm, vm->worker_init_function_registrations, 1 /* call_once */ ); if (e) clib_error_report (e); - /* Wait until the dpdk init sequence is complete */ - while (tm->extern_thread_mgmt && tm->worker_thread_release == 0) - vlib_worker_thread_barrier_check (); - vlib_worker_loop (vm); } diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index e491ba47..5fdc59f2 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -67,15 +67,16 @@ af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, static clib_error_t * af_packet_fd_read_ready (unix_file_t * uf) { - vlib_main_t *vm = vlib_get_main (); af_packet_main_t *apm = &af_packet_main; + vnet_main_t *vnm = vnet_get_main (); u32 idx = uf->private_data; + af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, idx); apm->pending_input_bitmap = clib_bitmap_set (apm->pending_input_bitmap, idx, 1); /* Schedule the rx node */ - vlib_node_set_interrupt_pending (vm, af_packet_input_node.index); + vnet_device_input_set_interrupt_pending (vnm, apif->hw_if_index, 0); return 0; } @@ -171,31 +172,6 @@ error: return ret; } -static void -af_packet_worker_thread_enable () -{ - /* If worker threads are enabled, switch to polling mode */ - foreach_vlib_main (( - { - vlib_node_set_state (this_vlib_main, - af_packet_input_node.index, - VLIB_NODE_STATE_POLLING); - })); - -} - -static void -af_packet_worker_thread_disable () -{ - foreach_vlib_main (( - { - vlib_node_set_state (this_vlib_main, - af_packet_input_node.index, - VLIB_NODE_STATE_INTERRUPT); - })); - -} - int af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, u32 * sw_if_index) @@ -298,6 +274,9 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index); apif->sw_if_index = sw->sw_if_index; + vnet_set_device_input_node (apif->hw_if_index, af_packet_input_node.index); + vnet_device_input_assign_thread (apif->hw_if_index, 0, /* queue */ + ~0 /* any cpu */ ); vnet_hw_interface_set_flags (vnm, apif->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); @@ -307,9 +286,6 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, if (sw_if_index) *sw_if_index = apif->sw_if_index; - if (tm->n_vlib_mains > 1 && pool_elts (apm->interfaces) == 1) - af_packet_worker_thread_enable (); - return 0; error: @@ -323,7 +299,6 @@ int af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name) { vnet_main_t *vnm = vnet_get_main (); - vlib_thread_main_t *tm = vlib_get_thread_main (); af_packet_main_t *apm = &af_packet_main; af_packet_if_t *apif; uword *p; @@ -373,8 +348,6 @@ af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name) ethernet_delete_interface (vnm, apif->hw_if_index); pool_put (apm->interfaces, apif); - if (tm->n_vlib_mains > 1 && pool_elts (apm->interfaces) == 0) - af_packet_worker_thread_disable (); return 0; } @@ -384,24 +357,9 @@ af_packet_init (vlib_main_t * vm) { af_packet_main_t *apm = &af_packet_main; vlib_thread_main_t *tm = vlib_get_thread_main (); - vlib_thread_registration_t *tr; - uword *p; memset (apm, 0, sizeof (af_packet_main_t)); - apm->input_cpu_first_index = 0; - apm->input_cpu_count = 1; - - /* find out which cpus will be used for input */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - apm->input_cpu_first_index = tr->first_index; - apm->input_cpu_count = tr->count; - } - mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword)); vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1, diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h index e00e5cb4..50ec2378 100644 --- a/src/vnet/devices/af_packet/af_packet.h +++ b/src/vnet/devices/af_packet/af_packet.h @@ -51,12 +51,6 @@ typedef struct /* hash of host interface names */ mhash_t if_index_by_host_if_name; - - /* first cpu index */ - u32 input_cpu_first_index; - - /* total cpu count */ - u32 input_cpu_count; } af_packet_main_t; af_packet_main_t af_packet_main; diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index ab7fd800..ba337f3f 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -246,20 +246,18 @@ static uword af_packet_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - int i; u32 n_rx_packets = 0; - u32 cpu_index = os_get_cpu_number (); af_packet_main_t *apm = &af_packet_main; - af_packet_if_t *apif; + vnet_device_input_runtime_t *rt = (void *) node->runtime_data; + vnet_device_and_queue_t *dq; - for (i = 0; i < vec_len (apm->interfaces); i++) - { - apif = vec_elt_at_index (apm->interfaces, i); - if (apif->is_admin_up && - (i % apm->input_cpu_count) == - (cpu_index - apm->input_cpu_first_index)) - n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif); - } + vec_foreach (dq, rt->devices_and_queues) + { + af_packet_if_t *apif; + apif = vec_elt_at_index (apm->interfaces, dq->dev_instance); + if (apif->is_admin_up) + n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif); + } return n_rx_packets; } @@ -271,9 +269,6 @@ VLIB_REGISTER_NODE (af_packet_input_node) = { .sibling_of = "device-input", .format_trace = format_af_packet_input_trace, .type = VLIB_NODE_TYPE_INPUT, - /** - * default state is INTERRUPT mode, switch to POLLING if worker threads are enabled - */ .state = VLIB_NODE_STATE_INTERRUPT, .n_errors = AF_PACKET_INPUT_N_ERROR, .error_strings = af_packet_input_error_strings, diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index 38f3002b..41645220 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -32,6 +32,7 @@ device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, VLIB_REGISTER_NODE (device_input_node) = { .function = device_input_fn, .name = "device-input", + .runtime_data_bytes = sizeof (vnet_device_input_runtime_t), .type = VLIB_NODE_TYPE_INPUT, .state = VLIB_NODE_STATE_DISABLED, .n_next_nodes = VNET_DEVICE_INPUT_N_NEXT_NODES, @@ -83,18 +84,257 @@ VNET_FEATURE_INIT (ethernet_input, static) = { }; /* *INDENT-ON* */ +static int +vnet_device_queue_sort (void *a1, void *a2) +{ + vnet_device_and_queue_t *dq1 = a1; + vnet_device_and_queue_t *dq2 = a2; + + if (dq1->dev_instance > dq2->dev_instance) + return 1; + else if (dq1->dev_instance < dq2->dev_instance) + return -1; + else if (dq1->queue_id > dq2->queue_id) + return 1; + else if (dq1->queue_id < dq2->queue_id) + return -1; + else + return 0; +} + +void +vnet_device_input_assign_thread (u32 hw_if_index, + u16 queue_id, uword cpu_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_device_main_t *vdm = &vnet_device_main; + vlib_main_t *vm; + vnet_device_input_runtime_t *rt; + vnet_device_and_queue_t *dq; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + + ASSERT (hw->input_node_index > 0); + + if (vdm->first_worker_cpu_index == 0) + cpu_index = 0; + + if (cpu_index != 0 && + (cpu_index < vdm->first_worker_cpu_index || + cpu_index > vdm->last_worker_cpu_index)) + { + cpu_index = vdm->next_worker_cpu_index++; + if (vdm->next_worker_cpu_index > vdm->last_worker_cpu_index) + vdm->next_worker_cpu_index = vdm->first_worker_cpu_index; + } + + vm = vlib_mains[cpu_index]; + rt = vlib_node_get_runtime_data (vm, hw->input_node_index); + + vec_add2 (rt->devices_and_queues, dq, 1); + dq->hw_if_index = hw_if_index; + dq->dev_instance = hw->dev_instance; + dq->queue_id = queue_id; + + vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort); + vec_validate (hw->input_node_cpu_index_by_queue, queue_id); + hw->input_node_cpu_index_by_queue[queue_id] = cpu_index; +} + +static int +vnet_device_input_unassign_thread (u32 hw_if_index, u16 queue_id, + uword cpu_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + vnet_device_input_runtime_t *rt; + vnet_device_and_queue_t *dq; + uword old_cpu_index; + + if (hw->input_node_cpu_index_by_queue == 0) + return VNET_API_ERROR_INVALID_INTERFACE; + + if (vec_len (hw->input_node_cpu_index_by_queue) < queue_id + 1) + return VNET_API_ERROR_INVALID_INTERFACE; + + old_cpu_index = hw->input_node_cpu_index_by_queue[queue_id]; + + if (old_cpu_index == cpu_index) + return 0; + + rt = + vlib_node_get_runtime_data (vlib_mains[old_cpu_index], + hw->input_node_index); + + vec_foreach (dq, rt->devices_and_queues) + if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id) + { + vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues); + goto deleted; + } + + return VNET_API_ERROR_INVALID_INTERFACE; + +deleted: + vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort); + + return 0; +} + +static clib_error_t * +show_device_placement_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 *s = 0; + vnet_main_t *vnm = vnet_get_main (); + vnet_device_input_runtime_t *rt; + vnet_device_and_queue_t *dq; + vlib_node_t *pn = vlib_get_node_by_name (vm, (u8 *) "device-input"); + uword si; + int index = 0; + + /* *INDENT-OFF* */ + foreach_vlib_main (({ + clib_bitmap_foreach (si, pn->sibling_bitmap, + ({ + rt = vlib_node_get_runtime_data (this_vlib_main, si); + + if (vec_len (rt->devices_and_queues)) + s = format (s, " node %U:\n", format_vlib_node_name, vm, si); + + vec_foreach (dq, rt->devices_and_queues) + { + s = format (s, " %U queue %u\n", + format_vnet_sw_if_index_name, vnm, dq->hw_if_index, + dq->queue_id); + } + })); + if (vec_len (s) > 0) + { + vlib_cli_output(vm, "Thread %u (%v):\n%v", index, + vlib_worker_threads[index].name, s); + vec_reset_length (s); + } + index++; + })); + /* *INDENT-ON* */ + + vec_free (s); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (memif_delete_command, static) = { + .path = "show interface placement", + .short_help = "show interface placement", + .function = show_device_placement_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_device_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + vnet_device_main_t *vdm = &vnet_device_main; + u32 hw_if_index = (u32) ~ 0; + u32 queue_id = (u32) 0; + u32 cpu_index = (u32) ~ 0; + int rv; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + ; + else if (unformat (line_input, "queue %d", &queue_id)) + ; + else if (unformat (line_input, "main", &cpu_index)) + cpu_index = 0; + else if (unformat (line_input, "worker %d", &cpu_index)) + cpu_index += vdm->first_worker_cpu_index; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + unformat_free (line_input); + return error; + } + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + + if (cpu_index > vdm->last_worker_cpu_index) + return clib_error_return (0, + "please specify valid worker thread or main"); + + rv = vnet_device_input_unassign_thread (hw_if_index, queue_id, cpu_index); + + if (rv) + return clib_error_return (0, "not found"); + + vnet_device_input_assign_thread (hw_if_index, queue_id, cpu_index); + + return 0; +} + +/*? + * This command is used to assign a given interface, and optionally a + * given queue, to a different thread. If the 'queue' is not provided, + * it defaults to 0. + * + * @cliexpar + * Example of how to display the interface placement: + * @cliexstart{show interface placement} + * Thread 1 (vpp_wk_0): + * GigabitEthernet0/8/0 queue 0 + * GigabitEthernet0/9/0 queue 0 + * Thread 2 (vpp_wk_1): + * GigabitEthernet0/8/0 queue 1 + * GigabitEthernet0/9/0 queue 1 + * @cliexend + * Example of how to assign a interface and queue to a thread: + * @cliexcmd{set interface placement GigabitEthernet0/8/0 queue 1 thread 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { + .path = "set interface placement", + .short_help = "set interface placement [queue ] [thread | main]", + .function = set_device_placement, +}; +/* *INDENT-ON* */ + static clib_error_t * vnet_device_init (vlib_main_t * vm) { vnet_device_main_t *vdm = &vnet_device_main; vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_thread_registration_t *tr; + uword *p; vec_validate_aligned (vdm->workers, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); + + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + if (tr && tr->count > 0) + { + vdm->first_worker_cpu_index = tr->first_index; + vdm->next_worker_cpu_index = tr->first_index; + vdm->last_worker_cpu_index = tr->first_index + tr->count - 1; + } return 0; } VLIB_INIT_FUNCTION (vnet_device_init); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h index a5cbc35e..bbb29fe3 100644 --- a/src/vnet/devices/devices.h +++ b/src/vnet/devices/devices.h @@ -50,12 +50,38 @@ typedef struct typedef struct { vnet_device_per_worker_data_t *workers; + uword first_worker_cpu_index; + uword last_worker_cpu_index; + uword next_worker_cpu_index; } vnet_device_main_t; +typedef struct +{ + u32 hw_if_index; + u32 dev_instance; + u16 queue_id; +} vnet_device_and_queue_t; + +typedef struct +{ + vnet_device_and_queue_t *devices_and_queues; +} vnet_device_input_runtime_t; + extern vnet_device_main_t vnet_device_main; extern vlib_node_registration_t device_input_node; extern const u32 device_input_next_node_advance[]; +static inline void +vnet_set_device_input_node (u32 hw_if_index, u32 node_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + hw->input_node_index = node_index; +} + +void vnet_device_input_assign_thread (u32 hw_if_index, u16 queue_id, + uword cpu_index); + static inline u64 vnet_get_aggregate_rx_packets (void) { @@ -78,6 +104,25 @@ vnet_device_increment_rx_packets (u32 cpu_index, u64 count) pwd->aggregate_rx_packets += count; } +static_always_inline vnet_device_and_queue_t * +vnet_get_device_and_queue (vlib_main_t * vm, vlib_node_runtime_t * node) +{ + vnet_device_input_runtime_t *rt = (void *) node->runtime_data; + return rt->devices_and_queues; +} + +static_always_inline void +vnet_device_input_set_interrupt_pending (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id) +{ + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + + ASSERT (queue_id < vec_len (hw->input_node_cpu_index_by_queue)); + u32 cpu_index = hw->input_node_cpu_index_by_queue[queue_id]; + vlib_node_set_interrupt_pending (vlib_mains[cpu_index], + hw->input_node_index); +} + #endif /* included_vnet_vnet_device_h */ /* diff --git a/src/vnet/interface.h b/src/vnet/interface.h index ef8f9118..a1ea2d61 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -464,6 +464,12 @@ typedef struct vnet_hw_interface_t #define VNET_HW_INTERFACE_BOND_INFO_NONE ((uword *) 0) #define VNET_HW_INTERFACE_BOND_INFO_SLAVE ((uword *) ~0) + /* Input node */ + u32 input_node_index; + + /* input node cpu index by queue */ + u32 *input_node_cpu_index_by_queue; + } vnet_hw_interface_t; extern vnet_device_class_t vnet_local_interface_device_class; -- cgit 1.2.3-korg From 7312cc7785a9d1198519e1091a645fecc019a6b8 Mon Sep 17 00:00:00 2001 From: Steven Date: Wed, 15 Mar 2017 21:18:55 -0700 Subject: vhost: support interrupt mode vhost currently supports only polling mode. This patch is to add interrupt mode. When the interface is configured for interrupt mode, our input node does not get called unless there is a packet in the vring. If a particular CPU has one interface configured for polling mode and another in interrupt, the input node is set to polling for that CPU. This diffs also includes two crashes in vlib's dispatch_node. One is included in https://gerrit.fd.io/r/#/c/5516. The other crash is in the ASSERT. The ASSERT can become true when the caller of dispatch_node is in a loop. The first call converted the node to polling. The second call thereafter will hit the ASSERT. Change-Id: If17b6d48b20d7d8605c6a161459828637173cd32 Signed-off-by: Steven --- src/vat/api_format.c | 67 ++++++- src/vlib/main.c | 13 +- src/vnet/devices/virtio/vhost-user.c | 333 ++++++++++++++++++++++++++++++++--- src/vnet/devices/virtio/vhost-user.h | 11 ++ 4 files changed, 392 insertions(+), 32 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 37b7f93e..3b57ac61 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -403,6 +403,46 @@ api_unformat_sw_if_index (unformat_input_t * input, va_list * args) } #endif /* VPP_API_TEST_BUILTIN */ +#define VHOST_USER_POLLING_MODE 0 +#define VHOST_USER_INTERRUPT_MODE 1 +#define VHOST_USER_ADAPTIVE_MODE 2 + +static u8 * +api_format_vhost_user_operation_mode (u8 * s, va_list * va) +{ + int operation_mode = va_arg (*va, int); + + switch (operation_mode) + { + case VHOST_USER_POLLING_MODE: + s = format (s, "%-9s", "polling"); + break; + case VHOST_USER_INTERRUPT_MODE: + s = format (s, "%-9s", "interrupt"); + break; + default: + s = format (s, "%-9s", "invalid"); + } + return s; +} + +static uword +api_unformat_vhost_user_operation_mode (unformat_input_t * input, + va_list * args) +{ + u8 *operation_mode = va_arg (*args, u8 *); + uword rc = 1; + + if (unformat (input, "interrupt")) + *operation_mode = VHOST_USER_INTERRUPT_MODE; + else if (unformat (input, "polling")) + *operation_mode = VHOST_USER_POLLING_MODE; + else + rc = 0; + + return rc; +} + static uword unformat_policer_rate_type (unformat_input_t * input, va_list * args) { @@ -11174,6 +11214,7 @@ api_create_vhost_user_if (vat_main_t * vam) u8 use_custom_mac = 0; u8 *tag = 0; int ret; + u8 operation_mode = VHOST_USER_POLLING_MODE; /* Shut up coverity */ memset (hwaddr, 0, sizeof (hwaddr)); @@ -11192,6 +11233,10 @@ api_create_vhost_user_if (vat_main_t * vam) is_server = 1; else if (unformat (i, "tag %s", &tag)) ; + else if (unformat (i, "mode %U", + api_unformat_vhost_user_operation_mode, + &operation_mode)) + ; else break; } @@ -11211,6 +11256,7 @@ api_create_vhost_user_if (vat_main_t * vam) M (CREATE_VHOST_USER_IF, mp); + mp->operation_mode = operation_mode; mp->is_server = is_server; clib_memcpy (mp->sock_filename, file_name, vec_len (file_name)); vec_free (file_name); @@ -11242,6 +11288,7 @@ api_modify_vhost_user_if (vat_main_t * vam) u8 sw_if_index_set = 0; u32 sw_if_index = (u32) ~ 0; int ret; + u8 operation_mode = VHOST_USER_POLLING_MODE; while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { @@ -11257,6 +11304,10 @@ api_modify_vhost_user_if (vat_main_t * vam) ; else if (unformat (i, "server")) is_server = 1; + else if (unformat (i, "mode %U", + api_unformat_vhost_user_operation_mode, + &operation_mode)) + ; else break; } @@ -11282,6 +11333,7 @@ api_modify_vhost_user_if (vat_main_t * vam) M (MODIFY_VHOST_USER_IF, mp); + mp->operation_mode = operation_mode; mp->sw_if_index = ntohl (sw_if_index); mp->is_server = is_server; clib_memcpy (mp->sock_filename, file_name, vec_len (file_name)); @@ -11337,11 +11389,12 @@ static void vl_api_sw_interface_vhost_user_details_t_handler { vat_main_t *vam = &vat_main; - print (vam->ofp, "%-25s %3" PRIu32 " %6" PRIu32 " %8x %6d %7d %s", + print (vam->ofp, "%-25s %3" PRIu32 " %6" PRIu32 " %8x %6d %7d %U %s", (char *) mp->interface_name, ntohl (mp->sw_if_index), ntohl (mp->virtio_net_hdr_sz), clib_net_to_host_u64 (mp->features), mp->is_server, - ntohl (mp->num_regions), (char *) mp->sock_filename); + ntohl (mp->num_regions), api_format_vhost_user_operation_mode, + mp->operation_mode, (char *) mp->sock_filename); print (vam->ofp, " Status: '%s'", strerror (ntohl (mp->sock_errno))); } @@ -11370,6 +11423,7 @@ static void vl_api_sw_interface_vhost_user_details_t_handler_json vat_json_object_add_string_copy (node, "sock_filename", mp->sock_filename); vat_json_object_add_uint (node, "num_regions", ntohl (mp->num_regions)); vat_json_object_add_uint (node, "sock_errno", ntohl (mp->sock_errno)); + vat_json_object_add_uint (node, "mode", mp->operation_mode); } static int @@ -11379,7 +11433,8 @@ api_sw_interface_vhost_user_dump (vat_main_t * vam) vl_api_control_ping_t *mp_ping; int ret; print (vam->ofp, - "Interface name idx hdr_sz features server regions filename"); + "Interface name idx hdr_sz features server regions mode" + " filename"); /* Get list of vhost-user interfaces */ M (SW_INTERFACE_VHOST_USER_DUMP, mp); @@ -18492,10 +18547,12 @@ _(l2_interface_vlan_tag_rewrite, \ "[translate-2-[1|2]] [push_dot1q 0] tag1 tag2 ") \ _(create_vhost_user_if, \ "socket [server] [renumber ] " \ - "[mac ]") \ + "[mac ] " \ + "[mode ]") \ _(modify_vhost_user_if, \ " | sw_if_index socket \n" \ - "[server] [renumber ]") \ + "[server] [renumber ] " \ + "[mode ]") \ _(delete_vhost_user_if, " | sw_if_index ") \ _(sw_interface_vhost_user_dump, "") \ _(show_version, "") \ diff --git a/src/vlib/main.c b/src/vlib/main.c index 605771c8..50f0b162 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -1032,15 +1032,14 @@ dispatch_node (vlib_main_t * vm, vlib_worker_thread_t *w = vlib_worker_threads + vm->cpu_index; #endif - if (dispatch_state == VLIB_NODE_STATE_INTERRUPT - && v >= nm->polling_threshold_vector_length) + if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT + && v >= nm->polling_threshold_vector_length) && + !(node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) { vlib_node_t *n = vlib_get_node (vm, node->node_index); n->state = VLIB_NODE_STATE_POLLING; node->state = VLIB_NODE_STATE_POLLING; - ASSERT (! - (node->flags & - VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)); node->flags &= ~VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE; node->flags |= @@ -1445,6 +1444,10 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) /* Pre-allocate expired nodes. */ vec_alloc (nm->pending_interrupt_node_runtime_indices, 32); + if (!nm->polling_threshold_vector_length) + nm->polling_threshold_vector_length = 10; + if (!nm->interrupt_threshold_vector_length) + nm->interrupt_threshold_vector_length = 5; if (is_main) { diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 3cbeca9b..5a5beb15 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -368,6 +368,8 @@ vhost_user_rx_thread_placement () vhost_user_intf_t *vui; vhost_cpu_t *vhc; u32 *workers = 0; + u32 cpu_index; + vlib_main_t *vm; //Let's list all workers cpu indexes u32 i; @@ -398,19 +400,59 @@ vhost_user_rx_thread_placement () continue; i %= vec_len (vui_workers); - u32 cpu_index = vui_workers[i]; + cpu_index = vui_workers[i]; i++; vhc = &vum->cpus[cpu_index]; iaq.qid = qid; iaq.vhost_iface_index = vui - vum->vhost_user_interfaces; vec_add1 (vhc->rx_queues, iaq); - vlib_node_set_state (vlib_mains[cpu_index], - vhost_user_input_node.index, - VLIB_NODE_STATE_POLLING); } }); /* *INDENT-ON* */ + + vec_foreach (vhc, vum->cpus) + { + vhost_iface_and_queue_t *vhiq; + u8 mode = VHOST_USER_INTERRUPT_MODE; + + vec_foreach (vhiq, vhc->rx_queues) + { + vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; + if (vui->operation_mode == VHOST_USER_POLLING_MODE) + { + /* At least one interface is polling, cpu is set to polling */ + mode = VHOST_USER_POLLING_MODE; + break; + } + } + vhc->operation_mode = mode; + } + + for (cpu_index = vum->input_cpu_first_index; + cpu_index < vum->input_cpu_first_index + vum->input_cpu_count; + cpu_index++) + { + vlib_node_state_t state = VLIB_NODE_STATE_POLLING; + + vhc = &vum->cpus[cpu_index]; + vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main; + switch (vhc->operation_mode) + { + case VHOST_USER_INTERRUPT_MODE: + state = VLIB_NODE_STATE_INTERRUPT; + break; + case VHOST_USER_POLLING_MODE: + state = VLIB_NODE_STATE_POLLING; + break; + default: + clib_warning ("BUG: bad operation mode %d", vhc->operation_mode); + break; + } + vlib_node_set_state (vm, vhost_user_input_node.index, state); + } + + vec_free (workers); } static int @@ -485,12 +527,68 @@ vhost_user_update_iface_state (vhost_user_intf_t * vui) vhost_user_tx_thread_placement (vui); } +static void +vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq) +{ + vhost_user_main_t *vum = &vhost_user_main; + vhost_cpu_t *vhc; + u32 cpu_index; + vhost_iface_and_queue_t *vhiq; + vlib_main_t *vm; + u32 ifq2; + u8 done = 0; + + if (vhost_user_intf_ready (vui)) + { + vec_foreach (vhc, vum->cpus) + { + if (vhc->operation_mode == VHOST_USER_POLLING_MODE) + continue; + + vec_foreach (vhiq, vhc->rx_queues) + { + /* + * Match the interface and the virtqueue number + */ + if ((vhiq->vhost_iface_index == (ifq >> 8)) && + (VHOST_VRING_IDX_TX (vhiq->qid) == (ifq & 0xff))) + { + cpu_index = vhc - vum->cpus; + vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main; + /* + * Convert RX virtqueue number in the lower byte to vring + * queue index for the input node process. Top bytes contain + * the interface, lower byte contains the queue index. + */ + ifq2 = ((ifq >> 8) << 8) | vhiq->qid; + vhc->pending_input_bitmap = + clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1); + vlib_node_set_interrupt_pending (vm, + vhost_user_input_node.index); + done = 1; + break; + } + } + if (done) + break; + } + } +} + static clib_error_t * vhost_user_callfd_read_ready (unix_file_t * uf) { __attribute__ ((unused)) int n; u8 buff[8]; + vhost_user_intf_t *vui = + pool_elt_at_index (vhost_user_main.vhost_user_interfaces, + uf->private_data >> 8); + n = read (uf->file_descriptor, ((char *) &buff), 8); + DBG_SOCK ("if %d CALL queue %d", uf->private_data >> 8, + uf->private_data & 0xff); + vhost_user_set_interrupt_pending (vui, uf->private_data); + return 0; } @@ -503,13 +601,20 @@ vhost_user_kickfd_read_ready (unix_file_t * uf) pool_elt_at_index (vhost_user_main.vhost_user_interfaces, uf->private_data >> 8); u32 qid = uf->private_data & 0xff; + n = read (uf->file_descriptor, ((char *) &buff), 8); DBG_SOCK ("if %d KICK queue %d", uf->private_data >> 8, qid); vlib_worker_thread_barrier_sync (vlib_get_main ()); - vui->vrings[qid].started = 1; - vhost_user_update_iface_state (vui); + if (!vui->vrings[qid].started || + (vhost_user_intf_ready (vui) != vui->is_up)) + { + vui->vrings[qid].started = 1; + vhost_user_update_iface_state (vui); + } vlib_worker_thread_barrier_release (vlib_get_main ()); + + vhost_user_set_interrupt_pending (vui, uf->private_data); return 0; } @@ -907,8 +1012,12 @@ vhost_user_socket_read (unix_file_t * uf) vui->vrings[msg.state.index].last_avail_idx = vui->vrings[msg.state.index].used->idx; - /* tell driver that we don't want interrupts */ - vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY; + if (vui->operation_mode == VHOST_USER_POLLING_MODE) + /* tell driver that we don't want interrupts */ + vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY; + else + /* tell driver that we want interrupts */ + vui->vrings[msg.state.index].used->flags = 0; break; case VHOST_USER_SET_OWNER: @@ -1811,7 +1920,8 @@ vhost_user_if_input (vlib_main_t * vm, vhost_user_log_dirty_ring (vui, txvq, idx); /* interrupt (call) handling */ - if ((txvq->callfd_idx != ~0) && !(txvq->avail->flags & 1)) + if ((txvq->callfd_idx != ~0) && + !(txvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { txvq->n_since_last_int += n_rx_packets; @@ -1837,16 +1947,33 @@ vhost_user_input (vlib_main_t * vm, vhost_user_main_t *vum = &vhost_user_main; uword n_rx_packets = 0; u32 cpu_index = os_get_cpu_number (); + vhost_iface_and_queue_t *vhiq; + vhost_user_intf_t *vui; + vhost_cpu_t *vhc; + vhc = &vum->cpus[cpu_index]; + if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE)) + { + vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues) + { + vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; + n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node); + } + } + else + { + int i; - vhost_iface_and_queue_t *vhiq; - vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues) - { - vhost_user_intf_t *vui = - &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; - n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node); - } + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, vhc->pending_input_bitmap, ({ + int qid = i & 0xff; + clib_bitmap_set (vhc->pending_input_bitmap, i, 0); + vui = pool_elt_at_index (vum->vhost_user_interfaces, i >> 8); + n_rx_packets += vhost_user_if_input (vm, vum, vui, qid, node); + })); + /* *INDENT-ON* */ + } return n_rx_packets; } @@ -2241,7 +2368,8 @@ done: } /* interrupt (call) handling */ - if ((rxvq->callfd_idx != ~0) && !(rxvq->avail->flags & 1)) + if ((rxvq->callfd_idx != ~0) && + !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { rxvq->n_since_last_int += frame->n_vectors - n_left; @@ -2595,6 +2723,95 @@ vhost_user_vui_init (vnet_main_t * vnm, vhost_user_tx_thread_placement (vui); } +static uword +vhost_user_send_interrupt_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + vhost_user_intf_t *vui; + f64 timeout = 3153600000.0 /* 100 years */ ; + uword event_type, *event_data = 0; + vhost_user_main_t *vum = &vhost_user_main; + vhost_iface_and_queue_t *vhiq; + vhost_cpu_t *vhc; + f64 now, poll_time_remaining; + + while (1) + { + poll_time_remaining = + vlib_process_wait_for_event_or_clock (vm, timeout); + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + /* + * Use the remaining timeout if it is less than coalesce time to avoid + * resetting the existing timer in the middle of expiration + */ + timeout = poll_time_remaining; + if (vlib_process_suspend_time_is_zero (timeout) || + (timeout > vum->coalesce_time)) + timeout = vum->coalesce_time; + + now = vlib_time_now (vm); + switch (event_type) + { + case VHOST_USER_EVENT_START_TIMER: + if (!vlib_process_suspend_time_is_zero (poll_time_remaining)) + break; + /* fall through */ + + case ~0: + vec_foreach (vhc, vum->cpus) + { + u32 cpu_index = vhc - vum->cpus; + f64 next_timeout; + + next_timeout = timeout; + vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues) + { + vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; + vhost_user_vring_t *rxvq = + &vui->vrings[VHOST_VRING_IDX_RX (vhiq->qid)]; + vhost_user_vring_t *txvq = + &vui->vrings[VHOST_VRING_IDX_TX (vhiq->qid)]; + + if (txvq->n_since_last_int) + { + if (now >= txvq->int_deadline) + vhost_user_send_call (vm, txvq); + else + next_timeout = txvq->int_deadline - now; + } + + if (rxvq->n_since_last_int) + { + if (now >= rxvq->int_deadline) + vhost_user_send_call (vm, rxvq); + else + next_timeout = rxvq->int_deadline - now; + } + + if ((next_timeout < timeout) && (next_timeout > 0.0)) + timeout = next_timeout; + } + } + break; + + default: + clib_warning ("BUG: unhandled event type %d", event_type); + break; + } + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = { + .function = vhost_user_send_interrupt_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "vhost-user-send-interrupt-process", +}; +/* *INDENT-ON* */ + int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, @@ -2608,8 +2825,10 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_idx = ~0; int rv = 0; int server_sock_fd = -1; + vhost_user_main_t *vum = &vhost_user_main; - if (operation_mode != VHOST_USER_POLLING_MODE) + if ((operation_mode != VHOST_USER_POLLING_MODE) && + (operation_mode != VHOST_USER_INTERRUPT_MODE)) return VNET_API_ERROR_UNIMPLEMENTED; if (sock_filename == NULL || !(strlen (sock_filename) > 0)) @@ -2640,6 +2859,15 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, // Process node must connect vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0); + + if ((operation_mode == VHOST_USER_INTERRUPT_MODE) && + !vum->interrupt_mode && (vum->coalesce_time > 0.0) && + (vum->coalesce_frames > 0)) + { + vum->interrupt_mode = 1; + vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index, + VHOST_USER_EVENT_START_TIMER, 0); + } return rv; } @@ -2658,7 +2886,8 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, int rv = 0; vnet_hw_interface_t *hwif; - if (operation_mode != VHOST_USER_POLLING_MODE) + if ((operation_mode != VHOST_USER_POLLING_MODE) && + (operation_mode != VHOST_USER_INTERRUPT_MODE)) return VNET_API_ERROR_UNIMPLEMENTED; if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) || hwif->dev_class_index != vhost_user_dev_class.index) @@ -2682,9 +2911,34 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, // Process node must connect vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0); + + if ((operation_mode == VHOST_USER_INTERRUPT_MODE) && + !vum->interrupt_mode && (vum->coalesce_time > 0.0) && + (vum->coalesce_frames > 0)) + { + vum->interrupt_mode = 1; + vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index, + VHOST_USER_EVENT_START_TIMER, 0); + } return rv; } +static uword +unformat_vhost_user_operation_mode (unformat_input_t * input, va_list * args) +{ + u8 *operation_mode = va_arg (*args, u8 *); + uword rc = 1; + + if (unformat (input, "interrupt")) + *operation_mode = VHOST_USER_INTERRUPT_MODE; + else if (unformat (input, "polling")) + *operation_mode = VHOST_USER_POLLING_MODE; + else + rc = 0; + + return rc; +} + clib_error_t * vhost_user_connect_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -2722,6 +2976,9 @@ vhost_user_connect_command_fn (vlib_main_t * vm, { renumber = 1; } + else if (unformat (line_input, "mode %U", + unformat_vhost_user_operation_mode, &operation_mode)) + ; else { error = clib_error_return (0, "unknown input `%U'", @@ -2851,6 +3108,25 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, return rv; } +static u8 * +format_vhost_user_operation_mode (u8 * s, va_list * va) +{ + int operation_mode = va_arg (*va, int); + + switch (operation_mode) + { + case VHOST_USER_POLLING_MODE: + s = format (s, "%s", "polling"); + break; + case VHOST_USER_INTERRUPT_MODE: + s = format (s, "%s", "interrupt"); + break; + default: + s = format (s, "%s", "invalid"); + } + return s; +} + clib_error_t * show_vhost_user_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -2959,14 +3235,22 @@ show_vhost_user_command_fn (vlib_main_t * vm, (vui->unix_server_index != ~0) ? "server" : "client", strerror (vui->sock_errno)); + vlib_cli_output (vm, " configured mode: %U\n", + format_vhost_user_operation_mode, vui->operation_mode); vlib_cli_output (vm, " rx placement: "); vec_foreach (vhc, vum->cpus) { vec_foreach (vhiq, vhc->rx_queues) { if (vhiq->vhost_iface_index == vui - vum->vhost_user_interfaces) - vlib_cli_output (vm, " thread %d on vring %d\n", - vhc - vum->cpus, VHOST_VRING_IDX_TX (vhiq->qid)); + { + vlib_cli_output (vm, " thread %d on vring %d\n", + vhc - vum->cpus, + VHOST_VRING_IDX_TX (vhiq->qid)); + vlib_cli_output (vm, " mode: %U\n", + format_vhost_user_operation_mode, + vhc->operation_mode); + } } } @@ -3096,6 +3380,9 @@ done: * in the name to be specified. If instance already exists, name will be used * anyway and multiple instances will have the same name. Use with caution. * + * - mode [interrupt | polling] - Optional parameter specifying + * the input thread polling policy. + * * @cliexpar * Example of how to create a vhost interface with VPP as the client and all features enabled: * @cliexstart{create vhost-user socket /tmp/vhost1.sock} @@ -3112,7 +3399,9 @@ done: /* *INDENT-OFF* */ VLIB_CLI_COMMAND (vhost_user_connect_command, static) = { .path = "create vhost-user", - .short_help = "create vhost-user socket [server] [feature-mask ] [hwaddr ] [renumber ]", + .short_help = "create vhost-user socket [server] " + "[feature-mask ] [hwaddr ] [renumber ] " + "[mode {interrupt | polling}]", .function = vhost_user_connect_command_fn, }; /* *INDENT-ON* */ diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index 6b928f05..67f18b8e 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -216,6 +216,8 @@ typedef struct #define VHOST_USER_INTERRUPT_MODE 1 #define VHOST_USER_ADAPTIVE_MODE 2 +#define VHOST_USER_EVENT_START_TIMER 1 + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -298,6 +300,12 @@ typedef struct /* This is here so it doesn't end-up * using stack or registers. */ vhost_trace_t *current_trace; + + /* bitmap of pending rx interfaces */ + uword *pending_input_bitmap; + + /* The operation mode computed per cpu based on interface setting */ + u8 operation_mode; } vhost_cpu_t; typedef struct @@ -320,6 +328,9 @@ typedef struct /** Pseudo random iterator */ u32 random; + + /* Node is in interrupt mode */ + u8 interrupt_mode; } vhost_user_main_t; typedef struct -- cgit 1.2.3-korg From 13ad1f02922858177915b1cb1450041d2e4d85de Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Sun, 26 Mar 2017 19:36:18 -0400 Subject: Rename "show interfaces" -> "show interface" To line up with "show interface placement," recently added. Otherwise, "show int" refers only to "show interface placement," which tends to annoy the cash customers... Change-Id: Iea9e3681aeb051e2b0e1ecbf06706d98af9a3abf Signed-off-by: Dave Barach --- src/vnet/devices/af_packet/cli.c | 2 +- src/vnet/devices/virtio/vhost-user.c | 2 +- src/vnet/interface_cli.c | 4 ++-- src/vnet/span/span.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/af_packet/cli.c b/src/vnet/devices/af_packet/cli.c index d4aa7016..44dc5179 100644 --- a/src/vnet/devices/af_packet/cli.c +++ b/src/vnet/devices/af_packet/cli.c @@ -113,7 +113,7 @@ done: * exist. Once created, a new host interface will exist in VPP * with the name 'host-', where '' * is the name of the specified veth pair. Use the - * 'show interfaces' command to display host interface details. + * 'show interface' command to display host interface details. * * This command has the following optional parameters: * diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 5a5beb15..5ad4cb62 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -3408,7 +3408,7 @@ VLIB_CLI_COMMAND (vhost_user_connect_command, static) = { /*? * Delete a vHost User interface using the interface name or the - * software interface index. Use the 'show interfaces' + * software interface index. Use the 'show interface' * command to determine the software interface index. On deletion, * the linux socket will not be deleted. * diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index c1566551..5640966c 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -416,8 +416,8 @@ done: /* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_sw_interfaces_command, static) = { - .path = "show interfaces", - .short_help = "show interfaces [address|addr|features|feat] [ ...]", + .path = "show interface", + .short_help = "show interface [address|addr|features|feat] [ ...]", .function = show_sw_interfaces, }; /* *INDENT-ON* */ diff --git a/src/vnet/span/span.c b/src/vnet/span/span.c index bc244eff..c5b43e34 100644 --- a/src/vnet/span/span.c +++ b/src/vnet/span/span.c @@ -169,7 +169,7 @@ show_interfaces_span_command_fn (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_interfaces_span_command, static) = { - .path = "show interfaces span", + .path = "show interface span", .short_help = "Shows SPAN mirror table", .function = show_interfaces_span_command_fn, }; -- cgit 1.2.3-korg From 22db11b491f80539438418eaaa0aa864202dadf6 Mon Sep 17 00:00:00 2001 From: Jim Gibson Date: Mon, 27 Mar 2017 19:46:12 +0000 Subject: af_packet driver needs to check VLIB_BUFFER_NEXT_PRESENT flag is set when walking vlib_buffer_t next_buffer chain on transmit. On buffer allocation: - next_buffer is not and may contain a stale invalid value that should be ignored if not overwritten by a valid value. - VLIB_BUFFER_NEXT_PRESENT flag is cleared and only set if a valid value is written to next_buffer. Change-Id: I9b0ccdc54f4f7456f8328ce7c4a0d52d0fba8caa Signed-off-by: Jim Gibson --- src/vnet/devices/af_packet/device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c index e3bf9bbc..9a94fc5e 100644 --- a/src/vnet/devices/af_packet/device.c +++ b/src/vnet/devices/af_packet/device.c @@ -125,7 +125,8 @@ af_packet_interface_tx (vlib_main_t * vm, vlib_buffer_get_current (b0), len); offset += len; } - while ((bi = b0->next_buffer)); + while ((bi = + (b0->flags & VLIB_BUFFER_NEXT_PRESENT) ? b0->next_buffer : 0)); tph->tp_len = tph->tp_snaplen = offset; tph->tp_status = TP_STATUS_SEND_REQUEST; -- cgit 1.2.3-korg From 0d150bb132b81c20b38a0cefd82c8f718435515d Mon Sep 17 00:00:00 2001 From: Steven Date: Wed, 22 Mar 2017 12:05:19 -0700 Subject: vhost: vhost-user component may become unusable with too many open files (VPP-668) When the number of open files is reached in the system, vhost may encounter a failure in socket call and return from vhost-user-process. The return terminates all attempts of incoming socket connections in the future, even if the condition is reconciled. The fix is to not return from vhost-user-process, record the error in the interface, spit out the error, and retry the connection every 3 seconds. Change-Id: I806baedf13e8c9b73e7c7820c094240f39949950 Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 5ad4cb62..00807dc0 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2440,14 +2440,11 @@ vhost_user_process (vlib_main_t * vm, f64 timeout = 3153600000.0 /* 100 years */ ; uword *event_data = 0; - sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + sockfd = -1; sun.sun_family = AF_UNIX; template.read_function = vhost_user_socket_read; template.error_function = vhost_user_socket_error; - if (sockfd < 0) - return 0; - while (1) { vlib_process_wait_for_event_or_clock (vm, timeout); @@ -2462,6 +2459,23 @@ vhost_user_process (vlib_main_t * vm, if (vui->unix_server_index == ~0) { //Nothing to do for server sockets if (vui->unix_file_index == ~0) { + if ((sockfd < 0) && + ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)) + { + /* + * 1st time error or new error for this interface, + * spit out the message and record the error + */ + if (!vui->sock_errno || (vui->sock_errno != errno)) + { + clib_unix_warning + ("Error: Could not open unix socket for %s", + vui->sock_filename); + vui->sock_errno = errno; + } + continue; + } + /* try to connect */ strncpy (sun.sun_path, (char *) vui->sock_filename, sizeof (sun.sun_path) - 1); @@ -2483,11 +2497,8 @@ vhost_user_process (vlib_main_t * vm, vui - vhost_user_main.vhost_user_interfaces; vui->unix_file_index = unix_file_add (&unix_main, &template); - //Re-open for next connect - if ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) { - clib_warning("Critical: Could not open unix socket"); - return 0; - } + /* This sockfd is considered consumed */ + sockfd = -1; } else { -- cgit 1.2.3-korg From 1927da29ccbe1d4cc8e59ccfa197eb41c257814f Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 27 Mar 2017 17:08:20 +0200 Subject: vppinfra: add spinlock inline functions Change-Id: I86089e9bb604adfc260a111685001be1c897ce53 Signed-off-by: Damjan Marion --- src/plugins/memif/device.c | 21 +------- src/plugins/memif/memif.c | 12 +---- src/plugins/memif/memif.h | 4 +- src/vnet/devices/af_packet/af_packet.c | 6 +-- src/vnet/devices/af_packet/af_packet.h | 4 +- src/vnet/devices/af_packet/device.c | 9 +--- src/vnet/devices/netmap/device.c | 9 +--- src/vnet/devices/netmap/netmap.c | 6 +-- src/vnet/devices/netmap/netmap.h | 4 +- src/vppinfra.am | 1 + src/vppinfra/lock.h | 97 ++++++++++++++++++++++++++++++++++ 11 files changed, 117 insertions(+), 56 deletions(-) create mode 100644 src/vppinfra/lock.h (limited to 'src/vnet/devices') diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index 446537a3..4faeb055 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -78,23 +78,6 @@ format_memif_tx_trace (u8 * s, va_list * args) return s; } -static_always_inline void -memif_interface_lock (memif_if_t * mif) -{ - if (PREDICT_FALSE (mif->lockp != 0)) - { - while (__sync_lock_test_and_set (mif->lockp, 1)) - ; - } -} - -static_always_inline void -memif_interface_unlock (memif_if_t * mif) -{ - if (PREDICT_FALSE (mif->lockp != 0)) - *mif->lockp = 0; -} - static_always_inline void memif_prefetch_buffer_and_data (vlib_main_t * vm, u32 bi) { @@ -117,7 +100,7 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u16 head, tail; u16 free_slots; - memif_interface_lock (mif); + clib_spinlock_lock_if_init (&mif->lockp); /* free consumed buffers */ @@ -210,7 +193,7 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, CLIB_MEMORY_STORE_BARRIER (); ring->head = head; - memif_interface_unlock (mif); + clib_spinlock_unlock (&mif->lockp); if (n_left) { diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 7ba67c5b..cf8ca577 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -716,11 +716,7 @@ memif_close_if (memif_main_t * mm, memif_if_t * mif) } } - if (mif->lockp != 0) - { - clib_mem_free ((void *) mif->lockp); - mif->lockp = 0; - } + clib_spinlock_free (&mif->lockp); mhash_unset (&mm->if_index_by_key, &mif->key, &mif->if_index); vec_free (mif->socket_filename); @@ -783,11 +779,7 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) mif->connection.fd = mif->interrupt_line.fd = -1; if (tm->n_vlib_mains > 1) - { - mif->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); - memset ((void *) mif->lockp, 0, CLIB_CACHE_LINE_BYTES); - } + clib_spinlock_init (&mif->lockp); if (!args->hw_addr_set) { diff --git a/src/plugins/memif/memif.h b/src/plugins/memif/memif.h index a7a88e07..f57170f8 100644 --- a/src/plugins/memif/memif.h +++ b/src/plugins/memif/memif.h @@ -15,6 +15,8 @@ *------------------------------------------------------------------ */ +#include + typedef struct { u16 version; @@ -98,7 +100,7 @@ typedef struct typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - volatile u32 *lockp; + clib_spinlock_t lockp; u32 flags; #define MEMIF_IF_FLAG_ADMIN_UP (1 << 0) #define MEMIF_IF_FLAG_IS_SLAVE (1 << 1) diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index 5fdc59f2..20285107 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -229,11 +229,7 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, apif->next_rx_frame = 0; if (tm->n_vlib_mains > 1) - { - apif->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); - memset ((void *) apif->lockp, 0, CLIB_CACHE_LINE_BYTES); - } + clib_spinlock_init (&apif->lockp); { unix_file_t template = { 0 }; diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h index 50ec2378..77a2c7a3 100644 --- a/src/vnet/devices/af_packet/af_packet.h +++ b/src/vnet/devices/af_packet/af_packet.h @@ -17,10 +17,12 @@ *------------------------------------------------------------------ */ +#include + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - volatile u32 *lockp; + clib_spinlock_t lockp; u8 *host_if_name; int fd; struct tpacket_req *rx_req; diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c index 9a94fc5e..2ba3f579 100644 --- a/src/vnet/devices/af_packet/device.c +++ b/src/vnet/devices/af_packet/device.c @@ -92,11 +92,7 @@ af_packet_interface_tx (vlib_main_t * vm, struct tpacket2_hdr *tph; u32 frame_not_ready = 0; - if (PREDICT_FALSE (apif->lockp != 0)) - { - while (__sync_lock_test_and_set (apif->lockp, 1)) - ; - } + clib_spinlock_lock_if_init (&apif->lockp); while (n_left > 0) { @@ -159,8 +155,7 @@ af_packet_interface_tx (vlib_main_t * vm, } } - if (PREDICT_FALSE (apif->lockp != 0)) - *apif->lockp = 0; + clib_spinlock_unlock_if_init (&apif->lockp); if (PREDICT_FALSE (frame_not_ready)) vlib_error_count (vm, node->node_index, diff --git a/src/vnet/devices/netmap/device.c b/src/vnet/devices/netmap/device.c index 2152824f..aea9ddf4 100644 --- a/src/vnet/devices/netmap/device.c +++ b/src/vnet/devices/netmap/device.c @@ -105,11 +105,7 @@ netmap_interface_tx (vlib_main_t * vm, netmap_if_t *nif = pool_elt_at_index (nm->interfaces, rd->dev_instance); int cur_ring; - if (PREDICT_FALSE (nif->lockp != 0)) - { - while (__sync_lock_test_and_set (nif->lockp, 1)) - ; - } + clib_spinlock_lock_if_init (&nif->lockp); cur_ring = nif->first_tx_ring; @@ -165,8 +161,7 @@ netmap_interface_tx (vlib_main_t * vm, if (n_left < frame->n_vectors) ioctl (nif->fd, NIOCTXSYNC, NULL); - if (PREDICT_FALSE (nif->lockp != 0)) - *nif->lockp = 0; + clib_spinlock_unlock_if_init (&nif->lockp); if (n_left) vlib_error_count (vm, node->node_index, diff --git a/src/vnet/devices/netmap/netmap.c b/src/vnet/devices/netmap/netmap.c index 3bdb442d..09afc764 100644 --- a/src/vnet/devices/netmap/netmap.c +++ b/src/vnet/devices/netmap/netmap.c @@ -185,11 +185,7 @@ netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, nif->per_interface_next_index = ~0; if (tm->n_vlib_mains > 1) - { - nif->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); - memset ((void *) nif->lockp, 0, CLIB_CACHE_LINE_BYTES); - } + clib_spinlock_init (&nif->lockp); { unix_file_t template = { 0 }; diff --git a/src/vnet/devices/netmap/netmap.h b/src/vnet/devices/netmap/netmap.h index 39a94043..e04f045d 100644 --- a/src/vnet/devices/netmap/netmap.h +++ b/src/vnet/devices/netmap/netmap.h @@ -40,10 +40,12 @@ * SUCH DAMAGE. */ +#include + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - volatile u32 *lockp; + clib_spinlock_t lockp; u8 *host_if_name; uword if_index; u32 hw_if_index; diff --git a/src/vppinfra.am b/src/vppinfra.am index 4b9f0c29..fed1981e 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -180,6 +180,7 @@ nobase_include_HEADERS = \ vppinfra/graph.h \ vppinfra/hash.h \ vppinfra/heap.h \ + vppinfra/lock.h \ vppinfra/longjmp.h \ vppinfra/macros.h \ vppinfra/math.h \ diff --git a/src/vppinfra/lock.h b/src/vppinfra/lock.h new file mode 100644 index 00000000..c60ff414 --- /dev/null +++ b/src/vppinfra/lock.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_clib_lock_h +#define included_clib_lock_h + +#include + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u32 lock; +#if CLIB_DEBUG > 0 + pid_t pid; + uword cpu_index; + void *frame_address; +#endif +} *clib_spinlock_t; + +static inline void +clib_spinlock_init (clib_spinlock_t * p) +{ + *p = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + memset ((void *) *p, 0, CLIB_CACHE_LINE_BYTES); +} + +static inline void +clib_spinlock_free (clib_spinlock_t * p) +{ + if (*p) + { + clib_mem_free ((void *) *p); + *p = 0; + } +} + +static_always_inline void +clib_spinlock_lock (clib_spinlock_t * p) +{ + while (__sync_lock_test_and_set (&(*p)->lock, 1)) +#if __x86_64__ + __builtin_ia32_pause () +#endif + ; +#if CLIB_DEBUG > 0 + (*p)->frame_address = __builtin_frame_address (0); + (*p)->pid = getpid (); + (*p)->cpu_index = os_get_cpu_number (); +#endif +} + +static_always_inline void +clib_spinlock_lock_if_init (clib_spinlock_t * p) +{ + if (PREDICT_FALSE (*p != 0)) + clib_spinlock_lock (p); +} + +static_always_inline void +clib_spinlock_unlock (clib_spinlock_t * p) +{ + (*p)->lock = 0; +#if CLIB_DEBUG > 0 + (*p)->frame_address = 0; + (*p)->pid = 0; + (*p)->cpu_index = 0; +#endif +} + +static_always_inline void +clib_spinlock_unlock_if_init (clib_spinlock_t * p) +{ + if (PREDICT_FALSE (*p != 0)) + clib_spinlock_unlock (p); +} + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 586afd762bfa149f5ca167bd5fd5a0cd59ce94fe Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 5 Apr 2017 19:18:20 +0200 Subject: Use thread local storage for thread index This patch deprecates stack-based thread identification, Also removes requirement that thread stacks are adjacent. Finally, possibly annoying for some folks, it renames all occurences of cpu_index and cpu_number with thread index. Using word "cpu" is misleading here as thread can be migrated ti different CPU, and also it is not related to linux cpu index. Change-Id: I68cdaf661e701d2336fc953dcb9978d10a70f7c1 Signed-off-by: Damjan Marion --- src/examples/srv6-sample-localsid/node.c | 4 +- src/plugins/dpdk/buffer.c | 2 +- src/plugins/dpdk/device/device.c | 8 +- src/plugins/dpdk/device/dpdk_priv.h | 8 +- src/plugins/dpdk/device/init.c | 2 +- src/plugins/dpdk/device/node.c | 32 +++--- src/plugins/dpdk/hqos/hqos.c | 16 +-- src/plugins/dpdk/ipsec/cli.c | 8 +- src/plugins/dpdk/ipsec/crypto_node.c | 4 +- src/plugins/dpdk/ipsec/esp.h | 4 +- src/plugins/dpdk/ipsec/esp_decrypt.c | 4 +- src/plugins/dpdk/ipsec/esp_encrypt.c | 5 +- src/plugins/dpdk/ipsec/ipsec.c | 2 +- src/plugins/dpdk/ipsec/ipsec.h | 4 +- src/plugins/dpdk/main.c | 2 +- src/plugins/flowperpkt/l2_node.c | 2 +- src/plugins/flowperpkt/node.c | 2 +- src/plugins/ioam/export-common/ioam_export.h | 6 +- .../ioam/ip6/ioam_cache_tunnel_select_node.c | 16 +-- src/plugins/ixge/ixge.c | 2 +- src/plugins/lb/lb.c | 8 +- src/plugins/lb/node.c | 22 ++-- src/plugins/lb/refcount.c | 8 +- src/plugins/lb/refcount.h | 4 +- src/plugins/memif/node.c | 35 +++--- src/plugins/snat/in2out.c | 110 +++++++++--------- src/plugins/snat/out2in.c | 102 ++++++++--------- src/plugins/snat/snat.h | 10 +- src/vlib/buffer.c | 6 +- src/vlib/buffer_funcs.h | 4 +- src/vlib/cli.c | 6 +- src/vlib/counter.h | 16 +-- src/vlib/error.c | 2 +- src/vlib/global_funcs.h | 2 +- src/vlib/main.c | 14 +-- src/vlib/main.h | 2 +- src/vlib/node.c | 2 +- src/vlib/node.h | 6 +- src/vlib/node_funcs.h | 8 +- src/vlib/threads.c | 69 ++++------- src/vlib/threads.h | 21 ++-- src/vlib/unix/cj.c | 7 +- src/vlib/unix/cj.h | 2 +- src/vlib/unix/main.c | 43 +++---- src/vnet/adj/adj_l2.c | 4 +- src/vnet/adj/adj_midchain.c | 8 +- src/vnet/adj/adj_nsh.c | 4 +- src/vnet/classify/vnet_classify.c | 16 +-- src/vnet/cop/ip4_whitelist.c | 8 +- src/vnet/cop/ip6_whitelist.c | 8 +- src/vnet/devices/af_packet/node.c | 20 ++-- src/vnet/devices/devices.c | 61 +++++----- src/vnet/devices/devices.h | 18 +-- src/vnet/devices/netmap/node.c | 24 ++-- src/vnet/devices/ssvm/node.c | 6 +- src/vnet/devices/virtio/vhost-user.c | 127 +++++++++++---------- src/vnet/dpo/lookup_dpo.c | 20 ++-- src/vnet/dpo/replicate_dpo.c | 12 +- src/vnet/ethernet/arp.c | 2 +- src/vnet/ethernet/interface.c | 7 +- src/vnet/ethernet/node.c | 14 +-- src/vnet/gre/node.c | 8 +- src/vnet/interface.h | 2 +- src/vnet/interface_output.c | 53 ++++----- src/vnet/ip/ip4_forward.c | 34 +++--- src/vnet/ip/ip4_input.c | 8 +- src/vnet/ip/ip6_forward.c | 24 ++-- src/vnet/ip/ip6_input.c | 8 +- src/vnet/ip/ip6_neighbor.c | 4 +- src/vnet/ipsec/esp.h | 8 +- src/vnet/ipsec/esp_decrypt.c | 13 ++- src/vnet/ipsec/esp_encrypt.c | 13 ++- src/vnet/ipsec/ikev2.c | 64 ++++++----- src/vnet/ipsec/ipsec.h | 12 +- src/vnet/ipsec/ipsec_if.c | 2 +- src/vnet/l2/l2_bvi.h | 2 +- src/vnet/l2/l2_input.c | 14 +-- src/vnet/l2/l2_output.c | 6 +- src/vnet/l2tp/decap.c | 2 +- src/vnet/l2tp/encap.c | 2 +- src/vnet/l2tp/l2tp.c | 6 +- src/vnet/lisp-gpe/decap.c | 16 +-- src/vnet/lldp/lldp_input.c | 2 +- src/vnet/map/ip4_map.c | 14 +-- src/vnet/map/ip4_map_t.c | 12 +- src/vnet/map/ip6_map.c | 19 +-- src/vnet/map/ip6_map_t.c | 12 +- src/vnet/mpls/mpls_input.c | 8 +- src/vnet/mpls/mpls_lookup.c | 20 ++-- src/vnet/mpls/mpls_output.c | 10 +- src/vnet/pg/input.c | 4 +- src/vnet/replication.c | 20 ++-- src/vnet/replication.h | 2 +- src/vnet/session/node.c | 2 +- src/vnet/sr/sr_localsid.c | 44 +++---- src/vnet/tcp/builtin_client.c | 2 +- src/vnet/tcp/tcp.c | 8 +- src/vnet/tcp/tcp_debug.h | 2 +- src/vnet/tcp/tcp_input.c | 10 +- src/vnet/tcp/tcp_output.c | 20 ++-- src/vnet/udp/udp_input.c | 2 +- src/vnet/unix/tapcli.c | 2 +- src/vnet/unix/tuntap.c | 4 +- src/vnet/vxlan-gpe/decap.c | 10 +- src/vnet/vxlan-gpe/encap.c | 12 +- src/vnet/vxlan/decap.c | 10 +- src/vnet/vxlan/encap.c | 12 +- src/vpp/stats/stats.c | 14 +-- src/vpp/stats/stats.h | 2 +- 109 files changed, 790 insertions(+), 791 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/examples/srv6-sample-localsid/node.c b/src/examples/srv6-sample-localsid/node.c index 7bae9cd7..e83e2352 100644 --- a/src/examples/srv6-sample-localsid/node.c +++ b/src/examples/srv6-sample-localsid/node.c @@ -114,7 +114,7 @@ srv6_localsid_sample_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_fram from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); while (n_left_from > 0) { @@ -168,7 +168,7 @@ srv6_localsid_sample_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_fram /* This increments the SRv6 per LocalSID counters.*/ vlib_increment_combined_counter (((next0 == SRV6_SAMPLE_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : &(sm->sr_ls_valid_counters)), - cpu_index, + thread_index, ls0 - sm->localsids, 1, vlib_buffer_length_in_chain (vm, b0)); diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c index 2765c292..c80b3fa8 100644 --- a/src/plugins/dpdk/buffer.c +++ b/src/plugins/dpdk/buffer.c @@ -132,7 +132,7 @@ dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) u32 merge_index; int i; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); f = vlib_buffer_get_free_list (vm, free_list_index); diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c index 50b26689..91661246 100644 --- a/src/plugins/dpdk/device/device.c +++ b/src/plugins/dpdk/device/device.c @@ -243,7 +243,7 @@ static_always_inline ASSERT (ring->tx_tail == 0); n_retry = 16; - queue_id = vm->cpu_index; + queue_id = vm->thread_index; do { @@ -266,7 +266,7 @@ static_always_inline { /* no wrap, transmit in one burst */ dpdk_device_hqos_per_worker_thread_t *hqos = - &xd->hqos_wt[vm->cpu_index]; + &xd->hqos_wt[vm->thread_index]; ASSERT (hqos->swq != NULL); @@ -332,7 +332,7 @@ dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp) { dpdk_main_t *dm = &dpdk_main; - u32 my_cpu = vm->cpu_index; + u32 my_cpu = vm->thread_index; struct rte_mbuf *mb_new; if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0) @@ -376,7 +376,7 @@ dpdk_interface_tx (vlib_main_t * vm, tx_ring_hdr_t *ring; u32 n_on_ring; - my_cpu = vm->cpu_index; + my_cpu = vm->thread_index; queue_id = my_cpu; diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h index dd40ff48..52b4ca4b 100644 --- a/src/plugins/dpdk/device/dpdk_priv.h +++ b/src/plugins/dpdk/device/dpdk_priv.h @@ -79,7 +79,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now) { vlib_simple_counter_main_t *cm; vnet_main_t *vnm = vnet_get_main (); - u32 my_cpu = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u64 rxerrors, last_rxerrors; /* only update counters for PMD interfaces */ @@ -96,7 +96,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now) cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, VNET_INTERFACE_COUNTER_RX_NO_BUF); - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index, xd->stats.rx_nombuf - xd->last_stats.rx_nombuf); } @@ -107,7 +107,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now) cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, VNET_INTERFACE_COUNTER_RX_MISS); - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index, xd->stats.imissed - xd->last_stats.imissed); } @@ -119,7 +119,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now) cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, VNET_INTERFACE_COUNTER_RX_ERROR); - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index, rxerrors - last_rxerrors); } diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 538db6cb..7eaf8da7 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -324,7 +324,7 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) int rv; int j; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) { diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index e740fd18..b10e0fad 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -283,7 +283,7 @@ dpdk_buffer_init_from_template (void *d0, void *d1, void *d2, void *d3, */ static_always_inline u32 dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, - vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id, + vlib_node_runtime_t * node, u32 thread_index, u16 queue_id, int maybe_multiseg) { u32 n_buffers; @@ -294,7 +294,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, uword n_rx_bytes = 0; u32 n_trace, trace_cnt __attribute__ ((unused)); vlib_buffer_free_list_t *fl; - vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, cpu_index); + vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, thread_index); if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) return 0; @@ -306,7 +306,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, return 0; } - vec_reset_length (xd->d_trace_buffers[cpu_index]); + vec_reset_length (xd->d_trace_buffers[thread_index]); trace_cnt = n_trace = vlib_get_trace_count (vm, node); if (n_trace > 0) @@ -318,7 +318,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, { struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++]; vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); - vec_add1 (xd->d_trace_buffers[cpu_index], + vec_add1 (xd->d_trace_buffers[thread_index], vlib_get_buffer_index (vm, b)); } } @@ -546,20 +546,22 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, vlib_put_next_frame (vm, node, next_index, n_left_to_next); } - if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0)) + if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[thread_index]) > 0)) { - dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index], - vec_len (xd->d_trace_buffers[cpu_index])); - vlib_set_trace_count (vm, node, n_trace - - vec_len (xd->d_trace_buffers[cpu_index])); + dpdk_rx_trace (dm, node, xd, queue_id, + xd->d_trace_buffers[thread_index], + vec_len (xd->d_trace_buffers[thread_index])); + vlib_set_trace_count (vm, node, + n_trace - + vec_len (xd->d_trace_buffers[thread_index])); } vlib_increment_combined_counter (vnet_get_main ()->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); + thread_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); - vnet_device_increment_rx_packets (cpu_index, mb_index); + vnet_device_increment_rx_packets (thread_index, mb_index); return mb_index; } @@ -630,19 +632,19 @@ dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) dpdk_device_t *xd; uword n_rx_packets = 0; dpdk_device_and_queue_t *dq; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); /* * Poll all devices on this cpu for input/interrupts. */ /* *INDENT-OFF* */ - vec_foreach (dq, dm->devices_by_cpu[cpu_index]) + vec_foreach (dq, dm->devices_by_cpu[thread_index]) { xd = vec_elt_at_index(dm->devices, dq->device); if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 1); + n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 1); else - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 0); + n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 0); } /* *INDENT-ON* */ diff --git a/src/plugins/dpdk/hqos/hqos.c b/src/plugins/dpdk/hqos/hqos.c index a288fca7..8b251beb 100644 --- a/src/plugins/dpdk/hqos/hqos.c +++ b/src/plugins/dpdk/hqos/hqos.c @@ -397,7 +397,7 @@ static_always_inline void dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm) { dpdk_main_t *dm = &dpdk_main; - u32 cpu_index = vm->cpu_index; + u32 thread_index = vm->thread_index; u32 dev_pos; dev_pos = 0; @@ -405,12 +405,12 @@ dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm) { vlib_worker_thread_barrier_check (); - u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); + u32 n_devs = vec_len (dm->devices_by_hqos_cpu[thread_index]); if (dev_pos >= n_devs) dev_pos = 0; dpdk_device_and_queue_t *dq = - vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); + vec_elt_at_index (dm->devices_by_hqos_cpu[thread_index], dev_pos); dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; @@ -479,7 +479,7 @@ static_always_inline void dpdk_hqos_thread_internal (vlib_main_t * vm) { dpdk_main_t *dm = &dpdk_main; - u32 cpu_index = vm->cpu_index; + u32 thread_index = vm->thread_index; u32 dev_pos; dev_pos = 0; @@ -487,7 +487,7 @@ dpdk_hqos_thread_internal (vlib_main_t * vm) { vlib_worker_thread_barrier_check (); - u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); + u32 n_devs = vec_len (dm->devices_by_hqos_cpu[thread_index]); if (PREDICT_FALSE (n_devs == 0)) { dev_pos = 0; @@ -497,7 +497,7 @@ dpdk_hqos_thread_internal (vlib_main_t * vm) dev_pos = 0; dpdk_device_and_queue_t *dq = - vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); + vec_elt_at_index (dm->devices_by_hqos_cpu[thread_index], dev_pos); dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; @@ -586,7 +586,7 @@ dpdk_hqos_thread (vlib_worker_thread_t * w) vm = vlib_get_main (); - ASSERT (vm->cpu_index == os_get_cpu_number ()); + ASSERT (vm->thread_index == vlib_get_thread_index ()); clib_time_init (&vm->clib_time); clib_mem_set_heap (w->thread_mheap); @@ -595,7 +595,7 @@ dpdk_hqos_thread (vlib_worker_thread_t * w) while (tm->worker_thread_release == 0) vlib_worker_thread_barrier_check (); - if (vec_len (dm->devices_by_hqos_cpu[vm->cpu_index]) == 0) + if (vec_len (dm->devices_by_hqos_cpu[vm->thread_index]) == 0) return clib_error ("current I/O TX thread does not have any devices assigned to it"); diff --git a/src/plugins/dpdk/ipsec/cli.c b/src/plugins/dpdk/ipsec/cli.c index cd0a6037..3ae8c9b8 100644 --- a/src/plugins/dpdk/ipsec/cli.c +++ b/src/plugins/dpdk/ipsec/cli.c @@ -42,8 +42,8 @@ dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display) for (i = 0; i < tm->n_vlib_mains; i++) { uword key, data; - u32 cpu_index = vlib_mains[i]->cpu_index; - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + u32 thread_index = vlib_mains[i]->thread_index; + crypto_worker_main_t *cwm = &dcm->workers_main[thread_index]; u8 *s = 0; if (skip_master) @@ -57,7 +57,7 @@ dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display) i32 last_cdev = -1; crypto_qp_data_t *qpd; - s = format (s, "%u\t", cpu_index); + s = format (s, "%u\t", thread_index); /* *INDENT-OFF* */ vec_foreach (qpd, cwm->qp_data) @@ -95,7 +95,7 @@ dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display) cap.sym.auth.algo = p_key->auth_algo; check_algo_is_supported (&cap, auth_str); vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n", - vlib_mains[i]->cpu_index, cipher_str, auth_str, + vlib_mains[i]->thread_index, cipher_str, auth_str, p_key->is_outbound ? "out" : "in", cwm->qp_data[data].dev_id, cwm->qp_data[data].qp_id); diff --git a/src/plugins/dpdk/ipsec/crypto_node.c b/src/plugins/dpdk/ipsec/crypto_node.c index dc3452b2..a3c45902 100644 --- a/src/plugins/dpdk/ipsec/crypto_node.c +++ b/src/plugins/dpdk/ipsec/crypto_node.c @@ -171,9 +171,9 @@ static uword dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + crypto_worker_main_t *cwm = &dcm->workers_main[thread_index]; crypto_qp_data_t *qpd; u32 n_deq = 0; diff --git a/src/plugins/dpdk/ipsec/esp.h b/src/plugins/dpdk/ipsec/esp.h index 320295b1..56f0c756 100644 --- a/src/plugins/dpdk/ipsec/esp.h +++ b/src/plugins/dpdk/ipsec/esp.h @@ -170,9 +170,9 @@ static_always_inline int create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, u8 is_outbound) { - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + crypto_worker_main_t *cwm = &dcm->workers_main[thread_index]; struct rte_crypto_sym_xform cipher_xform = { 0 }; struct rte_crypto_sym_xform auth_xform = { 0 }; struct rte_crypto_sym_xform *xfs; diff --git a/src/plugins/dpdk/ipsec/esp_decrypt.c b/src/plugins/dpdk/ipsec/esp_decrypt.c index 286e03f8..bab76e3b 100644 --- a/src/plugins/dpdk/ipsec/esp_decrypt.c +++ b/src/plugins/dpdk/ipsec/esp_decrypt.c @@ -88,7 +88,7 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm, { u32 n_left_from, *from, *to_next, next_index; ipsec_main_t *im = &ipsec_main; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); dpdk_crypto_main_t * dcm = &dpdk_crypto_main; dpdk_esp_main_t * em = &dpdk_esp_main; u32 i; @@ -104,7 +104,7 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm, return n_left_from; } - crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, cpu_index); + crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, thread_index); u32 n_qps = vec_len(cwm->qp_data); struct rte_crypto_op ** cops_to_enq[n_qps]; u32 n_cop_qp[n_qps], * bi_to_enq[n_qps]; diff --git a/src/plugins/dpdk/ipsec/esp_encrypt.c b/src/plugins/dpdk/ipsec/esp_encrypt.c index 5b03de73..f996d7df 100644 --- a/src/plugins/dpdk/ipsec/esp_encrypt.c +++ b/src/plugins/dpdk/ipsec/esp_encrypt.c @@ -93,7 +93,7 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm, { u32 n_left_from, *from, *to_next, next_index; ipsec_main_t *im = &ipsec_main; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); dpdk_crypto_main_t *dcm = &dpdk_crypto_main; dpdk_esp_main_t *em = &dpdk_esp_main; u32 i; @@ -111,7 +111,8 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm, return n_left_from; } - crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, cpu_index); + crypto_worker_main_t *cwm = + vec_elt_at_index (dcm->workers_main, thread_index); u32 n_qps = vec_len (cwm->qp_data); struct rte_crypto_op **cops_to_enq[n_qps]; u32 n_cop_qp[n_qps], *bi_to_enq[n_qps]; diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c index b0aaaaec..5d8f4fba 100644 --- a/src/plugins/dpdk/ipsec/ipsec.c +++ b/src/plugins/dpdk/ipsec/ipsec.c @@ -289,7 +289,7 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (!map) { clib_warning ("unable to create hash table for worker %u", - vlib_mains[i]->cpu_index); + vlib_mains[i]->thread_index); goto error; } cwm->algo_qp_map = map; diff --git a/src/plugins/dpdk/ipsec/ipsec.h b/src/plugins/dpdk/ipsec/ipsec.h index 28bffc80..f0f793c0 100644 --- a/src/plugins/dpdk/ipsec/ipsec.h +++ b/src/plugins/dpdk/ipsec/ipsec.h @@ -95,8 +95,8 @@ static_always_inline void crypto_alloc_cops () { dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - u32 cpu_index = os_get_cpu_number (); - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + u32 thread_index = vlib_get_thread_index (); + crypto_worker_main_t *cwm = &dcm->workers_main[thread_index]; unsigned socket_id = rte_socket_id (); crypto_qp_data_t *qpd; diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c index 7ee2a785..942b8b2d 100644 --- a/src/plugins/dpdk/main.c +++ b/src/plugins/dpdk/main.c @@ -39,7 +39,7 @@ rte_delay_us_override (unsigned us) * thread then do not intercept. (Must not be called from an * independent pthread). */ - if (os_get_cpu_number () == 0) + if (vlib_get_thread_index () == 0) { /* * We're in the vlib main thread or a vlib process. Make sure diff --git a/src/plugins/flowperpkt/l2_node.c b/src/plugins/flowperpkt/l2_node.c index 1c2f681e..fdaf81d1 100644 --- a/src/plugins/flowperpkt/l2_node.c +++ b/src/plugins/flowperpkt/l2_node.c @@ -102,7 +102,7 @@ add_to_flow_record_l2 (vlib_main_t * vm, u8 * src_mac, u8 * dst_mac, u16 ethertype, u64 timestamp, u16 length, int do_flush) { - u32 my_cpu_number = vm->cpu_index; + u32 my_cpu_number = vm->thread_index; flow_report_main_t *frm = &flow_report_main; ip4_header_t *ip; udp_header_t *udp; diff --git a/src/plugins/flowperpkt/node.c b/src/plugins/flowperpkt/node.c index f77f087d..0277682d 100644 --- a/src/plugins/flowperpkt/node.c +++ b/src/plugins/flowperpkt/node.c @@ -101,7 +101,7 @@ add_to_flow_record_ipv4 (vlib_main_t * vm, u32 src_address, u32 dst_address, u8 tos, u64 timestamp, u16 length, int do_flush) { - u32 my_cpu_number = vm->cpu_index; + u32 my_cpu_number = vm->thread_index; flow_report_main_t *frm = &flow_report_main; ip4_header_t *ip; udp_header_t *udp; diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h index 2bf3fd54..9de0d13b 100644 --- a/src/plugins/ioam/export-common/ioam_export.h +++ b/src/plugins/ioam/export-common/ioam_export.h @@ -477,8 +477,8 @@ do { \ from = vlib_frame_vector_args (F); \ n_left_from = (F)->n_vectors; \ next_index = (N)->cached_next_index; \ - while (__sync_lock_test_and_set ((EM)->lockp[(VM)->cpu_index], 1)); \ - my_buf = ioam_export_get_my_buffer (EM, (VM)->cpu_index); \ + while (__sync_lock_test_and_set ((EM)->lockp[(VM)->thread_index], 1)); \ + my_buf = ioam_export_get_my_buffer (EM, (VM)->thread_index); \ my_buf->touched_at = vlib_time_now (VM); \ while (n_left_from > 0) \ { \ @@ -620,7 +620,7 @@ do { \ } \ vlib_node_increment_counter (VM, export_node.index, \ EXPORT_ERROR_RECORDED, pkts_recorded); \ - *(EM)->lockp[(VM)->cpu_index] = 0; \ + *(EM)->lockp[(VM)->thread_index] = 0; \ } while(0) #endif /* __included_ioam_export_h__ */ diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c index a56dc040..0cf742c9 100644 --- a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c +++ b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c @@ -396,7 +396,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm, clib_net_to_host_u32 (tcp0->seq_number) + 1, no_of_responses, now, - vm->cpu_index, &pool_index0)) + vm->thread_index, &pool_index0)) { cache_ts_added++; } @@ -419,7 +419,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm, e2e = (ioam_e2e_cache_option_t *) ((u8 *) hbh0 + cm->rewrite_pool_index_offset); - e2e->pool_id = (u8) vm->cpu_index; + e2e->pool_id = (u8) vm->thread_index; e2e->pool_index = pool_index0; ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *) ((u8 *) e2e + @@ -455,7 +455,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm, clib_net_to_host_u32 (tcp1->seq_number) + 1, no_of_responses, now, - vm->cpu_index, &pool_index1)) + vm->thread_index, &pool_index1)) { cache_ts_added++; } @@ -479,7 +479,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm, e2e = (ioam_e2e_cache_option_t *) ((u8 *) hbh1 + cm->rewrite_pool_index_offset); - e2e->pool_id = (u8) vm->cpu_index; + e2e->pool_id = (u8) vm->thread_index; e2e->pool_index = pool_index1; ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *) ((u8 *) e2e + @@ -562,7 +562,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm, clib_net_to_host_u32 (tcp0->seq_number) + 1, no_of_responses, now, - vm->cpu_index, &pool_index0)) + vm->thread_index, &pool_index0)) { cache_ts_added++; } @@ -585,7 +585,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm, e2e = (ioam_e2e_cache_option_t *) ((u8 *) hbh0 + cm->rewrite_pool_index_offset); - e2e->pool_id = (u8) vm->cpu_index; + e2e->pool_id = (u8) vm->thread_index; e2e->pool_index = pool_index0; ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *) ((u8 *) e2e + @@ -701,7 +701,7 @@ expired_cache_ts_timer_callback (u32 * expired_timers) ioam_cache_main_t *cm = &ioam_cache_main; int i; u32 pool_index; - u32 thread_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u32 count = 0; for (i = 0; i < vec_len (expired_timers); i++) @@ -724,7 +724,7 @@ ioam_cache_ts_timer_tick_node_fn (vlib_main_t * vm, vlib_frame_t * f) { ioam_cache_main_t *cm = &ioam_cache_main; - u32 my_thread_index = os_get_cpu_number (); + u32 my_thread_index = vlib_get_thread_index (); struct timespec ts, tsrem; tw_timer_expire_timers_16t_2w_512sl (&cm->timer_wheels[my_thread_index], diff --git a/src/plugins/ixge/ixge.c b/src/plugins/ixge/ixge.c index f3c5cc09..08f5b692 100644 --- a/src/plugins/ixge/ixge.c +++ b/src/plugins/ixge/ixge.c @@ -1887,7 +1887,7 @@ done: vlib_increment_combined_counter (vnet_main. interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - 0 /* cpu_index */ , + 0 /* thread_index */ , xd->vlib_sw_if_index, n_packets, dq->rx.n_bytes); diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c index add81236..addc2a42 100644 --- a/src/plugins/lb/lb.c +++ b/src/plugins/lb/lb.c @@ -63,11 +63,11 @@ u8 *format_lb_main (u8 * s, va_list * args) s = format(s, " #vips: %u\n", pool_elts(lbm->vips)); s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1); - u32 cpu_index; - for(cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++ ) { - lb_hash_t *h = lbm->per_cpu[cpu_index].sticky_ht; + u32 thread_index; + for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) { + lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht; if (h) { - s = format(s, "core %d\n", cpu_index); + s = format(s, "core %d\n", thread_index); s = format(s, " timeout: %ds\n", h->timeout); s = format(s, " usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())), lb_hash_size(h)); } diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c index 8b763c53..3171148b 100644 --- a/src/plugins/lb/node.c +++ b/src/plugins/lb/node.c @@ -60,10 +60,10 @@ format_lb_trace (u8 * s, va_list * args) return s; } -lb_hash_t *lb_get_sticky_table(u32 cpu_index) +lb_hash_t *lb_get_sticky_table(u32 thread_index) { lb_main_t *lbm = &lb_main; - lb_hash_t *sticky_ht = lbm->per_cpu[cpu_index].sticky_ht; + lb_hash_t *sticky_ht = lbm->per_cpu[thread_index].sticky_ht; //Check if size changed if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht)))) { @@ -71,8 +71,8 @@ lb_hash_t *lb_get_sticky_table(u32 cpu_index) lb_hash_bucket_t *b; u32 i; lb_hash_foreach_entry(sticky_ht, b, i) { - vlib_refcount_add(&lbm->as_refcount, cpu_index, b->value[i], -1); - vlib_refcount_add(&lbm->as_refcount, cpu_index, 0, 1); + vlib_refcount_add(&lbm->as_refcount, thread_index, b->value[i], -1); + vlib_refcount_add(&lbm->as_refcount, thread_index, 0, 1); } lb_hash_free(sticky_ht); @@ -81,8 +81,8 @@ lb_hash_t *lb_get_sticky_table(u32 cpu_index) //Create if necessary if (PREDICT_FALSE(sticky_ht == NULL)) { - lbm->per_cpu[cpu_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout); - sticky_ht = lbm->per_cpu[cpu_index].sticky_ht; + lbm->per_cpu[thread_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout); + sticky_ht = lbm->per_cpu[thread_index].sticky_ht; clib_warning("Regenerated sticky table %p", sticky_ht); } @@ -153,10 +153,10 @@ lb_node_fn (vlib_main_t * vm, { lb_main_t *lbm = &lb_main; u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); u32 lb_time = lb_hash_time_now(vm); - lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index); + lb_hash_t *sticky_ht = lb_get_sticky_table(thread_index); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; @@ -240,9 +240,9 @@ lb_node_fn (vlib_main_t * vm, //Configuration may be changed, vectors resized, etc... //Dereference previously used - vlib_refcount_add(&lbm->as_refcount, cpu_index, + vlib_refcount_add(&lbm->as_refcount, thread_index, lb_hash_available_value(sticky_ht, hash0, available_index0), -1); - vlib_refcount_add(&lbm->as_refcount, cpu_index, + vlib_refcount_add(&lbm->as_refcount, thread_index, asindex0, 1); //Add sticky entry @@ -260,7 +260,7 @@ lb_node_fn (vlib_main_t * vm, } vlib_increment_simple_counter(&lbm->vip_counters[counter], - cpu_index, + thread_index, vnet_buffer (p0)->ip.adj_index[VLIB_TX], 1); diff --git a/src/plugins/lb/refcount.c b/src/plugins/lb/refcount.c index 22415c88..6f01ab5a 100644 --- a/src/plugins/lb/refcount.c +++ b/src/plugins/lb/refcount.c @@ -31,10 +31,10 @@ u64 vlib_refcount_get(vlib_refcount_t *r, u32 index) { u64 count = 0; vlib_thread_main_t *tm = vlib_get_thread_main (); - u32 cpu_index; - for (cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++) { - if (r->per_cpu[cpu_index].length > index) - count += r->per_cpu[cpu_index].counters[index]; + u32 thread_index; + for (thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++) { + if (r->per_cpu[thread_index].length > index) + count += r->per_cpu[thread_index].counters[index]; } return count; } diff --git a/src/plugins/lb/refcount.h b/src/plugins/lb/refcount.h index 8c26e7be..dcfcb3fe 100644 --- a/src/plugins/lb/refcount.h +++ b/src/plugins/lb/refcount.h @@ -45,9 +45,9 @@ typedef struct { void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size); static_always_inline -void vlib_refcount_add(vlib_refcount_t *r, u32 cpu_index, u32 counter_index, i32 v) +void vlib_refcount_add(vlib_refcount_t *r, u32 thread_index, u32 counter_index, i32 v) { - vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[cpu_index]; + vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[thread_index]; if (PREDICT_FALSE(counter_index >= per_cpu->length)) __vlib_refcount_resize(per_cpu, clib_max(counter_index + 16, per_cpu->length * 2)); diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index 659d5dfb..cee1f3d1 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -94,7 +94,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u32 n_rx_bytes = 0; u32 *to_next = 0; u32 n_free_bufs; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u32 bi0, bi1; vlib_buffer_t *b0, *b1; u16 ring_size = 1 << mif->log2_ring_size; @@ -105,14 +105,15 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (mif->per_interface_next_index != ~0) next_index = mif->per_interface_next_index; - n_free_bufs = vec_len (nm->rx_buffers[cpu_index]); + n_free_bufs = vec_len (nm->rx_buffers[thread_index]); if (PREDICT_FALSE (n_free_bufs < ring_size)) { - vec_validate (nm->rx_buffers[cpu_index], ring_size + n_free_bufs - 1); + vec_validate (nm->rx_buffers[thread_index], + ring_size + n_free_bufs - 1); n_free_bufs += - vlib_buffer_alloc (vm, &nm->rx_buffers[cpu_index][n_free_bufs], + vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs], ring_size); - _vec_len (nm->rx_buffers[cpu_index]) = n_free_bufs; + _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs; } head = ring->head; @@ -158,15 +159,15 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, CLIB_CACHE_LINE_BYTES, LOAD); } /* get empty buffer */ - u32 last_buf = vec_len (nm->rx_buffers[cpu_index]) - 1; - bi0 = nm->rx_buffers[cpu_index][last_buf]; - bi1 = nm->rx_buffers[cpu_index][last_buf - 1]; - _vec_len (nm->rx_buffers[cpu_index]) -= 2; + u32 last_buf = vec_len (nm->rx_buffers[thread_index]) - 1; + bi0 = nm->rx_buffers[thread_index][last_buf]; + bi1 = nm->rx_buffers[thread_index][last_buf - 1]; + _vec_len (nm->rx_buffers[thread_index]) -= 2; if (last_buf > 4) { - memif_prefetch (vm, nm->rx_buffers[cpu_index][last_buf - 2]); - memif_prefetch (vm, nm->rx_buffers[cpu_index][last_buf - 3]); + memif_prefetch (vm, nm->rx_buffers[thread_index][last_buf - 2]); + memif_prefetch (vm, nm->rx_buffers[thread_index][last_buf - 3]); } /* enqueue buffer */ @@ -256,9 +257,9 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, while (num_slots && n_left_to_next) { /* get empty buffer */ - u32 last_buf = vec_len (nm->rx_buffers[cpu_index]) - 1; - bi0 = nm->rx_buffers[cpu_index][last_buf]; - _vec_len (nm->rx_buffers[cpu_index]) = last_buf; + u32 last_buf = vec_len (nm->rx_buffers[thread_index]) - 1; + bi0 = nm->rx_buffers[thread_index][last_buf]; + _vec_len (nm->rx_buffers[thread_index]) = last_buf; /* enqueue buffer */ to_next[0] = bi0; @@ -315,7 +316,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, ring->tail = head; vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, cpu_index, + + VNET_INTERFACE_COUNTER_RX, thread_index, mif->hw_if_index, n_rx_packets, n_rx_bytes); @@ -327,7 +328,7 @@ memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_rx_packets = 0; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); memif_main_t *nm = &memif_main; memif_if_t *mif; @@ -337,7 +338,7 @@ memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, if (mif->flags & MEMIF_IF_FLAG_ADMIN_UP && mif->flags & MEMIF_IF_FLAG_CONNECTED && (mif->if_index % nm->input_cpu_count) == - (cpu_index - nm->input_cpu_first_index)) + (thread_index - nm->input_cpu_first_index)) { if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) n_rx_packets += diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c index b4961365..e5ee965f 100644 --- a/src/plugins/snat/in2out.c +++ b/src/plugins/snat/in2out.c @@ -212,7 +212,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, snat_session_t ** sessionp, vlib_node_runtime_t * node, u32 next0, - u32 cpu_index) + u32 thread_index) { snat_user_t *u; snat_user_key_t user_key; @@ -246,27 +246,27 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) { /* no, make a new one */ - pool_get (sm->per_thread_data[cpu_index].users, u); + pool_get (sm->per_thread_data[thread_index].users, u); memset (u, 0, sizeof (*u)); u->addr = ip0->src_address; u->fib_index = rx_fib_index0; - pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt); + pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt); u->sessions_per_user_list_head_index = per_user_list_head_elt - - sm->per_thread_data[cpu_index].list_pool; + sm->per_thread_data[thread_index].list_pool; - clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_init (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index); - kv0.value = u - sm->per_thread_data[cpu_index].users; + kv0.value = u - sm->per_thread_data[thread_index].users; /* add user */ clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); } else { - u = pool_elt_at_index (sm->per_thread_data[cpu_index].users, + u = pool_elt_at_index (sm->per_thread_data[thread_index].users, value0.value); } @@ -276,25 +276,25 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, /* Remove the oldest dynamic translation */ do { oldest_per_user_translation_list_index = - clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index); ASSERT (oldest_per_user_translation_list_index != ~0); /* add it back to the end of the LRU list */ - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index, oldest_per_user_translation_list_index); /* Get the list element */ oldest_per_user_translation_list_elt = - pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool, + pool_elt_at_index (sm->per_thread_data[thread_index].list_pool, oldest_per_user_translation_list_index); /* Get the session index from the list element */ session_index = oldest_per_user_translation_list_elt->value; /* Get the session */ - s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, session_index); } while (snat_is_session_static (s)); @@ -346,7 +346,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, } /* Create a new session */ - pool_get (sm->per_thread_data[cpu_index].sessions, s); + pool_get (sm->per_thread_data[thread_index].sessions, s); memset (s, 0, sizeof (*s)); s->outside_address_index = address_index; @@ -362,22 +362,22 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, } /* Create list elts */ - pool_get (sm->per_thread_data[cpu_index].list_pool, + pool_get (sm->per_thread_data[thread_index].list_pool, per_user_translation_list_elt); - clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_init (sm->per_thread_data[thread_index].list_pool, per_user_translation_list_elt - - sm->per_thread_data[cpu_index].list_pool); + sm->per_thread_data[thread_index].list_pool); per_user_translation_list_elt->value = - s - sm->per_thread_data[cpu_index].sessions; + s - sm->per_thread_data[thread_index].sessions; s->per_user_index = per_user_translation_list_elt - - sm->per_thread_data[cpu_index].list_pool; + sm->per_thread_data[thread_index].list_pool; s->per_user_list_head_index = u->sessions_per_user_list_head_index; - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s->per_user_list_head_index, per_user_translation_list_elt - - sm->per_thread_data[cpu_index].list_pool); + sm->per_thread_data[thread_index].list_pool); } s->in2out = *key0; @@ -388,12 +388,12 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, /* Add to translation hashes */ kv0.key = s->in2out.as_u64; - kv0.value = s - sm->per_thread_data[cpu_index].sessions; + kv0.value = s - sm->per_thread_data[thread_index].sessions; if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) clib_warning ("in2out key add failed"); kv0.key = s->out2in.as_u64; - kv0.value = s - sm->per_thread_data[cpu_index].sessions; + kv0.value = s - sm->per_thread_data[thread_index].sessions; if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) clib_warning ("out2in key add failed"); @@ -403,7 +403,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, worker_by_out_key.port = s->out2in.port; worker_by_out_key.fib_index = s->out2in.fib_index; kv0.key = worker_by_out_key.as_u64; - kv0.value = cpu_index; + kv0.value = thread_index; clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); /* log NAT event */ @@ -465,7 +465,7 @@ snat_in2out_error_t icmp_get_key(icmp46_header_t *icmp0, * * @param[in,out] sm SNAT main * @param[in,out] node SNAT node runtime - * @param[in] cpu_index CPU index + * @param[in] thread_index thread index * @param[in,out] b0 buffer containing packet to be translated * @param[out] p_key address and port before NAT translation * @param[out] p_value address and port after NAT translation @@ -473,7 +473,7 @@ snat_in2out_error_t icmp_get_key(icmp46_header_t *icmp0, * @param d optional parameter */ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node, - u32 cpu_index, vlib_buffer_t *b0, + u32 thread_index, vlib_buffer_t *b0, snat_session_key_t *p_key, snat_session_key_t *p_value, u8 *p_dont_translate, void *d) @@ -524,13 +524,13 @@ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node, } next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, - &s0, node, next0, cpu_index); + &s0, node, next0, thread_index); if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto out; } else - s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value0.value); out: @@ -548,7 +548,7 @@ out: * * @param[in] sm SNAT main * @param[in,out] node SNAT node runtime - * @param[in] cpu_index CPU index + * @param[in] thread_index thread index * @param[in,out] b0 buffer containing packet to be translated * @param[out] p_key address and port before NAT translation * @param[out] p_value address and port after NAT translation @@ -556,7 +556,7 @@ out: * @param d optional parameter */ u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node, - u32 cpu_index, vlib_buffer_t *b0, + u32 thread_index, vlib_buffer_t *b0, snat_session_key_t *p_key, snat_session_key_t *p_value, u8 *p_dont_translate, void *d) @@ -624,7 +624,7 @@ static inline u32 icmp_in2out (snat_main_t *sm, u32 rx_fib_index0, vlib_node_runtime_t * node, u32 next0, - u32 cpu_index, + u32 thread_index, void *d) { snat_session_key_t key0, sm0; @@ -641,7 +641,7 @@ static inline u32 icmp_in2out (snat_main_t *sm, echo0 = (icmp_echo_header_t *)(icmp0+1); - next0_tmp = sm->icmp_match_in2out_cb(sm, node, cpu_index, b0, + next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, &key0, &sm0, &dont_translate, d); if (next0_tmp != ~0) next0 = next0_tmp; @@ -847,11 +847,11 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, vlib_node_runtime_t * node, u32 next0, f64 now, - u32 cpu_index, + u32 thread_index, snat_session_t ** p_s0) { next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, cpu_index, p_s0); + next0, thread_index, p_s0); snat_session_t * s0 = *p_s0; if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0)) { @@ -862,9 +862,9 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, /* Per-user LRU list maintenance for dynamic translations */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } @@ -884,7 +884,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; f64 now = vlib_time_now (vm); u32 stats_node_index; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index : snat_in2out_node.index; @@ -977,7 +977,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { next0 = icmp_in2out_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, - node, next0, now, cpu_index, &s0); + node, next0, now, thread_index, &s0); goto trace00; } } @@ -1006,7 +1006,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, goto trace00; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, - &s0, node, next0, cpu_index); + &s0, node, next0, thread_index); if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto trace00; } @@ -1017,7 +1017,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value0.value); old_addr0 = ip0->src_address.as_u32; @@ -1063,9 +1063,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } @@ -1081,7 +1081,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, t->next_index = next0; t->session_index = ~0; if (s0) - t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + t->session_index = s0 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; @@ -1117,7 +1117,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { next1 = icmp_in2out_slow_path (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, - next1, now, cpu_index, &s1); + next1, now, thread_index, &s1); goto trace01; } } @@ -1146,7 +1146,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, goto trace01; next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1, - &s1, node, next1, cpu_index); + &s1, node, next1, thread_index); if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP)) goto trace01; } @@ -1157,7 +1157,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value1.value); old_addr1 = ip1->src_address.as_u32; @@ -1203,9 +1203,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s1)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s1->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s1->per_user_list_head_index, s1->per_user_index); } @@ -1220,7 +1220,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, t->next_index = next1; t->session_index = ~0; if (s1) - t->session_index = s1 - sm->per_thread_data[cpu_index].sessions; + t->session_index = s1 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP; @@ -1292,7 +1292,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { next0 = icmp_in2out_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, cpu_index, &s0); + next0, now, thread_index, &s0); goto trace0; } } @@ -1321,7 +1321,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, goto trace0; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, - &s0, node, next0, cpu_index); + &s0, node, next0, thread_index); if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto trace0; @@ -1333,7 +1333,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value0.value); old_addr0 = ip0->src_address.as_u32; @@ -1379,9 +1379,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } @@ -1397,7 +1397,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, t->next_index = next0; t->session_index = ~0; if (s0) - t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + t->session_index = s0 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; @@ -2010,7 +2010,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, u32 n_left_to_next_worker = 0, *to_next_worker = 0; u32 next_worker_index = 0; u32 current_worker_index = ~0; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); ASSERT (vec_len (sm->workers)); @@ -2048,7 +2048,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0); - if (PREDICT_FALSE (next_worker_index != cpu_index)) + if (PREDICT_FALSE (next_worker_index != thread_index)) { do_handoff = 1; diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c index 656e42db..5d308d78 100644 --- a/src/plugins/snat/out2in.c +++ b/src/plugins/snat/out2in.c @@ -129,7 +129,7 @@ create_session_for_static_mapping (snat_main_t *sm, snat_session_key_t in2out, snat_session_key_t out2in, vlib_node_runtime_t * node, - u32 cpu_index) + u32 thread_index) { snat_user_t *u; snat_user_key_t user_key; @@ -146,36 +146,36 @@ create_session_for_static_mapping (snat_main_t *sm, if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) { /* no, make a new one */ - pool_get (sm->per_thread_data[cpu_index].users, u); + pool_get (sm->per_thread_data[thread_index].users, u); memset (u, 0, sizeof (*u)); u->addr = in2out.addr; u->fib_index = in2out.fib_index; - pool_get (sm->per_thread_data[cpu_index].list_pool, + pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt); u->sessions_per_user_list_head_index = per_user_list_head_elt - - sm->per_thread_data[cpu_index].list_pool; + sm->per_thread_data[thread_index].list_pool; - clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_init (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index); - kv0.value = u - sm->per_thread_data[cpu_index].users; + kv0.value = u - sm->per_thread_data[thread_index].users; /* add user */ clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); /* add non-traslated packets worker lookup */ - kv0.value = cpu_index; + kv0.value = thread_index; clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); } else { - u = pool_elt_at_index (sm->per_thread_data[cpu_index].users, + u = pool_elt_at_index (sm->per_thread_data[thread_index].users, value0.value); } - pool_get (sm->per_thread_data[cpu_index].sessions, s); + pool_get (sm->per_thread_data[thread_index].sessions, s); memset (s, 0, sizeof (*s)); s->outside_address_index = ~0; @@ -183,22 +183,22 @@ create_session_for_static_mapping (snat_main_t *sm, u->nstaticsessions++; /* Create list elts */ - pool_get (sm->per_thread_data[cpu_index].list_pool, + pool_get (sm->per_thread_data[thread_index].list_pool, per_user_translation_list_elt); - clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_init (sm->per_thread_data[thread_index].list_pool, per_user_translation_list_elt - - sm->per_thread_data[cpu_index].list_pool); + sm->per_thread_data[thread_index].list_pool); per_user_translation_list_elt->value = - s - sm->per_thread_data[cpu_index].sessions; + s - sm->per_thread_data[thread_index].sessions; s->per_user_index = - per_user_translation_list_elt - sm->per_thread_data[cpu_index].list_pool; + per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool; s->per_user_list_head_index = u->sessions_per_user_list_head_index; - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s->per_user_list_head_index, per_user_translation_list_elt - - sm->per_thread_data[cpu_index].list_pool); + sm->per_thread_data[thread_index].list_pool); s->in2out = in2out; s->out2in = out2in; @@ -206,12 +206,12 @@ create_session_for_static_mapping (snat_main_t *sm, /* Add to translation hashes */ kv0.key = s->in2out.as_u64; - kv0.value = s - sm->per_thread_data[cpu_index].sessions; + kv0.value = s - sm->per_thread_data[thread_index].sessions; if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) clib_warning ("in2out key add failed"); kv0.key = s->out2in.as_u64; - kv0.value = s - sm->per_thread_data[cpu_index].sessions; + kv0.value = s - sm->per_thread_data[thread_index].sessions; if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) clib_warning ("out2in key add failed"); @@ -298,7 +298,7 @@ is_interface_addr(snat_main_t *sm, vlib_node_runtime_t *node, u32 sw_if_index0, * * @param[in,out] sm SNAT main * @param[in,out] node SNAT node runtime - * @param[in] cpu_index CPU index + * @param[in] thread_index thread index * @param[in,out] b0 buffer containing packet to be translated * @param[out] p_key address and port before NAT translation * @param[out] p_value address and port after NAT translation @@ -306,7 +306,7 @@ is_interface_addr(snat_main_t *sm, vlib_node_runtime_t *node, u32 sw_if_index0, * @param d optional parameter */ u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node, - u32 cpu_index, vlib_buffer_t *b0, + u32 thread_index, vlib_buffer_t *b0, snat_session_key_t *p_key, snat_session_key_t *p_value, u8 *p_dont_translate, void *d) @@ -366,7 +366,7 @@ u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node, /* Create session initiated by host from external network */ s0 = create_session_for_static_mapping(sm, b0, sm0, key0, - node, cpu_index); + node, thread_index); if (!s0) { @@ -375,7 +375,7 @@ u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node, } } else - s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value0.value); out: @@ -393,7 +393,7 @@ out: * * @param[in] sm SNAT main * @param[in,out] node SNAT node runtime - * @param[in] cpu_index CPU index + * @param[in] thread_index thread index * @param[in,out] b0 buffer containing packet to be translated * @param[out] p_key address and port before NAT translation * @param[out] p_value address and port after NAT translation @@ -401,7 +401,7 @@ out: * @param d optional parameter */ u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node, - u32 cpu_index, vlib_buffer_t *b0, + u32 thread_index, vlib_buffer_t *b0, snat_session_key_t *p_key, snat_session_key_t *p_value, u8 *p_dont_translate, void *d) @@ -460,7 +460,7 @@ static inline u32 icmp_out2in (snat_main_t *sm, u32 rx_fib_index0, vlib_node_runtime_t * node, u32 next0, - u32 cpu_index, + u32 thread_index, void *d) { snat_session_key_t key0, sm0; @@ -477,7 +477,7 @@ static inline u32 icmp_out2in (snat_main_t *sm, echo0 = (icmp_echo_header_t *)(icmp0+1); - next0_tmp = sm->icmp_match_out2in_cb(sm, node, cpu_index, b0, + next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, &key0, &sm0, &dont_translate, d); if (next0_tmp != ~0) next0 = next0_tmp; @@ -589,11 +589,11 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm, u32 rx_fib_index0, vlib_node_runtime_t * node, u32 next0, f64 now, - u32 cpu_index, + u32 thread_index, snat_session_t ** p_s0) { next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, cpu_index, p_s0); + next0, thread_index, p_s0); snat_session_t * s0 = *p_s0; if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0)) { @@ -604,9 +604,9 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } @@ -624,7 +624,7 @@ snat_out2in_node_fn (vlib_main_t * vm, u32 pkts_processed = 0; snat_main_t * sm = &snat_main; f64 now = vlib_time_now (vm); - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -712,7 +712,7 @@ snat_out2in_node_fn (vlib_main_t * vm, { next0 = icmp_out2in_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, cpu_index, &s0); + next0, now, thread_index, &s0); goto trace0; } @@ -743,7 +743,7 @@ snat_out2in_node_fn (vlib_main_t * vm, /* Create session initiated by host from external network */ s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node, - cpu_index); + thread_index); if (!s0) { b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; @@ -752,7 +752,7 @@ snat_out2in_node_fn (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value0.value); old_addr0 = ip0->dst_address.as_u32; @@ -796,9 +796,9 @@ snat_out2in_node_fn (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } @@ -813,7 +813,7 @@ snat_out2in_node_fn (vlib_main_t * vm, t->next_index = next0; t->session_index = ~0; if (s0) - t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + t->session_index = s0 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; @@ -847,7 +847,7 @@ snat_out2in_node_fn (vlib_main_t * vm, { next1 = icmp_out2in_slow_path (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, - next1, now, cpu_index, &s1); + next1, now, thread_index, &s1); goto trace1; } @@ -878,7 +878,7 @@ snat_out2in_node_fn (vlib_main_t * vm, /* Create session initiated by host from external network */ s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node, - cpu_index); + thread_index); if (!s1) { b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; @@ -887,7 +887,7 @@ snat_out2in_node_fn (vlib_main_t * vm, } } else - s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value1.value); old_addr1 = ip1->dst_address.as_u32; @@ -931,9 +931,9 @@ snat_out2in_node_fn (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s1)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s1->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s1->per_user_list_head_index, s1->per_user_index); } @@ -948,7 +948,7 @@ snat_out2in_node_fn (vlib_main_t * vm, t->next_index = next1; t->session_index = ~0; if (s1) - t->session_index = s1 - sm->per_thread_data[cpu_index].sessions; + t->session_index = s1 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP; @@ -1016,7 +1016,7 @@ snat_out2in_node_fn (vlib_main_t * vm, { next0 = icmp_out2in_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, cpu_index, &s0); + next0, now, thread_index, &s0); goto trace00; } @@ -1048,7 +1048,7 @@ snat_out2in_node_fn (vlib_main_t * vm, /* Create session initiated by host from external network */ s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node, - cpu_index); + thread_index); if (!s0) { b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; @@ -1057,7 +1057,7 @@ snat_out2in_node_fn (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value0.value); old_addr0 = ip0->dst_address.as_u32; @@ -1101,9 +1101,9 @@ snat_out2in_node_fn (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } @@ -1118,7 +1118,7 @@ snat_out2in_node_fn (vlib_main_t * vm, t->next_index = next0; t->session_index = ~0; if (s0) - t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + t->session_index = s0 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; @@ -1599,7 +1599,7 @@ snat_out2in_worker_handoff_fn (vlib_main_t * vm, u32 n_left_to_next_worker = 0, *to_next_worker = 0; u32 next_worker_index = 0; u32 current_worker_index = ~0; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); ASSERT (vec_len (sm->workers)); @@ -1637,7 +1637,7 @@ snat_out2in_worker_handoff_fn (vlib_main_t * vm, next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0); - if (PREDICT_FALSE (next_worker_index != cpu_index)) + if (PREDICT_FALSE (next_worker_index != thread_index)) { do_handoff = 1; diff --git a/src/plugins/snat/snat.h b/src/plugins/snat/snat.h index 017825c0..f4e1c5c0 100644 --- a/src/plugins/snat/snat.h +++ b/src/plugins/snat/snat.h @@ -221,7 +221,7 @@ struct snat_main_s; typedef u32 snat_icmp_match_function_t (struct snat_main_s *sm, vlib_node_runtime_t *node, - u32 cpu_index, + u32 thread_index, vlib_buffer_t *b0, snat_session_key_t *p_key, snat_session_key_t *p_value, @@ -402,22 +402,22 @@ typedef struct { } tcp_udp_header_t; u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node, - u32 cpu_index, vlib_buffer_t *b0, + u32 thread_index, vlib_buffer_t *b0, snat_session_key_t *p_key, snat_session_key_t *p_value, u8 *p_dont_translate, void *d); u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node, - u32 cpu_index, vlib_buffer_t *b0, + u32 thread_index, vlib_buffer_t *b0, snat_session_key_t *p_key, snat_session_key_t *p_value, u8 *p_dont_translate, void *d); u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node, - u32 cpu_index, vlib_buffer_t *b0, + u32 thread_index, vlib_buffer_t *b0, snat_session_key_t *p_key, snat_session_key_t *p_value, u8 *p_dont_translate, void *d); u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node, - u32 cpu_index, vlib_buffer_t *b0, + u32 thread_index, vlib_buffer_t *b0, snat_session_key_t *p_key, snat_session_key_t *p_value, u8 *p_dont_translate, void *d); diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index a517a597..be3b41ef 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -299,7 +299,7 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm, if (CLIB_DEBUG == 0) return; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); /* smp disaster check */ if (vec_len (vlib_mains) > 1) @@ -355,7 +355,7 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, vlib_buffer_free_list_t *f; int i; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0) { @@ -474,7 +474,7 @@ vlib_buffer_delete_free_list_internal (vlib_main_t * vm, u32 free_list_index) u32 merge_index; int i; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); f = vlib_buffer_get_free_list (vm, free_list_index); diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index 394c336a..328660a3 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -209,7 +209,7 @@ always_inline vlib_buffer_known_state_t vlib_buffer_is_known (vlib_main_t * vm, u32 buffer_index) { vlib_buffer_main_t *bm = vm->buffer_main; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); uword *p = hash_get (bm->buffer_known_hash, buffer_index); return p ? p[0] : VLIB_BUFFER_UNKNOWN; @@ -221,7 +221,7 @@ vlib_buffer_set_known_state (vlib_main_t * vm, vlib_buffer_known_state_t state) { vlib_buffer_main_t *bm = vm->buffer_main; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); hash_set (bm->buffer_known_hash, buffer_index, state); } diff --git a/src/vlib/cli.c b/src/vlib/cli.c index f853f655..3cc95076 100644 --- a/src/vlib/cli.c +++ b/src/vlib/cli.c @@ -709,7 +709,7 @@ test_heap_validate (vlib_main_t * vm, unformat_input_t * input, { /* *INDENT-OFF* */ foreach_vlib_main({ - heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index]; + heap = clib_per_cpu_mheaps[this_vlib_main->thread_index]; mheap = mheap_header(heap); mheap->flags |= MHEAP_FLAG_VALIDATE; // Turn off small object cache because it delays detection of errors @@ -722,7 +722,7 @@ test_heap_validate (vlib_main_t * vm, unformat_input_t * input, { /* *INDENT-OFF* */ foreach_vlib_main({ - heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index]; + heap = clib_per_cpu_mheaps[this_vlib_main->thread_index]; mheap = mheap_header(heap); mheap->flags &= ~MHEAP_FLAG_VALIDATE; mheap->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE; @@ -733,7 +733,7 @@ test_heap_validate (vlib_main_t * vm, unformat_input_t * input, { /* *INDENT-OFF* */ foreach_vlib_main({ - heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index]; + heap = clib_per_cpu_mheaps[this_vlib_main->thread_index]; mheap = mheap_header(heap); mheap_validate(heap); }); diff --git a/src/vlib/counter.h b/src/vlib/counter.h index 17a85217..60e2055d 100644 --- a/src/vlib/counter.h +++ b/src/vlib/counter.h @@ -70,17 +70,17 @@ u32 vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm); /** Increment a simple counter @param cm - (vlib_simple_counter_main_t *) simple counter main pointer - @param cpu_index - (u32) the current cpu index + @param thread_index - (u32) the current cpu index @param index - (u32) index of the counter to increment @param increment - (u64) quantitiy to add to the counter */ always_inline void vlib_increment_simple_counter (vlib_simple_counter_main_t * cm, - u32 cpu_index, u32 index, u64 increment) + u32 thread_index, u32 index, u64 increment) { counter_t *my_counters; - my_counters = cm->counters[cpu_index]; + my_counters = cm->counters[thread_index]; my_counters[index] += increment; } @@ -201,7 +201,7 @@ void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm); /** Increment a combined counter @param cm - (vlib_combined_counter_main_t *) comined counter main pointer - @param cpu_index - (u32) the current cpu index + @param thread_index - (u32) the current cpu index @param index - (u32) index of the counter to increment @param packet_increment - (u64) number of packets to add to the counter @param byte_increment - (u64) number of bytes to add to the counter @@ -209,13 +209,13 @@ void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm); always_inline void vlib_increment_combined_counter (vlib_combined_counter_main_t * cm, - u32 cpu_index, + u32 thread_index, u32 index, u64 n_packets, u64 n_bytes) { vlib_counter_t *my_counters; /* Use this CPU's counter array */ - my_counters = cm->counters[cpu_index]; + my_counters = cm->counters[thread_index]; my_counters[index].packets += n_packets; my_counters[index].bytes += n_bytes; @@ -224,14 +224,14 @@ vlib_increment_combined_counter (vlib_combined_counter_main_t * cm, /** Pre-fetch a per-thread combined counter for the given object index */ always_inline void vlib_prefetch_combined_counter (const vlib_combined_counter_main_t * cm, - u32 cpu_index, u32 index) + u32 thread_index, u32 index) { vlib_counter_t *cpu_counters; /* * This CPU's index is assumed to already be in cache */ - cpu_counters = cm->counters[cpu_index]; + cpu_counters = cm->counters[thread_index]; CLIB_PREFETCH (cpu_counters + index, CLIB_CACHE_LINE_BYTES, STORE); } diff --git a/src/vlib/error.c b/src/vlib/error.c index a2c23176..e4ed4ee3 100644 --- a/src/vlib/error.c +++ b/src/vlib/error.c @@ -149,7 +149,7 @@ vlib_register_errors (vlib_main_t * vm, vlib_node_t *n = vlib_get_node (vm, node_index); uword l; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); /* Free up any previous error strings. */ if (n->n_errors > 0) diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h index f51ec381..9dd01fbf 100644 --- a/src/vlib/global_funcs.h +++ b/src/vlib/global_funcs.h @@ -23,7 +23,7 @@ always_inline vlib_main_t * vlib_get_main (void) { vlib_main_t *vm; - vm = vlib_mains[os_get_cpu_number ()]; + vm = vlib_mains[vlib_get_thread_index ()]; ASSERT (vm); return vm; } diff --git a/src/vlib/main.c b/src/vlib/main.c index b22203f0..422d3e26 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -136,18 +136,18 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, else { f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN); - f->cpu_index = vm->cpu_index; + f->thread_index = vm->thread_index; fi = vlib_frame_index_no_check (vm, f); } /* Poison frame when debugging. */ if (CLIB_DEBUG > 0) { - u32 save_cpu_index = f->cpu_index; + u32 save_thread_index = f->thread_index; memset (f, 0xfe, n); - f->cpu_index = save_cpu_index; + f->thread_index = save_thread_index; } /* Insert magic number. */ @@ -517,7 +517,7 @@ vlib_put_next_frame (vlib_main_t * vm, * a dangling frame reference. Each thread has its own copy of * the next_frames vector. */ - if (0 && r->cpu_index != next_runtime->cpu_index) + if (0 && r->thread_index != next_runtime->thread_index) { nf->frame_index = ~0; nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED); @@ -866,7 +866,7 @@ vlib_elog_main_loop_event (vlib_main_t * vm, : evm->node_call_elog_event_types, node_index), /* track */ - (vm->cpu_index ? &vlib_worker_threads[vm->cpu_index]. + (vm->thread_index ? &vlib_worker_threads[vm->thread_index]. elog_track : &em->default_track), /* data to log */ n_vectors); } @@ -963,7 +963,7 @@ dispatch_node (vlib_main_t * vm, vm->cpu_time_last_node_dispatch = last_time_stamp; - if (1 /* || vm->cpu_index == node->cpu_index */ ) + if (1 /* || vm->thread_index == node->thread_index */ ) { vlib_main_t *stat_vm; @@ -1029,7 +1029,7 @@ dispatch_node (vlib_main_t * vm, { u32 node_name, vector_length, is_polling; } *ed; - vlib_worker_thread_t *w = vlib_worker_threads + vm->cpu_index; + vlib_worker_thread_t *w = vlib_worker_threads + vm->thread_index; #endif if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT diff --git a/src/vlib/main.h b/src/vlib/main.h index 0197b4f3..329bf073 100644 --- a/src/vlib/main.h +++ b/src/vlib/main.h @@ -156,7 +156,7 @@ typedef struct vlib_main_t uword *init_functions_called; /* to compare with node runtime */ - u32 cpu_index; + u32 thread_index; void **mbuf_alloc_list; diff --git a/src/vlib/node.c b/src/vlib/node.c index dc0a4de5..bbd3a42e 100644 --- a/src/vlib/node.c +++ b/src/vlib/node.c @@ -99,7 +99,7 @@ vlib_node_runtime_update (vlib_main_t * vm, u32 node_index, u32 next_index) vlib_pending_frame_t *pf; i32 i, j, n_insert; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); vlib_worker_thread_barrier_sync (vm); diff --git a/src/vlib/node.h b/src/vlib/node.h index fc7e7da2..1e2f4c38 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -344,8 +344,8 @@ typedef struct vlib_frame_t /* Number of vector elements currently in frame. */ u16 n_vectors; - /* Owner cpuid / heap id */ - u16 cpu_index; + /* Owner thread / heap id */ + u16 thread_index; /* Scalar and vector arguments to next node. */ u8 arguments[0]; @@ -459,7 +459,7 @@ typedef struct vlib_node_runtime_t zero before first run of this node. */ - u16 cpu_index; /**< CPU this node runs on */ + u16 thread_index; /**< thread this node runs on */ u8 runtime_data[0]; /**< Function dependent node-runtime data. This data is diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index 1f7d94e1..54e36874 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -201,9 +201,9 @@ always_inline vlib_frame_t * vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index) { vlib_frame_t *f; - u32 cpu_index = frame_index & VLIB_CPU_MASK; + u32 thread_index = frame_index & VLIB_CPU_MASK; u32 offset = frame_index & VLIB_OFFSET_MASK; - vm = vlib_mains[cpu_index]; + vm = vlib_mains[thread_index]; f = vm->heap_base + offset; return f; } @@ -215,10 +215,10 @@ vlib_frame_index_no_check (vlib_main_t * vm, vlib_frame_t * f) ASSERT (((uword) f & VLIB_CPU_MASK) == 0); - vm = vlib_mains[f->cpu_index]; + vm = vlib_mains[f->thread_index]; i = ((u8 *) f - (u8 *) vm->heap_base); - return i | f->cpu_index; + return i | f->thread_index; } always_inline vlib_frame_t * diff --git a/src/vlib/threads.c b/src/vlib/threads.c index ef3a24d3..4a111f8d 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -35,27 +35,12 @@ vl (void *p) vlib_worker_thread_t *vlib_worker_threads; vlib_thread_main_t vlib_thread_main; +__thread uword vlib_thread_index = 0; + uword os_get_cpu_number (void) { - void *sp; - uword n; - u32 len; - - len = vec_len (vlib_thread_stacks); - if (len == 0) - return 0; - - /* Get any old stack address. */ - sp = &sp; - - n = ((uword) sp - (uword) vlib_thread_stacks[0]) - >> VLIB_LOG2_THREAD_STACK_SIZE; - - /* "processes" have their own stacks, and they always run in thread 0 */ - n = n >= len ? 0 : n; - - return n; + return vlib_thread_index; } uword @@ -275,21 +260,6 @@ vlib_thread_init (vlib_main_t * vm) return 0; } -vlib_worker_thread_t * -vlib_alloc_thread (vlib_main_t * vm) -{ - vlib_worker_thread_t *w; - - if (vec_len (vlib_worker_threads) >= vec_len (vlib_thread_stacks)) - { - clib_warning ("out of worker threads... Quitting..."); - exit (1); - } - vec_add2 (vlib_worker_threads, w, 1); - w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads]; - return w; -} - vlib_frame_queue_t * vlib_frame_queue_alloc (int nelts) { @@ -427,7 +397,7 @@ vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index, f64 b4 = vlib_time_now_ticks (vm, before); vlib_worker_thread_barrier_check (vm, b4); /* Bad idea. Dequeue -> enqueue -> dequeue -> trouble */ - // vlib_frame_queue_dequeue (vm->cpu_index, vm, nm); + // vlib_frame_queue_dequeue (vm->thread_index, vm, nm); } elt = fq->elts + (new_tail & (fq->nelts - 1)); @@ -497,6 +467,8 @@ vlib_worker_thread_bootstrap_fn (void *arg) w->lwp = syscall (SYS_gettid); w->thread_id = pthread_self (); + vlib_thread_index = w - vlib_worker_threads; + rv = (void *) clib_calljmp ((uword (*)(uword)) w->thread_function, (uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE); @@ -610,7 +582,9 @@ start_workers (vlib_main_t * vm) mheap_alloc (0 /* use VM */ , tr->mheap_size); else w->thread_mheap = main_heap; - w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads]; + + w->thread_stack = + vlib_thread_stack_init (w - vlib_worker_threads); w->thread_function = tr->function; w->thread_function_arg = w; w->instance_id = k; @@ -630,7 +604,7 @@ start_workers (vlib_main_t * vm) vm_clone = clib_mem_alloc (sizeof (*vm_clone)); clib_memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone)); - vm_clone->cpu_index = worker_thread_index; + vm_clone->thread_index = worker_thread_index; vm_clone->heap_base = w->thread_mheap; vm_clone->mbuf_alloc_list = 0; vm_clone->init_functions_called = @@ -679,7 +653,7 @@ start_workers (vlib_main_t * vm) vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) { vlib_node_t *n = vlib_get_node (vm, rt->node_index); - rt->cpu_index = vm_clone->cpu_index; + rt->thread_index = vm_clone->thread_index; /* copy initial runtime_data from node */ if (n->runtime_data && n->runtime_data_bytes > 0) clib_memcpy (rt->runtime_data, n->runtime_data, @@ -692,7 +666,7 @@ start_workers (vlib_main_t * vm) vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) { vlib_node_t *n = vlib_get_node (vm, rt->node_index); - rt->cpu_index = vm_clone->cpu_index; + rt->thread_index = vm_clone->thread_index; /* copy initial runtime_data from node */ if (n->runtime_data && n->runtime_data_bytes > 0) clib_memcpy (rt->runtime_data, n->runtime_data, @@ -756,7 +730,8 @@ start_workers (vlib_main_t * vm) mheap_alloc (0 /* use VM */ , tr->mheap_size); else w->thread_mheap = main_heap; - w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads]; + w->thread_stack = + vlib_thread_stack_init (w - vlib_worker_threads); w->thread_function = tr->function; w->thread_function_arg = w; w->instance_id = j; @@ -827,7 +802,7 @@ vlib_worker_thread_node_runtime_update (void) uword n_calls, uword n_vectors, uword n_clocks); - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); if (vec_len (vlib_mains) == 1) return; @@ -835,7 +810,7 @@ vlib_worker_thread_node_runtime_update (void) vm = vlib_mains[0]; nm = &vm->node_main; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); ASSERT (*vlib_worker_threads->wait_at_barrier == 1); /* @@ -955,7 +930,7 @@ vlib_worker_thread_node_runtime_update (void) vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]) { vlib_node_t *n = vlib_get_node (vm, rt->node_index); - rt->cpu_index = vm_clone->cpu_index; + rt->thread_index = vm_clone->thread_index; /* copy runtime_data, will be overwritten later for existing rt */ if (n->runtime_data && n->runtime_data_bytes > 0) clib_memcpy (rt->runtime_data, n->runtime_data, @@ -981,7 +956,7 @@ vlib_worker_thread_node_runtime_update (void) vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) { vlib_node_t *n = vlib_get_node (vm, rt->node_index); - rt->cpu_index = vm_clone->cpu_index; + rt->thread_index = vm_clone->thread_index; /* copy runtime_data, will be overwritten later for existing rt */ if (n->runtime_data && n->runtime_data_bytes > 0) clib_memcpy (rt->runtime_data, n->runtime_data, @@ -1180,7 +1155,7 @@ vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which) if (vlib_mains == 0) return; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); vlib_worker_thread_barrier_sync (vm); switch (which) @@ -1212,7 +1187,7 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm) vlib_worker_threads[0].barrier_sync_count++; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT; @@ -1260,7 +1235,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) int vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm) { - u32 thread_id = vm->cpu_index; + u32 thread_id = vm->thread_index; vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id]; vlib_frame_queue_elt_t *elt; u32 *from, *to; @@ -1393,7 +1368,7 @@ vlib_worker_thread_fn (void *arg) vlib_main_t *vm = vlib_get_main (); clib_error_t *e; - ASSERT (vm->cpu_index == os_get_cpu_number ()); + ASSERT (vm->thread_index == vlib_get_thread_index ()); vlib_worker_thread_init (w); clib_time_init (&vm->clib_time); diff --git a/src/vlib/threads.h b/src/vlib/threads.h index eca4fc26..101d3d4a 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -153,8 +153,6 @@ typedef struct /* Called early, in thread 0's context */ clib_error_t *vlib_thread_init (vlib_main_t * vm); -vlib_worker_thread_t *vlib_alloc_thread (vlib_main_t * vm); - int vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index, u32 frame_queue_index, vlib_frame_t * frame, vlib_frame_queue_msg_type_t type); @@ -183,12 +181,19 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts); void vlib_worker_thread_barrier_sync (vlib_main_t * vm); void vlib_worker_thread_barrier_release (vlib_main_t * vm); +extern __thread uword vlib_thread_index; +static_always_inline uword +vlib_get_thread_index (void) +{ + return vlib_thread_index; +} + always_inline void vlib_smp_unsafe_warning (void) { if (CLIB_DEBUG > 0) { - if (os_get_cpu_number ()) + if (vlib_get_thread_index ()) fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__); } } @@ -331,21 +336,21 @@ vlib_num_workers () } always_inline u32 -vlib_get_worker_cpu_index (u32 worker_index) +vlib_get_worker_thread_index (u32 worker_index) { return worker_index + 1; } always_inline u32 -vlib_get_worker_index (u32 cpu_index) +vlib_get_worker_index (u32 thread_index) { - return cpu_index - 1; + return thread_index - 1; } always_inline u32 vlib_get_current_worker_index () { - return os_get_cpu_number () - 1; + return vlib_get_thread_index () - 1; } static inline void @@ -467,6 +472,8 @@ vlib_get_worker_handoff_queue_elt (u32 frame_queue_index, return elt; } +u8 *vlib_thread_stack_init (uword thread_index); + int vlib_thread_cb_register (struct vlib_main_t *vm, vlib_thread_callbacks_t * cb); diff --git a/src/vlib/unix/cj.c b/src/vlib/unix/cj.c index 33ba163a..7c1e9475 100644 --- a/src/vlib/unix/cj.c +++ b/src/vlib/unix/cj.c @@ -48,7 +48,7 @@ cj_log (u32 type, void *data0, void *data1) r = (cj_record_t *) & (cjm->records[new_tail & (cjm->num_records - 1)]); r->time = vlib_time_now (cjm->vlib_main); - r->cpu = os_get_cpu_number (); + r->thread_index = vlib_get_thread_index (); r->type = type; r->data[0] = pointer_to_uword (data0); r->data[1] = pointer_to_uword (data1); @@ -133,7 +133,8 @@ static inline void cj_dump_one_record (cj_record_t * r) { fprintf (stderr, "[%d]: %10.6f T%02d %llx %llx\n", - r->cpu, r->time, r->type, (long long unsigned int) r->data[0], + r->thread_index, r->time, r->type, + (long long unsigned int) r->data[0], (long long unsigned int) r->data[1]); } @@ -161,7 +162,7 @@ cj_dump_internal (u8 filter0_enable, u64 filter0, index = (cjm->tail + 1) & (cjm->num_records - 1); r = &(cjm->records[index]); - if (r->cpu != (u32) ~ 0) + if (r->thread_index != (u32) ~ 0) { /* Yes, dump from tail + 1 to the end */ for (i = index; i < cjm->num_records; i++) diff --git a/src/vlib/unix/cj.h b/src/vlib/unix/cj.h index 67626afe..d0a1d46e 100644 --- a/src/vlib/unix/cj.h +++ b/src/vlib/unix/cj.h @@ -23,7 +23,7 @@ typedef struct { f64 time; - u32 cpu; + u32 thread_index; u32 type; u64 data[2]; } cj_record_t; diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c index 6b96cc0d..db5ddd64 100644 --- a/src/vlib/unix/main.c +++ b/src/vlib/unix/main.c @@ -510,13 +510,28 @@ thread0 (uword arg) return i; } +u8 * +vlib_thread_stack_init (uword thread_index) +{ + vec_validate (vlib_thread_stacks, thread_index); + vlib_thread_stacks[thread_index] = clib_mem_alloc_aligned + (VLIB_THREAD_STACK_SIZE, VLIB_THREAD_STACK_SIZE); + + /* + * Disallow writes to the bottom page of the stack, to + * catch stack overflows. + */ + if (mprotect (vlib_thread_stacks[thread_index], + clib_mem_get_page_size (), PROT_READ) < 0) + clib_unix_warning ("thread stack"); + return vlib_thread_stacks[thread_index]; +} + int vlib_unix_main (int argc, char *argv[]) { vlib_main_t *vm = &vlib_global_main; /* one and only time for this! */ - vlib_thread_main_t *tm = &vlib_thread_main; unformat_input_t input; - u8 *thread_stacks; clib_error_t *e; int i; @@ -548,29 +563,9 @@ vlib_unix_main (int argc, char *argv[]) } unformat_free (&input); - /* - * allocate n x VLIB_THREAD_STACK_SIZE stacks, aligned to a - * VLIB_THREAD_STACK_SIZE boundary - * See also: os_get_cpu_number() in vlib/vlib/threads.c - */ - thread_stacks = clib_mem_alloc_aligned - ((uword) tm->n_thread_stacks * VLIB_THREAD_STACK_SIZE, - VLIB_THREAD_STACK_SIZE); - - vec_validate (vlib_thread_stacks, tm->n_thread_stacks - 1); - for (i = 0; i < vec_len (vlib_thread_stacks); i++) - { - vlib_thread_stacks[i] = thread_stacks; - - /* - * Disallow writes to the bottom page of the stack, to - * catch stack overflows. - */ - if (mprotect (thread_stacks, clib_mem_get_page_size (), PROT_READ) < 0) - clib_unix_warning ("thread stack"); + vlib_thread_stack_init (0); - thread_stacks += VLIB_THREAD_STACK_SIZE; - } + vlib_thread_index = 0; i = clib_calljmp (thread0, (uword) vm, (void *) (vlib_thread_stacks[0] + diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c index f68e54e0..20d70dd4 100644 --- a/src/vnet/adj/adj_l2.c +++ b/src/vnet/adj/adj_l2.c @@ -52,7 +52,7 @@ adj_l2_rewrite_inline (vlib_main_t * vm, { u32 * from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, * to_next, next_index; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); ethernet_main_t * em = ðernet_main; n_left_from = frame->n_vectors; @@ -93,7 +93,7 @@ adj_l2_rewrite_inline (vlib_main_t * vm, vnet_buffer(p0)->sw_if_index[VLIB_TX] = adj0->rewrite_header.sw_if_index; vlib_increment_combined_counter(&adjacency_counters, - cpu_index, + thread_index, adj_index0, /* packet increment */ 0, /* byte increment */ rw_len0); diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c index e8087f08..5756de43 100644 --- a/src/vnet/adj/adj_midchain.c +++ b/src/vnet/adj/adj_midchain.c @@ -49,7 +49,7 @@ adj_midchain_tx_inline (vlib_main_t * vm, u32 next_index; vnet_main_t *vnm = vnet_get_main (); vnet_interface_main_t *im = &vnm->interface_main; - u32 cpu_index = vm->cpu_index; + u32 thread_index = vm->thread_index; /* Vector of buffer / pkt indices we're supposed to process */ from = vlib_frame_vector_args (frame); @@ -124,13 +124,13 @@ adj_midchain_tx_inline (vlib_main_t * vm, { vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, + thread_index, adj0->rewrite_header.sw_if_index, 1, vlib_buffer_length_in_chain (vm, b0)); vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, + thread_index, adj1->rewrite_header.sw_if_index, 1, vlib_buffer_length_in_chain (vm, b1)); @@ -181,7 +181,7 @@ adj_midchain_tx_inline (vlib_main_t * vm, { vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, + thread_index, adj0->rewrite_header.sw_if_index, 1, vlib_buffer_length_in_chain (vm, b0)); diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c index 9a0f9d8b..128570b0 100644 --- a/src/vnet/adj/adj_nsh.c +++ b/src/vnet/adj/adj_nsh.c @@ -53,7 +53,7 @@ adj_nsh_rewrite_inline (vlib_main_t * vm, { u32 * from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, * to_next, next_index; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); n_left_from = frame->n_vectors; next_index = node->cached_next_index; @@ -94,7 +94,7 @@ adj_nsh_rewrite_inline (vlib_main_t * vm, vnet_buffer(p0)->ip.save_rewrite_length = rw_len0; vlib_increment_combined_counter(&adjacency_counters, - cpu_index, + thread_index, adj_index0, /* packet increment */ 0, /* byte increment */ rw_len0); diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c index 98842a48..70a189b0 100644 --- a/src/vnet/classify/vnet_classify.c +++ b/src/vnet/classify/vnet_classify.c @@ -251,12 +251,12 @@ static inline void make_working_copy vnet_classify_entry_##size##_t * working_copy##size = 0; foreach_size_in_u32x4; #undef _ - u32 cpu_number = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); - if (cpu_number >= vec_len (t->working_copies)) + if (thread_index >= vec_len (t->working_copies)) { oldheap = clib_mem_set_heap (t->mheap); - vec_validate (t->working_copies, cpu_number); + vec_validate (t->working_copies, thread_index); clib_mem_set_heap (oldheap); } @@ -265,7 +265,7 @@ static inline void make_working_copy * updates from multiple threads will not result in sporadic, spurious * lookup failures. */ - working_copy = t->working_copies[cpu_number]; + working_copy = t->working_copies[thread_index]; t->saved_bucket.as_u64 = b->as_u64; oldheap = clib_mem_set_heap (t->mheap); @@ -290,7 +290,7 @@ static inline void make_working_copy default: abort(); } - t->working_copies[cpu_number] = working_copy; + t->working_copies[thread_index] = working_copy; } _vec_len(working_copy) = (1<log2_pages)*t->entries_per_page; @@ -318,7 +318,7 @@ static inline void make_working_copy working_bucket.offset = vnet_classify_get_offset (t, working_copy); CLIB_MEMORY_BARRIER(); b->as_u64 = working_bucket.as_u64; - t->working_copies[cpu_number] = working_copy; + t->working_copies[thread_index] = working_copy; } static vnet_classify_entry_t * @@ -387,7 +387,7 @@ int vnet_classify_add_del (vnet_classify_table_t * t, int i; u64 hash, new_hash; u32 new_log2_pages; - u32 cpu_number = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); u8 * key_minus_skip; ASSERT ((add_v->flags & VNET_CLASSIFY_ENTRY_FREE) == 0); @@ -498,7 +498,7 @@ int vnet_classify_add_del (vnet_classify_table_t * t, new_log2_pages = t->saved_bucket.log2_pages + 1; expand_again: - working_copy = t->working_copies[cpu_number]; + working_copy = t->working_copies[thread_index]; new_v = split_and_rehash (t, working_copy, new_log2_pages); if (new_v == 0) diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c index 6ef3d7d7..1b5e336b 100644 --- a/src/vnet/cop/ip4_whitelist.c +++ b/src/vnet/cop/ip4_whitelist.c @@ -60,7 +60,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, cop_feature_type_t next_index; cop_main_t *cm = &cop_main; vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters; - u32 cpu_index = vm->cpu_index; + u32 thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -177,12 +177,12 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, dpo1 = load_balance_get_bucket_i(lb1, 0); vlib_increment_combined_counter - (vcm, cpu_index, lb_index0, 1, + (vcm, thread_index, lb_index0, 1, vlib_buffer_length_in_chain (vm, b0) + sizeof(ethernet_header_t)); vlib_increment_combined_counter - (vcm, cpu_index, lb_index1, 1, + (vcm, thread_index, lb_index1, 1, vlib_buffer_length_in_chain (vm, b1) + sizeof(ethernet_header_t)); @@ -273,7 +273,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, dpo0 = load_balance_get_bucket_i(lb0, 0); vlib_increment_combined_counter - (vcm, cpu_index, lb_index0, 1, + (vcm, thread_index, lb_index0, 1, vlib_buffer_length_in_chain (vm, b0) + sizeof(ethernet_header_t)); diff --git a/src/vnet/cop/ip6_whitelist.c b/src/vnet/cop/ip6_whitelist.c index c2e16ccf..f3fe62e3 100644 --- a/src/vnet/cop/ip6_whitelist.c +++ b/src/vnet/cop/ip6_whitelist.c @@ -61,7 +61,7 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm, cop_main_t *cm = &cop_main; ip6_main_t * im6 = &ip6_main; vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters; - u32 cpu_index = vm->cpu_index; + u32 thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -153,12 +153,12 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm, dpo1 = load_balance_get_bucket_i(lb1, 0); vlib_increment_combined_counter - (vcm, cpu_index, lb_index0, 1, + (vcm, thread_index, lb_index0, 1, vlib_buffer_length_in_chain (vm, b0) + sizeof(ethernet_header_t)); vlib_increment_combined_counter - (vcm, cpu_index, lb_index1, 1, + (vcm, thread_index, lb_index1, 1, vlib_buffer_length_in_chain (vm, b1) + sizeof(ethernet_header_t)); @@ -233,7 +233,7 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm, dpo0 = load_balance_get_bucket_i(lb0, 0); vlib_increment_combined_counter - (vcm, cpu_index, lb_index0, 1, + (vcm, thread_index, lb_index0, 1, vlib_buffer_length_in_chain (vm, b0) + sizeof(ethernet_header_t)); diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index ba337f3f..76980102 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -124,7 +124,7 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, u32 frame_num = apif->rx_req->tp_frame_nr; u8 *block_start = apif->rx_ring + block * block_size; uword n_trace = vlib_get_trace_count (vm, node); - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes; @@ -132,15 +132,15 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, if (apif->per_interface_next_index != ~0) next_index = apif->per_interface_next_index; - n_free_bufs = vec_len (apm->rx_buffers[cpu_index]); + n_free_bufs = vec_len (apm->rx_buffers[thread_index]); if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE)) { - vec_validate (apm->rx_buffers[cpu_index], + vec_validate (apm->rx_buffers[thread_index], VLIB_FRAME_SIZE + n_free_bufs - 1); n_free_bufs += - vlib_buffer_alloc (vm, &apm->rx_buffers[cpu_index][n_free_bufs], + vlib_buffer_alloc (vm, &apm->rx_buffers[thread_index][n_free_bufs], VLIB_FRAME_SIZE); - _vec_len (apm->rx_buffers[cpu_index]) = n_free_bufs; + _vec_len (apm->rx_buffers[thread_index]) = n_free_bufs; } rx_frame = apif->next_rx_frame; @@ -163,11 +163,11 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, { /* grab free buffer */ u32 last_empty_buffer = - vec_len (apm->rx_buffers[cpu_index]) - 1; + vec_len (apm->rx_buffers[thread_index]) - 1; prev_bi0 = bi0; - bi0 = apm->rx_buffers[cpu_index][last_empty_buffer]; + bi0 = apm->rx_buffers[thread_index][last_empty_buffer]; b0 = vlib_get_buffer (vm, bi0); - _vec_len (apm->rx_buffers[cpu_index]) = last_empty_buffer; + _vec_len (apm->rx_buffers[thread_index]) = last_empty_buffer; n_free_bufs--; /* copy data */ @@ -236,9 +236,9 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_increment_combined_counter (vnet_get_main ()->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - os_get_cpu_number (), apif->hw_if_index, n_rx_packets, n_rx_bytes); + vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes); - vnet_device_increment_rx_packets (cpu_index, n_rx_packets); + vnet_device_increment_rx_packets (thread_index, n_rx_packets); return n_rx_packets; } diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index 41645220..5e5e812c 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -104,7 +104,7 @@ vnet_device_queue_sort (void *a1, void *a2) void vnet_device_input_assign_thread (u32 hw_if_index, - u16 queue_id, uword cpu_index) + u16 queue_id, uword thread_index) { vnet_main_t *vnm = vnet_get_main (); vnet_device_main_t *vdm = &vnet_device_main; @@ -115,19 +115,19 @@ vnet_device_input_assign_thread (u32 hw_if_index, ASSERT (hw->input_node_index > 0); - if (vdm->first_worker_cpu_index == 0) - cpu_index = 0; + if (vdm->first_worker_thread_index == 0) + thread_index = 0; - if (cpu_index != 0 && - (cpu_index < vdm->first_worker_cpu_index || - cpu_index > vdm->last_worker_cpu_index)) + if (thread_index != 0 && + (thread_index < vdm->first_worker_thread_index || + thread_index > vdm->last_worker_thread_index)) { - cpu_index = vdm->next_worker_cpu_index++; - if (vdm->next_worker_cpu_index > vdm->last_worker_cpu_index) - vdm->next_worker_cpu_index = vdm->first_worker_cpu_index; + thread_index = vdm->next_worker_thread_index++; + if (vdm->next_worker_thread_index > vdm->last_worker_thread_index) + vdm->next_worker_thread_index = vdm->first_worker_thread_index; } - vm = vlib_mains[cpu_index]; + vm = vlib_mains[thread_index]; rt = vlib_node_get_runtime_data (vm, hw->input_node_index); vec_add2 (rt->devices_and_queues, dq, 1); @@ -136,33 +136,33 @@ vnet_device_input_assign_thread (u32 hw_if_index, dq->queue_id = queue_id; vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort); - vec_validate (hw->input_node_cpu_index_by_queue, queue_id); - hw->input_node_cpu_index_by_queue[queue_id] = cpu_index; + vec_validate (hw->input_node_thread_index_by_queue, queue_id); + hw->input_node_thread_index_by_queue[queue_id] = thread_index; } static int vnet_device_input_unassign_thread (u32 hw_if_index, u16 queue_id, - uword cpu_index) + uword thread_index) { vnet_main_t *vnm = vnet_get_main (); vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); vnet_device_input_runtime_t *rt; vnet_device_and_queue_t *dq; - uword old_cpu_index; + uword old_thread_index; - if (hw->input_node_cpu_index_by_queue == 0) + if (hw->input_node_thread_index_by_queue == 0) return VNET_API_ERROR_INVALID_INTERFACE; - if (vec_len (hw->input_node_cpu_index_by_queue) < queue_id + 1) + if (vec_len (hw->input_node_thread_index_by_queue) < queue_id + 1) return VNET_API_ERROR_INVALID_INTERFACE; - old_cpu_index = hw->input_node_cpu_index_by_queue[queue_id]; + old_thread_index = hw->input_node_thread_index_by_queue[queue_id]; - if (old_cpu_index == cpu_index) + if (old_thread_index == thread_index) return 0; rt = - vlib_node_get_runtime_data (vlib_mains[old_cpu_index], + vlib_node_get_runtime_data (vlib_mains[old_thread_index], hw->input_node_index); vec_foreach (dq, rt->devices_and_queues) @@ -240,7 +240,7 @@ set_device_placement (vlib_main_t * vm, unformat_input_t * input, vnet_device_main_t *vdm = &vnet_device_main; u32 hw_if_index = (u32) ~ 0; u32 queue_id = (u32) 0; - u32 cpu_index = (u32) ~ 0; + u32 thread_index = (u32) ~ 0; int rv; if (!unformat_user (input, unformat_line_input, line_input)) @@ -253,10 +253,10 @@ set_device_placement (vlib_main_t * vm, unformat_input_t * input, ; else if (unformat (line_input, "queue %d", &queue_id)) ; - else if (unformat (line_input, "main", &cpu_index)) - cpu_index = 0; - else if (unformat (line_input, "worker %d", &cpu_index)) - cpu_index += vdm->first_worker_cpu_index; + else if (unformat (line_input, "main", &thread_index)) + thread_index = 0; + else if (unformat (line_input, "worker %d", &thread_index)) + thread_index += vdm->first_worker_thread_index; else { error = clib_error_return (0, "parse error: '%U'", @@ -271,16 +271,17 @@ set_device_placement (vlib_main_t * vm, unformat_input_t * input, if (hw_if_index == (u32) ~ 0) return clib_error_return (0, "please specify valid interface name"); - if (cpu_index > vdm->last_worker_cpu_index) + if (thread_index > vdm->last_worker_thread_index) return clib_error_return (0, "please specify valid worker thread or main"); - rv = vnet_device_input_unassign_thread (hw_if_index, queue_id, cpu_index); + rv = + vnet_device_input_unassign_thread (hw_if_index, queue_id, thread_index); if (rv) return clib_error_return (0, "not found"); - vnet_device_input_assign_thread (hw_if_index, queue_id, cpu_index); + vnet_device_input_assign_thread (hw_if_index, queue_id, thread_index); return 0; } @@ -326,9 +327,9 @@ vnet_device_init (vlib_main_t * vm) tr = p ? (vlib_thread_registration_t *) p[0] : 0; if (tr && tr->count > 0) { - vdm->first_worker_cpu_index = tr->first_index; - vdm->next_worker_cpu_index = tr->first_index; - vdm->last_worker_cpu_index = tr->first_index + tr->count - 1; + vdm->first_worker_thread_index = tr->first_index; + vdm->next_worker_thread_index = tr->first_index; + vdm->last_worker_thread_index = tr->first_index + tr->count - 1; } return 0; } diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h index bbb29fe3..966f8302 100644 --- a/src/vnet/devices/devices.h +++ b/src/vnet/devices/devices.h @@ -50,9 +50,9 @@ typedef struct typedef struct { vnet_device_per_worker_data_t *workers; - uword first_worker_cpu_index; - uword last_worker_cpu_index; - uword next_worker_cpu_index; + uword first_worker_thread_index; + uword last_worker_thread_index; + uword next_worker_thread_index; } vnet_device_main_t; typedef struct @@ -80,7 +80,7 @@ vnet_set_device_input_node (u32 hw_if_index, u32 node_index) } void vnet_device_input_assign_thread (u32 hw_if_index, u16 queue_id, - uword cpu_index); + uword thread_index); static inline u64 vnet_get_aggregate_rx_packets (void) @@ -95,12 +95,12 @@ vnet_get_aggregate_rx_packets (void) } static inline void -vnet_device_increment_rx_packets (u32 cpu_index, u64 count) +vnet_device_increment_rx_packets (u32 thread_index, u64 count) { vnet_device_main_t *vdm = &vnet_device_main; vnet_device_per_worker_data_t *pwd; - pwd = vec_elt_at_index (vdm->workers, cpu_index); + pwd = vec_elt_at_index (vdm->workers, thread_index); pwd->aggregate_rx_packets += count; } @@ -117,9 +117,9 @@ vnet_device_input_set_interrupt_pending (vnet_main_t * vnm, u32 hw_if_index, { vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - ASSERT (queue_id < vec_len (hw->input_node_cpu_index_by_queue)); - u32 cpu_index = hw->input_node_cpu_index_by_queue[queue_id]; - vlib_node_set_interrupt_pending (vlib_mains[cpu_index], + ASSERT (queue_id < vec_len (hw->input_node_thread_index_by_queue)); + u32 thread_index = hw->input_node_thread_index_by_queue[queue_id]; + vlib_node_set_interrupt_pending (vlib_mains[thread_index], hw->input_node_index); } diff --git a/src/vnet/devices/netmap/node.c b/src/vnet/devices/netmap/node.c index 68ea7832..e120eeae 100644 --- a/src/vnet/devices/netmap/node.c +++ b/src/vnet/devices/netmap/node.c @@ -98,22 +98,22 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, u32 n_free_bufs; struct netmap_ring *ring; int cur_ring; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); if (nif->per_interface_next_index != ~0) next_index = nif->per_interface_next_index; - n_free_bufs = vec_len (nm->rx_buffers[cpu_index]); + n_free_bufs = vec_len (nm->rx_buffers[thread_index]); if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE)) { - vec_validate (nm->rx_buffers[cpu_index], + vec_validate (nm->rx_buffers[thread_index], VLIB_FRAME_SIZE + n_free_bufs - 1); n_free_bufs += - vlib_buffer_alloc (vm, &nm->rx_buffers[cpu_index][n_free_bufs], + vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs], VLIB_FRAME_SIZE); - _vec_len (nm->rx_buffers[cpu_index]) = n_free_bufs; + _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs; } cur_ring = nif->first_rx_ring; @@ -163,11 +163,11 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *b0; /* grab free buffer */ u32 last_empty_buffer = - vec_len (nm->rx_buffers[cpu_index]) - 1; + vec_len (nm->rx_buffers[thread_index]) - 1; prev_bi0 = bi0; - bi0 = nm->rx_buffers[cpu_index][last_empty_buffer]; + bi0 = nm->rx_buffers[thread_index][last_empty_buffer]; b0 = vlib_get_buffer (vm, bi0); - _vec_len (nm->rx_buffers[cpu_index]) = last_empty_buffer; + _vec_len (nm->rx_buffers[thread_index]) = last_empty_buffer; n_free_bufs--; /* copy data */ @@ -247,9 +247,9 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_increment_combined_counter (vnet_get_main ()->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - os_get_cpu_number (), nif->hw_if_index, n_rx_packets, n_rx_bytes); + vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes); - vnet_device_increment_rx_packets (cpu_index, n_rx_packets); + vnet_device_increment_rx_packets (thread_index, n_rx_packets); return n_rx_packets; } @@ -260,7 +260,7 @@ netmap_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, { int i; u32 n_rx_packets = 0; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); netmap_main_t *nm = &netmap_main; netmap_if_t *nmi; @@ -269,7 +269,7 @@ netmap_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, nmi = vec_elt_at_index (nm->interfaces, i); if (nmi->is_admin_up && (i % nm->input_cpu_count) == - (cpu_index - nm->input_cpu_first_index)) + (thread_index - nm->input_cpu_first_index)) n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi); } diff --git a/src/vnet/devices/ssvm/node.c b/src/vnet/devices/ssvm/node.c index a6c9dfd7..539b4161 100644 --- a/src/vnet/devices/ssvm/node.c +++ b/src/vnet/devices/ssvm/node.c @@ -89,7 +89,7 @@ ssvm_eth_device_input (ssvm_eth_main_t * em, ethernet_header_t *eh0; u16 type0; u32 n_rx_bytes = 0, l3_offset0; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u32 trace_cnt __attribute__ ((unused)) = vlib_get_trace_count (vm, node); volatile u32 *lock; u32 *elt_indices; @@ -284,10 +284,10 @@ out: vlib_increment_combined_counter (vnet_get_main ()->interface_main.combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, cpu_index, + + VNET_INTERFACE_COUNTER_RX, thread_index, intfc->vlib_hw_if_index, rx_queue_index, n_rx_bytes); - vnet_device_increment_rx_packets (cpu_index, rx_queue_index); + vnet_device_increment_rx_packets (thread_index, rx_queue_index); return rx_queue_index; } diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 00807dc0..5e720f65 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -331,7 +331,7 @@ vhost_user_tx_thread_placement (vhost_user_intf_t * vui) { //Let's try to assign one queue to each thread u32 qid = 0; - u32 cpu_index = 0; + u32 thread_index = 0; vui->use_tx_spinlock = 0; while (1) { @@ -341,20 +341,21 @@ vhost_user_tx_thread_placement (vhost_user_intf_t * vui) if (!rxvq->started || !rxvq->enabled) continue; - vui->per_cpu_tx_qid[cpu_index] = qid; - cpu_index++; - if (cpu_index == vlib_get_thread_main ()->n_vlib_mains) + vui->per_cpu_tx_qid[thread_index] = qid; + thread_index++; + if (thread_index == vlib_get_thread_main ()->n_vlib_mains) return; } //We need to loop, meaning the spinlock has to be used vui->use_tx_spinlock = 1; - if (cpu_index == 0) + if (thread_index == 0) { //Could not find a single valid one - for (cpu_index = 0; - cpu_index < vlib_get_thread_main ()->n_vlib_mains; cpu_index++) + for (thread_index = 0; + thread_index < vlib_get_thread_main ()->n_vlib_mains; + thread_index++) { - vui->per_cpu_tx_qid[cpu_index] = 0; + vui->per_cpu_tx_qid[thread_index] = 0; } return; } @@ -368,7 +369,7 @@ vhost_user_rx_thread_placement () vhost_user_intf_t *vui; vhost_cpu_t *vhc; u32 *workers = 0; - u32 cpu_index; + u32 thread_index; vlib_main_t *vm; //Let's list all workers cpu indexes @@ -400,9 +401,9 @@ vhost_user_rx_thread_placement () continue; i %= vec_len (vui_workers); - cpu_index = vui_workers[i]; + thread_index = vui_workers[i]; i++; - vhc = &vum->cpus[cpu_index]; + vhc = &vum->cpus[thread_index]; iaq.qid = qid; iaq.vhost_iface_index = vui - vum->vhost_user_interfaces; @@ -429,14 +430,14 @@ vhost_user_rx_thread_placement () vhc->operation_mode = mode; } - for (cpu_index = vum->input_cpu_first_index; - cpu_index < vum->input_cpu_first_index + vum->input_cpu_count; - cpu_index++) + for (thread_index = vum->input_cpu_first_index; + thread_index < vum->input_cpu_first_index + vum->input_cpu_count; + thread_index++) { vlib_node_state_t state = VLIB_NODE_STATE_POLLING; - vhc = &vum->cpus[cpu_index]; - vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main; + vhc = &vum->cpus[thread_index]; + vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main; switch (vhc->operation_mode) { case VHOST_USER_INTERRUPT_MODE: @@ -532,7 +533,7 @@ vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq) { vhost_user_main_t *vum = &vhost_user_main; vhost_cpu_t *vhc; - u32 cpu_index; + u32 thread_index; vhost_iface_and_queue_t *vhiq; vlib_main_t *vm; u32 ifq2; @@ -553,8 +554,8 @@ vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq) if ((vhiq->vhost_iface_index == (ifq >> 8)) && (VHOST_VRING_IDX_TX (vhiq->qid) == (ifq & 0xff))) { - cpu_index = vhc - vum->cpus; - vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main; + thread_index = vhc - vum->cpus; + vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main; /* * Convert RX virtqueue number in the lower byte to vring * queue index for the input node process. Top bytes contain @@ -1592,7 +1593,7 @@ vhost_user_if_input (vlib_main_t * vm, u32 n_trace = vlib_get_trace_count (vm, node); u16 qsz_mask; u32 map_hint = 0; - u16 cpu_index = os_get_cpu_number (); + u16 thread_index = vlib_get_thread_index (); u16 copy_len = 0; { @@ -1651,32 +1652,32 @@ vhost_user_if_input (vlib_main_t * vm, * in the loop and come back later. This is not an issue as for big packet, * processing cost really comes from the memory copy. */ - if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len < n_left + 1)) + if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1)) { - u32 curr_len = vum->cpus[cpu_index].rx_buffers_len; - vum->cpus[cpu_index].rx_buffers_len += + u32 curr_len = vum->cpus[thread_index].rx_buffers_len; + vum->cpus[thread_index].rx_buffers_len += vlib_buffer_alloc_from_free_list (vm, - vum->cpus[cpu_index].rx_buffers + + vum->cpus[thread_index].rx_buffers + curr_len, VHOST_USER_RX_BUFFERS_N - curr_len, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); if (PREDICT_FALSE - (vum->cpus[cpu_index].rx_buffers_len < + (vum->cpus[thread_index].rx_buffers_len < VHOST_USER_RX_BUFFER_STARVATION)) { /* In case of buffer starvation, discard some packets from the queue * and log the event. * We keep doing best effort for the remaining packets. */ - u32 flush = (n_left + 1 > vum->cpus[cpu_index].rx_buffers_len) ? - n_left + 1 - vum->cpus[cpu_index].rx_buffers_len : 1; + u32 flush = (n_left + 1 > vum->cpus[thread_index].rx_buffers_len) ? + n_left + 1 - vum->cpus[thread_index].rx_buffers_len : 1; flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush); n_left -= flush; vlib_increment_simple_counter (vnet_main. interface_main.sw_if_counters + VNET_INTERFACE_COUNTER_DROP, - os_get_cpu_number (), + vlib_get_thread_index (), vui->sw_if_index, flush); vlib_error_count (vm, vhost_user_input_node.index, @@ -1696,7 +1697,7 @@ vhost_user_if_input (vlib_main_t * vm, u32 desc_data_offset; vring_desc_t *desc_table = txvq->desc; - if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len <= 1)) + if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len <= 1)) { /* Not enough rx_buffers * Note: We yeld on 1 so we don't need to do an additional @@ -1707,17 +1708,18 @@ vhost_user_if_input (vlib_main_t * vm, } desc_current = txvq->avail->ring[txvq->last_avail_idx & qsz_mask]; - vum->cpus[cpu_index].rx_buffers_len--; - bi_current = (vum->cpus[cpu_index].rx_buffers) - [vum->cpus[cpu_index].rx_buffers_len]; + vum->cpus[thread_index].rx_buffers_len--; + bi_current = (vum->cpus[thread_index].rx_buffers) + [vum->cpus[thread_index].rx_buffers_len]; b_head = b_current = vlib_get_buffer (vm, bi_current); to_next[0] = bi_current; //We do that now so we can forget about bi_current to_next++; n_left_to_next--; vlib_prefetch_buffer_with_index (vm, - (vum->cpus[cpu_index].rx_buffers) - [vum->cpus[cpu_index]. + (vum-> + cpus[thread_index].rx_buffers) + [vum->cpus[thread_index]. rx_buffers_len - 1], LOAD); /* Just preset the used descriptor id and length for later */ @@ -1791,7 +1793,7 @@ vhost_user_if_input (vlib_main_t * vm, (b_current->current_length == VLIB_BUFFER_DATA_SIZE)) { if (PREDICT_FALSE - (vum->cpus[cpu_index].rx_buffers_len == 0)) + (vum->cpus[thread_index].rx_buffers_len == 0)) { /* Cancel speculation */ to_next--; @@ -1805,17 +1807,18 @@ vhost_user_if_input (vlib_main_t * vm, * but valid. */ vhost_user_input_rewind_buffers (vm, - &vum->cpus[cpu_index], + &vum->cpus + [thread_index], b_head); n_left = 0; goto stop; } /* Get next output */ - vum->cpus[cpu_index].rx_buffers_len--; + vum->cpus[thread_index].rx_buffers_len--; u32 bi_next = - (vum->cpus[cpu_index].rx_buffers)[vum->cpus - [cpu_index].rx_buffers_len]; + (vum->cpus[thread_index].rx_buffers)[vum->cpus + [thread_index].rx_buffers_len]; b_current->next_buffer = bi_next; b_current->flags |= VLIB_BUFFER_NEXT_PRESENT; bi_current = bi_next; @@ -1823,7 +1826,7 @@ vhost_user_if_input (vlib_main_t * vm, } /* Prepare a copy order executed later for the data */ - vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len]; + vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len]; copy_len++; u32 desc_data_l = desc_table[desc_current].len - desc_data_offset; @@ -1880,7 +1883,7 @@ vhost_user_if_input (vlib_main_t * vm, if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD)) { if (PREDICT_FALSE - (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy, + (vhost_user_input_copy (vui, vum->cpus[thread_index].copy, copy_len, &map_hint))) { clib_warning @@ -1905,7 +1908,7 @@ vhost_user_if_input (vlib_main_t * vm, /* Do the memory copies */ if (PREDICT_FALSE - (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy, + (vhost_user_input_copy (vui, vum->cpus[thread_index].copy, copy_len, &map_hint))) { clib_warning ("Memory mapping error on interface hw_if_index=%d " @@ -1933,9 +1936,9 @@ vhost_user_if_input (vlib_main_t * vm, vlib_increment_combined_counter (vnet_main.interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - os_get_cpu_number (), vui->sw_if_index, n_rx_packets, n_rx_bytes); + vlib_get_thread_index (), vui->sw_if_index, n_rx_packets, n_rx_bytes); - vnet_device_increment_rx_packets (cpu_index, n_rx_packets); + vnet_device_increment_rx_packets (thread_index, n_rx_packets); return n_rx_packets; } @@ -1946,15 +1949,15 @@ vhost_user_input (vlib_main_t * vm, { vhost_user_main_t *vum = &vhost_user_main; uword n_rx_packets = 0; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); vhost_iface_and_queue_t *vhiq; vhost_user_intf_t *vui; vhost_cpu_t *vhc; - vhc = &vum->cpus[cpu_index]; + vhc = &vum->cpus[thread_index]; if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE)) { - vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues) + vec_foreach (vhiq, vum->cpus[thread_index].rx_queues) { vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node); @@ -2096,7 +2099,7 @@ vhost_user_tx (vlib_main_t * vm, vhost_user_vring_t *rxvq; u16 qsz_mask; u8 error; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u32 map_hint = 0; u8 retry = 8; u16 copy_len; @@ -2116,7 +2119,7 @@ vhost_user_tx (vlib_main_t * vm, qid = VHOST_VRING_IDX_RX (*vec_elt_at_index - (vui->per_cpu_tx_qid, os_get_cpu_number ())); + (vui->per_cpu_tx_qid, vlib_get_thread_index ())); rxvq = &vui->vrings[qid]; if (PREDICT_FALSE (vui->use_tx_spinlock)) vhost_user_vring_lock (vui, qid); @@ -2143,10 +2146,10 @@ retry: if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { - vum->cpus[cpu_index].current_trace = + vum->cpus[thread_index].current_trace = vlib_add_trace (vm, node, b0, - sizeof (*vum->cpus[cpu_index].current_trace)); - vhost_user_tx_trace (vum->cpus[cpu_index].current_trace, + sizeof (*vum->cpus[thread_index].current_trace)); + vhost_user_tx_trace (vum->cpus[thread_index].current_trace, vui, qid / 2, b0, rxvq); } @@ -2188,14 +2191,14 @@ retry: { // Get a header from the header array virtio_net_hdr_mrg_rxbuf_t *hdr = - &vum->cpus[cpu_index].tx_headers[tx_headers_len]; + &vum->cpus[thread_index].tx_headers[tx_headers_len]; tx_headers_len++; hdr->hdr.flags = 0; hdr->hdr.gso_type = 0; hdr->num_buffers = 1; //This is local, no need to check // Prepare a copy order executed later for the header - vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len]; + vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len]; copy_len++; cpy->len = vui->virtio_net_hdr_sz; cpy->dst = buffer_map_addr; @@ -2220,7 +2223,7 @@ retry: else if (vui->virtio_net_hdr_sz == 12) //MRG is available { virtio_net_hdr_mrg_rxbuf_t *hdr = - &vum->cpus[cpu_index].tx_headers[tx_headers_len - 1]; + &vum->cpus[thread_index].tx_headers[tx_headers_len - 1]; //Move from available to used buffer rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id = @@ -2282,7 +2285,7 @@ retry: } { - vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len]; + vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len]; copy_len++; cpy->len = bytes_left; cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len; @@ -2325,8 +2328,8 @@ retry: if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { - vum->cpus[cpu_index].current_trace->hdr = - vum->cpus[cpu_index].tx_headers[tx_headers_len - 1]; + vum->cpus[thread_index].current_trace->hdr = + vum->cpus[thread_index].tx_headers[tx_headers_len - 1]; } n_left--; //At the end for error counting when 'goto done' is invoked @@ -2336,7 +2339,7 @@ retry: done: //Do the memory copies if (PREDICT_FALSE - (vhost_user_tx_copy (vui, vum->cpus[cpu_index].copy, + (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy, copy_len, &map_hint))) { clib_warning ("Memory mapping error on interface hw_if_index=%d " @@ -2386,7 +2389,7 @@ done3: vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + VNET_INTERFACE_COUNTER_DROP, - os_get_cpu_number (), vui->sw_if_index, n_left); + vlib_get_thread_index (), vui->sw_if_index, n_left); } vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); @@ -2773,11 +2776,11 @@ vhost_user_send_interrupt_process (vlib_main_t * vm, case ~0: vec_foreach (vhc, vum->cpus) { - u32 cpu_index = vhc - vum->cpus; + u32 thread_index = vhc - vum->cpus; f64 next_timeout; next_timeout = timeout; - vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues) + vec_foreach (vhiq, vum->cpus[thread_index].rx_queues) { vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; vhost_user_vring_t *rxvq = diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c index e94e871c..97ad0a44 100644 --- a/src/vnet/dpo/lookup_dpo.c +++ b/src/vnet/dpo/lookup_dpo.c @@ -266,7 +266,7 @@ lookup_dpo_ip4_inline (vlib_main_t * vm, int table_from_interface) { u32 n_left_from, next_index, * from, * to_next; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; from = vlib_frame_vector_args (from_frame); @@ -407,10 +407,10 @@ lookup_dpo_ip4_inline (vlib_main_t * vm, vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b0)); vlib_increment_combined_counter - (cm, cpu_index, lbi1, 1, + (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b1)); if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -511,7 +511,7 @@ lookup_dpo_ip4_inline (vlib_main_t * vm, vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b0)); if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -606,7 +606,7 @@ lookup_dpo_ip6_inline (vlib_main_t * vm, { vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; u32 n_left_from, next_index, * from, * to_next; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -749,10 +749,10 @@ lookup_dpo_ip6_inline (vlib_main_t * vm, vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b0)); vlib_increment_combined_counter - (cm, cpu_index, lbi1, 1, + (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b1)); if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -853,7 +853,7 @@ lookup_dpo_ip6_inline (vlib_main_t * vm, vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b0)); if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -930,7 +930,7 @@ lookup_dpo_mpls_inline (vlib_main_t * vm, int table_from_interface) { u32 n_left_from, next_index, * from, * to_next; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; from = vlib_frame_vector_args (from_frame); @@ -994,7 +994,7 @@ lookup_dpo_mpls_inline (vlib_main_t * vm, vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b0)); if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c index a9f334be..e25ceae9 100644 --- a/src/vnet/dpo/replicate_dpo.c +++ b/src/vnet/dpo/replicate_dpo.c @@ -627,7 +627,7 @@ replicate_inline (vlib_main_t * vm, vlib_combined_counter_main_t * cm = &replicate_main.repm_counters; replicate_main_t * rm = &replicate_main; u32 n_left_from, * from, * to_next, next_index; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -657,12 +657,12 @@ replicate_inline (vlib_main_t * vm, rep0 = replicate_get(repi0); vlib_increment_combined_counter( - cm, cpu_index, repi0, 1, + cm, thread_index, repi0, 1, vlib_buffer_length_in_chain(vm, b0)); - vec_validate (rm->clones[cpu_index], rep0->rep_n_buckets - 1); + vec_validate (rm->clones[thread_index], rep0->rep_n_buckets - 1); - num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[cpu_index], rep0->rep_n_buckets, 128); + num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[thread_index], rep0->rep_n_buckets, 128); if (num_cloned != rep0->rep_n_buckets) { @@ -673,7 +673,7 @@ replicate_inline (vlib_main_t * vm, for (bucket = 0; bucket < num_cloned; bucket++) { - ci0 = rm->clones[cpu_index][bucket]; + ci0 = rm->clones[thread_index][bucket]; c0 = vlib_get_buffer(vm, ci0); to_next[0] = ci0; @@ -700,7 +700,7 @@ replicate_inline (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); } } - vec_reset_length (rm->clones[cpu_index]); + vec_reset_length (rm->clones[thread_index]); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index ee757505..c74a097e 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -1771,7 +1771,7 @@ set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t * a) { vnet_main_t *vm = vnet_get_main (); - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); if (a->flags & ETHERNET_ARP_ARGS_REMOVE) vnet_arp_unset_ip4_over_ethernet_internal (vm, a); diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c index 9894e3c8..335e3f9f 100644 --- a/src/vnet/ethernet/interface.c +++ b/src/vnet/ethernet/interface.c @@ -362,7 +362,7 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, u32 next_index = VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT; u32 i, next_node_index, bvi_flag, sw_if_index; u32 n_pkts = 0, n_bytes = 0; - u32 cpu_index = vm->cpu_index; + u32 thread_index = vm->thread_index; vnet_main_t *vnm = vnet_get_main (); vnet_interface_main_t *im = &vnm->interface_main; vlib_node_main_t *nm = &vm->node_main; @@ -420,8 +420,9 @@ simulated_ethernet_interface_tx (vlib_main_t * vm, /* increment TX interface stat */ vlib_increment_combined_counter (im->combined_sw_if_counters + - VNET_INTERFACE_COUNTER_TX, cpu_index, - sw_if_index, n_pkts, n_bytes); + VNET_INTERFACE_COUNTER_TX, + thread_index, sw_if_index, n_pkts, + n_bytes); } return n_left_from; diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index b699e381..f7787ed2 100755 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -291,7 +291,7 @@ ethernet_input_inline (vlib_main_t * vm, vlib_node_runtime_t *error_node; u32 n_left_from, next_index, *from, *to_next; u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u32 cached_sw_if_index = ~0; u32 cached_is_l2 = 0; /* shut up gcc */ vnet_hw_interface_t *hi = NULL; /* used for main interface only */ @@ -510,7 +510,7 @@ ethernet_input_inline (vlib_main_t * vm, interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, + thread_index, new_sw_if_index0, 1, len0); if (new_sw_if_index1 != old_sw_if_index1 @@ -519,7 +519,7 @@ ethernet_input_inline (vlib_main_t * vm, interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, + thread_index, new_sw_if_index1, 1, len1); @@ -530,7 +530,7 @@ ethernet_input_inline (vlib_main_t * vm, vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); stats_n_packets = stats_n_bytes = 0; @@ -696,13 +696,13 @@ ethernet_input_inline (vlib_main_t * vm, vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, new_sw_if_index0, 1, len0); + thread_index, new_sw_if_index0, 1, len0); if (stats_n_packets > 0) { vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); stats_n_packets = stats_n_bytes = 0; } @@ -734,7 +734,7 @@ ethernet_input_inline (vlib_main_t * vm, vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); node->runtime_data[0] = stats_sw_if_index; } diff --git a/src/vnet/gre/node.c b/src/vnet/gre/node.c index 2683586e..acf15f24 100644 --- a/src/vnet/gre/node.c +++ b/src/vnet/gre/node.c @@ -75,7 +75,7 @@ gre_input (vlib_main_t * vm, u64 cached_tunnel_key6[4]; u32 cached_tunnel_sw_if_index = 0, tunnel_sw_if_index = 0; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); u32 len; vnet_interface_main_t *im = &gm->vnet_main->interface_main; @@ -257,7 +257,7 @@ gre_input (vlib_main_t * vm, len = vlib_buffer_length_in_chain (vm, b0); vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, + thread_index, tunnel_sw_if_index, 1 /* packets */, len /* bytes */); @@ -324,7 +324,7 @@ drop0: len = vlib_buffer_length_in_chain (vm, b1); vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, + thread_index, tunnel_sw_if_index, 1 /* packets */, len /* bytes */); @@ -502,7 +502,7 @@ drop1: len = vlib_buffer_length_in_chain (vm, b0); vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, + thread_index, tunnel_sw_if_index, 1 /* packets */, len /* bytes */); diff --git a/src/vnet/interface.h b/src/vnet/interface.h index a1ea2d61..08f08b10 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -468,7 +468,7 @@ typedef struct vnet_hw_interface_t u32 input_node_index; /* input node cpu index by queue */ - u32 *input_node_cpu_index_by_queue; + u32 *input_node_thread_index_by_queue; } vnet_hw_interface_t; diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c index 03f2cdca..663dc309 100644 --- a/src/vnet/interface_output.c +++ b/src/vnet/interface_output.c @@ -196,7 +196,7 @@ slow_path (vlib_main_t * vm, */ static_always_inline void incr_output_stats (vnet_main_t * vnm, - u32 cpu_index, + u32 thread_index, u32 length, u32 sw_if_index, u32 * last_sw_if_index, u32 * n_packets, u32 * n_bytes) @@ -216,7 +216,7 @@ incr_output_stats (vnet_main_t * vnm, vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, + thread_index, *last_sw_if_index, *n_packets, *n_bytes); } @@ -240,7 +240,7 @@ vnet_interface_output_node_flatten (vlib_main_t * vm, u32 n_left_to_tx, *from, *from_end, *to_tx; u32 n_bytes, n_buffers, n_packets; u32 last_sw_if_index; - u32 cpu_index = vm->cpu_index; + u32 thread_index = vm->thread_index; n_buffers = frame->n_vectors; @@ -266,7 +266,7 @@ vnet_interface_output_node_flatten (vlib_main_t * vm, cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, VNET_INTERFACE_COUNTER_TX_ERROR); - vlib_increment_simple_counter (cm, cpu_index, + vlib_increment_simple_counter (cm, thread_index, rt->sw_if_index, n_buffers); return vlib_error_drop_buffers (vm, node, from, /* buffer stride */ 1, @@ -341,18 +341,18 @@ vnet_interface_output_node_flatten (vlib_main_t * vm, from += 1; to_tx += n_buffers; n_left_to_tx -= n_buffers; - incr_output_stats (vnm, cpu_index, n_slow_bytes, + incr_output_stats (vnm, thread_index, n_slow_bytes, vnet_buffer (b)->sw_if_index[VLIB_TX], &last_sw_if_index, &n_packets, &n_bytes); } } else { - incr_output_stats (vnm, cpu_index, + incr_output_stats (vnm, thread_index, vlib_buffer_length_in_chain (vm, b0), vnet_buffer (b0)->sw_if_index[VLIB_TX], &last_sw_if_index, &n_packets, &n_bytes); - incr_output_stats (vnm, cpu_index, + incr_output_stats (vnm, thread_index, vlib_buffer_length_in_chain (vm, b0), vnet_buffer (b1)->sw_if_index[VLIB_TX], &last_sw_if_index, &n_packets, &n_bytes); @@ -396,7 +396,7 @@ vnet_interface_output_node_flatten (vlib_main_t * vm, to_tx += n_buffers; n_left_to_tx -= n_buffers; } - incr_output_stats (vnm, cpu_index, + incr_output_stats (vnm, thread_index, vlib_buffer_length_in_chain (vm, b0), vnet_buffer (b0)->sw_if_index[VLIB_TX], &last_sw_if_index, &n_packets, &n_bytes); @@ -408,7 +408,7 @@ vnet_interface_output_node_flatten (vlib_main_t * vm, } /* Final update of interface stats. */ - incr_output_stats (vnm, cpu_index, 0, ~0, /* ~0 will flush stats */ + incr_output_stats (vnm, thread_index, 0, ~0, /* ~0 will flush stats */ &last_sw_if_index, &n_packets, &n_bytes); return n_buffers; @@ -428,7 +428,7 @@ vnet_interface_output_node (vlib_main_t * vm, u32 n_left_to_tx, *from, *from_end, *to_tx; u32 n_bytes, n_buffers, n_packets; u32 n_bytes_b0, n_bytes_b1, n_bytes_b2, n_bytes_b3; - u32 cpu_index = vm->cpu_index; + u32 thread_index = vm->thread_index; vnet_interface_main_t *im = &vnm->interface_main; u32 next_index = VNET_INTERFACE_OUTPUT_NEXT_TX; u32 current_config_index = ~0; @@ -458,7 +458,7 @@ vnet_interface_output_node (vlib_main_t * vm, cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, VNET_INTERFACE_COUNTER_TX_ERROR); - vlib_increment_simple_counter (cm, cpu_index, + vlib_increment_simple_counter (cm, thread_index, rt->sw_if_index, n_buffers); return vlib_error_drop_buffers (vm, node, from, @@ -558,7 +558,7 @@ vnet_interface_output_node (vlib_main_t * vm, { vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, tx_swif0, 1, + thread_index, tx_swif0, 1, n_bytes_b0); } @@ -567,7 +567,7 @@ vnet_interface_output_node (vlib_main_t * vm, vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, tx_swif1, 1, + thread_index, tx_swif1, 1, n_bytes_b1); } @@ -576,7 +576,7 @@ vnet_interface_output_node (vlib_main_t * vm, vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, tx_swif2, 1, + thread_index, tx_swif2, 1, n_bytes_b2); } if (PREDICT_FALSE (tx_swif3 != rt->sw_if_index)) @@ -584,7 +584,7 @@ vnet_interface_output_node (vlib_main_t * vm, vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, tx_swif3, 1, + thread_index, tx_swif3, 1, n_bytes_b3); } } @@ -623,7 +623,7 @@ vnet_interface_output_node (vlib_main_t * vm, vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, tx_swif0, 1, + thread_index, tx_swif0, 1, n_bytes_b0); } } @@ -634,7 +634,7 @@ vnet_interface_output_node (vlib_main_t * vm, /* Update main interface stats. */ vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, + thread_index, rt->sw_if_index, n_packets, n_bytes); return n_buffers; } @@ -893,7 +893,7 @@ process_drop_punt (vlib_main_t * vm, u32 current_sw_if_index, n_errors_current_sw_if_index; u64 current_counter; vlib_simple_counter_main_t *cm; - u32 cpu_index = vm->cpu_index; + u32 thread_index = vm->thread_index; static vlib_error_t memory[VNET_ERROR_N_DISPOSITION]; static char memory_init[VNET_ERROR_N_DISPOSITION]; @@ -965,19 +965,19 @@ process_drop_punt (vlib_main_t * vm, current_counter -= 2; n_errors_current_sw_if_index -= 2; - vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); - vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1); /* Increment super-interface drop/punt counters for sub-interfaces. */ sw_if0 = vnet_get_sw_interface (vnm, sw_if_index0); vlib_increment_simple_counter - (cm, cpu_index, sw_if0->sup_sw_if_index, + (cm, thread_index, sw_if0->sup_sw_if_index, sw_if0->sup_sw_if_index != sw_if_index0); sw_if1 = vnet_get_sw_interface (vnm, sw_if_index1); vlib_increment_simple_counter - (cm, cpu_index, sw_if1->sup_sw_if_index, + (cm, thread_index, sw_if1->sup_sw_if_index, sw_if1->sup_sw_if_index != sw_if_index1); em->counters[current_counter_index] = current_counter; @@ -1013,11 +1013,12 @@ process_drop_punt (vlib_main_t * vm, sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; /* Increment drop/punt counters. */ - vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); /* Increment super-interface drop/punt counters for sub-interfaces. */ sw_if0 = vnet_get_sw_interface (vnm, sw_if_index0); - vlib_increment_simple_counter (cm, cpu_index, sw_if0->sup_sw_if_index, + vlib_increment_simple_counter (cm, thread_index, + sw_if0->sup_sw_if_index, sw_if0->sup_sw_if_index != sw_if_index0); if (PREDICT_FALSE (e0 != current_error)) @@ -1041,12 +1042,12 @@ process_drop_punt (vlib_main_t * vm, { vnet_sw_interface_t *si; - vlib_increment_simple_counter (cm, cpu_index, current_sw_if_index, + vlib_increment_simple_counter (cm, thread_index, current_sw_if_index, n_errors_current_sw_if_index); si = vnet_get_sw_interface (vnm, current_sw_if_index); if (si->sup_sw_if_index != current_sw_if_index) - vlib_increment_simple_counter (cm, cpu_index, si->sup_sw_if_index, + vlib_increment_simple_counter (cm, thread_index, si->sup_sw_if_index, n_errors_current_sw_if_index); } diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index ee1703e7..fdfe7f63 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -75,7 +75,7 @@ ip4_lookup_inline (vlib_main_t * vm, vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters; u32 n_left_from, n_left_to_next, *from, *to_next; ip_lookup_next_t next; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -292,19 +292,19 @@ ip4_lookup_inline (vlib_main_t * vm, vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lb_index0, 1, + (cm, thread_index, lb_index0, 1, vlib_buffer_length_in_chain (vm, p0) + sizeof (ethernet_header_t)); vlib_increment_combined_counter - (cm, cpu_index, lb_index1, 1, + (cm, thread_index, lb_index1, 1, vlib_buffer_length_in_chain (vm, p1) + sizeof (ethernet_header_t)); vlib_increment_combined_counter - (cm, cpu_index, lb_index2, 1, + (cm, thread_index, lb_index2, 1, vlib_buffer_length_in_chain (vm, p2) + sizeof (ethernet_header_t)); vlib_increment_combined_counter - (cm, cpu_index, lb_index3, 1, + (cm, thread_index, lb_index3, 1, vlib_buffer_length_in_chain (vm, p3) + sizeof (ethernet_header_t)); @@ -392,7 +392,7 @@ ip4_lookup_inline (vlib_main_t * vm, vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); from += 1; to_next += 1; @@ -479,7 +479,7 @@ ip4_load_balance (vlib_main_t * vm, vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters; u32 n_left_from, n_left_to_next, *from, *to_next; ip_lookup_next_t next; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -584,9 +584,9 @@ ip4_load_balance (vlib_main_t * vm, vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); vlib_increment_combined_counter - (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); + (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); vlib_validate_buffer_enqueue_x2 (vm, node, next, to_next, n_left_to_next, @@ -639,7 +639,7 @@ ip4_load_balance (vlib_main_t * vm, vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, n_left_to_next, @@ -2330,7 +2330,7 @@ ip4_rewrite_inline (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); while (n_left_from > 0) { @@ -2379,9 +2379,9 @@ ip4_rewrite_inline (vlib_main_t * vm, if (do_counters) { vlib_prefetch_combined_counter (&adjacency_counters, - cpu_index, adj_index0); + thread_index, adj_index0); vlib_prefetch_combined_counter (&adjacency_counters, - cpu_index, adj_index1); + thread_index, adj_index1); } ip0 = vlib_buffer_get_current (p0); @@ -2527,13 +2527,13 @@ ip4_rewrite_inline (vlib_main_t * vm, { vlib_increment_combined_counter (&adjacency_counters, - cpu_index, + thread_index, adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); vlib_increment_combined_counter (&adjacency_counters, - cpu_index, + thread_index, adj_index1, 1, vlib_buffer_length_in_chain (vm, p1) + rw_len1); } @@ -2618,7 +2618,7 @@ ip4_rewrite_inline (vlib_main_t * vm, if (do_counters) vlib_prefetch_combined_counter (&adjacency_counters, - cpu_index, adj_index0); + thread_index, adj_index0); /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); @@ -2637,7 +2637,7 @@ ip4_rewrite_inline (vlib_main_t * vm, if (do_counters) vlib_increment_combined_counter (&adjacency_counters, - cpu_index, adj_index0, 1, + thread_index, adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); /* Check MTU of outgoing interface. */ diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c index ba200a9f..3b08f4b0 100644 --- a/src/vnet/ip/ip4_input.c +++ b/src/vnet/ip/ip4_input.c @@ -85,7 +85,7 @@ ip4_input_inline (vlib_main_t * vm, vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip4_input_node.index); vlib_simple_counter_main_t *cm; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -178,8 +178,8 @@ ip4_input_inline (vlib_main_t * vm, vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0); vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1); - vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); - vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1); /* Punt packets with options or wrong version. */ if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45)) @@ -299,7 +299,7 @@ ip4_input_inline (vlib_main_t * vm, vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0); - vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); /* Punt packets with options or wrong version. */ if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45)) diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index c120f12c..c2fc4f87 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -74,7 +74,7 @@ ip6_lookup_inline (vlib_main_t * vm, vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters; u32 n_left_from, n_left_to_next, *from, *to_next; ip_lookup_next_t next; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -185,9 +185,9 @@ ip6_lookup_inline (vlib_main_t * vm, vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); vlib_increment_combined_counter - (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); + (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); from += 2; to_next += 2; @@ -291,7 +291,7 @@ ip6_lookup_inline (vlib_main_t * vm, vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); from += 1; to_next += 1; @@ -703,7 +703,7 @@ ip6_load_balance (vlib_main_t * vm, vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters; u32 n_left_from, n_left_to_next, *from, *to_next; ip_lookup_next_t next; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); ip6_main_t *im = &ip6_main; from = vlib_frame_vector_args (frame); @@ -824,9 +824,9 @@ ip6_load_balance (vlib_main_t * vm, vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); vlib_increment_combined_counter - (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); + (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); vlib_validate_buffer_enqueue_x2 (vm, node, next, to_next, n_left_to_next, @@ -886,7 +886,7 @@ ip6_load_balance (vlib_main_t * vm, } vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, n_left_to_next, @@ -1897,7 +1897,7 @@ ip6_rewrite_inline (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); while (n_left_from > 0) { @@ -2019,11 +2019,11 @@ ip6_rewrite_inline (vlib_main_t * vm, { vlib_increment_combined_counter (&adjacency_counters, - cpu_index, adj_index0, 1, + thread_index, adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); vlib_increment_combined_counter (&adjacency_counters, - cpu_index, adj_index1, 1, + thread_index, adj_index1, 1, vlib_buffer_length_in_chain (vm, p1) + rw_len1); } @@ -2156,7 +2156,7 @@ ip6_rewrite_inline (vlib_main_t * vm, { vlib_increment_combined_counter (&adjacency_counters, - cpu_index, adj_index0, 1, + thread_index, adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); } diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c index 20306088..ffdc4727 100644 --- a/src/vnet/ip/ip6_input.c +++ b/src/vnet/ip/ip6_input.c @@ -82,7 +82,7 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip6_input_node.index); vlib_simple_counter_main_t *cm; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -171,8 +171,8 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0); vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1); - vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); - vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1); error0 = error1 = IP6_ERROR_NONE; @@ -270,7 +270,7 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0); - vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); error0 = IP6_ERROR_NONE; /* Version != 6? Drop it. */ diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 5d1fb6f8..2af546df 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -581,7 +581,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, u32 next_index; pending_resolution_t *pr, *mc; - if (os_get_cpu_number ()) + if (vlib_get_thread_index ()) { set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address, 1 /* set new neighbor */ , is_static, @@ -722,7 +722,7 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, uword *p; int rv = 0; - if (os_get_cpu_number ()) + if (vlib_get_thread_index ()) { set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address, 0 /* unset */ , 0, 0); diff --git a/src/vnet/ipsec/esp.h b/src/vnet/ipsec/esp.h index 50cac806..799003b9 100644 --- a/src/vnet/ipsec/esp.h +++ b/src/vnet/ipsec/esp.h @@ -282,8 +282,8 @@ hmac_calc (ipsec_integ_alg_t alg, u8 * data, int data_len, u8 * signature, u8 use_esn, u32 seq_hi) { esp_main_t *em = &esp_main; - u32 cpu_index = os_get_cpu_number (); - HMAC_CTX *ctx = &(em->per_thread_data[cpu_index].hmac_ctx); + u32 thread_index = vlib_get_thread_index (); + HMAC_CTX *ctx = &(em->per_thread_data[thread_index].hmac_ctx); const EVP_MD *md = NULL; unsigned int len; @@ -292,10 +292,10 @@ hmac_calc (ipsec_integ_alg_t alg, if (PREDICT_FALSE (em->esp_integ_algs[alg].md == 0)) return 0; - if (PREDICT_FALSE (alg != em->per_thread_data[cpu_index].last_integ_alg)) + if (PREDICT_FALSE (alg != em->per_thread_data[thread_index].last_integ_alg)) { md = em->esp_integ_algs[alg].md; - em->per_thread_data[cpu_index].last_integ_alg = alg; + em->per_thread_data[thread_index].last_integ_alg = alg; } HMAC_Init (ctx, key, key_len, md); diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c index 7289b260..925d2b45 100644 --- a/src/vnet/ipsec/esp_decrypt.c +++ b/src/vnet/ipsec/esp_decrypt.c @@ -85,8 +85,8 @@ esp_decrypt_aes_cbc (ipsec_crypto_alg_t alg, u8 * in, u8 * out, size_t in_len, u8 * key, u8 * iv) { esp_main_t *em = &esp_main; - u32 cpu_index = os_get_cpu_number (); - EVP_CIPHER_CTX *ctx = &(em->per_thread_data[cpu_index].decrypt_ctx); + u32 thread_index = vlib_get_thread_index (); + EVP_CIPHER_CTX *ctx = &(em->per_thread_data[thread_index].decrypt_ctx); const EVP_CIPHER *cipher = NULL; int out_len; @@ -95,10 +95,11 @@ esp_decrypt_aes_cbc (ipsec_crypto_alg_t alg, if (PREDICT_FALSE (em->esp_crypto_algs[alg].type == 0)) return; - if (PREDICT_FALSE (alg != em->per_thread_data[cpu_index].last_decrypt_alg)) + if (PREDICT_FALSE + (alg != em->per_thread_data[thread_index].last_decrypt_alg)) { cipher = em->esp_crypto_algs[alg].type; - em->per_thread_data[cpu_index].last_decrypt_alg = alg; + em->per_thread_data[thread_index].last_decrypt_alg = alg; } EVP_DecryptInit_ex (ctx, cipher, NULL, key, iv); @@ -117,11 +118,11 @@ esp_decrypt_node_fn (vlib_main_t * vm, u32 *recycle = 0; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); ipsec_alloc_empty_buffers (vm, im); - u32 *empty_buffers = im->empty_buffers[cpu_index]; + u32 *empty_buffers = im->empty_buffers[thread_index]; if (PREDICT_FALSE (vec_len (empty_buffers) < n_left_from)) { diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c index 44ae2297..b2bc4e0b 100644 --- a/src/vnet/ipsec/esp_encrypt.c +++ b/src/vnet/ipsec/esp_encrypt.c @@ -88,8 +88,8 @@ esp_encrypt_aes_cbc (ipsec_crypto_alg_t alg, u8 * in, u8 * out, size_t in_len, u8 * key, u8 * iv) { esp_main_t *em = &esp_main; - u32 cpu_index = os_get_cpu_number (); - EVP_CIPHER_CTX *ctx = &(em->per_thread_data[cpu_index].encrypt_ctx); + u32 thread_index = vlib_get_thread_index (); + EVP_CIPHER_CTX *ctx = &(em->per_thread_data[thread_index].encrypt_ctx); const EVP_CIPHER *cipher = NULL; int out_len; @@ -98,10 +98,11 @@ esp_encrypt_aes_cbc (ipsec_crypto_alg_t alg, if (PREDICT_FALSE (em->esp_crypto_algs[alg].type == IPSEC_CRYPTO_ALG_NONE)) return; - if (PREDICT_FALSE (alg != em->per_thread_data[cpu_index].last_encrypt_alg)) + if (PREDICT_FALSE + (alg != em->per_thread_data[thread_index].last_encrypt_alg)) { cipher = em->esp_crypto_algs[alg].type; - em->per_thread_data[cpu_index].last_encrypt_alg = alg; + em->per_thread_data[thread_index].last_encrypt_alg = alg; } EVP_EncryptInit_ex (ctx, cipher, NULL, key, iv); @@ -119,11 +120,11 @@ esp_encrypt_node_fn (vlib_main_t * vm, n_left_from = from_frame->n_vectors; ipsec_main_t *im = &ipsec_main; u32 *recycle = 0; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); ipsec_alloc_empty_buffers (vm, im); - u32 *empty_buffers = im->empty_buffers[cpu_index]; + u32 *empty_buffers = im->empty_buffers[thread_index]; if (PREDICT_FALSE (vec_len (empty_buffers) < n_left_from)) { diff --git a/src/vnet/ipsec/ikev2.c b/src/vnet/ipsec/ikev2.c index 2c1074d8..3f9978a7 100644 --- a/src/vnet/ipsec/ikev2.c +++ b/src/vnet/ipsec/ikev2.c @@ -303,16 +303,16 @@ static void ikev2_delete_sa (ikev2_sa_t * sa) { ikev2_main_t *km = &ikev2_main; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); uword *p; ikev2_sa_free_all_vec (sa); - p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi, sa->rspi); + p = hash_get (km->per_thread_data[thread_index].sa_by_rspi, sa->rspi); if (p) { - hash_unset (km->per_thread_data[cpu_index].sa_by_rspi, sa->rspi); - pool_put (km->per_thread_data[cpu_index].sas, sa); + hash_unset (km->per_thread_data[thread_index].sa_by_rspi, sa->rspi); + pool_put (km->per_thread_data[thread_index].sas, sa); } } @@ -776,29 +776,31 @@ ikev2_initial_contact_cleanup (ikev2_sa_t * sa) ikev2_sa_t *tmp; u32 i, *delete = 0; ikev2_child_sa_t *c; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); if (!sa->initial_contact) return; /* find old IKE SAs with the same authenticated identity */ /* *INDENT-OFF* */ - pool_foreach (tmp, km->per_thread_data[cpu_index].sas, ({ + pool_foreach (tmp, km->per_thread_data[thread_index].sas, ({ if (tmp->i_id.type != sa->i_id.type || vec_len(tmp->i_id.data) != vec_len(sa->i_id.data) || memcmp(sa->i_id.data, tmp->i_id.data, vec_len(sa->i_id.data))) continue; if (sa->rspi != tmp->rspi) - vec_add1(delete, tmp - km->per_thread_data[cpu_index].sas); + vec_add1(delete, tmp - km->per_thread_data[thread_index].sas); })); /* *INDENT-ON* */ for (i = 0; i < vec_len (delete); i++) { - tmp = pool_elt_at_index (km->per_thread_data[cpu_index].sas, delete[i]); - vec_foreach (c, tmp->childs) - ikev2_delete_tunnel_interface (km->vnet_main, tmp, c); + tmp = + pool_elt_at_index (km->per_thread_data[thread_index].sas, delete[i]); + vec_foreach (c, + tmp->childs) ikev2_delete_tunnel_interface (km->vnet_main, + tmp, c); ikev2_delete_sa (tmp); } @@ -1922,10 +1924,10 @@ ikev2_retransmit_sa_init (ike_header_t * ike, { ikev2_main_t *km = &ikev2_main; ikev2_sa_t *sa; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); /* *INDENT-OFF* */ - pool_foreach (sa, km->per_thread_data[cpu_index].sas, ({ + pool_foreach (sa, km->per_thread_data[thread_index].sas, ({ if (sa->ispi == clib_net_to_host_u64(ike->ispi) && sa->iaddr.as_u32 == iaddr.as_u32 && sa->raddr.as_u32 == raddr.as_u32) @@ -2036,7 +2038,7 @@ ikev2_node_fn (vlib_main_t * vm, u32 n_left_from, *from, *to_next; ikev2_next_t next_index; ikev2_main_t *km = &ikev2_main; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -2134,11 +2136,14 @@ ikev2_node_fn (vlib_main_t * vm, if (sa0->state == IKEV2_STATE_SA_INIT) { /* add SA to the pool */ - pool_get (km->per_thread_data[cpu_index].sas, sa0); + pool_get (km->per_thread_data[thread_index].sas, + sa0); clib_memcpy (sa0, &sa, sizeof (*sa0)); - hash_set (km->per_thread_data[cpu_index].sa_by_rspi, + hash_set (km-> + per_thread_data[thread_index].sa_by_rspi, sa0->rspi, - sa0 - km->per_thread_data[cpu_index].sas); + sa0 - + km->per_thread_data[thread_index].sas); } else { @@ -2169,11 +2174,11 @@ ikev2_node_fn (vlib_main_t * vm, if (sa0->state == IKEV2_STATE_SA_INIT) { /* add SA to the pool */ - pool_get (km->per_thread_data[cpu_index].sas, sa0); + pool_get (km->per_thread_data[thread_index].sas, sa0); clib_memcpy (sa0, &sa, sizeof (*sa0)); - hash_set (km->per_thread_data[cpu_index].sa_by_rspi, + hash_set (km->per_thread_data[thread_index].sa_by_rspi, sa0->rspi, - sa0 - km->per_thread_data[cpu_index].sas); + sa0 - km->per_thread_data[thread_index].sas); } else { @@ -2184,12 +2189,13 @@ ikev2_node_fn (vlib_main_t * vm, else if (ike0->exchange == IKEV2_EXCHANGE_IKE_AUTH) { uword *p; - p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi, + p = hash_get (km->per_thread_data[thread_index].sa_by_rspi, clib_net_to_host_u64 (ike0->rspi)); if (p) { - sa0 = pool_elt_at_index (km->per_thread_data[cpu_index].sas, - p[0]); + sa0 = + pool_elt_at_index (km->per_thread_data[thread_index].sas, + p[0]); r = ikev2_retransmit_resp (sa0, ike0); if (r == 1) @@ -2240,12 +2246,13 @@ ikev2_node_fn (vlib_main_t * vm, else if (ike0->exchange == IKEV2_EXCHANGE_INFORMATIONAL) { uword *p; - p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi, + p = hash_get (km->per_thread_data[thread_index].sa_by_rspi, clib_net_to_host_u64 (ike0->rspi)); if (p) { - sa0 = pool_elt_at_index (km->per_thread_data[cpu_index].sas, - p[0]); + sa0 = + pool_elt_at_index (km->per_thread_data[thread_index].sas, + p[0]); r = ikev2_retransmit_resp (sa0, ike0); if (r == 1) @@ -2305,12 +2312,13 @@ ikev2_node_fn (vlib_main_t * vm, else if (ike0->exchange == IKEV2_EXCHANGE_CREATE_CHILD_SA) { uword *p; - p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi, + p = hash_get (km->per_thread_data[thread_index].sa_by_rspi, clib_net_to_host_u64 (ike0->rspi)); if (p) { - sa0 = pool_elt_at_index (km->per_thread_data[cpu_index].sas, - p[0]); + sa0 = + pool_elt_at_index (km->per_thread_data[thread_index].sas, + p[0]); r = ikev2_retransmit_resp (sa0, ike0); if (r == 1) diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h index 58f0f145..c884e360 100644 --- a/src/vnet/ipsec/ipsec.h +++ b/src/vnet/ipsec/ipsec.h @@ -324,21 +324,21 @@ int ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index, always_inline void ipsec_alloc_empty_buffers (vlib_main_t * vm, ipsec_main_t * im) { - u32 cpu_index = os_get_cpu_number (); - uword l = vec_len (im->empty_buffers[cpu_index]); + u32 thread_index = vlib_get_thread_index (); + uword l = vec_len (im->empty_buffers[thread_index]); uword n_alloc = 0; if (PREDICT_FALSE (l < VLIB_FRAME_SIZE)) { - if (!im->empty_buffers[cpu_index]) + if (!im->empty_buffers[thread_index]) { - vec_alloc (im->empty_buffers[cpu_index], 2 * VLIB_FRAME_SIZE); + vec_alloc (im->empty_buffers[thread_index], 2 * VLIB_FRAME_SIZE); } - n_alloc = vlib_buffer_alloc (vm, im->empty_buffers[cpu_index] + l, + n_alloc = vlib_buffer_alloc (vm, im->empty_buffers[thread_index] + l, 2 * VLIB_FRAME_SIZE - l); - _vec_len (im->empty_buffers[cpu_index]) = l + n_alloc; + _vec_len (im->empty_buffers[thread_index]) = l + n_alloc; } } diff --git a/src/vnet/ipsec/ipsec_if.c b/src/vnet/ipsec/ipsec_if.c index dc882004..ed124894 100644 --- a/src/vnet/ipsec/ipsec_if.c +++ b/src/vnet/ipsec/ipsec_if.c @@ -99,7 +99,7 @@ static int ipsec_add_del_tunnel_if_rpc_callback (ipsec_add_del_tunnel_args_t * a) { vnet_main_t *vnm = vnet_get_main (); - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); return ipsec_add_del_tunnel_if_internal (vnm, a); } diff --git a/src/vnet/l2/l2_bvi.h b/src/vnet/l2/l2_bvi.h index dd1130a6..e21a1616 100644 --- a/src/vnet/l2/l2_bvi.h +++ b/src/vnet/l2/l2_bvi.h @@ -97,7 +97,7 @@ l2_to_bvi (vlib_main_t * vlib_main, vlib_increment_combined_counter (vnet_main->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - vlib_main->cpu_index, + vlib_main->thread_index, vnet_buffer (b0)->sw_if_index[VLIB_RX], 1, vlib_buffer_length_in_chain (vlib_main, b0)); return TO_BVI_ERR_OK; diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c index 041ff38d..e5d6878a 100644 --- a/src/vnet/l2/l2_input.c +++ b/src/vnet/l2/l2_input.c @@ -117,7 +117,7 @@ typedef enum static_always_inline void classify_and_dispatch (vlib_main_t * vm, vlib_node_runtime_t * node, - u32 cpu_index, + u32 thread_index, l2input_main_t * msm, vlib_buffer_t * b0, u32 * next0) { /* @@ -237,7 +237,7 @@ l2input_node_inline (vlib_main_t * vm, u32 n_left_from, *from, *to_next; l2input_next_t next_index; l2input_main_t *msm = &l2input_main; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; /* number of packets to process */ @@ -350,10 +350,10 @@ l2input_node_inline (vlib_main_t * vm, vlib_node_increment_counter (vm, l2input_node.index, L2INPUT_ERROR_L2INPUT, 4); - classify_and_dispatch (vm, node, cpu_index, msm, b0, &next0); - classify_and_dispatch (vm, node, cpu_index, msm, b1, &next1); - classify_and_dispatch (vm, node, cpu_index, msm, b2, &next2); - classify_and_dispatch (vm, node, cpu_index, msm, b3, &next3); + classify_and_dispatch (vm, node, thread_index, msm, b0, &next0); + classify_and_dispatch (vm, node, thread_index, msm, b1, &next1); + classify_and_dispatch (vm, node, thread_index, msm, b2, &next2); + classify_and_dispatch (vm, node, thread_index, msm, b3, &next3); /* verify speculative enqueues, maybe switch current next frame */ /* if next0==next1==next_index then nothing special needs to be done */ @@ -393,7 +393,7 @@ l2input_node_inline (vlib_main_t * vm, vlib_node_increment_counter (vm, l2input_node.index, L2INPUT_ERROR_L2INPUT, 1); - classify_and_dispatch (vm, node, cpu_index, msm, b0, &next0); + classify_and_dispatch (vm, node, thread_index, msm, b0, &next0); /* verify speculative enqueue, maybe switch current next frame */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c index 00f22571..e17b2a16 100644 --- a/src/vnet/l2/l2_output.c +++ b/src/vnet/l2/l2_output.c @@ -643,11 +643,11 @@ l2output_create_output_node_mapping (vlib_main_t * vlib_main, vnet_main_t * vnet hw0 = vnet_get_sup_hw_interface (vnet_main, sw_if_index); - uword cpu_number; + uword thread_index; - cpu_number = os_get_cpu_number (); + thread_index = vlib_get_thread_index (); - if (cpu_number) + if (thread_index) { u32 oldflags; diff --git a/src/vnet/l2tp/decap.c b/src/vnet/l2tp/decap.c index e8986935..46104129 100644 --- a/src/vnet/l2tp/decap.c +++ b/src/vnet/l2tp/decap.c @@ -149,7 +149,7 @@ last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi) /* per-mapping byte stats include the ethernet header */ vlib_increment_combined_counter (&lm->counter_main, - os_get_cpu_number (), + vlib_get_thread_index (), counter_index, 1 /* packet_increment */ , vlib_buffer_length_in_chain (vm, b) + sizeof (ethernet_header_t)); diff --git a/src/vnet/l2tp/encap.c b/src/vnet/l2tp/encap.c index ed7a9580..dcdfde4b 100644 --- a/src/vnet/l2tp/encap.c +++ b/src/vnet/l2tp/encap.c @@ -124,7 +124,7 @@ last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi) /* per-mapping byte stats include the ethernet header */ vlib_increment_combined_counter (&lm->counter_main, - os_get_cpu_number (), + vlib_get_thread_index (), counter_index, 1 /* packet_increment */ , vlib_buffer_length_in_chain (vm, b)); diff --git a/src/vnet/l2tp/l2tp.c b/src/vnet/l2tp/l2tp.c index cb94d7e7..3dedc447 100644 --- a/src/vnet/l2tp/l2tp.c +++ b/src/vnet/l2tp/l2tp.c @@ -157,7 +157,7 @@ test_counters_command_fn (vlib_main_t * vm, u32 session_index; u32 counter_index; u32 nincr = 0; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); /* *INDENT-OFF* */ pool_foreach (session, lm->sessions, @@ -167,11 +167,11 @@ test_counters_command_fn (vlib_main_t * vm, session_index_to_counter_index (session_index, SESSION_COUNTER_USER_TO_NETWORK); vlib_increment_combined_counter (&lm->counter_main, - cpu_index, + thread_index, counter_index, 1/*pkt*/, 1111 /*bytes*/); vlib_increment_combined_counter (&lm->counter_main, - cpu_index, + thread_index, counter_index+1, 1/*pkt*/, 2222 /*bytes*/); nincr++; diff --git a/src/vnet/lisp-gpe/decap.c b/src/vnet/lisp-gpe/decap.c index d887a95f..68769710 100644 --- a/src/vnet/lisp-gpe/decap.c +++ b/src/vnet/lisp-gpe/decap.c @@ -103,7 +103,7 @@ next_index_to_iface (lisp_gpe_main_t * lgm, u32 next_index) } static_always_inline void -incr_decap_stats (vnet_main_t * vnm, u32 cpu_index, u32 length, +incr_decap_stats (vnet_main_t * vnm, u32 thread_index, u32 length, u32 sw_if_index, u32 * last_sw_if_index, u32 * n_packets, u32 * n_bytes) { @@ -122,7 +122,7 @@ incr_decap_stats (vnet_main_t * vnm, u32 cpu_index, u32 length, vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, *last_sw_if_index, + thread_index, *last_sw_if_index, *n_packets, *n_bytes); } *last_sw_if_index = sw_if_index; @@ -150,11 +150,11 @@ static uword lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, u8 is_v4) { - u32 n_left_from, next_index, *from, *to_next, cpu_index; + u32 n_left_from, next_index, *from, *to_next, thread_index; u32 n_bytes = 0, n_packets = 0, last_sw_if_index = ~0, drops = 0; lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main (); - cpu_index = os_get_cpu_number (); + thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -267,7 +267,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (si0) { - incr_decap_stats (lgm->vnet_main, cpu_index, + incr_decap_stats (lgm->vnet_main, thread_index, vlib_buffer_length_in_chain (vm, b0), si0[0], &last_sw_if_index, &n_packets, &n_bytes); vnet_buffer (b0)->sw_if_index[VLIB_RX] = si0[0]; @@ -282,7 +282,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (si1) { - incr_decap_stats (lgm->vnet_main, cpu_index, + incr_decap_stats (lgm->vnet_main, thread_index, vlib_buffer_length_in_chain (vm, b1), si1[0], &last_sw_if_index, &n_packets, &n_bytes); vnet_buffer (b1)->sw_if_index[VLIB_RX] = si1[0]; @@ -397,7 +397,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (si0) { - incr_decap_stats (lgm->vnet_main, cpu_index, + incr_decap_stats (lgm->vnet_main, thread_index, vlib_buffer_length_in_chain (vm, b0), si0[0], &last_sw_if_index, &n_packets, &n_bytes); vnet_buffer (b0)->sw_if_index[VLIB_RX] = si0[0]; @@ -430,7 +430,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } /* flush iface stats */ - incr_decap_stats (lgm->vnet_main, cpu_index, 0, ~0, &last_sw_if_index, + incr_decap_stats (lgm->vnet_main, thread_index, 0, ~0, &last_sw_if_index, &n_packets, &n_bytes); vlib_node_increment_counter (vm, lisp_gpe_ip4_input_node.index, LISP_GPE_ERROR_NO_TUNNEL, drops); diff --git a/src/vnet/lldp/lldp_input.c b/src/vnet/lldp/lldp_input.c index 762743d0..e88f6fdb 100644 --- a/src/vnet/lldp/lldp_input.c +++ b/src/vnet/lldp/lldp_input.c @@ -35,7 +35,7 @@ typedef struct static void lldp_rpc_update_peer_cb (const lldp_intf_update_t * a) { - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); lldp_intf_t *n = lldp_get_intf (&lldp_main, a->hw_if_index); if (!n) diff --git a/src/vnet/map/ip4_map.c b/src/vnet/map/ip4_map.c index 1a20d704..e39b6f14 100644 --- a/src/vnet/map/ip4_map.c +++ b/src/vnet/map/ip4_map.c @@ -248,7 +248,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) next_index = node->cached_next_index; map_main_t *mm = &map_main; vlib_combined_counter_main_t *cm = mm->domain_counters; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); while (n_left_from > 0) { @@ -377,7 +377,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip40) ? IP4_MAP_NEXT_IP6_REWRITE : next0; vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - cpu_index, + thread_index, map_domain_index0, 1, clib_net_to_host_u16 (ip6h0->payload_length) + @@ -409,7 +409,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip41) ? IP4_MAP_NEXT_IP6_REWRITE : next1; vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - cpu_index, + thread_index, map_domain_index1, 1, clib_net_to_host_u16 (ip6h1->payload_length) + @@ -520,7 +520,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip40) ? IP4_MAP_NEXT_IP6_REWRITE : next0; vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - cpu_index, + thread_index, map_domain_index0, 1, clib_net_to_host_u16 (ip6h0->payload_length) + @@ -564,7 +564,7 @@ ip4_map_reass (vlib_main_t * vm, next_index = node->cached_next_index; map_main_t *mm = &map_main; vlib_combined_counter_main_t *cm = mm->domain_counters; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u32 *fragments_to_drop = NULL; u32 *fragments_to_loopback = NULL; @@ -694,8 +694,8 @@ ip4_map_reass (vlib_main_t * vm, { if (error0 == MAP_ERROR_NONE) vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - cpu_index, map_domain_index0, - 1, + thread_index, + map_domain_index0, 1, clib_net_to_host_u16 (ip60->payload_length) + 40); next0 = diff --git a/src/vnet/map/ip4_map_t.c b/src/vnet/map/ip4_map_t.c index b63d76bf..5f2bcbf9 100644 --- a/src/vnet/map/ip4_map_t.c +++ b/src/vnet/map/ip4_map_t.c @@ -477,7 +477,7 @@ ip4_map_t_icmp (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; vlib_combined_counter_main_t *cm = map_main.domain_counters; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); while (n_left_from > 0) { @@ -520,7 +520,7 @@ ip4_map_t_icmp (vlib_main_t * vm, if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - cpu_index, + thread_index, vnet_buffer (p0)->map_t. map_domain_index, 1, len0); } @@ -1051,7 +1051,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) n_left_from = frame->n_vectors; next_index = node->cached_next_index; vlib_combined_counter_main_t *cm = map_main.domain_counters; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); while (n_left_from > 0) { @@ -1158,7 +1158,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - cpu_index, + thread_index, vnet_buffer (p0)->map_t. map_domain_index, 1, clib_net_to_host_u16 (ip40-> @@ -1169,7 +1169,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) (error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP)) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - cpu_index, + thread_index, vnet_buffer (p1)->map_t. map_domain_index, 1, clib_net_to_host_u16 (ip41-> @@ -1252,7 +1252,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - cpu_index, + thread_index, vnet_buffer (p0)->map_t. map_domain_index, 1, clib_net_to_host_u16 (ip40-> diff --git a/src/vnet/map/ip6_map.c b/src/vnet/map/ip6_map.c index f7eb768f..63ada962 100644 --- a/src/vnet/map/ip6_map.c +++ b/src/vnet/map/ip6_map.c @@ -172,7 +172,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vlib_node_get_runtime (vm, ip6_map_node.index); map_main_t *mm = &map_main; vlib_combined_counter_main_t *cm = mm->domain_counters; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -319,7 +319,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) IP6_MAP_NEXT_IP4_REWRITE : next0; } vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - cpu_index, + thread_index, map_domain_index0, 1, clib_net_to_host_u16 (ip40->length)); @@ -352,7 +352,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) IP6_MAP_NEXT_IP4_REWRITE : next1; } vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - cpu_index, + thread_index, map_domain_index1, 1, clib_net_to_host_u16 (ip41->length)); @@ -505,7 +505,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) IP6_MAP_NEXT_IP4_REWRITE : next0; } vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - cpu_index, + thread_index, map_domain_index0, 1, clib_net_to_host_u16 (ip40->length)); @@ -820,7 +820,7 @@ ip6_map_ip4_reass (vlib_main_t * vm, vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index); map_main_t *mm = &map_main; vlib_combined_counter_main_t *cm = mm->domain_counters; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u32 *fragments_to_drop = NULL; u32 *fragments_to_loopback = NULL; @@ -958,8 +958,8 @@ ip6_map_ip4_reass (vlib_main_t * vm, { if (error0 == MAP_ERROR_NONE) vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - cpu_index, map_domain_index0, - 1, + thread_index, + map_domain_index0, 1, clib_net_to_host_u16 (ip40->length)); next0 = @@ -1015,7 +1015,7 @@ ip6_map_icmp_relay (vlib_main_t * vm, vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index); map_main_t *mm = &map_main; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u16 *fragment_ids, *fid; from = vlib_frame_vector_args (frame); @@ -1143,7 +1143,8 @@ ip6_map_icmp_relay (vlib_main_t * vm, ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20); new_icmp40->checksum = ~ip_csum_fold (sum); - vlib_increment_simple_counter (&mm->icmp_relayed, cpu_index, 0, 1); + vlib_increment_simple_counter (&mm->icmp_relayed, thread_index, 0, + 1); error: if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) diff --git a/src/vnet/map/ip6_map_t.c b/src/vnet/map/ip6_map_t.c index eb3996c2..99151678 100644 --- a/src/vnet/map/ip6_map_t.c +++ b/src/vnet/map/ip6_map_t.c @@ -448,7 +448,7 @@ ip6_map_t_icmp (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; vlib_combined_counter_main_t *cm = map_main.domain_counters; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); while (n_left_from > 0) { @@ -493,7 +493,7 @@ ip6_map_t_icmp (vlib_main_t * vm, if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - cpu_index, + thread_index, vnet_buffer (p0)-> map_t.map_domain_index, 1, len0); @@ -1051,7 +1051,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip6_map_t_node.index); vlib_combined_counter_main_t *cm = map_main.domain_counters; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -1218,7 +1218,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - cpu_index, + thread_index, vnet_buffer (p0)-> map_t.map_domain_index, 1, clib_net_to_host_u16 @@ -1229,7 +1229,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) (error1 == MAP_ERROR_NONE && next1 != IP6_MAPT_NEXT_MAPT_ICMP)) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - cpu_index, + thread_index, vnet_buffer (p1)-> map_t.map_domain_index, 1, clib_net_to_host_u16 @@ -1403,7 +1403,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - cpu_index, + thread_index, vnet_buffer (p0)-> map_t.map_domain_index, 1, clib_net_to_host_u16 diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c index 893c4511..1b9bdd05 100644 --- a/src/vnet/mpls/mpls_input.c +++ b/src/vnet/mpls/mpls_input.c @@ -76,7 +76,7 @@ mpls_input_inline (vlib_main_t * vm, u32 n_left_from, next_index, * from, * to_next; mpls_input_runtime_t * rt; mpls_main_t * mm; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); vlib_simple_counter_main_t * cm; vnet_main_t * vnm = vnet_get_main(); @@ -151,7 +151,7 @@ mpls_input_inline (vlib_main_t * vm, next0 = MPLS_INPUT_NEXT_LOOKUP; vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0); - vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); } if (PREDICT_FALSE(h1[3] == 0)) @@ -164,7 +164,7 @@ mpls_input_inline (vlib_main_t * vm, next1 = MPLS_INPUT_NEXT_LOOKUP; vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index1, &next1, b1); - vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1); } if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -215,7 +215,7 @@ mpls_input_inline (vlib_main_t * vm, { next0 = MPLS_INPUT_NEXT_LOOKUP; vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0); - vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); } if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c index 475bb204..ace6a70f 100644 --- a/src/vnet/mpls/mpls_lookup.c +++ b/src/vnet/mpls/mpls_lookup.c @@ -67,7 +67,7 @@ mpls_lookup (vlib_main_t * vm, vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; u32 n_left_from, next_index, * from, * to_next; mpls_main_t * mm = &mpls_main; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -220,16 +220,16 @@ mpls_lookup (vlib_main_t * vm, vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b0)); vlib_increment_combined_counter - (cm, cpu_index, lbi1, 1, + (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b1)); vlib_increment_combined_counter - (cm, cpu_index, lbi2, 1, + (cm, thread_index, lbi2, 1, vlib_buffer_length_in_chain (vm, b2)); vlib_increment_combined_counter - (cm, cpu_index, lbi3, 1, + (cm, thread_index, lbi3, 1, vlib_buffer_length_in_chain (vm, b3)); /* @@ -351,7 +351,7 @@ mpls_lookup (vlib_main_t * vm, vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b0)); /* @@ -440,7 +440,7 @@ mpls_load_balance (vlib_main_t * vm, { vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; u32 n_left_from, n_left_to_next, * from, * to_next; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); u32 next; from = vlib_frame_vector_args (frame); @@ -536,10 +536,10 @@ mpls_load_balance (vlib_main_t * vm, vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); vlib_increment_combined_counter - (cm, cpu_index, lbi1, 1, + (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) @@ -597,7 +597,7 @@ mpls_load_balance (vlib_main_t * vm, vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, lbi0, 1, + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); vlib_validate_buffer_enqueue_x1 (vm, node, next, diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c index 08018fd1..d90dec21 100644 --- a/src/vnet/mpls/mpls_output.c +++ b/src/vnet/mpls/mpls_output.c @@ -64,12 +64,12 @@ mpls_output_inline (vlib_main_t * vm, vlib_frame_t * from_frame, int is_midchain) { - u32 n_left_from, next_index, * from, * to_next, cpu_index; + u32 n_left_from, next_index, * from, * to_next, thread_index; vlib_node_runtime_t * error_node; u32 n_left_to_next; mpls_main_t *mm; - cpu_index = os_get_cpu_number(); + thread_index = vlib_get_thread_index(); error_node = vlib_node_get_runtime (vm, mpls_output_node.index); from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -137,13 +137,13 @@ mpls_output_inline (vlib_main_t * vm, /* Bump the adj counters for packet and bytes */ vlib_increment_combined_counter (&adjacency_counters, - cpu_index, + thread_index, adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); vlib_increment_combined_counter (&adjacency_counters, - cpu_index, + thread_index, adj_index1, 1, vlib_buffer_length_in_chain (vm, p1) + rw_len1); @@ -245,7 +245,7 @@ mpls_output_inline (vlib_main_t * vm, vlib_increment_combined_counter (&adjacency_counters, - cpu_index, + thread_index, adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c index 2649798b..597ae060 100644 --- a/src/vnet/pg/input.c +++ b/src/vnet/pg/input.c @@ -893,7 +893,7 @@ pg_generate_set_lengths (pg_main_t * pg, vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - os_get_cpu_number (), + vlib_get_thread_index (), si->sw_if_index, n_buffers, length_sum); } @@ -1266,7 +1266,7 @@ pg_stream_fill_helper (pg_main_t * pg, l += vlib_buffer_index_length_in_chain (vm, buffers[i]); vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - os_get_cpu_number (), + vlib_get_thread_index (), si->sw_if_index, n_alloc, l); s->current_replay_packet_index += n_alloc; s->current_replay_packet_index %= diff --git a/src/vnet/replication.c b/src/vnet/replication.c index 86d922b5..233a8c2f 100644 --- a/src/vnet/replication.c +++ b/src/vnet/replication.c @@ -31,16 +31,16 @@ replication_prep (vlib_main_t * vm, { replication_main_t *rm = &replication_main; replication_context_t *ctx; - uword cpu_number = vm->cpu_index; + uword thread_index = vm->thread_index; ip4_header_t *ip; u32 ctx_id; /* Allocate a context, reserve context 0 */ - if (PREDICT_FALSE (rm->contexts[cpu_number] == 0)) - pool_get_aligned (rm->contexts[cpu_number], ctx, CLIB_CACHE_LINE_BYTES); + if (PREDICT_FALSE (rm->contexts[thread_index] == 0)) + pool_get_aligned (rm->contexts[thread_index], ctx, CLIB_CACHE_LINE_BYTES); - pool_get_aligned (rm->contexts[cpu_number], ctx, CLIB_CACHE_LINE_BYTES); - ctx_id = ctx - rm->contexts[cpu_number]; + pool_get_aligned (rm->contexts[thread_index], ctx, CLIB_CACHE_LINE_BYTES); + ctx_id = ctx - rm->contexts[thread_index]; /* Save state from vlib buffer */ ctx->saved_free_list_index = b0->free_list_index; @@ -94,11 +94,11 @@ replication_recycle (vlib_main_t * vm, vlib_buffer_t * b0, u32 is_last) { replication_main_t *rm = &replication_main; replication_context_t *ctx; - uword cpu_number = vm->cpu_index; + uword thread_index = vm->thread_index; ip4_header_t *ip; /* Get access to the replication context */ - ctx = pool_elt_at_index (rm->contexts[cpu_number], b0->recycle_count); + ctx = pool_elt_at_index (rm->contexts[thread_index], b0->recycle_count); /* Restore vnet buffer state */ clib_memcpy (vnet_buffer (b0), ctx->vnet_buffer, @@ -133,7 +133,7 @@ replication_recycle (vlib_main_t * vm, vlib_buffer_t * b0, u32 is_last) b0->flags &= ~VLIB_BUFFER_RECYCLE; /* Free context back to its pool */ - pool_put (rm->contexts[cpu_number], ctx); + pool_put (rm->contexts[thread_index], ctx); } return ctx; @@ -160,7 +160,7 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl) replication_main_t *rm = &replication_main; replication_context_t *ctx; u32 feature_node_index = 0; - uword cpu_number = vm->cpu_index; + uword thread_index = vm->thread_index; /* * All buffers in the list are destined to the same recycle node. @@ -172,7 +172,7 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl) { bi0 = fl->buffers[0]; b0 = vlib_get_buffer (vm, bi0); - ctx = pool_elt_at_index (rm->contexts[cpu_number], b0->recycle_count); + ctx = pool_elt_at_index (rm->contexts[thread_index], b0->recycle_count); feature_node_index = ctx->recycle_node_index; } diff --git a/src/vnet/replication.h b/src/vnet/replication.h index 5dc554c9..ce4b3ff1 100644 --- a/src/vnet/replication.h +++ b/src/vnet/replication.h @@ -100,7 +100,7 @@ replication_get_ctx (vlib_buffer_t * b0) replication_main_t *rm = &replication_main; return replication_is_recycled (b0) ? - pool_elt_at_index (rm->contexts[os_get_cpu_number ()], + pool_elt_at_index (rm->contexts[vlib_get_thread_index ()], b0->recycle_count) : 0; } diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c index b86e87d9..dd211c51 100644 --- a/src/vnet/session/node.c +++ b/src/vnet/session/node.c @@ -311,7 +311,7 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, unix_shared_memory_queue_t *q; application_t *app; int n_tx_packets = 0; - u32 my_thread_index = vm->cpu_index; + u32 my_thread_index = vm->thread_index; int i, rv; f64 now = vlib_time_now (vm); diff --git a/src/vnet/sr/sr_localsid.c b/src/vnet/sr/sr_localsid.c index 2e3d56de..6d72a506 100755 --- a/src/vnet/sr/sr_localsid.c +++ b/src/vnet/sr/sr_localsid.c @@ -887,7 +887,7 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node, from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; next_index = node->cached_next_index; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); while (n_left_from > 0) { @@ -974,26 +974,26 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_increment_combined_counter (((next0 == SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1, - vlib_buffer_length_in_chain (vm, b0)); + &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b0)); vlib_increment_combined_counter (((next1 == SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), cpu_index, ls1 - sm->localsids, 1, - vlib_buffer_length_in_chain (vm, b1)); + &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b1)); vlib_increment_combined_counter (((next2 == SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), cpu_index, ls2 - sm->localsids, 1, - vlib_buffer_length_in_chain (vm, b2)); + &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b2)); vlib_increment_combined_counter (((next3 == SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), cpu_index, ls3 - sm->localsids, 1, - vlib_buffer_length_in_chain (vm, b3)); + &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b3)); vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, n_left_to_next, bi0, bi1, bi2, bi3, @@ -1062,8 +1062,8 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_increment_combined_counter (((next0 == SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1, - vlib_buffer_length_in_chain (vm, b0)); + &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b0)); vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); @@ -1103,7 +1103,7 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node, from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; next_index = node->cached_next_index; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); while (n_left_from > 0) { @@ -1205,26 +1205,26 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_increment_combined_counter (((next0 == SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1, - vlib_buffer_length_in_chain (vm, b0)); + &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b0)); vlib_increment_combined_counter (((next1 == SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), cpu_index, ls1 - sm->localsids, 1, - vlib_buffer_length_in_chain (vm, b1)); + &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b1)); vlib_increment_combined_counter (((next2 == SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), cpu_index, ls2 - sm->localsids, 1, - vlib_buffer_length_in_chain (vm, b2)); + &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b2)); vlib_increment_combined_counter (((next3 == SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), cpu_index, ls3 - sm->localsids, 1, - vlib_buffer_length_in_chain (vm, b3)); + &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b3)); vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, n_left_to_next, bi0, bi1, bi2, bi3, @@ -1295,8 +1295,8 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_increment_combined_counter (((next0 == SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1, - vlib_buffer_length_in_chain (vm, b0)); + &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b0)); vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c index e3705060..c1567aa0 100644 --- a/src/vnet/tcp/builtin_client.c +++ b/src/vnet/tcp/builtin_client.c @@ -174,7 +174,7 @@ tclient_thread_fn (void *arg) pthread_sigmask (SIG_SETMASK, &s, 0); } - clib_per_cpu_mheaps[os_get_cpu_number ()] = clib_per_cpu_mheaps[0]; + clib_per_cpu_mheaps[vlib_get_thread_index ()] = clib_per_cpu_mheaps[0]; while (1) { diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index b2a371e2..b6c34828 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -646,10 +646,10 @@ const static transport_proto_vft_t tcp6_proto = { void tcp_timer_keep_handler (u32 conn_index) { - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); tcp_connection_t *tc; - tc = tcp_connection_get (conn_index, cpu_index); + tc = tcp_connection_get (conn_index, thread_index); tc->timers[TCP_TIMER_KEEP] = TCP_TIMER_HANDLE_INVALID; tcp_connection_close (tc); @@ -675,10 +675,10 @@ tcp_timer_establish_handler (u32 conn_index) void tcp_timer_waitclose_handler (u32 conn_index) { - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); tcp_connection_t *tc; - tc = tcp_connection_get (conn_index, cpu_index); + tc = tcp_connection_get (conn_index, thread_index); tc->timers[TCP_TIMER_WAITCLOSE] = TCP_TIMER_HANDLE_INVALID; /* Session didn't come back with a close(). Send FIN either way diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h index 0090e15e..eaca672c 100644 --- a/src/vnet/tcp/tcp_debug.h +++ b/src/vnet/tcp/tcp_debug.h @@ -343,7 +343,7 @@ typedef enum _tcp_dbg_evt } \ else \ { \ - u32 _thread_index = os_get_cpu_number (); \ + u32 _thread_index = vlib_get_thread_index (); \ _tc = tcp_connection_get (_tc_index, _thread_index); \ } \ ELOG_TYPE_DECLARE (_e) = \ diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index a8224dc2..7e9fa47b 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -1142,7 +1142,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, int is_ip4) { u32 n_left_from, next_index, *from, *to_next; - u32 my_thread_index = vm->cpu_index, errors = 0; + u32 my_thread_index = vm->thread_index, errors = 0; tcp_main_t *tm = vnet_get_tcp_main (); from = vlib_frame_vector_args (from_frame); @@ -1332,7 +1332,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { tcp_main_t *tm = vnet_get_tcp_main (); u32 n_left_from, next_index, *from, *to_next; - u32 my_thread_index = vm->cpu_index, errors = 0; + u32 my_thread_index = vm->thread_index, errors = 0; u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP; from = vlib_frame_vector_args (from_frame); @@ -1634,7 +1634,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { tcp_main_t *tm = vnet_get_tcp_main (); u32 n_left_from, next_index, *from, *to_next; - u32 my_thread_index = vm->cpu_index, errors = 0; + u32 my_thread_index = vm->thread_index, errors = 0; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -1989,7 +1989,7 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, int is_ip4) { u32 n_left_from, next_index, *from, *to_next; - u32 my_thread_index = vm->cpu_index; + u32 my_thread_index = vm->thread_index; tcp_main_t *tm = vnet_get_tcp_main (); u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP; @@ -2243,7 +2243,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, int is_ip4) { u32 n_left_from, next_index, *from, *to_next; - u32 my_thread_index = vm->cpu_index; + u32 my_thread_index = vm->thread_index; tcp_main_t *tm = vnet_get_tcp_main (); from = vlib_frame_vector_args (from_frame); diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index ea157bd7..e18bfad7 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -387,8 +387,8 @@ tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts, #define tcp_get_free_buffer_index(tm, bidx) \ do { \ u32 *my_tx_buffers, n_free_buffers; \ - u32 cpu_index = os_get_cpu_number(); \ - my_tx_buffers = tm->tx_buffers[cpu_index]; \ + u32 thread_index = vlib_get_thread_index(); \ + my_tx_buffers = tm->tx_buffers[thread_index]; \ if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0)) \ { \ n_free_buffers = 32; /* TODO config or macro */ \ @@ -396,7 +396,7 @@ do { \ _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list ( \ tm->vlib_main, my_tx_buffers, n_free_buffers, \ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); \ - tm->tx_buffers[cpu_index] = my_tx_buffers; \ + tm->tx_buffers[thread_index] = my_tx_buffers; \ } \ /* buffer shortage */ \ if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0)) \ @@ -408,8 +408,8 @@ do { \ #define tcp_return_buffer(tm) \ do { \ u32 *my_tx_buffers; \ - u32 cpu_index = os_get_cpu_number(); \ - my_tx_buffers = tm->tx_buffers[cpu_index]; \ + u32 thread_index = vlib_get_thread_index(); \ + my_tx_buffers = tm->tx_buffers[thread_index]; \ _vec_len (my_tx_buffers) +=1; \ } while (0) @@ -942,7 +942,7 @@ tcp_send_ack (tcp_connection_t * tc) void tcp_timer_delack_handler (u32 index) { - u32 thread_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); tcp_connection_t *tc; tc = tcp_connection_get (index, thread_index); @@ -1022,7 +1022,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) { tcp_main_t *tm = vnet_get_tcp_main (); vlib_main_t *vm = vlib_get_main (); - u32 thread_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); tcp_connection_t *tc; vlib_buffer_t *b; u32 bi, snd_space, n_bytes; @@ -1152,7 +1152,7 @@ tcp_timer_persist_handler (u32 index) { tcp_main_t *tm = vnet_get_tcp_main (); vlib_main_t *vm = vlib_get_main (); - u32 thread_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); tcp_connection_t *tc; vlib_buffer_t *b; u32 bi, n_bytes; @@ -1313,7 +1313,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_frame_t * from_frame, int is_ip4) { u32 n_left_from, next_index, *from, *to_next; - u32 my_thread_index = vm->cpu_index; + u32 my_thread_index = vm->thread_index; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -1524,7 +1524,7 @@ tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, u8 is_ip4) { u32 n_left_from, next_index, *from, *to_next; - u32 my_thread_index = vm->cpu_index; + u32 my_thread_index = vm->thread_index; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c index 4b22109b..810278e6 100644 --- a/src/vnet/udp/udp_input.c +++ b/src/vnet/udp/udp_input.c @@ -70,7 +70,7 @@ udp4_uri_input_node_fn (vlib_main_t * vm, udp4_uri_input_next_t next_index; udp_uri_main_t *um = vnet_get_udp_main (); session_manager_main_t *smm = vnet_get_session_manager_main (); - u32 my_thread_index = vm->cpu_index; + u32 my_thread_index = vm->thread_index; u8 my_enqueue_epoch; u32 *session_indices_to_enqueue; static u32 serial_number; diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index fb1a8bac..0fc62f6c 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -366,7 +366,7 @@ static uword tapcli_rx_iface(vlib_main_t * vm, vlib_increment_combined_counter ( vnet_main.interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - os_get_cpu_number(), ti->sw_if_index, + vlib_get_thread_index(), ti->sw_if_index, 1, n_bytes_in_packet); if (PREDICT_FALSE(n_trace > 0)) { diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c index 2cfcc92f..ac674653 100644 --- a/src/vnet/unix/tuntap.c +++ b/src/vnet/unix/tuntap.c @@ -189,7 +189,7 @@ tuntap_tx (vlib_main_t * vm, /* Update tuntap interface output stats. */ vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - vm->cpu_index, + vm->thread_index, tm->sw_if_index, n_packets, n_bytes); @@ -297,7 +297,7 @@ tuntap_rx (vlib_main_t * vm, vlib_increment_combined_counter (vnet_main.interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - os_get_cpu_number(), + vlib_get_thread_index(), tm->sw_if_index, 1, n_bytes_in_packet); diff --git a/src/vnet/vxlan-gpe/decap.c b/src/vnet/vxlan-gpe/decap.c index 22ab4b62..d4fe4231 100644 --- a/src/vnet/vxlan-gpe/decap.c +++ b/src/vnet/vxlan-gpe/decap.c @@ -115,7 +115,7 @@ vxlan_gpe_input (vlib_main_t * vm, vxlan4_gpe_tunnel_key_t last_key4; vxlan6_gpe_tunnel_key_t last_key6; u32 pkts_decapsulated = 0; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; if (is_ip4) @@ -342,7 +342,7 @@ vxlan_gpe_input (vlib_main_t * vm, if (stats_n_packets) vlib_increment_combined_counter ( im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); stats_n_packets = 1; stats_n_bytes = len0; stats_sw_if_index = sw_if_index0; @@ -427,7 +427,7 @@ vxlan_gpe_input (vlib_main_t * vm, if (stats_n_packets) vlib_increment_combined_counter ( im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); stats_n_packets = 1; stats_n_bytes = len1; stats_sw_if_index = sw_if_index1; @@ -588,7 +588,7 @@ vxlan_gpe_input (vlib_main_t * vm, if (stats_n_packets) vlib_increment_combined_counter ( im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); stats_n_packets = 1; stats_n_bytes = len0; stats_sw_if_index = sw_if_index0; @@ -615,7 +615,7 @@ vxlan_gpe_input (vlib_main_t * vm, if (stats_n_packets) { vlib_increment_combined_counter ( - im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, cpu_index, + im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); node->runtime_data[0] = stats_sw_if_index; } diff --git a/src/vnet/vxlan-gpe/encap.c b/src/vnet/vxlan-gpe/encap.c index 3a486e56..67ed94b4 100644 --- a/src/vnet/vxlan-gpe/encap.c +++ b/src/vnet/vxlan-gpe/encap.c @@ -151,7 +151,7 @@ vxlan_gpe_encap (vlib_main_t * vm, vnet_main_t * vnm = ngm->vnet_main; vnet_interface_main_t * im = &vnm->interface_main; u32 pkts_encapsulated = 0; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; from = vlib_frame_vector_args (from_frame); @@ -253,7 +253,7 @@ vxlan_gpe_encap (vlib_main_t * vm, if (stats_n_packets) vlib_increment_combined_counter ( im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); stats_sw_if_index = sw_if_index0; stats_n_packets = 2; stats_n_bytes = len0 + len1; @@ -262,10 +262,10 @@ vxlan_gpe_encap (vlib_main_t * vm, { vlib_increment_combined_counter ( im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, sw_if_index0, 1, len0); + thread_index, sw_if_index0, 1, len0); vlib_increment_combined_counter ( im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, sw_if_index1, 1, len1); + thread_index, sw_if_index1, 1, len1); } } @@ -335,7 +335,7 @@ vxlan_gpe_encap (vlib_main_t * vm, if (stats_n_packets) vlib_increment_combined_counter ( im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); stats_n_packets = 1; stats_n_bytes = len0; stats_sw_if_index = sw_if_index0; @@ -359,7 +359,7 @@ vxlan_gpe_encap (vlib_main_t * vm, if (stats_n_packets) { vlib_increment_combined_counter ( - im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, cpu_index, + im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); node->runtime_data[0] = stats_sw_if_index; } diff --git a/src/vnet/vxlan/decap.c b/src/vnet/vxlan/decap.c index 514b2c99..2acb1f6f 100644 --- a/src/vnet/vxlan/decap.c +++ b/src/vnet/vxlan/decap.c @@ -81,7 +81,7 @@ vxlan_input (vlib_main_t * vm, vxlan4_tunnel_key_t last_key4; vxlan6_tunnel_key_t last_key6; u32 pkts_decapsulated = 0; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; if (is_ip4) @@ -314,7 +314,7 @@ vxlan_input (vlib_main_t * vm, if (stats_n_packets) vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, stats_sw_if_index, + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); stats_n_packets = 1; stats_n_bytes = len0; @@ -468,7 +468,7 @@ vxlan_input (vlib_main_t * vm, if (stats_n_packets) vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, stats_sw_if_index, + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); stats_n_packets = 1; stats_n_bytes = len1; @@ -674,7 +674,7 @@ vxlan_input (vlib_main_t * vm, if (stats_n_packets) vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, stats_sw_if_index, + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); stats_n_packets = 1; stats_n_bytes = len0; @@ -711,7 +711,7 @@ vxlan_input (vlib_main_t * vm, { vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); node->runtime_data[0] = stats_sw_if_index; } diff --git a/src/vnet/vxlan/encap.c b/src/vnet/vxlan/encap.c index 5b63064a..4cfbbc23 100644 --- a/src/vnet/vxlan/encap.c +++ b/src/vnet/vxlan/encap.c @@ -77,7 +77,7 @@ vxlan_encap_inline (vlib_main_t * vm, vnet_interface_main_t * im = &vnm->interface_main; u32 pkts_encapsulated = 0; u16 old_l0 = 0, old_l1 = 0; - u32 cpu_index = os_get_cpu_number(); + u32 thread_index = vlib_get_thread_index(); u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; u32 sw_if_index0 = 0, sw_if_index1 = 0; u32 next0 = 0, next1 = 0; @@ -301,7 +301,7 @@ vxlan_encap_inline (vlib_main_t * vm, if (stats_n_packets) vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, stats_sw_if_index, + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); stats_sw_if_index = sw_if_index0; stats_n_packets = 2; @@ -311,10 +311,10 @@ vxlan_encap_inline (vlib_main_t * vm, { vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, sw_if_index0, 1, len0); + thread_index, sw_if_index0, 1, len0); vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, sw_if_index1, 1, len1); + thread_index, sw_if_index1, 1, len1); } } @@ -464,7 +464,7 @@ vxlan_encap_inline (vlib_main_t * vm, if (stats_n_packets) vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, stats_sw_if_index, + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); stats_n_packets = 1; stats_n_bytes = len0; @@ -496,7 +496,7 @@ vxlan_encap_inline (vlib_main_t * vm, { vlib_increment_combined_counter (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, - cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); node->runtime_data[0] = stats_sw_if_index; } diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c index 042d02e2..4309cd51 100644 --- a/src/vpp/stats/stats.c +++ b/src/vpp/stats/stats.c @@ -66,14 +66,14 @@ _(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) void dslock (stats_main_t * sm, int release_hint, int tag) { - u32 thread_id; + u32 thread_index; data_structure_lock_t *l = sm->data_structure_lock; if (PREDICT_FALSE (l == 0)) return; - thread_id = os_get_cpu_number (); - if (l->lock && l->thread_id == thread_id) + thread_index = vlib_get_thread_index (); + if (l->lock && l->thread_index == thread_index) { l->count++; return; @@ -85,7 +85,7 @@ dslock (stats_main_t * sm, int release_hint, int tag) while (__sync_lock_test_and_set (&l->lock, 1)) /* zzzz */ ; l->tag = tag; - l->thread_id = thread_id; + l->thread_index = thread_index; l->count = 1; } @@ -99,14 +99,14 @@ stats_dslock_with_hint (int hint, int tag) void dsunlock (stats_main_t * sm) { - u32 thread_id; + u32 thread_index; data_structure_lock_t *l = sm->data_structure_lock; if (PREDICT_FALSE (l == 0)) return; - thread_id = os_get_cpu_number (); - ASSERT (l->lock && l->thread_id == thread_id); + thread_index = vlib_get_thread_index (); + ASSERT (l->lock && l->thread_index == thread_index); l->count--; if (l->count == 0) { diff --git a/src/vpp/stats/stats.h b/src/vpp/stats/stats.h index 118115be..024dc78e 100644 --- a/src/vpp/stats/stats.h +++ b/src/vpp/stats/stats.h @@ -30,7 +30,7 @@ typedef struct { volatile u32 lock; volatile u32 release_hint; - u32 thread_id; + u32 thread_index; u32 count; int tag; } data_structure_lock_t; -- cgit 1.2.3-korg From 0f26c5a0138ac86d7ebd197c31a09d8d624c35fe Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 1 Mar 2017 15:12:11 -0800 Subject: MPLS Mcast 1 - interface-DPO Used in the Data-plane to change a packet's input interface 2 - MPLS multicast FIB entry Same as a unicast entry but it links to a replicate not a load-balance DPO 3 - Multicast MPLS tunnel Update MPLS tunnels to use a FIB path-list to describe the endpoint[s]. Use the path-list to generate the forwarding chain (DPOs) to link to . 4 - Resolve a path via a local label (of an mLDP LSP) For IP multicast entries to use an LSP in the replication list, we need to decribe the 'resolve-via-label' where the label is that of a multicast LSP. 5 - MPLS disposition path sets RPF-ID For a interface-less LSP (i.e. mLDP not RSVP-TE) at the tail of the LSP we still need to perform an RPF check. An MPLS disposition DPO performs the MPLS pop validation checks and sets the RPF-ID in the packet. 6 - RPF check with per-entry RPF-ID An RPF-ID is used instead of a real interface SW if index in the case the IP traffic arrives from an LSP that does not have an associated interface. Change-Id: Ib92e177be919147bafeb599729abf3d1abc2f4b3 Signed-off-by: Neale Ranns --- src/plugins/dpdk/device/node.c | 2 +- src/vat/api_format.c | 133 ++--- src/vnet.am | 2 + src/vnet/adj/adj.c | 13 +- src/vnet/adj/adj.h | 6 + src/vnet/adj/adj_internal.h | 14 +- src/vnet/adj/adj_mcast.c | 134 ++++- src/vnet/adj/adj_mcast.h | 27 + src/vnet/adj/adj_midchain.c | 62 ++- src/vnet/adj/adj_nbr.c | 2 - src/vnet/buffer.h | 3 + src/vnet/devices/ssvm/node.c | 2 +- src/vnet/dhcp/dhcp6_proxy_node.c | 1 + src/vnet/dpo/dpo.c | 10 + src/vnet/dpo/dpo.h | 8 +- src/vnet/dpo/interface_dpo.c | 416 ++++++++++++++++ src/vnet/dpo/interface_dpo.h | 67 +++ src/vnet/dpo/lookup_dpo.c | 211 +++++++- src/vnet/dpo/lookup_dpo.h | 20 + src/vnet/dpo/mpls_disposition.c | 364 ++++++++++++++ src/vnet/dpo/mpls_disposition.h | 85 ++++ src/vnet/dpo/mpls_label_dpo.c | 6 +- src/vnet/dpo/replicate_dpo.c | 48 +- src/vnet/dpo/replicate_dpo.h | 2 + src/vnet/ethernet/arp.c | 1 + src/vnet/ethernet/interface.c | 2 +- src/vnet/ethernet/node.c | 4 +- src/vnet/ethernet/types.def | 4 +- src/vnet/fib/fib_api.h | 4 + src/vnet/fib/fib_entry.c | 47 +- src/vnet/fib/fib_entry.h | 13 +- src/vnet/fib/fib_entry_src.c | 154 +++--- src/vnet/fib/fib_internal.h | 1 + src/vnet/fib/fib_path.c | 222 +++++++-- src/vnet/fib/fib_path.h | 17 +- src/vnet/fib/fib_path_ext.c | 4 +- src/vnet/fib/fib_path_ext.h | 3 +- src/vnet/fib/fib_path_list.c | 270 ++++++---- src/vnet/fib/fib_path_list.h | 22 +- src/vnet/fib/fib_table.c | 47 +- src/vnet/fib/fib_test.c | 345 +++++++++++-- src/vnet/fib/fib_test.h | 111 +++++ src/vnet/fib/fib_types.c | 15 +- src/vnet/fib/fib_types.h | 60 ++- src/vnet/fib/mpls_fib.c | 15 +- src/vnet/handoff.h | 10 +- src/vnet/interface.c | 2 +- src/vnet/ip/ip.api | 3 + src/vnet/ip/ip4_forward.c | 20 + src/vnet/ip/ip6_forward.c | 23 + src/vnet/ip/ip6_neighbor.c | 1 + src/vnet/ip/ip_api.c | 98 ++-- src/vnet/ip/lookup.c | 3 +- src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c | 1 + src/vnet/mfib/ip4_mfib.c | 1 + src/vnet/mfib/ip6_mfib.c | 1 + src/vnet/mfib/mfib_entry.c | 395 +++++++++------ src/vnet/mfib/mfib_entry.h | 20 +- src/vnet/mfib/mfib_forward.c | 29 +- src/vnet/mfib/mfib_table.c | 8 +- src/vnet/mfib/mfib_table.h | 1 + src/vnet/mfib/mfib_test.c | 127 ++++- src/vnet/mpls/mpls.api | 87 ++-- src/vnet/mpls/mpls.c | 17 +- src/vnet/mpls/mpls_api.c | 97 ++-- src/vnet/mpls/mpls_input.c | 2 +- src/vnet/mpls/mpls_lookup.c | 236 ++++++--- src/vnet/mpls/mpls_tunnel.c | 883 ++++++++++++++++++++++----------- src/vnet/mpls/mpls_tunnel.h | 57 ++- src/vnet/mpls/mpls_types.h | 20 + src/vnet/srp/interface.c | 2 +- test/test_ip_mcast.py | 1 + test/test_mpls.py | 277 ++++++++++- test/vpp_ip_route.py | 38 +- test/vpp_mpls_tunnel_interface.py | 46 ++ test/vpp_papi_provider.py | 16 +- 76 files changed, 4393 insertions(+), 1128 deletions(-) create mode 100644 src/vnet/dpo/interface_dpo.c create mode 100644 src/vnet/dpo/interface_dpo.h create mode 100644 src/vnet/dpo/mpls_disposition.c create mode 100644 src/vnet/dpo/mpls_disposition.h create mode 100644 src/vnet/fib/fib_test.h create mode 100644 test/vpp_mpls_tunnel_interface.py (limited to 'src/vnet/devices') diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index b10e0fad..0549ba5d 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -52,7 +52,7 @@ always_inline int vlib_buffer_is_mpls (vlib_buffer_t * b) { ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b); - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)); } always_inline u32 diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 61b8e1d8..107aa012 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -16369,32 +16369,82 @@ api_netmap_delete (vat_main_t * vam) return ret; } -static void vl_api_mpls_tunnel_details_t_handler - (vl_api_mpls_tunnel_details_t * mp) +static void +vl_api_mpls_fib_path_print (vat_main_t * vam, vl_api_fib_path2_t * fp) +{ + if (fp->afi == IP46_TYPE_IP6) + print (vam->ofp, + " weight %d, sw_if_index %d, is_local %d, is_drop %d, " + "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U", + ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local, + fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi, + format_ip6_address, fp->next_hop); + else if (fp->afi == IP46_TYPE_IP4) + print (vam->ofp, + " weight %d, sw_if_index %d, is_local %d, is_drop %d, " + "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U", + ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local, + fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi, + format_ip4_address, fp->next_hop); +} + +static void +vl_api_mpls_fib_path_json_print (vat_json_node_t * node, + vl_api_fib_path2_t * fp) +{ + struct in_addr ip4; + struct in6_addr ip6; + + vat_json_object_add_uint (node, "weight", ntohl (fp->weight)); + vat_json_object_add_uint (node, "sw_if_index", ntohl (fp->sw_if_index)); + vat_json_object_add_uint (node, "is_local", fp->is_local); + vat_json_object_add_uint (node, "is_drop", fp->is_drop); + vat_json_object_add_uint (node, "is_unreach", fp->is_unreach); + vat_json_object_add_uint (node, "is_prohibit", fp->is_prohibit); + vat_json_object_add_uint (node, "next_hop_afi", fp->afi); + if (fp->afi == IP46_TYPE_IP4) + { + clib_memcpy (&ip4, &fp->next_hop, sizeof (ip4)); + vat_json_object_add_ip4 (node, "next_hop", ip4); + } + else if (fp->afi == IP46_TYPE_IP6) + { + clib_memcpy (&ip6, &fp->next_hop, sizeof (ip6)); + vat_json_object_add_ip6 (node, "next_hop", ip6); + } +} + +static void +vl_api_mpls_tunnel_details_t_handler (vl_api_mpls_tunnel_details_t * mp) { vat_main_t *vam = &vat_main; - i32 len = mp->mt_next_hop_n_labels; + int count = ntohl (mp->mt_count); + vl_api_fib_path2_t *fp; i32 i; - print (vam->ofp, "[%d]: via %U %d labels ", - mp->tunnel_index, - format_ip4_address, mp->mt_next_hop, - ntohl (mp->mt_next_hop_sw_if_index)); - for (i = 0; i < len; i++) + print (vam->ofp, "[%d]: sw_if_index %d via:", + ntohl (mp->mt_tunnel_index), ntohl (mp->mt_sw_if_index)); + fp = mp->mt_paths; + for (i = 0; i < count; i++) { - print (vam->ofp, "%u ", ntohl (mp->mt_next_hop_out_labels[i])); + vl_api_mpls_fib_path_print (vam, fp); + fp++; } + print (vam->ofp, ""); } -static void vl_api_mpls_tunnel_details_t_handler_json - (vl_api_mpls_tunnel_details_t * mp) +#define vl_api_mpls_tunnel_details_t_endian vl_noop_handler +#define vl_api_mpls_tunnel_details_t_print vl_noop_handler + +static void +vl_api_mpls_tunnel_details_t_handler_json (vl_api_mpls_tunnel_details_t * mp) { vat_main_t *vam = &vat_main; vat_json_node_t *node = NULL; - struct in_addr ip4; + int count = ntohl (mp->mt_count); + vl_api_fib_path2_t *fp; i32 i; - i32 len = mp->mt_next_hop_n_labels; if (VAT_JSON_ARRAY != vam->json_tree.type) { @@ -16404,17 +16454,17 @@ static void vl_api_mpls_tunnel_details_t_handler_json node = vat_json_array_add (&vam->json_tree); vat_json_init_object (node); - vat_json_object_add_uint (node, "tunnel_index", ntohl (mp->tunnel_index)); - clib_memcpy (&ip4, &(mp->mt_next_hop), sizeof (ip4)); - vat_json_object_add_ip4 (node, "next_hop", ip4); - vat_json_object_add_uint (node, "next_hop_sw_if_index", - ntohl (mp->mt_next_hop_sw_if_index)); - vat_json_object_add_uint (node, "l2_only", ntohl (mp->mt_l2_only)); - vat_json_object_add_uint (node, "label_count", len); - for (i = 0; i < len; i++) + vat_json_object_add_uint (node, "tunnel_index", + ntohl (mp->mt_tunnel_index)); + vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->mt_sw_if_index)); + + vat_json_object_add_uint (node, "l2_only", mp->mt_l2_only); + + fp = mp->mt_paths; + for (i = 0; i < count; i++) { - vat_json_object_add_uint (node, "label", - ntohl (mp->mt_next_hop_out_labels[i])); + vl_api_mpls_fib_path_json_print (node, fp); + fp++; } } @@ -16453,6 +16503,7 @@ api_mpls_tunnel_dump (vat_main_t * vam) #define vl_api_mpls_fib_details_t_endian vl_noop_handler #define vl_api_mpls_fib_details_t_print vl_noop_handler + static void vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp) { @@ -16467,20 +16518,7 @@ vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp) fp = mp->path; for (i = 0; i < count; i++) { - if (fp->afi == IP46_TYPE_IP6) - print (vam->ofp, - " weight %d, sw_if_index %d, is_local %d, is_drop %d, " - "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U", - ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local, - fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi, - format_ip6_address, fp->next_hop); - else if (fp->afi == IP46_TYPE_IP4) - print (vam->ofp, - " weight %d, sw_if_index %d, is_local %d, is_drop %d, " - "is_unreach %d, is_prohitbit %d, afi %d, next_hop %U", - ntohl (fp->weight), ntohl (fp->sw_if_index), fp->is_local, - fp->is_drop, fp->is_unreach, fp->is_prohibit, fp->afi, - format_ip4_address, fp->next_hop); + vl_api_mpls_fib_path_print (vam, fp); fp++; } } @@ -16491,8 +16529,6 @@ static void vl_api_mpls_fib_details_t_handler_json vat_main_t *vam = &vat_main; int count = ntohl (mp->count); vat_json_node_t *node = NULL; - struct in_addr ip4; - struct in6_addr ip6; vl_api_fib_path2_t *fp; int i; @@ -16511,23 +16547,8 @@ static void vl_api_mpls_fib_details_t_handler_json fp = mp->path; for (i = 0; i < count; i++) { - vat_json_object_add_uint (node, "weight", ntohl (fp->weight)); - vat_json_object_add_uint (node, "sw_if_index", ntohl (fp->sw_if_index)); - vat_json_object_add_uint (node, "is_local", fp->is_local); - vat_json_object_add_uint (node, "is_drop", fp->is_drop); - vat_json_object_add_uint (node, "is_unreach", fp->is_unreach); - vat_json_object_add_uint (node, "is_prohibit", fp->is_prohibit); - vat_json_object_add_uint (node, "next_hop_afi", fp->afi); - if (fp->afi == IP46_TYPE_IP4) - { - clib_memcpy (&ip4, &fp->next_hop, sizeof (ip4)); - vat_json_object_add_ip4 (node, "next_hop", ip4); - } - else if (fp->afi == IP46_TYPE_IP6) - { - clib_memcpy (&ip6, &fp->next_hop, sizeof (ip6)); - vat_json_object_add_ip6 (node, "next_hop", ip6); - } + vl_api_mpls_fib_path_json_print (node, fp); + fp++; } } diff --git a/src/vnet.am b/src/vnet.am index 643ae92e..bed4902b 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -990,6 +990,8 @@ libvnet_la_SOURCES += \ vnet/dpo/lookup_dpo.c \ vnet/dpo/classify_dpo.c \ vnet/dpo/replicate_dpo.c \ + vnet/dpo/interface_dpo.c \ + vnet/dpo/mpls_disposition.c \ vnet/dpo/mpls_label_dpo.c nobase_include_HEADERS += \ diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index 90182006..36dfe500 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -67,6 +67,10 @@ adj_alloc (fib_protocol_t proto) adj->lookup_next_index = 0; adj->ia_delegates = NULL; + /* lest it become a midchain in the future */ + memset(&adj->sub_type.midchain.next_dpo, 0, + sizeof(adj->sub_type.midchain.next_dpo)); + ip4_main.lookup_main.adjacency_heap = adj_pool; ip6_main.lookup_main.adjacency_heap = adj_pool; @@ -118,6 +122,9 @@ format_ip_adjacency (u8 * s, va_list * args) case IP_LOOKUP_NEXT_MCAST: s = format (s, "%U", format_adj_mcast, adj_index, 0); break; + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: + s = format (s, "%U", format_adj_mcast_midchain, adj_index, 0); + break; default: break; } @@ -180,6 +187,7 @@ adj_last_lock_gone (ip_adjacency_t *adj) adj->rewrite_header.sw_if_index); break; case IP_LOOKUP_NEXT_MCAST: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: adj_mcast_remove(adj->ia_nh_proto, adj->rewrite_header.sw_if_index); break; @@ -338,6 +346,7 @@ adj_walk (u32 sw_if_index, FOR_EACH_FIB_IP_PROTOCOL(proto) { adj_nbr_walk(sw_if_index, proto, cb, ctx); + adj_mcast_walk(sw_if_index, proto, cb, ctx); } } @@ -544,9 +553,9 @@ adj_show (vlib_main_t * vm, * [@0] * [@1] glean: loop0 * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc - * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc + * [@3] mpls via 1.0.0.2 loop0: MPLS: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc - * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc + * [@5] mpls via 1.0.0.3 loop0: MPLS: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc * @cliexend ?*/ VLIB_CLI_COMMAND (adj_show_command, static) = { diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h index 32997c91..ed5eb1f1 100644 --- a/src/vnet/adj/adj.h +++ b/src/vnet/adj/adj.h @@ -81,6 +81,10 @@ typedef enum /** Multicast Adjacency. */ IP_LOOKUP_NEXT_MCAST, + /** Multicast Midchain Adjacency. An Adjacency for sending macst packets + * on a tunnel/virtual interface */ + IP_LOOKUP_NEXT_MCAST_MIDCHAIN, + IP_LOOKUP_N_NEXT, } __attribute__ ((packed)) ip_lookup_next_t; @@ -107,6 +111,7 @@ typedef enum [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite", \ [IP_LOOKUP_NEXT_MCAST] = "ip4-rewrite-mcast", \ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \ + [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip4-mcast-midchain", \ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \ } @@ -119,6 +124,7 @@ typedef enum [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \ [IP_LOOKUP_NEXT_MCAST] = "ip6-rewrite-mcast", \ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \ + [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip6-mcast-midchain", \ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \ [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \ [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \ diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h index 30668625..2c123c54 100644 --- a/src/vnet/adj/adj_internal.h +++ b/src/vnet/adj/adj_internal.h @@ -17,6 +17,7 @@ #define __ADJ_INTERNAL_H__ #include +#include #include #include #include @@ -87,11 +88,14 @@ adj_get_index (ip_adjacency_t *adj) return (adj - adj_pool); } -extern void adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, - ip_lookup_next_t adj_next_index, - u32 complete_next_index, - u32 next_index, - u8 *rewrite); +extern void adj_nbr_update_rewrite_internal(ip_adjacency_t *adj, + ip_lookup_next_t adj_next_index, + u32 complete_next_index, + u32 next_index, + u8 *rewrite); +extern void adj_midchain_setup(adj_index_t adj_index, + adj_midchain_fixup_t fixup, + adj_flags_t flags); extern ip_adjacency_t * adj_alloc(fib_protocol_t proto); diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c index 4f678e43..755abfd4 100644 --- a/src/vnet/adj/adj_mcast.c +++ b/src/vnet/adj/adj_mcast.c @@ -13,7 +13,7 @@ * limitations under the License. */ -#include +#include #include #include #include @@ -129,6 +129,59 @@ adj_mcast_update_rewrite (adj_index_t adj_index, adj->rewrite_header.dst_mcast_mask = clib_host_to_net_u32(mask); } +/** + * adj_mcast_midchain_update_rewrite + * + * Update the adjacency's rewrite string. A NULL string implies the + * rewirte is reset (i.e. when ARP/ND etnry is gone). + * NB: the adj being updated may be handling traffic in the DP. + */ +void +adj_mcast_midchain_update_rewrite (adj_index_t adj_index, + adj_midchain_fixup_t fixup, + adj_flags_t flags, + u8 *rewrite, + u8 offset, + u32 mask) +{ + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != adj_index); + + adj = adj_get(adj_index); + + /* + * one time only update. since we don't support chainging the tunnel + * src,dst, this is all we need. + */ + ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_MCAST); + /* + * tunnels can always provide a rewrite. + */ + ASSERT(NULL != rewrite); + + adj_midchain_setup(adj_index, fixup, flags); + + /* + * update the adj's rewrite string and build the arc + * from the rewrite node to the interface's TX node + */ + adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MCAST_MIDCHAIN, + adj_get_mcast_node(adj->ia_nh_proto), + vnet_tx_node_index_for_sw_interface( + vnet_get_main(), + adj->rewrite_header.sw_if_index), + rewrite); + + /* + * set the fields corresponding to the mcast IP address rewrite + * The mask must be stored in network byte order, since the packet's + * IP address will also be in network order. + */ + adj->rewrite_header.dst_mcast_offset = offset; + adj->rewrite_header.dst_mcast_mask = clib_host_to_net_u32(mask); +} + void adj_mcast_remove (fib_protocol_t proto, u32 sw_if_index) @@ -260,6 +313,24 @@ adj_mcast_interface_delete (vnet_main_t * vnm, VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_mcast_interface_delete); +/** + * @brief Walk the multicast Adjacencies on a given interface + */ +void +adj_mcast_walk (u32 sw_if_index, + fib_protocol_t proto, + adj_walk_cb_t cb, + void *ctx) +{ + if (vec_len(adj_mcasts[proto]) > sw_if_index) + { + if (ADJ_INDEX_INVALID != adj_mcasts[proto][sw_if_index]) + { + cb(adj_mcasts[proto][sw_if_index], ctx); + } + } +} + u8* format_adj_mcast (u8* s, va_list *ap) { @@ -269,6 +340,8 @@ format_adj_mcast (u8* s, va_list *ap) s = format(s, "%U-mcast: ", format_fib_protocol, adj->ia_nh_proto); + if (adj->rewrite_header.flags & VNET_REWRITE_HAS_FEATURES) + s = format(s, "[features] "); s = format (s, "%U", format_vnet_rewrite, &adj->rewrite_header, sizeof (adj->rewrite_data), 0); @@ -276,6 +349,28 @@ format_adj_mcast (u8* s, va_list *ap) return (s); } +u8* +format_adj_mcast_midchain (u8* s, va_list *ap) +{ + index_t index = va_arg(*ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(*ap, u32); + vnet_main_t * vnm = vnet_get_main(); + ip_adjacency_t * adj = adj_get(index); + + s = format(s, "%U-mcast-midchain: ", + format_fib_protocol, adj->ia_nh_proto); + s = format (s, "%U", + format_vnet_rewrite, + vnm->vlib_main, &adj->rewrite_header, + sizeof (adj->rewrite_data), 0); + s = format (s, "\n%Ustacked-on:\n%U%U", + format_white_space, indent, + format_white_space, indent+2, + format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2); + + return (s); +} + static void adj_dpo_lock (dpo_id_t *dpo) @@ -293,6 +388,11 @@ const static dpo_vft_t adj_mcast_dpo_vft = { .dv_unlock = adj_dpo_unlock, .dv_format = format_adj_mcast, }; +const static dpo_vft_t adj_mcast_midchain_dpo_vft = { + .dv_lock = adj_dpo_lock, + .dv_unlock = adj_dpo_unlock, + .dv_format = format_adj_mcast_midchain, +}; /** * @brief The per-protocol VLIB graph nodes that are assigned to a mcast @@ -319,6 +419,31 @@ const static char* const * const adj_mcast_nodes[DPO_PROTO_NUM] = [DPO_PROTO_MPLS] = NULL, }; +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a mcast + * object. + * + * this means that these graph nodes are ones from which a mcast is the + * parent object in the DPO-graph. + */ +const static char* const adj_mcast_midchain_ip4_nodes[] = +{ + "ip4-mcast-midchain", + NULL, +}; +const static char* const adj_mcast_midchain_ip6_nodes[] = +{ + "ip6-mcast-midchain", + NULL, +}; + +const static char* const * const adj_mcast_midchain_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = adj_mcast_midchain_ip4_nodes, + [DPO_PROTO_IP6] = adj_mcast_midchain_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + /** * @brief Return the size of the adj DB. * This is only for testing purposes so an efficient implementation is not needed @@ -349,5 +474,10 @@ adj_mcast_db_size (void) void adj_mcast_module_init (void) { - dpo_register(DPO_ADJACENCY_MCAST, &adj_mcast_dpo_vft, adj_mcast_nodes); + dpo_register(DPO_ADJACENCY_MCAST, + &adj_mcast_dpo_vft, + adj_mcast_nodes); + dpo_register(DPO_ADJACENCY_MCAST_MIDCHAIN, + &adj_mcast_midchain_dpo_vft, + adj_mcast_midchain_nodes); } diff --git a/src/vnet/adj/adj_mcast.h b/src/vnet/adj/adj_mcast.h index 40d44313..bfb0d6f6 100644 --- a/src/vnet/adj/adj_mcast.h +++ b/src/vnet/adj/adj_mcast.h @@ -26,6 +26,7 @@ #define __ADJ_MCAST_H__ #include +#include /** * @brief @@ -68,10 +69,36 @@ extern void adj_mcast_update_rewrite(adj_index_t adj_index, u8 offset, u32 mask); +/** + * @brief + * Update the rewrite string for an existing adjacecny and + * Convert the adjacency into a midchain + * + * @param + * The index of the adj to update + * + * @param + * The new rewrite + */ +extern void adj_mcast_midchain_update_rewrite(adj_index_t adj_index, + adj_midchain_fixup_t fixup, + adj_flags_t flags, + u8 *rewrite, + u8 offset, + u32 mask); +/** + * @brief Walk the multicast Adjacencies on a given interface + */ +extern void adj_mcast_walk (u32 sw_if_index, + fib_protocol_t adj_nh_proto, + adj_walk_cb_t cb, + void *ctx); + /** * @brief Format/display a mcast adjacency. */ extern u8* format_adj_mcast(u8* s, va_list *ap); +extern u8* format_adj_mcast_midchain(u8* s, va_list *ap); /** * @brief Get the sze of the mcast adj DB. Test purposes only. diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c index 5756de43..a93a1c3e 100644 --- a/src/vnet/adj/adj_midchain.c +++ b/src/vnet/adj/adj_midchain.c @@ -346,7 +346,7 @@ adj_get_midchain_node (vnet_link_t link) static u8 adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj) { - u8 arc = (u8) ~0; + u8 arc = (u8) ~0; switch (adj->ia_link) { case VNET_LINK_IP4: @@ -393,17 +393,14 @@ adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj) } /** - * adj_nbr_midchain_update_rewrite + * adj_midchain_setup * - * Update the adjacency's rewrite string. A NULL string implies the - * rewrite is reset (i.e. when ARP/ND etnry is gone). - * NB: the adj being updated may be handling traffic in the DP. + * Setup the adj as a mid-chain */ void -adj_nbr_midchain_update_rewrite (adj_index_t adj_index, - adj_midchain_fixup_t fixup, - adj_flags_t flags, - u8 *rewrite) +adj_midchain_setup (adj_index_t adj_index, + adj_midchain_fixup_t fixup, + adj_flags_t flags) { u32 feature_index, tx_node; ip_adjacency_t *adj; @@ -413,16 +410,6 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, adj = adj_get(adj_index); - /* - * one time only update. since we don't support chainging the tunnel - * src,dst, this is all we need. - */ - ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP); - /* - * tunnels can always provide a rewrite. - */ - ASSERT(NULL != rewrite); - adj->sub_type.midchain.fixup_func = fixup; adj->ia_flags |= flags; @@ -447,6 +434,38 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, dpo_stack_from_node(tx_node, &adj->sub_type.midchain.next_dpo, drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link))); +} + +/** + * adj_nbr_midchain_update_rewrite + * + * Update the adjacency's rewrite string. A NULL string implies the + * rewrite is reset (i.e. when ARP/ND etnry is gone). + * NB: the adj being updated may be handling traffic in the DP. + */ +void +adj_nbr_midchain_update_rewrite (adj_index_t adj_index, + adj_midchain_fixup_t fixup, + adj_flags_t flags, + u8 *rewrite) +{ + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != adj_index); + + adj = adj_get(adj_index); + + /* + * one time only update. since we don't support chainging the tunnel + * src,dst, this is all we need. + */ + ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP); + /* + * tunnels can always provide a rewrite. + */ + ASSERT(NULL != rewrite); + + adj_midchain_setup(adj_index, fixup, flags); /* * update the rewirte with the workers paused. @@ -454,7 +473,7 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MIDCHAIN, adj_get_midchain_node(adj->ia_link), - tx_node, + adj_nbr_midchain_get_tx_node(adj), rewrite); } @@ -496,7 +515,8 @@ adj_nbr_midchain_stack (adj_index_t adj_index, adj = adj_get(adj_index); - ASSERT(IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index); + ASSERT((IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index) || + (IP_LOOKUP_NEXT_MCAST_MIDCHAIN == adj->lookup_next_index)); dpo_stack_from_node(adj_nbr_midchain_get_tx_node(adj), &adj->sub_type.midchain.next_dpo, diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c index ddacb030..3d450d1f 100644 --- a/src/vnet/adj/adj_nbr.c +++ b/src/vnet/adj/adj_nbr.c @@ -195,8 +195,6 @@ adj_nbr_alloc (fib_protocol_t nh_proto, adj->ia_link = link_type; adj->ia_nh_proto = nh_proto; adj->rewrite_header.sw_if_index = sw_if_index; - memset(&adj->sub_type.midchain.next_dpo, 0, - sizeof(adj->sub_type.midchain.next_dpo)); adj_nbr_evaluate_feature (adj_get_index(adj)); return (adj); diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index ea3ce093..ed869d1f 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -130,6 +130,9 @@ typedef struct /* Rewrite length */ u32 save_rewrite_length; + + /* MFIB RPF ID */ + u32 rpf_id; }; /* ICMP */ diff --git a/src/vnet/devices/ssvm/node.c b/src/vnet/devices/ssvm/node.c index 539b4161..b7a8db05 100644 --- a/src/vnet/devices/ssvm/node.c +++ b/src/vnet/devices/ssvm/node.c @@ -210,7 +210,7 @@ ssvm_eth_device_input (ssvm_eth_main_t * em, next0 = SSVM_ETH_INPUT_NEXT_IP4_INPUT; else if (type0 == ETHERNET_TYPE_IP6) next0 = SSVM_ETH_INPUT_NEXT_IP6_INPUT; - else if (type0 == ETHERNET_TYPE_MPLS_UNICAST) + else if (type0 == ETHERNET_TYPE_MPLS) next0 = SSVM_ETH_INPUT_NEXT_MPLS_INPUT; l3_offset0 = ((next0 == SSVM_ETH_INPUT_NEXT_IP4_INPUT || diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index 524cb095..de73154d 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -883,6 +883,7 @@ dhcp6_proxy_set_server (ip46_address_t *addr, mfib_table_entry_update(rx_fib_index, &all_dhcp_servers, MFIB_SOURCE_DHCP, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF); mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6); } diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c index d8e075a7..dfc2bd92 100644 --- a/src/vnet/dpo/dpo.c +++ b/src/vnet/dpo/dpo.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include /** * Array of char* names for the DPO types and protos @@ -182,6 +184,12 @@ dpo_set (dpo_id_t *dpo, case IP_LOOKUP_NEXT_MIDCHAIN: dpo->dpoi_type = DPO_ADJACENCY_MIDCHAIN; break; + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: + dpo->dpoi_type = DPO_ADJACENCY_MCAST_MIDCHAIN; + break; + case IP_LOOKUP_NEXT_MCAST: + dpo->dpoi_type = DPO_ADJACENCY_MCAST; + break; default: break; } @@ -453,6 +461,8 @@ dpo_module_init (vlib_main_t * vm) lookup_dpo_module_init(); ip_null_dpo_module_init(); replicate_module_init(); + interface_dpo_module_init(); + mpls_disp_dpo_module_init(); return (NULL); } diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h index 48b92d3d..5aa4e2d2 100644 --- a/src/vnet/dpo/dpo.h +++ b/src/vnet/dpo/dpo.h @@ -108,12 +108,15 @@ typedef enum dpo_type_t_ { DPO_ADJACENCY_MIDCHAIN, DPO_ADJACENCY_GLEAN, DPO_ADJACENCY_MCAST, + DPO_ADJACENCY_MCAST_MIDCHAIN, DPO_RECEIVE, DPO_LOOKUP, DPO_LISP_CP, DPO_CLASSIFY, DPO_MPLS_LABEL, + DPO_MPLS_DISPOSITION, DPO_MFIB_ENTRY, + DPO_INTERFACE, DPO_LAST, } __attribute__((packed)) dpo_type_t; @@ -129,6 +132,7 @@ typedef enum dpo_type_t_ { [DPO_ADJACENCY_MIDCHAIN] = "dpo-adjacency-midcahin", \ [DPO_ADJACENCY_GLEAN] = "dpo-glean", \ [DPO_ADJACENCY_MCAST] = "dpo-adj-mcast", \ + [DPO_ADJACENCY_MCAST_MIDCHAIN] = "dpo-adj-mcast-midchain", \ [DPO_RECEIVE] = "dpo-receive", \ [DPO_LOOKUP] = "dpo-lookup", \ [DPO_LOAD_BALANCE] = "dpo-load-balance", \ @@ -136,7 +140,9 @@ typedef enum dpo_type_t_ { [DPO_LISP_CP] = "dpo-lisp-cp", \ [DPO_CLASSIFY] = "dpo-classify", \ [DPO_MPLS_LABEL] = "dpo-mpls-label", \ - [DPO_MFIB_ENTRY] = "dpo-mfib_entry" \ + [DPO_MPLS_DISPOSITION] = "dpo-mpls-diposition", \ + [DPO_MFIB_ENTRY] = "dpo-mfib_entry", \ + [DPO_INTERFACE] = "dpo-interface" \ } /** diff --git a/src/vnet/dpo/interface_dpo.c b/src/vnet/dpo/interface_dpo.c new file mode 100644 index 00000000..50ca756f --- /dev/null +++ b/src/vnet/dpo/interface_dpo.c @@ -0,0 +1,416 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +/* + * The 'DB' of interface DPOs. + * There is only one per-interface per-protocol, so this is a per-interface + * vector + */ +static index_t *interface_dpo_db[DPO_PROTO_NUM]; + +static interface_dpo_t * +interface_dpo_alloc (void) +{ + interface_dpo_t *ido; + + pool_get(interface_dpo_pool, ido); + + return (ido); +} + +static inline interface_dpo_t * +interface_dpo_get_from_dpo (const dpo_id_t *dpo) +{ + ASSERT(DPO_INTERFACE == dpo->dpoi_type); + + return (interface_dpo_get(dpo->dpoi_index)); +} + +static inline index_t +interface_dpo_get_index (interface_dpo_t *ido) +{ + return (ido - interface_dpo_pool); +} + +static void +interface_dpo_lock (dpo_id_t *dpo) +{ + interface_dpo_t *ido; + + ido = interface_dpo_get_from_dpo(dpo); + ido->ido_locks++; +} + +static void +interface_dpo_unlock (dpo_id_t *dpo) +{ + interface_dpo_t *ido; + + ido = interface_dpo_get_from_dpo(dpo); + ido->ido_locks--; + + if (0 == ido->ido_locks) + { + interface_dpo_db[ido->ido_proto][ido->ido_sw_if_index] = + INDEX_INVALID; + pool_put(interface_dpo_pool, ido); + } +} + +/* + * interface_dpo_add_or_lock + * + * Add/create and lock a new or lock an existing for the interface DPO + * on the interface and protocol given + */ +void +interface_dpo_add_or_lock (dpo_proto_t proto, + u32 sw_if_index, + dpo_id_t *dpo) +{ + interface_dpo_t *ido; + + vec_validate_init_empty(interface_dpo_db[proto], + sw_if_index, + INDEX_INVALID); + + if (INDEX_INVALID == interface_dpo_db[proto][sw_if_index]) + { + ido = interface_dpo_alloc(); + + ido->ido_sw_if_index = sw_if_index; + ido->ido_proto = proto; + + interface_dpo_db[proto][sw_if_index] = + interface_dpo_get_index(ido); + } + else + { + ido = interface_dpo_get(interface_dpo_db[proto][sw_if_index]); + } + + dpo_set(dpo, DPO_INTERFACE, proto, interface_dpo_get_index(ido)); +} + + +static clib_error_t * +interface_dpo_interface_state_change (vnet_main_t * vnm, + u32 sw_if_index, + u32 flags) +{ + /* + */ + return (NULL); +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION( + interface_dpo_interface_state_change); + +/** + * @brief Registered callback for HW interface state changes + */ +static clib_error_t * +interface_dpo_hw_interface_state_change (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags) +{ + return (NULL); +} + +VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION( + interface_dpo_hw_interface_state_change); + +static clib_error_t * +interface_dpo_interface_delete (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + return (NULL); +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION( + interface_dpo_interface_delete); + +u8* +format_interface_dpo (u8* s, va_list *ap) +{ + index_t index = va_arg(*ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(*ap, u32); + vnet_main_t * vnm = vnet_get_main(); + interface_dpo_t *ido = interface_dpo_get(index); + + return (format(s, "%U-dpo: %U", + format_vnet_sw_interface_name, + vnm, + vnet_get_sw_interface(vnm, ido->ido_sw_if_index), + format_dpo_proto, ido->ido_proto)); +} + +static void +interface_dpo_mem_show (void) +{ + fib_show_memory_usage("Interface", + pool_elts(interface_dpo_pool), + pool_len(interface_dpo_pool), + sizeof(interface_dpo_t)); +} + + +const static dpo_vft_t interface_dpo_vft = { + .dv_lock = interface_dpo_lock, + .dv_unlock = interface_dpo_unlock, + .dv_format = format_interface_dpo, + .dv_mem_show = interface_dpo_mem_show, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a glean + * object. + * + * this means that these graph nodes are ones from which a glean is the + * parent object in the DPO-graph. + */ +const static char* const interface_dpo_ip4_nodes[] = +{ + "interface-dpo-ip4", + NULL, +}; +const static char* const interface_dpo_ip6_nodes[] = +{ + "interface-dpo-ip4", + NULL, +}; + +const static char* const * const interface_dpo_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = interface_dpo_ip4_nodes, + [DPO_PROTO_IP6] = interface_dpo_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +void +interface_dpo_module_init (void) +{ + dpo_register(DPO_INTERFACE, + &interface_dpo_vft, + interface_dpo_nodes); +} + +/** + * @brief Interface DPO trace data + */ +typedef struct interface_dpo_trace_t_ +{ + u32 sw_if_index; +} interface_dpo_trace_t; + +typedef enum interface_dpo_next_t_ +{ + INTERFACE_DPO_DROP = 0, + INTERFACE_DPO_INPUT = 1, +} interface_dpo_next_t; + +always_inline uword +interface_dpo_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + u32 cpu_index = os_get_cpu_number(); + vnet_interface_main_t *im; + + im = &vnet_get_main ()->interface_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next > 2) + { + const interface_dpo_t *ido0, *ido1; + u32 bi0, idoi0, bi1, idoi1; + vlib_buffer_t *b0, *b1; + + bi0 = from[0]; + to_next[0] = bi0; + bi1 = from[1]; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + idoi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + idoi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; + ido0 = interface_dpo_get(idoi0); + ido1 = interface_dpo_get(idoi1); + + vnet_buffer(b0)->sw_if_index[VLIB_RX] = ido0->ido_sw_if_index; + vnet_buffer(b1)->sw_if_index[VLIB_RX] = ido1->ido_sw_if_index; + + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + ido0->ido_sw_if_index, + 1, + vlib_buffer_length_in_chain (vm, b0)); + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + ido1->ido_sw_if_index, + 1, + vlib_buffer_length_in_chain (vm, b1)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + interface_dpo_trace_t *tr0; + + tr0 = vlib_add_trace (vm, node, b0, sizeof (*tr0)); + tr0->sw_if_index = ido0->ido_sw_if_index; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + interface_dpo_trace_t *tr1; + + tr1 = vlib_add_trace (vm, node, b1, sizeof (*tr1)); + tr1->sw_if_index = ido1->ido_sw_if_index; + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, + INTERFACE_DPO_INPUT, + INTERFACE_DPO_INPUT); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + const interface_dpo_t * ido0; + vlib_buffer_t * b0; + u32 bi0, idoi0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + idoi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + ido0 = interface_dpo_get(idoi0); + + /* Swap the RX interface of the packet to the one the + * interface DPR represents */ + vnet_buffer(b0)->sw_if_index[VLIB_RX] = ido0->ido_sw_if_index; + + /* Bump the interface's RX coutners */ + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + ido0->ido_sw_if_index, + 1, + vlib_buffer_length_in_chain (vm, b0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + interface_dpo_trace_t *tr; + + tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->sw_if_index = ido0->ido_sw_if_index; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, + INTERFACE_DPO_INPUT); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static u8 * +format_interface_dpo_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + interface_dpo_trace_t * t = va_arg (*args, interface_dpo_trace_t *); + uword indent = format_get_indent (s); + s = format (s, "%U sw_if_index:%d", + format_white_space, indent, + t->sw_if_index); + return s; +} + +static uword +interface_dpo_ip4 (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (interface_dpo_inline(vm, node, from_frame)); +} + +static uword +interface_dpo_ip6 (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (interface_dpo_inline(vm, node, from_frame)); +} + +VLIB_REGISTER_NODE (interface_dpo_ip4_node) = { + .function = interface_dpo_ip4, + .name = "interface-dpo-ip4", + .vector_size = sizeof (u32), + .format_trace = format_interface_dpo_trace, + + .n_next_nodes = 2, + .next_nodes = { + [INTERFACE_DPO_DROP] = "ip4-drop", + [INTERFACE_DPO_INPUT] = "ip4-input", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_ip4_node, + interface_dpo_ip4) + +VLIB_REGISTER_NODE (interface_dpo_ip6_node) = { + .function = interface_dpo_ip6, + .name = "interface-dpo-ip6", + .vector_size = sizeof (u32), + .format_trace = format_interface_dpo_trace, + + .n_next_nodes = 2, + .next_nodes = { + [INTERFACE_DPO_DROP] = "ip6-drop", + [INTERFACE_DPO_INPUT] = "ip6-input", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_ip6_node, + interface_dpo_ip6) + diff --git a/src/vnet/dpo/interface_dpo.h b/src/vnet/dpo/interface_dpo.h new file mode 100644 index 00000000..1538dfbb --- /dev/null +++ b/src/vnet/dpo/interface_dpo.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * The data-path object representing interfaceing the packet, i.e. it's for-us + */ + +#ifndef __INTERFACE_DPO_H__ +#define __INTERFACE_DPO_H__ + +#include + +typedef struct interface_dpo_t_ +{ + /** + * The Software interface index that the packets will be given + * as the ingress/rx interface + */ + u32 ido_sw_if_index; + + /** + * next VLIB node. A '-input' node. + */ + u32 ido_next_node; + + /** + * DPO protocol that the packets will have as they 'ingress' + * on this interface + */ + dpo_proto_t ido_proto; + + /** + * number of locks. + */ + u16 ido_locks; +} interface_dpo_t; + +extern void interface_dpo_add_or_lock (dpo_proto_t proto, + u32 sw_if_index, + dpo_id_t *dpo); + +extern void interface_dpo_module_init(void); + +/** + * @brief pool of all interface DPOs + */ +interface_dpo_t *interface_dpo_pool; + +static inline interface_dpo_t * +interface_dpo_get (index_t index) +{ + return (pool_elt_at_index(interface_dpo_pool, index)); +} + +#endif diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c index 97ad0a44..e5b00a79 100644 --- a/src/vnet/dpo/lookup_dpo.c +++ b/src/vnet/dpo/lookup_dpo.c @@ -21,8 +21,12 @@ #include #include #include +#include +#include +#include static const char *const lookup_input_names[] = LOOKUP_INPUTS; +static const char *const lookup_cast_names[] = LOOKUP_CASTS; /** * @brief Enumeration of the lookup subtypes @@ -31,6 +35,7 @@ typedef enum lookup_sub_type_t_ { LOOKUP_SUB_TYPE_SRC, LOOKUP_SUB_TYPE_DST, + LOOKUP_SUB_TYPE_DST_MCAST, LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE, } lookup_sub_type_t; #define LOOKUP_SUB_TYPE_NUM (LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE+1) @@ -67,6 +72,7 @@ lookup_dpo_get_index (lookup_dpo_t *lkd) static void lookup_dpo_add_or_lock_i (fib_node_index_t fib_index, dpo_proto_t proto, + lookup_cast_t cast, lookup_input_t input, lookup_table_t table_config, dpo_id_t *dpo) @@ -79,6 +85,7 @@ lookup_dpo_add_or_lock_i (fib_node_index_t fib_index, lkd->lkd_proto = proto; lkd->lkd_input = input; lkd->lkd_table = table_config; + lkd->lkd_cast = cast; /* * use the input type to select the lookup sub-type @@ -100,6 +107,10 @@ lookup_dpo_add_or_lock_i (fib_node_index_t fib_index, type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST]; break; } + if (LOOKUP_MULTICAST == cast) + { + type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_MCAST]; + } } if (0 == type) @@ -115,20 +126,29 @@ lookup_dpo_add_or_lock_i (fib_node_index_t fib_index, void lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index, dpo_proto_t proto, + lookup_cast_t cast, lookup_input_t input, lookup_table_t table_config, dpo_id_t *dpo) { if (LOOKUP_TABLE_FROM_CONFIG == table_config) { - fib_table_lock(fib_index, dpo_proto_to_fib(proto)); + if (LOOKUP_UNICAST == cast) + { + fib_table_lock(fib_index, dpo_proto_to_fib(proto)); + } + else + { + mfib_table_lock(fib_index, dpo_proto_to_fib(proto)); + } } - lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo); + lookup_dpo_add_or_lock_i(fib_index, proto, cast, input, table_config, dpo); } void lookup_dpo_add_or_lock_w_table_id (u32 table_id, dpo_proto_t proto, + lookup_cast_t cast, lookup_input_t input, lookup_table_t table_config, dpo_id_t *dpo) @@ -137,13 +157,22 @@ lookup_dpo_add_or_lock_w_table_id (u32 table_id, if (LOOKUP_TABLE_FROM_CONFIG == table_config) { - fib_index = - fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), - table_id); + if (LOOKUP_UNICAST == cast) + { + fib_index = + fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), + table_id); + } + else + { + fib_index = + mfib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), + table_id); + } } ASSERT(FIB_NODE_INDEX_INVALID != fib_index); - lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo); + lookup_dpo_add_or_lock_i(fib_index, proto, cast, input, table_config, dpo); } u8* @@ -156,16 +185,29 @@ format_lookup_dpo (u8 *s, va_list *args) if (LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table) { - s = format(s, "%s lookup in interface's %U table", + s = format(s, "%s,%s lookup in interface's %U table", lookup_input_names[lkd->lkd_input], + lookup_cast_names[lkd->lkd_cast], format_dpo_proto, lkd->lkd_proto); } else { - s = format(s, "%s lookup in %U", - lookup_input_names[lkd->lkd_input], - format_fib_table_name, lkd->lkd_fib_index, - dpo_proto_to_fib(lkd->lkd_proto)); + if (LOOKUP_UNICAST == lkd->lkd_cast) + { + s = format(s, "%s,%s lookup in %U", + lookup_input_names[lkd->lkd_input], + lookup_cast_names[lkd->lkd_cast], + format_fib_table_name, lkd->lkd_fib_index, + dpo_proto_to_fib(lkd->lkd_proto)); + } + else + { + s = format(s, "%s,%s lookup in %U", + lookup_input_names[lkd->lkd_input], + lookup_cast_names[lkd->lkd_cast], + format_mfib_table_name, lkd->lkd_fib_index, + dpo_proto_to_fib(lkd->lkd_proto)); + } } return (s); } @@ -193,8 +235,16 @@ lookup_dpo_unlock (dpo_id_t *dpo) { if (LOOKUP_TABLE_FROM_CONFIG == lkd->lkd_table) { - fib_table_unlock(lkd->lkd_fib_index, - dpo_proto_to_fib(lkd->lkd_proto)); + if (LOOKUP_UNICAST == lkd->lkd_cast) + { + fib_table_unlock(lkd->lkd_fib_index, + dpo_proto_to_fib(lkd->lkd_proto)); + } + else + { + mfib_table_unlock(lkd->lkd_fib_index, + dpo_proto_to_fib(lkd->lkd_proto)); + } } pool_put(lookup_dpo_pool, lkd); } @@ -1069,6 +1119,123 @@ VLIB_REGISTER_NODE (lookup_mpls_dst_itf_node) = { }; VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_itf_node, lookup_mpls_dst_itf) +typedef enum lookup_ip_dst_mcast_next_t_ { + LOOKUP_IP_DST_MCAST_NEXT_RPF, + LOOKUP_IP_DST_MCAST_N_NEXT, +} mfib_forward_lookup_next_t; + +always_inline uword +lookup_dpo_ip_dst_mcast_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + int is_v4) +{ + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = LOOKUP_IP_DST_MCAST_NEXT_RPF; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* } */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, lkdi0, fib_index0, next0; + const lookup_dpo_t * lkd0; + fib_node_index_t mfei0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* dst lookup was done by mpls lookup */ + lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + lkd0 = lookup_dpo_get(lkdi0); + fib_index0 = lkd0->lkd_fib_index; + next0 = LOOKUP_IP_DST_MCAST_NEXT_RPF; + + if (is_v4) + { + ip4_header_t * ip0; + + ip0 = vlib_buffer_get_current (b0); + mfei0 = ip4_mfib_table_lookup(ip4_mfib_get(fib_index0), + &ip0->src_address, + &ip0->dst_address, + 64); + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->fib_index = fib_index0; + tr->lbi = mfei0; + tr->addr.ip4 = ip0->dst_address; + } + } + else + { + ip6_header_t * ip0; + + ip0 = vlib_buffer_get_current (b0); + mfei0 = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index0), + &ip0->src_address, + &ip0->dst_address); + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->fib_index = fib_index0; + tr->lbi = mfei0; + tr->addr.ip6 = ip0->dst_address; + } + } + + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = mfei0; + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +always_inline uword +lookup_ip4_dst_mcast (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_ip_dst_mcast_inline(vm, node, from_frame, 1)); +} + +VLIB_REGISTER_NODE (lookup_ip4_dst_mcast_node) = { + .function = lookup_ip4_dst_mcast, + .name = "lookup-ip4-dst-mcast", + .vector_size = sizeof (u32), + + .format_trace = format_lookup_trace, + .n_next_nodes = LOOKUP_IP_DST_MCAST_N_NEXT, + .next_nodes = { + [LOOKUP_IP_DST_MCAST_NEXT_RPF] = "ip4-mfib-forward-rpf", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_mcast_node, + lookup_ip4_dst_mcast) + static void lookup_dpo_mem_show (void) { @@ -1129,6 +1296,22 @@ const static char* const * const lookup_dst_nodes[DPO_PROTO_NUM] = [DPO_PROTO_MPLS] = lookup_dst_mpls_nodes, }; +const static char* const lookup_dst_mcast_ip4_nodes[] = +{ + "lookup-ip4-dst-mcast", + NULL, +}; +const static char* const lookup_dst_mcast_ip6_nodes[] = +{ + "lookup-ip6-dst-mcast", + NULL, +}; +const static char* const * const lookup_dst_mcast_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = lookup_dst_mcast_ip4_nodes, + [DPO_PROTO_IP6] = lookup_dst_mcast_ip6_nodes, +}; + const static char* const lookup_dst_from_interface_ip4_nodes[] = { "lookup-ip4-dst-itf", @@ -1168,6 +1351,8 @@ lookup_dpo_module_init (void) dpo_register_new_type(&lkd_vft, lookup_src_nodes); lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST] = dpo_register_new_type(&lkd_vft, lookup_dst_nodes); + lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_MCAST] = + dpo_register_new_type(&lkd_vft, lookup_dst_mcast_nodes); lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE] = dpo_register_new_type(&lkd_vft, lookup_dst_from_interface_nodes); } diff --git a/src/vnet/dpo/lookup_dpo.h b/src/vnet/dpo/lookup_dpo.h index ff283388..7dfd0385 100644 --- a/src/vnet/dpo/lookup_dpo.h +++ b/src/vnet/dpo/lookup_dpo.h @@ -46,6 +46,19 @@ typedef enum lookup_table_t_ { [LOOKUP_INPUT_DST_ADDR] = "table-configured", \ } +/** + * Switch to use the packet's source or destination address for lookup + */ +typedef enum lookup_cast_t_ { + LOOKUP_UNICAST, + LOOKUP_MULTICAST, +} __attribute__ ((packed)) lookup_cast_t; + +#define LOOKUP_CASTS { \ + [LOOKUP_UNICAST] = "unicast", \ + [LOOKUP_MULTICAST] = "multicast", \ +} + /** * A representation of an MPLS label for imposition in the data-path */ @@ -73,6 +86,11 @@ typedef struct lookup_dpo_t */ lookup_table_t lkd_table; + /** + * Unicast of rmulticast FIB lookup + */ + lookup_cast_t lkd_cast; + /** * Number of locks */ @@ -81,11 +99,13 @@ typedef struct lookup_dpo_t extern void lookup_dpo_add_or_lock_w_fib_index(fib_node_index_t fib_index, dpo_proto_t proto, + lookup_cast_t cast, lookup_input_t input, lookup_table_t table, dpo_id_t *dpo); extern void lookup_dpo_add_or_lock_w_table_id(u32 table_id, dpo_proto_t proto, + lookup_cast_t cast, lookup_input_t input, lookup_table_t table, dpo_id_t *dpo); diff --git a/src/vnet/dpo/mpls_disposition.c b/src/vnet/dpo/mpls_disposition.c new file mode 100644 index 00000000..5dc33fcf --- /dev/null +++ b/src/vnet/dpo/mpls_disposition.c @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +/* + * pool of all MPLS Label DPOs + */ +mpls_disp_dpo_t *mpls_disp_dpo_pool; + +static mpls_disp_dpo_t * +mpls_disp_dpo_alloc (void) +{ + mpls_disp_dpo_t *mdd; + + pool_get_aligned(mpls_disp_dpo_pool, mdd, CLIB_CACHE_LINE_BYTES); + memset(mdd, 0, sizeof(*mdd)); + + dpo_reset(&mdd->mdd_dpo); + + return (mdd); +} + +static index_t +mpls_disp_dpo_get_index (mpls_disp_dpo_t *mdd) +{ + return (mdd - mpls_disp_dpo_pool); +} + +index_t +mpls_disp_dpo_create (dpo_proto_t payload_proto, + fib_rpf_id_t rpf_id, + const dpo_id_t *dpo) +{ + mpls_disp_dpo_t *mdd; + + mdd = mpls_disp_dpo_alloc(); + + mdd->mdd_payload_proto = payload_proto; + mdd->mdd_rpf_id = rpf_id; + + dpo_stack(DPO_MPLS_DISPOSITION, + mdd->mdd_payload_proto, + &mdd->mdd_dpo, + dpo); + + return (mpls_disp_dpo_get_index(mdd)); +} + +u8* +format_mpls_disp_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + u32 indent = va_arg (*args, u32); + mpls_disp_dpo_t *mdd; + + mdd = mpls_disp_dpo_get(index); + + s = format(s, "mpls-disposition:[%d]:[%U]", + index, + format_dpo_proto, mdd->mdd_payload_proto); + + s = format(s, "\n%U", format_white_space, indent); + s = format(s, "%U", format_dpo_id, &mdd->mdd_dpo, indent+2); + + return (s); +} + +static void +mpls_disp_dpo_lock (dpo_id_t *dpo) +{ + mpls_disp_dpo_t *mdd; + + mdd = mpls_disp_dpo_get(dpo->dpoi_index); + + mdd->mdd_locks++; +} + +static void +mpls_disp_dpo_unlock (dpo_id_t *dpo) +{ + mpls_disp_dpo_t *mdd; + + mdd = mpls_disp_dpo_get(dpo->dpoi_index); + + mdd->mdd_locks--; + + if (0 == mdd->mdd_locks) + { + dpo_reset(&mdd->mdd_dpo); + pool_put(mpls_disp_dpo_pool, mdd); + } +} + +/** + * @brief A struct to hold tracing information for the MPLS label disposition + * node. + */ +typedef struct mpls_label_disposition_trace_t_ +{ + index_t mdd; +} mpls_label_disposition_trace_t; + +always_inline uword +mpls_label_disposition_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + u8 payload_is_ip4, + u8 payload_is_ip6) +{ + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + mpls_disp_dpo_t *mdd0, *mdd1; + u32 bi0, mddi0, bi1, mddi1; + vlib_buffer_t * b0, *b1; + u32 next0, next1; + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (ip6_header_t), STORE); + CLIB_PREFETCH (p3->data, sizeof (ip6_header_t), STORE); + } + + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* dst lookup was done by ip4 lookup */ + mddi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + mddi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; + mdd0 = mpls_disp_dpo_get(mddi0); + mdd1 = mpls_disp_dpo_get(mddi1); + + if (payload_is_ip4) + { + /* + * decrement the TTL on ingress to the LSP + */ + } + else if (payload_is_ip6) + { + /* + * decrement the TTL on ingress to the LSP + */ + } + + next0 = mdd0->mdd_dpo.dpoi_next_node; + next1 = mdd1->mdd_dpo.dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mdd0->mdd_dpo.dpoi_index; + vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mdd1->mdd_dpo.dpoi_index; + vnet_buffer(b0)->ip.rpf_id = mdd0->mdd_rpf_id; + vnet_buffer(b1)->ip.rpf_id = mdd1->mdd_rpf_id; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_disposition_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + + tr->mdd = mddi0; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_disposition_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + tr->mdd = mddi1; + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + mpls_disp_dpo_t *mdd0; + vlib_buffer_t * b0; + u32 bi0, mddi0; + u32 next0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* dst lookup was done by ip4 lookup */ + mddi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + mdd0 = mpls_disp_dpo_get(mddi0); + + if (payload_is_ip4) + { + /* + * decrement the TTL on ingress to the LSP + */ + } + else if (payload_is_ip6) + { + /* + * decrement the TTL on ingress to the LSP + */ + } + else + { + } + + next0 = mdd0->mdd_dpo.dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mdd0->mdd_dpo.dpoi_index; + vnet_buffer(b0)->ip.rpf_id = mdd0->mdd_rpf_id; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_disposition_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->mdd = mddi0; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static u8 * +format_mpls_label_disposition_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + CLIB_UNUSED (mpls_label_disposition_trace_t * t); + + t = va_arg (*args, mpls_label_disposition_trace_t *); + + s = format(s, "disp:%d", t->mdd); + return (s); +} + +static uword +ip4_mpls_label_disposition (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_label_disposition_inline(vm, node, frame, 1, 0)); +} + +VLIB_REGISTER_NODE (ip4_mpls_label_disposition_node) = { + .function = ip4_mpls_label_disposition, + .name = "ip4-mpls-label-disposition", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_label_disposition_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "ip4-drop", + } +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_mpls_label_disposition_node, + ip4_mpls_label_disposition) + +static uword +ip6_mpls_label_disposition (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_label_disposition_inline(vm, node, frame, 0, 1)); +} + +VLIB_REGISTER_NODE (ip6_mpls_label_disposition_node) = { + .function = ip6_mpls_label_disposition, + .name = "ip6-mpls-label-disposition", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_label_disposition_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "ip6-drop", + } +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_disposition_node, + ip6_mpls_label_disposition) + +static void +mpls_disp_dpo_mem_show (void) +{ + fib_show_memory_usage("MPLS label", + pool_elts(mpls_disp_dpo_pool), + pool_len(mpls_disp_dpo_pool), + sizeof(mpls_disp_dpo_t)); +} + +const static dpo_vft_t mdd_vft = { + .dv_lock = mpls_disp_dpo_lock, + .dv_unlock = mpls_disp_dpo_unlock, + .dv_format = format_mpls_disp_dpo, + .dv_mem_show = mpls_disp_dpo_mem_show, +}; + +const static char* const mpls_label_disp_ip4_nodes[] = +{ + "ip4-mpls-label-disposition", + NULL, +}; +const static char* const mpls_label_disp_ip6_nodes[] = +{ + "ip6-mpls-label-disposition", + NULL, +}; +const static char* const * const mpls_label_disp_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = mpls_label_disp_ip4_nodes, + [DPO_PROTO_IP6] = mpls_label_disp_ip6_nodes, +}; + + +void +mpls_disp_dpo_module_init (void) +{ + dpo_register(DPO_MPLS_DISPOSITION, &mdd_vft, mpls_label_disp_nodes); +} diff --git a/src/vnet/dpo/mpls_disposition.h b/src/vnet/dpo/mpls_disposition.h new file mode 100644 index 00000000..9c015083 --- /dev/null +++ b/src/vnet/dpo/mpls_disposition.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MPLS_DISP_DPO_H__ +#define __MPLS_DISP_DPO_H__ + +#include +#include +#include +#include + +/** + * A representation of an MPLS label for imposition in the data-path + */ +typedef struct mpls_disp_dpo_t +{ + /** + * Next DPO in the graph + */ + dpo_id_t mdd_dpo; + + /** + * The protocol of the payload/packets that are being encapped + */ + dpo_proto_t mdd_payload_proto; + + /** + * RPF-ID (if this is an mcast disposition) + */ + fib_rpf_id_t mdd_rpf_id; + + /** + * Number of locks/users of the label + */ + u16 mdd_locks; +} mpls_disp_dpo_t; + +/** + * @brief Assert that the MPLS label object is less than a cache line in size. + * Should this get any bigger then we will need to reconsider how many labels + * can be pushed in one object. + */ +_Static_assert((sizeof(mpls_disp_dpo_t) <= CLIB_CACHE_LINE_BYTES), + "MPLS Disposition DPO is larger than one cache line."); + +/** + * @brief Create an MPLS label object + * + * @param payload_proto The ptocool of the payload packets that will + * be imposed with this label header. + * @param dpo The parent of the created MPLS label object + */ +extern index_t mpls_disp_dpo_create(dpo_proto_t payload_proto, + fib_rpf_id_t rpf_id, + const dpo_id_t *dpo); + +extern u8* format_mpls_disp_dpo(u8 *s, va_list *args); + + +/* + * Encapsulation violation for fast data-path access + */ +extern mpls_disp_dpo_t *mpls_disp_dpo_pool; + +static inline mpls_disp_dpo_t * +mpls_disp_dpo_get (index_t index) +{ + return (pool_elt_at_index(mpls_disp_dpo_pool, index)); +} + +extern void mpls_disp_dpo_module_init(void); + +#endif diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c index be9b2850..4d84b900 100644 --- a/src/vnet/dpo/mpls_label_dpo.c +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -562,7 +562,7 @@ VLIB_REGISTER_NODE (mpls_label_imposition_node) = { .format_trace = format_mpls_label_imposition_trace, .n_next_nodes = 1, .next_nodes = { - [0] = "error-drop", + [0] = "mpls-drop", } }; VLIB_NODE_FUNCTION_MULTIARCH (mpls_label_imposition_node, @@ -584,7 +584,7 @@ VLIB_REGISTER_NODE (ip4_mpls_label_imposition_node) = { .format_trace = format_mpls_label_imposition_trace, .n_next_nodes = 1, .next_nodes = { - [0] = "error-drop", + [0] = "ip4-drop", } }; VLIB_NODE_FUNCTION_MULTIARCH (ip4_mpls_label_imposition_node, @@ -606,7 +606,7 @@ VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = { .format_trace = format_mpls_label_imposition_trace, .n_next_nodes = 1, .next_nodes = { - [0] = "error-drop", + [0] = "ip6-drop", } }; VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_imposition_node, diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c index e25ceae9..9fdb9a05 100644 --- a/src/vnet/dpo/replicate_dpo.c +++ b/src/vnet/dpo/replicate_dpo.c @@ -17,6 +17,7 @@ #include #include #include +#include #undef REP_DEBUG @@ -106,6 +107,7 @@ replicate_format (index_t repi, dpo_id_t *buckets; u32 i; + repi &= ~MPLS_IS_REPLICATE; rep = replicate_get(repi); vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to); buckets = replicate_get_buckets(rep); @@ -187,6 +189,7 @@ replicate_set_bucket (index_t repi, replicate_t *rep; dpo_id_t *buckets; + repi &= ~MPLS_IS_REPLICATE; rep = replicate_get(repi); buckets = replicate_get_buckets(rep); @@ -199,11 +202,13 @@ int replicate_is_drop (const dpo_id_t *dpo) { replicate_t *rep; + index_t repi; if (DPO_REPLICATE != dpo->dpoi_type) return (0); - rep = replicate_get(dpo->dpoi_index); + repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE; + rep = replicate_get(repi); if (1 == rep->rep_n_buckets) { @@ -218,6 +223,7 @@ replicate_get_bucket (index_t repi, { replicate_t *rep; + repi &= ~MPLS_IS_REPLICATE; rep = replicate_get(repi); return (replicate_get_bucket_i(rep, bucket)); @@ -288,9 +294,11 @@ replicate_multipath_update (const dpo_id_t *dpo, dpo_id_t *tmp_dpo; u32 ii, n_buckets; replicate_t *rep; + index_t repi; ASSERT(DPO_REPLICATE == dpo->dpoi_type); - rep = replicate_get(dpo->dpoi_index); + repi = dpo->dpoi_index & ~MPLS_IS_REPLICATE; + rep = replicate_get(repi); nhs = replicate_multipath_next_hop_fixup(next_hops, rep->rep_proto); n_buckets = vec_len(nhs); @@ -718,7 +726,7 @@ format_replicate_trace (u8 * s, va_list * args) s = format (s, "replicate: %d via %U", t->rep_index, - format_dpo_id, &t->dpo); + format_dpo_id, &t->dpo, 0); return s; } @@ -731,7 +739,7 @@ ip4_replicate (vlib_main_t * vm, } /** - * @brief + * @brief IP4 replication node */ VLIB_REGISTER_NODE (ip4_replicate_node) = { .function = ip4_replicate, @@ -744,7 +752,7 @@ VLIB_REGISTER_NODE (ip4_replicate_node) = { .format_trace = format_replicate_trace, .n_next_nodes = 1, .next_nodes = { - [0] = "error-drop", + [0] = "ip4-drop", }, }; @@ -757,7 +765,7 @@ ip6_replicate (vlib_main_t * vm, } /** - * @brief + * @brief IPv6 replication node */ VLIB_REGISTER_NODE (ip6_replicate_node) = { .function = ip6_replicate, @@ -770,7 +778,33 @@ VLIB_REGISTER_NODE (ip6_replicate_node) = { .format_trace = format_replicate_trace, .n_next_nodes = 1, .next_nodes = { - [0] = "error-drop", + [0] = "ip6-drop", + }, +}; + +static uword +mpls_replicate (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (replicate_inline (vm, node, frame)); +} + +/** + * @brief MPLS replication node + */ +VLIB_REGISTER_NODE (mpls_replicate_node) = { + .function = mpls_replicate, + .name = "mpls-replicate", + .vector_size = sizeof (u32), + + .n_errors = ARRAY_LEN(replicate_dpo_error_strings), + .error_strings = replicate_dpo_error_strings, + + .format_trace = format_replicate_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "mpls-drop", }, }; diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h index 77273015..7383184a 100644 --- a/src/vnet/dpo/replicate_dpo.h +++ b/src/vnet/dpo/replicate_dpo.h @@ -25,6 +25,7 @@ #include #include #include +#include /** * replicate main @@ -119,6 +120,7 @@ extern replicate_t *replicate_pool; static inline replicate_t* replicate_get (index_t repi) { + repi &= ~MPLS_IS_REPLICATE; return (pool_elt_at_index(replicate_pool, repi)); } diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index c74a097e..dd509193 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -507,6 +507,7 @@ arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) case IP_LOOKUP_NEXT_PUNT: case IP_LOOKUP_NEXT_LOCAL: case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: case IP_LOOKUP_NEXT_MIDCHAIN: case IP_LOOKUP_NEXT_ICMP_ERROR: case IP_LOOKUP_N_NEXT: diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c index 335e3f9f..9ac30bc6 100644 --- a/src/vnet/ethernet/interface.c +++ b/src/vnet/ethernet/interface.c @@ -115,7 +115,7 @@ ethernet_build_rewrite (vnet_main_t * vnm, #define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break _(IP4, IP4); _(IP6, IP6); - _(MPLS, MPLS_UNICAST); + _(MPLS, MPLS); _(ARP, ARP); #undef _ default: diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index f7787ed2..5305012f 100755 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -249,7 +249,7 @@ determine_next_node (ethernet_main_t * em, { *next0 = em->l3_next.input_next_ip6; } - else if (type0 == ETHERNET_TYPE_MPLS_UNICAST) + else if (type0 == ETHERNET_TYPE_MPLS) { *next0 = em->l3_next.input_next_mpls; @@ -1252,7 +1252,7 @@ next_by_ethertype_register (next_by_ethertype_t * l3_next, { l3_next->input_next_ip6 = next_index; } - else if (ethertype == ETHERNET_TYPE_MPLS_UNICAST) + else if (ethertype == ETHERNET_TYPE_MPLS) { l3_next->input_next_mpls = next_index; } diff --git a/src/vnet/ethernet/types.def b/src/vnet/ethernet/types.def index 643f3152..7dab8ee1 100644 --- a/src/vnet/ethernet/types.def +++ b/src/vnet/ethernet/types.def @@ -85,8 +85,8 @@ ethernet_type (0x876D, SECURE_DATA) ethernet_type (0x8808, MAC_CONTROL) ethernet_type (0x8809, SLOW_PROTOCOLS) ethernet_type (0x880B, PPP) -ethernet_type (0x8847, MPLS_UNICAST) -ethernet_type (0x8848, MPLS_MULTICAST) +ethernet_type (0x8847, MPLS) +ethernet_type (0x8848, MPLS_UPSTREAM_ASSIGNED) ethernet_type (0x8863, PPPOE_DISCOVERY) ethernet_type (0x8864, PPPOE_SESSION) ethernet_type (0x886D, INTEL_ANS) diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h index f8275317..10d0cb58 100644 --- a/src/vnet/fib/fib_api.h +++ b/src/vnet/fib/fib_api.h @@ -24,6 +24,7 @@ add_del_route_check (fib_protocol_t table_proto, fib_protocol_t next_hop_table_proto, u32 next_hop_table_id, u8 create_missing_tables, + u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index); int @@ -33,10 +34,13 @@ add_del_route_t_handler (u8 is_multipath, u8 is_unreach, u8 is_prohibit, u8 is_local, + u8 is_multicast, u8 is_classify, u32 classify_table_index, u8 is_resolve_host, u8 is_resolve_attached, + u8 is_interface_rx, + u8 is_rpf_id, u32 fib_index, const fib_prefix_t * prefix, u8 next_hop_proto_is_ip4, diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index dac1fce9..6f811aa1 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -75,13 +75,7 @@ fib_entry_get_default_chain_type (const fib_entry_t *fib_entry) return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); case FIB_PROTOCOL_MPLS: if (MPLS_EOS == fib_entry->fe_prefix.fp_eos) - /* - * If the entry being asked is a eos-MPLS label entry, - * then use the payload-protocol field, that we stashed there - * for just this purpose - */ - return (fib_forw_chain_type_from_dpo_proto( - fib_entry->fe_prefix.fp_payload_proto)); + return (FIB_FORW_CHAIN_TYPE_MPLS_EOS); else return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); } @@ -370,6 +364,35 @@ fib_entry_contribute_urpf (fib_node_index_t entry_index, return (fib_path_list_contribute_urpf(fib_entry->fe_parent, urpf)); } +/* + * If the client is request a chain for multicast forwarding then swap + * the chain type to one that can provide such transport. + */ +static fib_forward_chain_type_t +fib_entry_chain_type_mcast_to_ucast (fib_forward_chain_type_t fct) +{ + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_MCAST_IP4: + case FIB_FORW_CHAIN_TYPE_MCAST_IP6: + /* + * we can only transport IP multicast packets if there is an + * LSP. + */ + fct = FIB_FORW_CHAIN_TYPE_MPLS_EOS; + break; + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + case FIB_FORW_CHAIN_TYPE_ETHERNET: + case FIB_FORW_CHAIN_TYPE_NSH: + break; + } + + return (fct); +} + /* * fib_entry_contribute_forwarding * @@ -385,6 +408,11 @@ fib_entry_contribute_forwarding (fib_node_index_t fib_entry_index, fib_entry = fib_entry_get(fib_entry_index); + /* + * mfib children ask for mcast chains. fix these to the appropriate ucast types. + */ + fct = fib_entry_chain_type_mcast_to_ucast(fct); + if (fct == fib_entry_get_default_chain_type(fib_entry)) { dpo_copy(dpo, &fib_entry->fe_lb); @@ -414,6 +442,11 @@ fib_entry_contribute_forwarding (fib_node_index_t fib_entry_index, dpo_copy(dpo, &fed->fd_dpo); } + /* + * don't allow the special index indicating replicate.vs.load-balance + * to escape to the clients + */ + dpo->dpoi_index &= ~MPLS_IS_REPLICATE; } const dpo_id_t * diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index a3f75e60..b17a0b64 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -192,6 +192,11 @@ typedef enum fib_entry_attribute_t_ { * The prefix/address is local to this device */ FIB_ENTRY_ATTRIBUTE_LOCAL, + /** + * The prefix/address is a multicast prefix. + * this aplies only to MPLS. IP multicast is handled by mfib + */ + FIB_ENTRY_ATTRIBUTE_MULTICAST, /** * The prefix/address exempted from loose uRPF check * To be used with caution @@ -200,7 +205,7 @@ typedef enum fib_entry_attribute_t_ { /** * Marker. add new entries before this one. */ - FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT, + FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_MULTICAST, } fib_entry_attribute_t; /** @@ -215,7 +220,8 @@ typedef enum fib_entry_attribute_t_ { [FIB_ENTRY_ATTRIBUTE_DROP] = "drop", \ [FIB_ENTRY_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ [FIB_ENTRY_ATTRIBUTE_LOCAL] = "local", \ - [FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT] = "uRPF-exempt" \ + [FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT] = "uRPF-exempt", \ + [FIB_ENTRY_ATTRIBUTE_MULTICAST] = "multicast", \ } #define FOR_EACH_FIB_ATTRIBUTE(_item) \ @@ -232,6 +238,7 @@ typedef enum fib_entry_flag_t_ { FIB_ENTRY_FLAG_LOCAL = (1 << FIB_ENTRY_ATTRIBUTE_LOCAL), FIB_ENTRY_FLAG_IMPORT = (1 << FIB_ENTRY_ATTRIBUTE_IMPORT), FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT = (1 << FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT), + FIB_ENTRY_FLAG_MULTICAST = (1 << FIB_ENTRY_ATTRIBUTE_MULTICAST), } __attribute__((packed)) fib_entry_flag_t; /** @@ -396,7 +403,7 @@ typedef struct fib_entry_t_ { * paint the header straight on without the need to check the packet * type to derive the EOS bit value. */ - dpo_id_t fe_lb; // [FIB_FORW_CHAIN_MPLS_NUM]; + dpo_id_t fe_lb; /** * Vector of source infos. * Most entries will only have 1 source. So we optimise for memory usage, diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c index aa1d5a24..a700282e 100644 --- a/src/vnet/fib/fib_entry_src.c +++ b/src/vnet/fib/fib_entry_src.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -229,8 +230,6 @@ fib_forward_chain_type_t fib_entry_chain_type_fixup (const fib_entry_t *entry, fib_forward_chain_type_t fct) { - ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS == fct); - /* * The EOS chain is a tricky since one cannot know the adjacency * to link to without knowing what the packets payload protocol @@ -238,6 +237,11 @@ fib_entry_chain_type_fixup (const fib_entry_t *entry, */ fib_forward_chain_type_t dfct; + if (FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct) + { + return (fct); + } + dfct = fib_entry_get_default_chain_type(entry); if (FIB_FORW_CHAIN_TYPE_MPLS_EOS == dfct) @@ -303,7 +307,12 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index, * found a matching extension. stack it to obtain the forwarding * info for this path. */ - ctx->next_hops = fib_path_ext_stack(path_ext, ctx->fib_entry, ctx->fct, ctx->next_hops); + ctx->next_hops = + fib_path_ext_stack(path_ext, + ctx->fct, + fib_entry_chain_type_fixup(ctx->fib_entry, + ctx->fct), + ctx->next_hops); } else { @@ -355,6 +364,9 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index, fib_entry_chain_type_fixup(ctx->fib_entry, ctx->fct), &nh->path_dpo); + fib_path_stack_mpls_disp(path_index, + ctx->fib_entry->fe_prefix.fp_payload_proto, + &nh->path_dpo); break; } @@ -424,50 +436,70 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry, /* * first time create */ - flow_hash_config_t fhc; - - fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, - dpo_proto_to_fib(lb_proto)); - dpo_set(dpo_lb, - DPO_LOAD_BALANCE, - lb_proto, - load_balance_create(0, lb_proto, fhc)); + if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_MULTICAST) + { + dpo_set(dpo_lb, + DPO_REPLICATE, + lb_proto, + MPLS_IS_REPLICATE | replicate_create(0, lb_proto)); + } + else + { + flow_hash_config_t fhc; + + fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, + dpo_proto_to_fib(lb_proto)); + dpo_set(dpo_lb, + DPO_LOAD_BALANCE, + lb_proto, + load_balance_create(0, lb_proto, fhc)); + } } - load_balance_multipath_update(dpo_lb, - ctx.next_hops, - fib_entry_calc_lb_flags(&ctx)); - vec_free(ctx.next_hops); - - /* - * if this entry is sourced by the uRPF-exempt source then we - * append the always present local0 interface (index 0) to the - * uRPF list so it is not empty. that way packets pass the loose check. - */ - index_t ui = fib_path_list_get_urpf(esrc->fes_pl); - - if ((fib_entry_is_sourced(fib_entry_get_index(fib_entry), - FIB_SOURCE_URPF_EXEMPT) || - (esrc->fes_entry_flags & FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT))&& - (0 == fib_urpf_check_size(ui))) + if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_MULTICAST) { - /* - * The uRPF list we get from the path-list is shared by all - * other users of the list, but the uRPF exemption applies - * only to this prefix. So we need our own list. - */ - ui = fib_urpf_list_alloc_and_lock(); - fib_urpf_list_append(ui, 0); - fib_urpf_list_bake(ui); - load_balance_set_urpf(dpo_lb->dpoi_index, ui); - fib_urpf_list_unlock(ui); + /* + * MPLS multicast + */ + replicate_multipath_update(dpo_lb, ctx.next_hops); } else { - load_balance_set_urpf(dpo_lb->dpoi_index, ui); + load_balance_multipath_update(dpo_lb, + ctx.next_hops, + fib_entry_calc_lb_flags(&ctx)); + vec_free(ctx.next_hops); + + /* + * if this entry is sourced by the uRPF-exempt source then we + * append the always present local0 interface (index 0) to the + * uRPF list so it is not empty. that way packets pass the loose check. + */ + index_t ui = fib_path_list_get_urpf(esrc->fes_pl); + + if ((fib_entry_is_sourced(fib_entry_get_index(fib_entry), + FIB_SOURCE_URPF_EXEMPT) || + (esrc->fes_entry_flags & FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT))&& + (0 == fib_urpf_check_size(ui))) + { + /* + * The uRPF list we get from the path-list is shared by all + * other users of the list, but the uRPF exemption applies + * only to this prefix. So we need our own list. + */ + ui = fib_urpf_list_alloc_and_lock(); + fib_urpf_list_append(ui, 0); + fib_urpf_list_bake(ui); + load_balance_set_urpf(dpo_lb->dpoi_index, ui); + fib_urpf_list_unlock(ui); + } + else + { + load_balance_set_urpf(dpo_lb->dpoi_index, ui); + } + load_balance_set_fib_entry_flags(dpo_lb->dpoi_index, + fib_entry_get_flags_i(fib_entry)); } - load_balance_set_fib_entry_flags(dpo_lb->dpoi_index, - fib_entry_get_flags_i(fib_entry)); } void @@ -887,21 +919,6 @@ fib_entry_src_action_remove (fib_entry_t *fib_entry, return (sflags); } -static inline int -fib_route_recurses_via_self (const fib_prefix_t *prefix, - const fib_route_path_t *rpath) -{ - /* - * not all zeros next hop && - * is recursive path && - * nexthop is same as the route's address - */ - return ((!ip46_address_is_zero(&rpath->frp_addr)) && - (~0 == rpath->frp_sw_if_index) && - (0 == ip46_address_cmp(&rpath->frp_addr, &prefix->fp_addr))); - -} - /* * fib_route_attached_cross_table * @@ -962,14 +979,14 @@ fib_entry_src_flags_2_path_list_flags (fib_entry_flag_t eflags) { plf |= FIB_PATH_LIST_FLAG_DROP; } - if (eflags & FIB_ENTRY_FLAG_LOCAL) - { - plf |= FIB_PATH_LIST_FLAG_LOCAL; - } if (eflags & FIB_ENTRY_FLAG_EXCLUSIVE) { plf |= FIB_PATH_LIST_FLAG_EXCLUSIVE; } + if (eflags & FIB_ENTRY_FLAG_LOCAL) + { + plf |= FIB_PATH_LIST_FLAG_LOCAL; + } return (plf); } @@ -980,25 +997,6 @@ fib_entry_flags_update (const fib_entry_t *fib_entry, fib_path_list_flags_t *pl_flags, fib_entry_src_t *esrc) { - /* - * don't allow the addition of a recursive looped path for prefix - * via itself. - */ - if (fib_route_recurses_via_self(&fib_entry->fe_prefix, rpath)) - { - /* - * force the install of a drop path-list. - * we want the entry to have some path-list, mainly so - * the dodgy path can be rmeoved when the source stops playing - * silly buggers. - */ - *pl_flags |= FIB_PATH_LIST_FLAG_DROP; - } - else - { - *pl_flags &= ~FIB_PATH_LIST_FLAG_DROP; - } - if ((esrc->fes_src == FIB_SOURCE_API) || (esrc->fes_src == FIB_SOURCE_CLI)) { diff --git a/src/vnet/fib/fib_internal.h b/src/vnet/fib/fib_internal.h index 2d980bcc..8abc0e07 100644 --- a/src/vnet/fib/fib_internal.h +++ b/src/vnet/fib/fib_internal.h @@ -25,6 +25,7 @@ #undef FIB_DEBUG extern void fib_prefix_from_mpls_label(mpls_label_t label, + mpls_eos_bit_t eos, fib_prefix_t *prf); extern int fib_route_path_cmp(const fib_route_path_t *rpath1, diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index 6b202a97..f81f4170 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -66,6 +68,10 @@ typedef enum fib_path_type_t_ { * deag. Link to a lookup adj in the next table */ FIB_PATH_TYPE_DEAG, + /** + * interface receive. + */ + FIB_PATH_TYPE_INTF_RX, /** * receive. it's for-us. */ @@ -88,6 +94,7 @@ typedef enum fib_path_type_t_ { [FIB_PATH_TYPE_SPECIAL] = "special", \ [FIB_PATH_TYPE_EXCLUSIVE] = "exclusive", \ [FIB_PATH_TYPE_DEAG] = "deag", \ + [FIB_PATH_TYPE_INTF_RX] = "intf-rx", \ [FIB_PATH_TYPE_RECEIVE] = "receive", \ } @@ -220,10 +227,16 @@ typedef struct fib_path_t_ { * The next-hop */ ip46_address_t fp_ip; - /** - * The local label to resolve through. - */ - mpls_label_t fp_local_label; + struct { + /** + * The local label to resolve through. + */ + mpls_label_t fp_local_label; + /** + * The EOS bit of the resolving label + */ + mpls_eos_bit_t fp_eos; + }; } fp_nh; /** * The FIB table index in which to find the next-hop. @@ -254,6 +267,10 @@ typedef struct fib_path_t_ { * The FIB index in which to perfom the next lookup */ fib_node_index_t fp_tbl_id; + /** + * The RPF-ID to tag the packets with + */ + fib_rpf_id_t fp_rpf_id; } deag; struct { } special; @@ -273,6 +290,12 @@ typedef struct fib_path_t_ { */ ip46_address_t fp_addr; } receive; + struct { + /** + * The interface on which the packets will be input. + */ + u32 fp_interface; + } intf_rx; }; STRUCT_MARK(path_hash_end); @@ -444,9 +467,11 @@ format_fib_path (u8 * s, va_list * args) case FIB_PATH_TYPE_RECURSIVE: if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) { - s = format (s, "via %U", + s = format (s, "via %U %U", format_mpls_unicast_label, - path->recursive.fp_nh.fp_local_label); + path->recursive.fp_nh.fp_local_label, + format_mpls_eos_bit, + path->recursive.fp_nh.fp_eos); } else { @@ -465,6 +490,7 @@ format_fib_path (u8 * s, va_list * args) break; case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_INTF_RX: case FIB_PATH_TYPE_SPECIAL: case FIB_PATH_TYPE_DEAG: case FIB_PATH_TYPE_EXCLUSIVE: @@ -736,6 +762,7 @@ fib_path_unresolve (fib_path_t *path) break; case FIB_PATH_TYPE_SPECIAL: case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_INTF_RX: case FIB_PATH_TYPE_DEAG: /* * these hold only the path's DPO, which is reset below. @@ -754,16 +781,24 @@ fib_path_unresolve (fib_path_t *path) } static fib_forward_chain_type_t -fib_path_proto_to_chain_type (fib_protocol_t proto) +fib_path_to_chain_type (const fib_path_t *path) { - switch (proto) + switch (path->fp_nh_proto) { case FIB_PROTOCOL_IP4: return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); case FIB_PROTOCOL_IP6: return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); case FIB_PROTOCOL_MPLS: - return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); + if (FIB_PATH_TYPE_RECURSIVE == path->fp_type && + MPLS_EOS == path->recursive.fp_nh.fp_eos) + { + return (FIB_FORW_CHAIN_TYPE_MPLS_EOS); + } + else + { + return (FIB_FORW_CHAIN_TYPE_MPLS_EOS); + } } return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); } @@ -793,7 +828,7 @@ fib_path_back_walk_notify (fib_node_t *node, */ fib_path_recursive_adj_update( path, - fib_path_proto_to_chain_type(path->fp_nh_proto), + fib_path_to_chain_type(path), &path->fp_dpo); } if ((FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) || @@ -931,6 +966,8 @@ FIXME comment path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP; } break; + case FIB_PATH_TYPE_INTF_RX: + ASSERT(0); case FIB_PATH_TYPE_DEAG: /* * FIXME When VRF delete is allowed this will need a poke. @@ -986,6 +1023,14 @@ fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath) cfg_flags |= FIB_PATH_CFG_FLAG_LOCAL; if (rpath->frp_flags & FIB_ROUTE_PATH_ATTACHED) cfg_flags |= FIB_PATH_CFG_FLAG_ATTACHED; + if (rpath->frp_flags & FIB_ROUTE_PATH_INTF_RX) + cfg_flags |= FIB_PATH_CFG_FLAG_INTF_RX; + if (rpath->frp_flags & FIB_ROUTE_PATH_RPF_ID) + cfg_flags |= FIB_PATH_CFG_FLAG_RPF_ID; + if (rpath->frp_flags & FIB_ROUTE_PATH_EXCLUSIVE) + cfg_flags |= FIB_PATH_CFG_FLAG_EXCLUSIVE; + if (rpath->frp_flags & FIB_ROUTE_PATH_DROP) + cfg_flags |= FIB_PATH_CFG_FLAG_DROP; return (cfg_flags); } @@ -998,8 +1043,6 @@ fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath) */ fib_node_index_t fib_path_create (fib_node_index_t pl_index, - fib_protocol_t nh_proto, - fib_path_cfg_flags_t flags, const fib_route_path_t *rpath) { fib_path_t *path; @@ -1012,7 +1055,7 @@ fib_path_create (fib_node_index_t pl_index, dpo_reset(&path->fp_dpo); path->fp_pl_index = pl_index; - path->fp_nh_proto = nh_proto; + path->fp_nh_proto = rpath->frp_proto; path->fp_via_fib = FIB_NODE_INDEX_INVALID; path->fp_weight = rpath->frp_weight; if (0 == path->fp_weight) @@ -1023,8 +1066,7 @@ fib_path_create (fib_node_index_t pl_index, */ path->fp_weight = 1; } - path->fp_cfg_flags = flags; - path->fp_cfg_flags |= fib_path_route_flags_to_cfg_flags(rpath); + path->fp_cfg_flags = fib_path_route_flags_to_cfg_flags(rpath); /* * deduce the path's tpye from the parementers and save what is needed. @@ -1035,6 +1077,17 @@ fib_path_create (fib_node_index_t pl_index, path->receive.fp_interface = rpath->frp_sw_if_index; path->receive.fp_addr = rpath->frp_addr; } + else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_INTF_RX) + { + path->fp_type = FIB_PATH_TYPE_INTF_RX; + path->intf_rx.fp_interface = rpath->frp_sw_if_index; + } + else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RPF_ID) + { + path->fp_type = FIB_PATH_TYPE_DEAG; + path->deag.fp_tbl_id = rpath->frp_fib_index; + path->deag.fp_rpf_id = rpath->frp_rpf_id; + } else if (~0 != rpath->frp_sw_if_index) { if (ip46_address_is_zero(&rpath->frp_addr)) @@ -1069,6 +1122,7 @@ fib_path_create (fib_node_index_t pl_index, if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) { path->recursive.fp_nh.fp_local_label = rpath->frp_local_label; + path->recursive.fp_nh.fp_eos = rpath->frp_eos; } else { @@ -1238,17 +1292,13 @@ fib_path_cmp_i (const fib_path_t *path1, res = ip46_address_cmp(&path1->attached_next_hop.fp_nh, &path2->attached_next_hop.fp_nh); if (0 == res) { - res = vnet_sw_interface_compare( - vnet_get_main(), - path1->attached_next_hop.fp_interface, - path2->attached_next_hop.fp_interface); + res = (path1->attached_next_hop.fp_interface - + path2->attached_next_hop.fp_interface); } break; case FIB_PATH_TYPE_ATTACHED: - res = vnet_sw_interface_compare( - vnet_get_main(), - path1->attached.fp_interface, - path2->attached.fp_interface); + res = (path1->attached.fp_interface - + path2->attached.fp_interface); break; case FIB_PATH_TYPE_RECURSIVE: res = ip46_address_cmp(&path1->recursive.fp_nh, @@ -1261,6 +1311,13 @@ fib_path_cmp_i (const fib_path_t *path1, break; case FIB_PATH_TYPE_DEAG: res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id); + if (0 == res) + { + res = (path1->deag.fp_rpf_id - path2->deag.fp_rpf_id); + } + break; + case FIB_PATH_TYPE_INTF_RX: + res = (path1->intf_rx.fp_interface - path2->intf_rx.fp_interface); break; case FIB_PATH_TYPE_SPECIAL: case FIB_PATH_TYPE_RECEIVE: @@ -1336,22 +1393,22 @@ fib_path_cmp_w_route_path (fib_node_index_t path_index, &rpath->frp_addr); if (0 == res) { - res = vnet_sw_interface_compare( - vnet_get_main(), - path->attached_next_hop.fp_interface, - rpath->frp_sw_if_index); + res = (path->attached_next_hop.fp_interface - + rpath->frp_sw_if_index); } break; case FIB_PATH_TYPE_ATTACHED: - res = vnet_sw_interface_compare( - vnet_get_main(), - path->attached.fp_interface, - rpath->frp_sw_if_index); + res = (path->attached.fp_interface - rpath->frp_sw_if_index); break; case FIB_PATH_TYPE_RECURSIVE: if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) { res = path->recursive.fp_nh.fp_local_label - rpath->frp_local_label; + + if (res == 0) + { + res = path->recursive.fp_nh.fp_eos - rpath->frp_eos; + } } else { @@ -1364,9 +1421,16 @@ fib_path_cmp_w_route_path (fib_node_index_t path_index, res = (path->recursive.fp_tbl_id - rpath->frp_fib_index); } break; + case FIB_PATH_TYPE_INTF_RX: + res = (path->intf_rx.fp_interface - rpath->frp_sw_if_index); + break; case FIB_PATH_TYPE_DEAG: res = (path->deag.fp_tbl_id - rpath->frp_fib_index); - break; + if (0 == res) + { + res = (path->deag.fp_rpf_id - rpath->frp_rpf_id); + } + break; case FIB_PATH_TYPE_SPECIAL: case FIB_PATH_TYPE_RECEIVE: case FIB_PATH_TYPE_EXCLUSIVE: @@ -1465,6 +1529,7 @@ fib_path_recursive_loop_detect (fib_node_index_t path_index, case FIB_PATH_TYPE_SPECIAL: case FIB_PATH_TYPE_DEAG: case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_INTF_RX: case FIB_PATH_TYPE_EXCLUSIVE: /* * these path types cannot be part of a loop, since they are the leaves @@ -1563,7 +1628,9 @@ fib_path_resolve (fib_node_index_t path_index) if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) { - fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, &pfx); + fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, + path->recursive.fp_nh.fp_eos, + &pfx); } else { @@ -1592,7 +1659,7 @@ fib_path_resolve (fib_node_index_t path_index) */ fib_path_recursive_adj_update( path, - fib_path_proto_to_chain_type(path->fp_nh_proto), + fib_path_to_chain_type(path), &path->fp_dpo); break; @@ -1605,16 +1672,25 @@ fib_path_resolve (fib_node_index_t path_index) drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); break; case FIB_PATH_TYPE_DEAG: + { /* * Resolve via a lookup DPO. * FIXME. control plane should add routes with a table ID */ - lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id, - fib_proto_to_dpo(path->fp_nh_proto), - LOOKUP_INPUT_DST_ADDR, - LOOKUP_TABLE_FROM_CONFIG, - &path->fp_dpo); + lookup_cast_t cast; + + cast = (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RPF_ID ? + LOOKUP_MULTICAST : + LOOKUP_UNICAST); + + lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id, + fib_proto_to_dpo(path->fp_nh_proto), + cast, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_CONFIG, + &path->fp_dpo); break; + } case FIB_PATH_TYPE_RECEIVE: /* * Resolve via a receive DPO. @@ -1624,6 +1700,15 @@ fib_path_resolve (fib_node_index_t path_index) &path->receive.fp_addr, &path->fp_dpo); break; + case FIB_PATH_TYPE_INTF_RX: { + /* + * Resolve via a receive DPO. + */ + interface_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto), + path->intf_rx.fp_interface, + &path->fp_dpo); + break; + } case FIB_PATH_TYPE_EXCLUSIVE: /* * Resolve via the user provided DPO @@ -1652,6 +1737,7 @@ fib_path_get_resolving_interface (fib_node_index_t path_index) return (path->receive.fp_interface); case FIB_PATH_TYPE_RECURSIVE: return (fib_entry_get_resolving_interface(path->fp_via_fib)); + case FIB_PATH_TYPE_INTF_RX: case FIB_PATH_TYPE_SPECIAL: case FIB_PATH_TYPE_DEAG: case FIB_PATH_TYPE_EXCLUSIVE: @@ -1743,6 +1829,7 @@ fib_path_contribute_urpf (fib_node_index_t path_index, case FIB_PATH_TYPE_DEAG: case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_INTF_RX: /* * these path types don't link to an adj */ @@ -1750,6 +1837,44 @@ fib_path_contribute_urpf (fib_node_index_t path_index, } } +void +fib_path_stack_mpls_disp (fib_node_index_t path_index, + dpo_proto_t payload_proto, + dpo_id_t *dpo) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + ASSERT(path); + + switch (path->fp_type) + { + case FIB_PATH_TYPE_DEAG: + { + dpo_id_t tmp = DPO_INVALID; + + dpo_copy(&tmp, dpo); + dpo_set(dpo, + DPO_MPLS_DISPOSITION, + payload_proto, + mpls_disp_dpo_create(payload_proto, + path->deag.fp_rpf_id, + &tmp)); + dpo_reset(&tmp); + break; + } + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_ATTACHED: + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + case FIB_PATH_TYPE_RECURSIVE: + case FIB_PATH_TYPE_INTF_RX: + case FIB_PATH_TYPE_EXCLUSIVE: + case FIB_PATH_TYPE_SPECIAL: + break; + } +} + void fib_path_contribute_forwarding (fib_node_index_t path_index, fib_forward_chain_type_t fct, @@ -1769,7 +1894,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, * This then represents the path's 'native' protocol; IP. * For all others will need to go find something else. */ - if (fib_path_proto_to_chain_type(path->fp_nh_proto) == fct) + if (fib_path_to_chain_type(path) == fct) { dpo_copy(dpo, &path->fp_dpo); } @@ -1813,10 +1938,10 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: - fib_path_recursive_adj_update(path, fct, dpo); - break; case FIB_FORW_CHAIN_TYPE_MCAST_IP4: case FIB_FORW_CHAIN_TYPE_MCAST_IP6: + fib_path_recursive_adj_update(path, fct, dpo); + break; case FIB_FORW_CHAIN_TYPE_ETHERNET: case FIB_FORW_CHAIN_TYPE_NSH: ASSERT(0); @@ -1829,13 +1954,14 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID, DPO_PROTO_MPLS, + LOOKUP_UNICAST, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_CONFIG, dpo); break; + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: - case FIB_FORW_CHAIN_TYPE_MPLS_EOS: dpo_copy(dpo, &path->fp_dpo); break; case FIB_FORW_CHAIN_TYPE_MCAST_IP4: @@ -1870,7 +1996,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, ai = adj_mcast_add_or_lock(path->fp_nh_proto, fib_forw_chain_type_to_link_type(fct), path->attached.fp_interface); - dpo_set(dpo, DPO_ADJACENCY_MCAST, + dpo_set(dpo, DPO_ADJACENCY, fib_forw_chain_type_to_dpo_proto(fct), ai); adj_unlock(ai); @@ -1878,6 +2004,14 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, break; } break; + case FIB_PATH_TYPE_INTF_RX: + /* + * Create the adj needed for sending IP multicast traffic + */ + interface_dpo_add_or_lock(fib_forw_chain_type_to_dpo_proto(fct), + path->attached.fp_interface, + dpo); + break; case FIB_PATH_TYPE_RECEIVE: case FIB_PATH_TYPE_SPECIAL: dpo_copy(dpo, &path->fp_dpo); diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h index 14efc1ab..334be6f5 100644 --- a/src/vnet/fib/fib_path.h +++ b/src/vnet/fib/fib_path.h @@ -69,6 +69,14 @@ typedef enum fib_path_cfg_attribute_t_ { /** * The path is a for-us path */ + FIB_PATH_CFG_ATTRIBUTE_INTF_RX, + /** + * The path is a deag with rpf-id + */ + FIB_PATH_CFG_ATTRIBUTE_RPF_ID, + /** + * The path is an interface recieve + */ FIB_PATH_CFG_ATTRIBUTE_LOCAL, /** * Marker. Add new types before this one, then update it. @@ -88,6 +96,8 @@ typedef enum fib_path_cfg_attribute_t_ { [FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED] = "resolve-attached", \ [FIB_PATH_CFG_ATTRIBUTE_LOCAL] = "local", \ [FIB_PATH_CFG_ATTRIBUTE_ATTACHED] = "attached", \ + [FIB_PATH_CFG_ATTRIBUTE_INTF_RX] = "interface-rx", \ + [FIB_PATH_CFG_ATTRIBUTE_RPF_ID] = "rpf-id", \ } #define FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(_item) \ @@ -106,6 +116,8 @@ typedef enum fib_path_cfg_flags_t_ { FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED), FIB_PATH_CFG_FLAG_LOCAL = (1 << FIB_PATH_CFG_ATTRIBUTE_LOCAL), FIB_PATH_CFG_FLAG_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_ATTACHED), + FIB_PATH_CFG_FLAG_INTF_RX = (1 << FIB_PATH_CFG_ATTRIBUTE_INTF_RX), + FIB_PATH_CFG_FLAG_RPF_ID = (1 << FIB_PATH_CFG_ATTRIBUTE_RPF_ID), } __attribute__ ((packed)) fib_path_cfg_flags_t; @@ -117,8 +129,6 @@ extern u8 *fib_path_adj_format(fib_node_index_t pi, extern u8 * format_fib_path(u8 * s, va_list * args); extern fib_node_index_t fib_path_create(fib_node_index_t pl_index, - fib_protocol_t nh_proto, - fib_path_cfg_flags_t flags, const fib_route_path_t *path); extern fib_node_index_t fib_path_create_special(fib_node_index_t pl_index, fib_protocol_t nh_proto, @@ -145,6 +155,9 @@ extern load_balance_path_t * fib_path_append_nh_for_multipath_hash( fib_node_index_t path_index, fib_forward_chain_type_t fct, load_balance_path_t *hash_key); +extern void fib_path_stack_mpls_disp(fib_node_index_t path_index, + dpo_proto_t payload_proto, + dpo_id_t *dpo); extern void fib_path_contribute_forwarding(fib_node_index_t path_index, fib_forward_chain_type_t type, dpo_id_t *dpo); diff --git a/src/vnet/fib/fib_path_ext.c b/src/vnet/fib/fib_path_ext.c index f75b5626..08293bcf 100644 --- a/src/vnet/fib/fib_path_ext.c +++ b/src/vnet/fib/fib_path_ext.c @@ -103,8 +103,8 @@ fib_path_ext_is_imp_null (fib_path_ext_t *path_ext) load_balance_path_t * fib_path_ext_stack (fib_path_ext_t *path_ext, - const fib_entry_t *entry, fib_forward_chain_type_t child_fct, + fib_forward_chain_type_t imp_null_fct, load_balance_path_t *nhs) { fib_forward_chain_type_t parent_fct; @@ -129,7 +129,7 @@ fib_path_ext_stack (fib_path_ext_t *path_ext, */ if (fib_path_ext_is_imp_null(path_ext)) { - parent_fct = fib_entry_chain_type_fixup(entry, child_fct); + parent_fct = imp_null_fct; } else { diff --git a/src/vnet/fib/fib_path_ext.h b/src/vnet/fib/fib_path_ext.h index cf8f8df0..d617700d 100644 --- a/src/vnet/fib/fib_path_ext.h +++ b/src/vnet/fib/fib_path_ext.h @@ -18,6 +18,7 @@ #include #include +#include /** * A path extension is a per-entry addition to the forwarding information @@ -61,8 +62,8 @@ extern void fib_path_ext_resolve(fib_path_ext_t *path_ext, fib_node_index_t path_list_index); extern load_balance_path_t *fib_path_ext_stack(fib_path_ext_t *path_ext, - const struct fib_entry_t_ *entry, fib_forward_chain_type_t fct, + fib_forward_chain_type_t imp_null_fct, load_balance_path_t *nhs); #endif diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c index b9a391b3..ea6565dd 100644 --- a/src/vnet/fib/fib_path_list.c +++ b/src/vnet/fib/fib_path_list.c @@ -40,13 +40,6 @@ typedef struct fib_path_list_t_ { */ fib_path_list_flags_t fpl_flags; - /** - * The next-hop protocol for the paths in this path list. - * Note that fixing the proto here means we don't support a mix of - * v4 and v6 paths. ho hum. - */ - fib_protocol_t fpl_nh_proto; - /** * Vector of paths indicies for all configured paths. * For shareable path-lists this list MUST not change. @@ -57,6 +50,11 @@ typedef struct fib_path_list_t_ { * the RPF list calculated for this path list */ fib_node_index_t fpl_urpf; + + /** + * Hash table of paths. valid only with INDEXED flag + */ + uword *fpl_db; } fib_path_list_t; /* @@ -131,7 +129,6 @@ format_fib_path_list (u8 * s, va_list * args) s = format (s, " index:%u", fib_path_list_get_index(path_list)); s = format (s, " locks:%u", path_list->fpl_node.fn_locks); - s = format (s, " proto:%U", format_fib_protocol, path_list->fpl_nh_proto); if (FIB_PATH_LIST_FLAG_NONE != path_list->fpl_flags) { @@ -155,26 +152,6 @@ format_fib_path_list (u8 * s, va_list * args) return (s); } -u8 * -fib_path_list_adjs_format (fib_node_index_t path_list_index, - u32 indent, - u8 * s) -{ - fib_path_list_t *path_list; - u32 i; - - path_list = fib_path_list_get(path_list_index); - - vec_foreach_index (i, path_list->fpl_paths) - { - s = fib_path_adj_format(path_list->fpl_paths[i], - indent, s); - } - - return (s); -} - - u8 * fib_path_list_format (fib_node_index_t path_list_index, u8 * s) @@ -648,27 +625,6 @@ fib_path_list_is_looped (fib_node_index_t path_list_index) return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_LOOPED); } -static fib_path_cfg_flags_t -fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf) -{ - fib_path_cfg_flags_t pf = FIB_PATH_CFG_FLAG_NONE; - - if (plf & FIB_PATH_LIST_FLAG_LOCAL) - { - pf |= FIB_PATH_CFG_FLAG_LOCAL; - } - if (plf & FIB_PATH_LIST_FLAG_DROP) - { - pf |= FIB_PATH_CFG_FLAG_DROP; - } - if (plf & FIB_PATH_LIST_FLAG_EXCLUSIVE) - { - pf |= FIB_PATH_CFG_FLAG_EXCLUSIVE; - } - - return (pf); -} - static fib_path_list_flags_t fib_path_list_flags_fixup (fib_path_list_flags_t flags) { @@ -695,18 +651,15 @@ fib_path_list_create (fib_path_list_flags_t flags, flags = fib_path_list_flags_fixup(flags); path_list = fib_path_list_alloc(&path_list_index); path_list->fpl_flags = flags; - /* - * we'll assume for now all paths are the same next-hop protocol - */ - path_list->fpl_nh_proto = rpaths[0].frp_proto; - vec_foreach_index(i, rpaths) + if (NULL != rpaths) { - vec_add1(path_list->fpl_paths, - fib_path_create(path_list_index, - path_list->fpl_nh_proto, - fib_path_list_flags_2_path_flags(flags), - &rpaths[i])); + vec_foreach_index(i, rpaths) + { + vec_add1(path_list->fpl_paths, + fib_path_create(path_list_index, + &rpaths[i])); + } } /* @@ -748,6 +701,27 @@ fib_path_list_create (fib_path_list_flags_t flags, return (path_list_index); } +static fib_path_cfg_flags_t +fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf) +{ + fib_path_cfg_flags_t pf = FIB_PATH_CFG_FLAG_NONE; + + if (plf & FIB_PATH_LIST_FLAG_DROP) + { + pf |= FIB_PATH_CFG_FLAG_DROP; + } + if (plf & FIB_PATH_LIST_FLAG_EXCLUSIVE) + { + pf |= FIB_PATH_CFG_FLAG_EXCLUSIVE; + } + if (plf & FIB_PATH_LIST_FLAG_LOCAL) + { + pf |= FIB_PATH_CFG_FLAG_LOCAL; + } + + return (pf); +} + fib_node_index_t fib_path_list_create_special (fib_protocol_t nh_proto, fib_path_list_flags_t flags, @@ -758,11 +732,10 @@ fib_path_list_create_special (fib_protocol_t nh_proto, path_list = fib_path_list_alloc(&path_list_index); path_list->fpl_flags = flags; - path_list->fpl_nh_proto = nh_proto; path_index = fib_path_create_special(path_list_index, - path_list->fpl_nh_proto, + nh_proto, fib_path_list_flags_2_path_flags(flags), dpo); vec_add1(path_list->fpl_paths, path_index); @@ -775,6 +748,30 @@ fib_path_list_create_special (fib_protocol_t nh_proto, return (path_list_index); } +/* + * return the index info the path-lists's vector of paths, of the matching path. + * ~0 if not found + */ +u32 +fib_path_list_find_rpath (fib_node_index_t path_list_index, + const fib_route_path_t *rpath) +{ + fib_path_list_t *path_list; + u32 ii; + + path_list = fib_path_list_get(path_list_index); + + vec_foreach_index (ii, path_list->fpl_paths) + { + if (!fib_path_cmp_w_route_path(path_list->fpl_paths[ii], rpath)) + { + return (ii); + } + } + return (~0); +} + + /* * fib_path_list_copy_and_path_add * @@ -782,13 +779,62 @@ fib_path_list_create_special (fib_protocol_t nh_proto, * The path-list returned could either have been newly created, or * can be a shared path-list from the data-base. */ +fib_node_index_t +fib_path_list_path_add (fib_node_index_t path_list_index, + const fib_route_path_t *rpaths) +{ + fib_node_index_t new_path_index, *orig_path_index; + fib_path_list_t *path_list; + + /* + * alloc the new list before we retrieve the old one, lest + * the alloc result in a realloc + */ + path_list = fib_path_list_get(path_list_index); + + ASSERT(1 == vec_len(rpaths)); + ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED)); + + FIB_PATH_LIST_DBG(orig_path_list, "path-add"); + + new_path_index = fib_path_create(path_list_index, + rpaths); + + vec_foreach (orig_path_index, path_list->fpl_paths) + { + /* + * don't add duplicate paths + */ + if (0 == fib_path_cmp(new_path_index, *orig_path_index)) + { + return (*orig_path_index); + } + } + + /* + * Add the new path - no sort, no sharing, no key.. + */ + vec_add1(path_list->fpl_paths, new_path_index); + + FIB_PATH_LIST_DBG(path_list, "path-added"); + + /* + * no shared path list requested. resolve and use the one + * just created. + */ + fib_path_resolve(new_path_index); + + return (new_path_index); +} + fib_node_index_t fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index, - fib_path_list_flags_t flags, - const fib_route_path_t *rpaths) + fib_path_list_flags_t flags, + const fib_route_path_t *rpaths) { fib_node_index_t path_index, new_path_index, *orig_path_index; fib_path_list_t *path_list, *orig_path_list; + fib_node_index_t exist_path_list_index; fib_node_index_t path_list_index; fib_node_index_t pi; @@ -806,13 +852,11 @@ fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index, flags = fib_path_list_flags_fixup(flags); path_list->fpl_flags = flags; - path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto; + vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths)); pi = 0; new_path_index = fib_path_create(path_list_index, - path_list->fpl_nh_proto, - fib_path_list_flags_2_path_flags(flags), rpaths); vec_foreach (orig_path_index, orig_path_list->fpl_paths) @@ -845,46 +889,79 @@ fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index, FIB_PATH_LIST_DBG(path_list, "path-added"); /* - * If a shared path list is requested, consult the DB for a match + * check for a matching path-list in the DB. + * If we find one then we can return the existing one and destroy the + * new one just created. */ - if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED) + exist_path_list_index = fib_path_list_db_find(path_list); + if (FIB_NODE_INDEX_INVALID != exist_path_list_index) { - fib_node_index_t exist_path_list_index; - /* - * check for a matching path-list in the DB. - * If we find one then we can return the existing one and destroy the - * new one just created. - */ - exist_path_list_index = fib_path_list_db_find(path_list); - if (FIB_NODE_INDEX_INVALID != exist_path_list_index) - { - fib_path_list_destroy(path_list); + fib_path_list_destroy(path_list); - path_list_index = exist_path_list_index; - } - else - { - /* - * if there was not a matching path-list, then this - * new one will need inserting into the DB and resolving. - */ - fib_path_list_db_insert(path_list_index); - - path_list = fib_path_list_resolve(path_list); - } + path_list_index = exist_path_list_index; } else { - /* - * no shared path list requested. resolve and use the one - * just created. - */ - path_list = fib_path_list_resolve(path_list); + /* + * if there was not a matching path-list, then this + * new one will need inserting into the DB and resolving. + */ + fib_path_list_db_insert(path_list_index); + + path_list = fib_path_list_resolve(path_list); } return (path_list_index); } +/* + * fib_path_list_path_remove + */ +fib_node_index_t +fib_path_list_path_remove (fib_node_index_t path_list_index, + const fib_route_path_t *rpaths) +{ + fib_node_index_t match_path_index, tmp_path_index; + fib_path_list_t *path_list; + fib_node_index_t pi; + + path_list = fib_path_list_get(path_list_index); + + ASSERT(1 == vec_len(rpaths)); + ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED)); + + FIB_PATH_LIST_DBG(orig_path_list, "path-remove"); + + /* + * create a representation of the path to be removed, so it + * can be used as a comparison object during the copy. + */ + tmp_path_index = fib_path_create(path_list_index, + rpaths); + match_path_index = FIB_NODE_INDEX_INVALID; + + vec_foreach_index (pi, path_list->fpl_paths) + { + if (0 == fib_path_cmp(tmp_path_index, + path_list->fpl_paths[pi])) + { + /* + * match - remove it + */ + match_path_index = path_list->fpl_paths[pi]; + fib_path_destroy(match_path_index); + vec_del1(path_list->fpl_paths, pi); + } + } + + /* + * done with the temporary now + */ + fib_path_destroy(tmp_path_index); + + return (match_path_index); +} + /* * fib_path_list_copy_and_path_remove * @@ -911,7 +988,6 @@ fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index, FIB_PATH_LIST_DBG(orig_path_list, "copy-remove"); path_list->fpl_flags = flags; - path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto; /* * allocate as many paths as we might need in one go, rather than * using vec_add to do a few at a time. @@ -927,8 +1003,6 @@ fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index, * can be used as a comparison object during the copy. */ tmp_path_index = fib_path_create(path_list_index, - path_list->fpl_nh_proto, - fib_path_list_flags_2_path_flags(flags), rpaths); vec_foreach (orig_path_index, orig_path_list->fpl_paths) diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h index b4971add..9d246211 100644 --- a/src/vnet/fib/fib_path_list.h +++ b/src/vnet/fib/fib_path_list.h @@ -38,6 +38,11 @@ typedef enum fib_path_list_attribute_t_ { * be searched for each route update. */ FIB_PATH_LIST_ATTRIBUTE_SHARED = FIB_PATH_LIST_ATTRIBUTE_FIRST, + /** + * Indexed means the path-list keeps a hash table of all paths for + * fast lookup. The lookup result is the fib_node_index of the path. + */ + FIB_PATH_LIST_ATTRIBUTE_INDEXED, /** * explicit drop path-list. Used when the entry source needs to * force a drop, despite the fact the path info is present. @@ -73,6 +78,7 @@ typedef enum fib_path_list_attribute_t_ { typedef enum fib_path_list_flags_t_ { FIB_PATH_LIST_FLAG_NONE = 0, FIB_PATH_LIST_FLAG_SHARED = (1 << FIB_PATH_LIST_ATTRIBUTE_SHARED), + FIB_PATH_LIST_FLAG_INDEXED = (1 << FIB_PATH_LIST_ATTRIBUTE_INDEXED), FIB_PATH_LIST_FLAG_DROP = (1 << FIB_PATH_LIST_ATTRIBUTE_DROP), FIB_PATH_LIST_FLAG_LOCAL = (1 << FIB_PATH_LIST_ATTRIBUTE_LOCAL), FIB_PATH_LIST_FLAG_EXCLUSIVE = (1 << FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE), @@ -83,10 +89,11 @@ typedef enum fib_path_list_flags_t_ { #define FIB_PATH_LIST_ATTRIBUTES { \ [FIB_PATH_LIST_ATTRIBUTE_SHARED] = "shared", \ + [FIB_PATH_LIST_ATTRIBUTE_INDEXED] = "indexed", \ [FIB_PATH_LIST_ATTRIBUTE_RESOLVED] = "resolved", \ [FIB_PATH_LIST_ATTRIBUTE_DROP] = "drop", \ [FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ - [FIB_PATH_LIST_ATTRIBUTE_LOCAL] = "local", \ + [FIB_PATH_LIST_ATTRIBUTE_LOCAL] = "local", \ [FIB_PATH_LIST_ATTRIBUTE_LOOPED] = "looped", \ [FIB_PATH_LIST_ATTRIBUTE_NO_URPF] = "no-uRPF", \ } @@ -110,6 +117,13 @@ extern fib_node_index_t fib_path_list_copy_and_path_remove( fib_node_index_t pl_index, fib_path_list_flags_t flags, const fib_route_path_t *path); +extern fib_node_index_t fib_path_list_path_add ( + fib_node_index_t path_list_index, + const fib_route_path_t *rpaths); +extern fib_node_index_t fib_path_list_path_remove ( + fib_node_index_t path_list_index, + const fib_route_path_t *rpaths); + extern u32 fib_path_list_get_n_paths(fib_node_index_t pl_index); extern void fib_path_list_contribute_forwarding(fib_node_index_t path_list_index, @@ -137,11 +151,11 @@ extern int fib_path_list_is_looped(fib_node_index_t path_list_index); extern fib_protocol_t fib_path_list_get_proto(fib_node_index_t path_list_index); extern u8 * fib_path_list_format(fib_node_index_t pl_index, u8 * s); -extern u8 * fib_path_list_adjs_format(fib_node_index_t pl_index, - u32 indent, - u8 * s); extern index_t fib_path_list_lb_map_add_or_lock(fib_node_index_t pl_index, const fib_node_index_t *pis); +extern u32 fib_path_list_find_rpath (fib_node_index_t path_list_index, + const fib_route_path_t *rpath); + /** * A callback function type for walking a path-list's paths */ diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 6c3162e7..b31f35e3 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -475,8 +475,21 @@ fib_table_entry_special_remove (u32 fib_index, */ static void fib_table_route_path_fixup (const fib_prefix_t *prefix, + fib_entry_flag_t eflags, fib_route_path_t *path) { + /* + * not all zeros next hop && + * is recursive path && + * nexthop is same as the route's address + */ + if ((!ip46_address_is_zero(&path->frp_addr)) && + (~0 == path->frp_sw_if_index) && + (0 == ip46_address_cmp(&path->frp_addr, &prefix->fp_addr))) + { + /* Prefix recurses via itse;f */ + path->frp_flags |= FIB_ROUTE_PATH_DROP; + } if (fib_prefix_is_host(prefix) && ip46_address_is_zero(&path->frp_addr) && path->frp_sw_if_index != ~0) @@ -484,7 +497,19 @@ fib_table_route_path_fixup (const fib_prefix_t *prefix, path->frp_addr = prefix->fp_addr; path->frp_flags |= FIB_ROUTE_PATH_ATTACHED; } -} + if (eflags & FIB_ENTRY_FLAG_DROP) + { + path->frp_flags |= FIB_ROUTE_PATH_DROP; + } + if (eflags & FIB_ENTRY_FLAG_LOCAL) + { + path->frp_flags |= FIB_ROUTE_PATH_LOCAL; + } + if (eflags & FIB_ENTRY_FLAG_EXCLUSIVE) + { + path->frp_flags |= FIB_ROUTE_PATH_EXCLUSIVE; + } +} fib_node_index_t fib_table_entry_path_add (u32 fib_index, @@ -536,7 +561,7 @@ fib_table_entry_path_add2 (u32 fib_index, for (ii = 0; ii < vec_len(rpath); ii++) { - fib_table_route_path_fixup(prefix, &rpath[ii]); + fib_table_route_path_fixup(prefix, flags, &rpath[ii]); } if (FIB_NODE_INDEX_INVALID == fib_entry_index) @@ -583,11 +608,6 @@ fib_table_entry_path_remove2 (u32 fib_index, fib_table = fib_table_get(fib_index, prefix->fp_proto); fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); - for (ii = 0; ii < vec_len(rpath); ii++) - { - fib_table_route_path_fixup(prefix, &rpath[ii]); - } - if (FIB_NODE_INDEX_INVALID == fib_entry_index) { /* @@ -605,6 +625,15 @@ fib_table_entry_path_remove2 (u32 fib_index, fib_entry_lock(fib_entry_index); was_sourced = fib_entry_is_sourced(fib_entry_index, source); + for (ii = 0; ii < vec_len(rpath); ii++) + { + fib_table_route_path_fixup( + prefix, + fib_entry_get_flags_for_source(fib_entry_index, + source), + &rpath[ii]); + } + src_flag = fib_entry_path_remove(fib_entry_index, source, rpath); if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag)) @@ -661,7 +690,6 @@ fib_table_entry_path_remove (u32 fib_index, }; fib_route_path_t *paths = NULL; - fib_table_route_path_fixup(prefix, &path); vec_add1(paths, path); fib_table_entry_path_remove2(fib_index, prefix, source, paths); @@ -692,7 +720,7 @@ fib_table_entry_update (u32 fib_index, for (ii = 0; ii < vec_len(paths); ii++) { - fib_table_route_path_fixup(prefix, &paths[ii]); + fib_table_route_path_fixup(prefix, flags, &paths[ii]); } /* * sort the paths provided by the control plane. this means @@ -750,7 +778,6 @@ fib_table_entry_update_one_path (u32 fib_index, }; fib_route_path_t *paths = NULL; - fib_table_route_path_fixup(prefix, &path); vec_add1(paths, path); fib_entry_index = diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index 3c9b8a38..e4a8a70e 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include @@ -271,6 +273,7 @@ typedef enum fib_test_lb_bucket_type_t_ { FT_LB_O_LB, FT_LB_SPECIAL, FT_LB_ADJ, + FT_LB_INTF, } fib_test_lb_bucket_type_t; typedef struct fib_test_lb_bucket_t_ { @@ -315,6 +318,31 @@ typedef struct fib_test_lb_bucket_t_ { }; } fib_test_lb_bucket_t; +typedef enum fib_test_rep_bucket_type_t_ { + FT_REP_LABEL_O_ADJ, + FT_REP_DISP_MFIB_LOOKUP, + FT_REP_INTF, +} fib_test_rep_bucket_type_t; + +typedef struct fib_test_rep_bucket_t_ { + fib_test_rep_bucket_type_t type; + + union + { + struct + { + mpls_eos_bit_t eos; + mpls_label_t label; + u8 ttl; + adj_index_t adj; + } label_o_adj; + struct + { + adj_index_t adj; + } adj; + }; +} fib_test_rep_bucket_t; + #define FIB_TEST_LB(_cond, _comment, _args...) \ { \ if (!FIB_TEST_I(_cond, _comment, ##_args)) { \ @@ -322,7 +350,83 @@ typedef struct fib_test_lb_bucket_t_ { } \ } -static int +int +fib_test_validate_rep_v (const replicate_t *rep, + u16 n_buckets, + va_list ap) +{ + const fib_test_rep_bucket_t *exp; + const dpo_id_t *dpo; + int bucket; + + FIB_TEST_LB((n_buckets == rep->rep_n_buckets), + "n_buckets = %d", rep->rep_n_buckets); + + for (bucket = 0; bucket < n_buckets; bucket++) + { + exp = va_arg(ap, fib_test_rep_bucket_t*); + + dpo = replicate_get_bucket_i(rep, bucket); + + switch (exp->type) + { + case FT_REP_LABEL_O_ADJ: + { + const mpls_label_dpo_t *mld; + mpls_label_t hdr; + FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + + mld = mpls_label_dpo_get(dpo->dpoi_index); + hdr = clib_net_to_host_u32(mld->mld_hdr[0].label_exp_s_ttl); + + FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) == + exp->label_o_adj.label), + "bucket %d stacks on label %d", + bucket, + exp->label_o_adj.label); + + FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) == + exp->label_o_adj.eos), + "bucket %d stacks on label %d %U", + bucket, + exp->label_o_adj.label, + format_mpls_eos_bit, exp->label_o_adj.eos); + + FIB_TEST_LB((DPO_ADJACENCY_INCOMPLETE == mld->mld_dpo.dpoi_type), + "bucket %d label stacks on %U", + bucket, + format_dpo_type, mld->mld_dpo.dpoi_type); + + FIB_TEST_LB((exp->label_o_adj.adj == mld->mld_dpo.dpoi_index), + "bucket %d label stacks on adj %d", + bucket, + exp->label_o_adj.adj); + } + break; + case FT_REP_INTF: + FIB_TEST_LB((DPO_INTERFACE == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + + FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index), + "bucket %d stacks on adj %d", + bucket, + exp->adj.adj); + break; + case FT_REP_DISP_MFIB_LOOKUP: +// ASSERT(0); + break; + } + } + + return (!0); +} + +int fib_test_validate_lb_v (const load_balance_t *lb, u16 n_buckets, va_list ap) @@ -484,6 +588,16 @@ fib_test_validate_lb_v (const load_balance_t *lb, bucket, exp->adj.adj); break; + case FT_LB_INTF: + FIB_TEST_I((DPO_INTERFACE == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index), + "bucket %d stacks on adj %d", + bucket, + exp->adj.adj); + break; case FT_LB_O_LB: FIB_TEST_I((DPO_LOAD_BALANCE == dpo->dpoi_type), "bucket %d stacks on %U", @@ -509,14 +623,13 @@ fib_test_validate_lb_v (const load_balance_t *lb, return (!0); } -static int +int fib_test_validate_entry (fib_node_index_t fei, fib_forward_chain_type_t fct, u16 n_buckets, ...) { dpo_id_t dpo = DPO_INVALID; - const load_balance_t *lb; fib_prefix_t pfx; index_t fw_lbi; u32 fib_index; @@ -529,47 +642,59 @@ fib_test_validate_entry (fib_node_index_t fei, fib_index = fib_entry_get_fib_index(fei); fib_entry_contribute_forwarding(fei, fct, &dpo); - FIB_TEST_LB((DPO_LOAD_BALANCE == dpo.dpoi_type), - "Entry links to %U", - format_dpo_type, dpo.dpoi_type); - lb = load_balance_get(dpo.dpoi_index); - - res = fib_test_validate_lb_v(lb, n_buckets, ap); + if (DPO_REPLICATE == dpo.dpoi_type) + { + const replicate_t *rep; - /* - * ensure that the LB contributed by the entry is the - * same as the LB in the forwarding tables - */ - if (fct == fib_entry_get_default_chain_type(fib_entry_get(fei))) + rep = replicate_get(dpo.dpoi_index); + res = fib_test_validate_rep_v(rep, n_buckets, ap); + } + else { - switch (pfx.fp_proto) - { - case FIB_PROTOCOL_IP4: - fw_lbi = ip4_fib_forwarding_lookup(fib_index, &pfx.fp_addr.ip4); - break; - case FIB_PROTOCOL_IP6: - fw_lbi = ip6_fib_table_fwding_lookup(&ip6_main, fib_index, &pfx.fp_addr.ip6); - break; - case FIB_PROTOCOL_MPLS: - { - mpls_unicast_header_t hdr = { - .label_exp_s_ttl = 0, - }; + const load_balance_t *lb; + + FIB_TEST_LB((DPO_LOAD_BALANCE == dpo.dpoi_type), + "Entry links to %U", + format_dpo_type, dpo.dpoi_type); - vnet_mpls_uc_set_label(&hdr.label_exp_s_ttl, pfx.fp_label); - vnet_mpls_uc_set_s(&hdr.label_exp_s_ttl, pfx.fp_eos); - hdr.label_exp_s_ttl = clib_host_to_net_u32(hdr.label_exp_s_ttl); + lb = load_balance_get(dpo.dpoi_index); + res = fib_test_validate_lb_v(lb, n_buckets, ap); - fw_lbi = mpls_fib_table_forwarding_lookup(fib_index, &hdr); + /* + * ensure that the LB contributed by the entry is the + * same as the LB in the forwarding tables + */ + if (fct == fib_entry_get_default_chain_type(fib_entry_get(fei))) + { + switch (pfx.fp_proto) + { + case FIB_PROTOCOL_IP4: + fw_lbi = ip4_fib_forwarding_lookup(fib_index, &pfx.fp_addr.ip4); + break; + case FIB_PROTOCOL_IP6: + fw_lbi = ip6_fib_table_fwding_lookup(&ip6_main, fib_index, &pfx.fp_addr.ip6); break; + case FIB_PROTOCOL_MPLS: + { + mpls_unicast_header_t hdr = { + .label_exp_s_ttl = 0, + }; + + vnet_mpls_uc_set_label(&hdr.label_exp_s_ttl, pfx.fp_label); + vnet_mpls_uc_set_s(&hdr.label_exp_s_ttl, pfx.fp_eos); + hdr.label_exp_s_ttl = clib_host_to_net_u32(hdr.label_exp_s_ttl); + + fw_lbi = mpls_fib_table_forwarding_lookup(fib_index, &hdr); + break; + } + default: + fw_lbi = 0; } - default: - fw_lbi = 0; + FIB_TEST_LB((fw_lbi == dpo.dpoi_index), + "Contributed LB = FW LB: %U\n %U", + format_load_balance, fw_lbi, 0, + format_load_balance, dpo.dpoi_index, 0); } - FIB_TEST_LB((fw_lbi == dpo.dpoi_index), - "Contributed LB = FW LB: %U\n %U", - format_load_balance, fw_lbi, 0, - format_load_balance, dpo.dpoi_index, 0); } dpo_reset(&dpo); @@ -1289,6 +1414,7 @@ fib_test_v4 (void) lookup_dpo_add_or_lock_w_fib_index(fib_index, DPO_PROTO_IP4, + LOOKUP_UNICAST, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_CONFIG, &ex_dpo); @@ -2605,7 +2731,6 @@ fib_test_v4 (void) NULL, FIB_ROUTE_PATH_FLAG_NONE); - fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32); dpo1 = fib_entry_contribute_ip_forwarding(fei); @@ -7493,6 +7618,7 @@ lfib_test (void) fib_route_path_t *rpaths = NULL, rpath = { .frp_proto = FIB_PROTOCOL_MPLS, .frp_local_label = 1200, + .frp_eos = MPLS_NON_EOS, .frp_sw_if_index = ~0, // recurive .frp_fib_index = 0, // Default MPLS fib .frp_weight = 1, @@ -7607,6 +7733,146 @@ lfib_test (void) dpo_reset(&ip_1200); + /* + * An rx-interface route. + * like the tail of an mcast LSP + */ + dpo_id_t idpo = DPO_INVALID; + + interface_dpo_add_or_lock(DPO_PROTO_IP4, + tm->hw[0]->sw_if_index, + &idpo); + + fib_prefix_t pfx_2500 = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 2500, + .fp_eos = MPLS_EOS, + .fp_payload_proto = DPO_PROTO_IP4, + }; + fib_test_lb_bucket_t rx_intf_0 = { + .type = FT_LB_INTF, + .adj = { + .adj = idpo.dpoi_index, + }, + }; + + lfe = fib_table_entry_update_one_path(fib_index, + &pfx_2500, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 0, + NULL, + FIB_ROUTE_PATH_INTF_RX); + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 1, + &rx_intf_0), + "2500 rx-interface 0"); + fib_table_entry_delete(fib_index, &pfx_2500, FIB_SOURCE_API); + + /* + * An MPLS mulicast entry + */ + fib_prefix_t pfx_3500 = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 3500, + .fp_eos = MPLS_EOS, + .fp_payload_proto = DPO_PROTO_IP4, + }; + fib_test_rep_bucket_t mc_0 = { + .type = FT_REP_LABEL_O_ADJ, + .label_o_adj = { + .adj = ai_mpls_10_10_10_1, + .label = 3300, + .eos = MPLS_EOS, + }, + }; + fib_test_rep_bucket_t mc_intf_0 = { + .type = FT_REP_INTF, + .adj = { + .adj = idpo.dpoi_index, + }, + }; + mpls_label_t *l3300 = NULL; + vec_add1(l3300, 3300); + + lfe = fib_table_entry_update_one_path(lfib_index, + &pfx_3500, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_MULTICAST, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + l3300, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 1, + &mc_0), + "3500 via replicate over 10.10.10.1"); + + /* + * MPLS Bud-node. Add a replication via an interface-receieve path + */ + lfe = fib_table_entry_path_add(lfib_index, + &pfx_3500, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_MULTICAST, + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 0, + NULL, + FIB_ROUTE_PATH_INTF_RX); + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 2, + &mc_0, + &mc_intf_0), + "3500 via replicate over 10.10.10.1 and interface-rx"); + + /* + * Add a replication via an interface-free for-us path + */ + fib_test_rep_bucket_t mc_disp = { + .type = FT_REP_DISP_MFIB_LOOKUP, + .adj = { + .adj = idpo.dpoi_index, + }, + }; + lfe = fib_table_entry_path_add(lfib_index, + &pfx_3500, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_MULTICAST, + FIB_PROTOCOL_IP4, + NULL, + 5, // rpf-id + 0, // default table + 0, + NULL, + FIB_ROUTE_PATH_RPF_ID); + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 3, + &mc_0, + &mc_disp, + &mc_intf_0), + "3500 via replicate over 10.10.10.1 and interface-rx"); + + + + fib_table_entry_delete(fib_index, &pfx_3500, FIB_SOURCE_API); + dpo_reset(&idpo); + /* * cleanup */ @@ -7617,6 +7883,9 @@ lfib_test (void) FIB_TEST(lb_count == pool_elts(load_balance_pool), "Load-balance resources freed %d of %d", lb_count, pool_elts(load_balance_pool)); + FIB_TEST(0 == pool_elts(interface_dpo_pool), + "interface_dpo resources freed %d of %d", + 0, pool_elts(interface_dpo_pool)); return (0); } diff --git a/src/vnet/fib/fib_test.h b/src/vnet/fib/fib_test.h new file mode 100644 index 00000000..b98680bf --- /dev/null +++ b/src/vnet/fib/fib_test.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_TEST_H__ +#define __FIB_TEST_H__ + +#include + +typedef enum fib_test_lb_bucket_type_t_ { + FT_LB_LABEL_O_ADJ, + FT_LB_LABEL_STACK_O_ADJ, + FT_LB_LABEL_O_LB, + FT_LB_O_LB, + FT_LB_SPECIAL, + FT_LB_ADJ, + FT_LB_INTF, +} fib_test_lb_bucket_type_t; + +typedef struct fib_test_lb_bucket_t_ { + fib_test_lb_bucket_type_t type; + + union + { + struct + { + mpls_eos_bit_t eos; + mpls_label_t label; + u8 ttl; + adj_index_t adj; + } label_o_adj; + struct + { + mpls_eos_bit_t eos; + mpls_label_t label_stack[8]; + u8 label_stack_size; + u8 ttl; + adj_index_t adj; + } label_stack_o_adj; + struct + { + mpls_eos_bit_t eos; + mpls_label_t label; + u8 ttl; + index_t lb; + } label_o_lb; + struct + { + index_t adj; + } adj; + struct + { + index_t lb; + } lb; + struct + { + index_t adj; + } special; + }; +} fib_test_lb_bucket_t; + +typedef enum fib_test_rep_bucket_type_t_ { + FT_REP_LABEL_O_ADJ, + FT_REP_INTF, +} fib_test_rep_bucket_type_t; + +typedef struct fib_test_rep_bucket_t_ { + fib_test_rep_bucket_type_t type; + + union + { + struct + { + mpls_eos_bit_t eos; + mpls_label_t label; + u8 ttl; + adj_index_t adj; + } label_o_adj; + struct + { + adj_index_t adj; + } adj; + }; +} fib_test_rep_bucket_t; + + +extern int fib_test_validate_rep_v(const replicate_t *rep, + u16 n_buckets, + va_list ap); + +extern int fib_test_validate_lb_v(const load_balance_t *lb, + u16 n_buckets, + va_list ap); + +extern int fib_test_validate_entry(fib_node_index_t fei, + fib_forward_chain_type_t fct, + u16 n_buckets, + ...); + +#endif diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c index 2837a59d..8165f3eb 100644 --- a/src/vnet/fib/fib_types.c +++ b/src/vnet/fib/fib_types.c @@ -66,12 +66,13 @@ fib_prefix_from_ip46_addr (const ip46_address_t *addr, void fib_prefix_from_mpls_label (mpls_label_t label, + mpls_eos_bit_t eos, fib_prefix_t *pfx) { pfx->fp_proto = FIB_PROTOCOL_MPLS; pfx->fp_len = 21; pfx->fp_label = label; - pfx->fp_eos = MPLS_NON_EOS; + pfx->fp_eos = eos; } int @@ -194,17 +195,7 @@ fib_route_path_cmp (const fib_route_path_t *rpath1, if (0 != res) return (res); - if (~0 != rpath1->frp_sw_if_index && - ~0 != rpath2->frp_sw_if_index) - { - res = vnet_sw_interface_compare(vnet_get_main(), - rpath1->frp_sw_if_index, - rpath2->frp_sw_if_index); - } - else - { - res = rpath1->frp_sw_if_index - rpath2->frp_sw_if_index; - } + res = (rpath1->frp_sw_if_index - rpath2->frp_sw_if_index); if (0 != res) return (res); diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h index 1c5299a9..4cb73e8a 100644 --- a/src/vnet/fib/fib_types.h +++ b/src/vnet/fib/fib_types.h @@ -286,8 +286,36 @@ typedef enum fib_route_path_flags_t_ * Attached path */ FIB_ROUTE_PATH_ATTACHED = (1 << 3), + /** + * A Drop path - resolve the path on the drop DPO + */ + FIB_ROUTE_PATH_DROP = (1 << 4), + /** + * Don't resolve the path, use the DPO the client provides + */ + FIB_ROUTE_PATH_EXCLUSIVE = (1 << 5), + /** + * A path that result in received traffic being recieved/recirculated + * so that it appears to have arrived on the new interface + */ + FIB_ROUTE_PATH_INTF_RX = (1 << 6), + /** + * A local path with a RPF-ID => multicast traffic + */ + FIB_ROUTE_PATH_RPF_ID = (1 << 7), } fib_route_path_flags_t; +/** + * An RPF-ID is numerical value that is used RPF validate. An entry + * has-a RPF-ID, when a packet egress from (e.g. an LSP) it gains an + * RPF-ID, these two are compared for the RPF check. + * This replaces the interfce based chack (since the LSP has no associated + * interface. + */ +typedef u32 fib_rpf_id_t; + +#define MFIB_RPF_ID_NONE (0) + /** * @brief * A representation of a path as described by a route producer. @@ -321,17 +349,29 @@ typedef struct fib_route_path_t_ { */ ip46_address_t frp_addr; - /** - * The MPLS local Label to reursively resolve through. - * This is valid when the path type is MPLS. - */ - mpls_label_t frp_local_label; + struct { + /** + * The MPLS local Label to reursively resolve through. + * This is valid when the path type is MPLS. + */ + mpls_label_t frp_local_label; + /** + * EOS bit for the resolving label + */ + mpls_eos_bit_t frp_eos; + }; + }; + union { + /** + * The interface. + * Will be invalid for recursive paths. + */ + u32 frp_sw_if_index; + /** + * The RPF-ID + */ + fib_rpf_id_t frp_rpf_id; }; - /** - * The interface. - * Will be invalid for recursive paths. - */ - u32 frp_sw_if_index; /** * The FIB index to lookup the nexthop * Only valid for recursive paths. diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c index 4b2b76ea..19f9f3c1 100644 --- a/src/vnet/fib/mpls_fib.c +++ b/src/vnet/fib/mpls_fib.c @@ -165,6 +165,7 @@ mpls_fib_create_with_table_id (u32 table_id) lookup_dpo_add_or_lock_w_fib_index(0, // unused DPO_PROTO_IP4, + LOOKUP_UNICAST, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_INPUT_INTERFACE, &dpo); @@ -179,6 +180,7 @@ mpls_fib_create_with_table_id (u32 table_id) lookup_dpo_add_or_lock_w_fib_index(0, //unsued DPO_PROTO_MPLS, + LOOKUP_UNICAST, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_INPUT_INTERFACE, &dpo); @@ -197,6 +199,7 @@ mpls_fib_create_with_table_id (u32 table_id) lookup_dpo_add_or_lock_w_fib_index(0, //unused DPO_PROTO_IP6, + LOOKUP_UNICAST, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_INPUT_INTERFACE, &dpo); @@ -210,6 +213,7 @@ mpls_fib_create_with_table_id (u32 table_id) prefix.fp_eos = MPLS_NON_EOS; lookup_dpo_add_or_lock_w_fib_index(0, // unsued DPO_PROTO_MPLS, + LOOKUP_UNICAST, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_INPUT_INTERFACE, &dpo); @@ -320,8 +324,15 @@ mpls_fib_forwarding_table_update (mpls_fib_t *mf, { mpls_label_t key; - ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type); - + ASSERT((DPO_LOAD_BALANCE == dpo->dpoi_type) || + (DPO_REPLICATE == dpo->dpoi_type)); + if (CLIB_DEBUG > 0) + { + if (DPO_REPLICATE == dpo->dpoi_type) + ASSERT(dpo->dpoi_index & MPLS_IS_REPLICATE); + if (DPO_LOAD_BALANCE == dpo->dpoi_type) + ASSERT(!(dpo->dpoi_index & MPLS_IS_REPLICATE)); + } key = mpls_fib_entry_mk_key(label, eos); mf->mf_lbs[key] = dpo->dpoi_index; diff --git a/src/vnet/handoff.h b/src/vnet/handoff.h index 815206a9..04ba8bfb 100644 --- a/src/vnet/handoff.h +++ b/src/vnet/handoff.h @@ -150,7 +150,7 @@ eth_get_sym_key (ethernet_header_t * h0) ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1] ^ ip->protocol); } - else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)) + else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)) { hash_key = mpls_get_key ((mpls_unicast_header_t *) (h0 + 1)); } @@ -179,8 +179,7 @@ eth_get_sym_key (ethernet_header_t * h0) ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1] ^ ip->protocol); } - else if (outer->type == - clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)) + else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)) { hash_key = mpls_get_key ((mpls_unicast_header_t *) (outer + 1)); } @@ -210,7 +209,7 @@ eth_get_key (ethernet_header_t * h0) { hash_key = ipv6_get_key ((ip6_header_t *) (h0 + 1)); } - else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)) + else if (h0->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)) { hash_key = mpls_get_key ((mpls_unicast_header_t *) (h0 + 1)); } @@ -230,8 +229,7 @@ eth_get_key (ethernet_header_t * h0) { hash_key = ipv6_get_key ((ip6_header_t *) (outer + 1)); } - else if (outer->type == - clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)) + else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)) { hash_key = mpls_get_key ((mpls_unicast_header_t *) (outer + 1)); } diff --git a/src/vnet/interface.c b/src/vnet/interface.c index 2a1e70e8..45417b2f 100644 --- a/src/vnet/interface.c +++ b/src/vnet/interface.c @@ -1360,7 +1360,7 @@ vnet_link_to_l3_proto (vnet_link_t link) case VNET_LINK_IP6: return (VNET_L3_PACKET_TYPE_IP6); case VNET_LINK_MPLS: - return (VNET_L3_PACKET_TYPE_MPLS_UNICAST); + return (VNET_L3_PACKET_TYPE_MPLS); case VNET_LINK_ARP: return (VNET_L3_PACKET_TYPE_ARP); case VNET_LINK_ETHERNET: diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api index 5c2df32c..6af1714f 100644 --- a/src/vnet/ip/ip.api +++ b/src/vnet/ip/ip.api @@ -478,6 +478,7 @@ define ip_mroute_add_del u32 table_id; u32 entry_flags; u32 itf_flags; + u32 rpf_id; u16 grp_address_length; u8 create_vrf_if_needed; u8 is_add; @@ -518,6 +519,8 @@ manual_endian manual_print define ip_mfib_details { u32 context; u32 table_id; + u32 entry_flags; + u32 rpf_id; u8 address_length; u8 grp_address[4]; u8 src_address[4]; diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index fdfe7f63..9fdf9b3c 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -2752,6 +2752,16 @@ ip4_rewrite_mcast (vlib_main_t * vm, return ip4_rewrite_inline (vm, node, frame, 0, 0, 1); } +static uword +ip4_mcast_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip4_rewrite_inline (vm, node, frame, 1, 1, 1); + else + return ip4_rewrite_inline (vm, node, frame, 0, 1, 1); +} + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_rewrite_node) = { .function = ip4_rewrite, @@ -2778,6 +2788,16 @@ VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = { }; VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast) +VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = { + .function = ip4_mcast_midchain, + .name = "ip4-mcast-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_rewrite_trace, + .sibling_of = "ip4-rewrite", +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain) + VLIB_REGISTER_NODE (ip4_midchain_node) = { .function = ip4_midchain, .name = "ip4-midchain", diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index c2fc4f87..a369f79f 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -2246,6 +2246,16 @@ ip6_midchain (vlib_main_t * vm, return ip6_rewrite_inline (vm, node, frame, 0, 1, 0); } +static uword +ip6_mcast_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip6_rewrite_inline (vm, node, frame, 1, 1, 1); + else + return ip6_rewrite_inline (vm, node, frame, 1, 1, 1); +} + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_midchain_node) = { @@ -2290,6 +2300,19 @@ VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) = VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_mcast_node, ip6_rewrite_mcast); +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_mcast_midchain_node, static) = +{ + .function = ip6_mcast_midchain, + .name = "ip6-mcast-midchain", + .vector_size = sizeof (u32), + .format_trace = format_ip6_rewrite_trace, + .sibling_of = "ip6-rewrite", +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_mcast_midchain_node, ip6_mcast_midchain); + /* * Hop-by-Hop handling */ diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 2af546df..58b997aa 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -557,6 +557,7 @@ ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) case IP_LOOKUP_NEXT_PUNT: case IP_LOOKUP_NEXT_LOCAL: case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: case IP_LOOKUP_NEXT_MIDCHAIN: case IP_LOOKUP_NEXT_ICMP_ERROR: case IP_LOOKUP_N_NEXT: diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index b9f1782b..9c9cb4a4 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -438,17 +438,20 @@ vl_api_ip6_fib_dump_t_handler (vl_api_ip6_fib_dump_t * mp) } static void -send_ip_mfib_details (vpe_api_main_t * am, - unix_shared_memory_queue_t * q, - u32 table_id, - mfib_prefix_t * pfx, - fib_route_path_encode_t * api_rpaths, u32 context) +send_ip_mfib_details (unix_shared_memory_queue_t * q, + u32 context, u32 table_id, fib_node_index_t mfei) { + fib_route_path_encode_t *api_rpath, *api_rpaths = NULL; vl_api_ip_mfib_details_t *mp; - fib_route_path_encode_t *api_rpath; + mfib_entry_t *mfib_entry; vl_api_fib_path_t *fp; + mfib_prefix_t pfx; int path_count; + mfib_entry = mfib_entry_get (mfei); + mfib_entry_get_prefix (mfei, &pfx); + mfib_entry_encode (mfei, &api_rpaths); + path_count = vec_len (api_rpaths); mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp)); if (!mp) @@ -457,12 +460,14 @@ send_ip_mfib_details (vpe_api_main_t * am, mp->_vl_msg_id = ntohs (VL_API_IP_FIB_DETAILS); mp->context = context; + mp->rpf_id = mfib_entry->mfe_rpf_id; + mp->entry_flags = mfib_entry->mfe_flags; mp->table_id = htonl (table_id); - mp->address_length = pfx->fp_len; - memcpy (mp->grp_address, &pfx->fp_grp_addr.ip4, - sizeof (pfx->fp_grp_addr.ip4)); - memcpy (mp->src_address, &pfx->fp_src_addr.ip4, - sizeof (pfx->fp_src_addr.ip4)); + mp->address_length = pfx.fp_len; + memcpy (mp->grp_address, &pfx.fp_grp_addr.ip4, + sizeof (pfx.fp_grp_addr.ip4)); + memcpy (mp->src_address, &pfx.fp_src_addr.ip4, + sizeof (pfx.fp_src_addr.ip4)); mp->count = htonl (path_count); fp = mp->path; @@ -475,6 +480,7 @@ send_ip_mfib_details (vpe_api_main_t * am, copy_fib_next_hop (api_rpath, fp); fp++; } + vec_free (api_rpaths); vl_msg_api_send_shmem (q, (u8 *) & mp); } @@ -497,13 +503,10 @@ vl_api_ip_mfib_table_dump_walk (fib_node_index_t fei, void *arg) static void vl_api_ip_mfib_dump_t_handler (vl_api_ip_mfib_dump_t * mp) { - vpe_api_main_t *am = &vpe_api_main; unix_shared_memory_queue_t *q; ip4_main_t *im = &ip4_main; mfib_table_t *mfib_table; fib_node_index_t *mfeip; - mfib_prefix_t pfx; - fib_route_path_encode_t *api_rpaths = NULL; vl_api_ip_mfib_dump_ctc_t ctx = { .entries = NULL, }; @@ -524,21 +527,16 @@ vl_api_ip_mfib_dump_t_handler (vl_api_ip_mfib_dump_t * mp) vec_foreach (mfeip, ctx.entries) { - mfib_entry_get_prefix (*mfeip, &pfx); - mfib_entry_encode (*mfeip, &api_rpaths); - send_ip_mfib_details (am, q, + send_ip_mfib_details (q, mp->context, mfib_table->mft_table_id, - &pfx, api_rpaths, - mp->context); + *mfeip); } - vec_reset_length (api_rpaths); vec_reset_length (ctx.entries); })); /* *INDENT-ON* */ vec_free (ctx.entries); - vec_free (api_rpaths); } static void @@ -705,10 +703,13 @@ add_del_route_t_handler (u8 is_multipath, u8 is_unreach, u8 is_prohibit, u8 is_local, + u8 is_multicast, u8 is_classify, u32 classify_table_index, u8 is_resolve_host, u8 is_resolve_attached, + u8 is_interface_rx, + u8 is_rpf_id, u32 fib_index, const fib_prefix_t * prefix, u8 next_hop_proto_is_ip4, @@ -731,16 +732,24 @@ add_del_route_t_handler (u8 is_multipath, .frp_label_stack = next_hop_out_label_stack, }; fib_route_path_t *paths = NULL; + fib_entry_flag_t entry_flags = FIB_ENTRY_FLAG_NONE; if (MPLS_LABEL_INVALID != next_hop_via_label) { path.frp_proto = FIB_PROTOCOL_MPLS; path.frp_local_label = next_hop_via_label; + path.frp_eos = MPLS_NON_EOS; } if (is_resolve_host) path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST; if (is_resolve_attached) path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED; + if (is_interface_rx) + path_flags |= FIB_ROUTE_PATH_INTF_RX; + if (is_rpf_id) + path_flags |= FIB_ROUTE_PATH_RPF_ID; + if (is_multicast) + entry_flags |= FIB_ENTRY_FLAG_MULTICAST; path.frp_flags = path_flags; @@ -754,8 +763,7 @@ add_del_route_t_handler (u8 is_multipath, if (is_add) fib_table_entry_path_add2 (fib_index, prefix, - FIB_SOURCE_API, - FIB_ENTRY_FLAG_NONE, paths); + FIB_SOURCE_API, entry_flags, paths); else fib_table_entry_path_remove2 (fib_index, prefix, FIB_SOURCE_API, paths); @@ -826,8 +834,7 @@ add_del_route_t_handler (u8 is_multipath, { vec_add1 (paths, path); fib_table_entry_update (fib_index, - prefix, - FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, paths); + prefix, FIB_SOURCE_API, entry_flags, paths); vec_free (paths); } else @@ -847,7 +854,7 @@ add_del_route_check (fib_protocol_t table_proto, fib_protocol_t next_hop_table_proto, u32 next_hop_table_id, u8 create_missing_tables, - u32 * fib_index, u32 * next_hop_fib_index) + u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index) { vnet_main_t *vnm = vnet_get_main (); @@ -866,7 +873,7 @@ add_del_route_check (fib_protocol_t table_proto, } } - if (~0 != ntohl (next_hop_sw_if_index)) + if (!is_rpf_id && ~0 != ntohl (next_hop_sw_if_index)) { if (pool_is_free_index (vnm->interface_main.sw_interfaces, ntohl (next_hop_sw_if_index))) @@ -876,16 +883,27 @@ add_del_route_check (fib_protocol_t table_proto, } else { - *next_hop_fib_index = fib_table_find (next_hop_table_proto, - ntohl (next_hop_table_id)); + if (is_rpf_id) + *next_hop_fib_index = mfib_table_find (next_hop_table_proto, + ntohl (next_hop_table_id)); + else + *next_hop_fib_index = fib_table_find (next_hop_table_proto, + ntohl (next_hop_table_id)); if (~0 == *next_hop_fib_index) { if (create_missing_tables) { - *next_hop_fib_index = - fib_table_find_or_create_and_lock (next_hop_table_proto, - ntohl (next_hop_table_id)); + if (is_rpf_id) + *next_hop_fib_index = + mfib_table_find_or_create_and_lock (next_hop_table_proto, + ntohl + (next_hop_table_id)); + else + *next_hop_fib_index = + fib_table_find_or_create_and_lock (next_hop_table_proto, + ntohl + (next_hop_table_id)); } else { @@ -910,7 +928,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->next_hop_sw_if_index, FIB_PROTOCOL_IP4, mp->next_hop_table_id, - mp->create_vrf_if_needed, + mp->create_vrf_if_needed, 0, &fib_index, &next_hop_fib_index); if (0 != rv) @@ -943,11 +961,11 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->is_drop, mp->is_unreach, mp->is_prohibit, - mp->is_local, + mp->is_local, 0, mp->is_classify, mp->classify_table_index, mp->is_resolve_host, - mp->is_resolve_attached, + mp->is_resolve_attached, 0, 0, fib_index, &pfx, 1, &nh, ntohl (mp->next_hop_sw_if_index), @@ -969,7 +987,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->next_hop_sw_if_index, FIB_PROTOCOL_IP6, mp->next_hop_table_id, - mp->create_vrf_if_needed, + mp->create_vrf_if_needed, 0, &fib_index, &next_hop_fib_index); if (0 != rv) @@ -1002,11 +1020,11 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->is_drop, mp->is_unreach, mp->is_prohibit, - mp->is_local, + mp->is_local, 0, mp->is_classify, mp->classify_table_index, mp->is_resolve_host, - mp->is_resolve_attached, + mp->is_resolve_attached, 0, 0, fib_index, &pfx, 0, &nh, ntohl (mp->next_hop_sw_if_index), next_hop_fib_index, @@ -1075,6 +1093,7 @@ mroute_add_del_handler (u8 is_add, u32 fib_index, const mfib_prefix_t * prefix, u32 entry_flags, + fib_rpf_id_t rpf_id, u32 next_hop_sw_if_index, u32 itf_flags) { stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ ); @@ -1091,7 +1110,7 @@ mroute_add_del_handler (u8 is_add, if (!is_local && ~0 == next_hop_sw_if_index) { mfib_table_entry_update (fib_index, prefix, - MFIB_SOURCE_API, entry_flags); + MFIB_SOURCE_API, rpf_id, entry_flags); } else { @@ -1152,6 +1171,7 @@ api_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp) mp->is_local, fib_index, &pfx, ntohl (mp->entry_flags), + ntohl (mp->rpf_id), ntohl (mp->next_hop_sw_if_index), ntohl (mp->itf_flags))); } diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index ec9a1f97..597de06b 100755 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -450,6 +450,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, unformat_mpls_unicast_label, &rpath.frp_local_label)) { rpath.frp_weight = 1; + rpath.frp_eos = MPLS_NON_EOS; rpath.frp_proto = FIB_PROTOCOL_MPLS; rpath.frp_sw_if_index = ~0; vec_add1 (rpaths, rpath); @@ -923,7 +924,7 @@ vnet_ip_mroute_cmd (vlib_main_t * vm, else if (eflags) { mfib_table_entry_update (fib_index, &pfx, MFIB_SOURCE_CLI, - eflags); + MFIB_RPF_ID_NONE, eflags); } else { diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c index efa724e0..d2954e96 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c +++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c @@ -88,6 +88,7 @@ ip_dst_fib_add_route (u32 dst_fib_index, const ip_prefix_t * dst_prefix) (ip_prefix_version (dst_prefix) == IP6 ? DPO_PROTO_IP6 : DPO_PROTO_IP4), + LOOKUP_UNICAST, LOOKUP_INPUT_SRC_ADDR, LOOKUP_TABLE_FROM_CONFIG, &src_lkup_dpo); diff --git a/src/vnet/mfib/ip4_mfib.c b/src/vnet/mfib/ip4_mfib.c index 164cafa1..3ed7cba7 100644 --- a/src/vnet/mfib/ip4_mfib.c +++ b/src/vnet/mfib/ip4_mfib.c @@ -72,6 +72,7 @@ ip4_create_mfib_with_table_id (u32 table_id) mfib_table_entry_update(mfib_table->mft_index, &prefix, MFIB_SOURCE_DEFAULT_ROUTE, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_DROP); } diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c index 991b91c6..116fee22 100644 --- a/src/vnet/mfib/ip6_mfib.c +++ b/src/vnet/mfib/ip6_mfib.c @@ -195,6 +195,7 @@ ip6_create_mfib_with_table_id (u32 table_id) mfib_table_entry_update(mfib_table->mft_index, &all_zeros, MFIB_SOURCE_DEFAULT_ROUTE, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_DROP); /* diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c index 1aa8e086..847f25e7 100644 --- a/src/vnet/mfib/mfib_entry.c +++ b/src/vnet/mfib/mfib_entry.c @@ -48,6 +48,15 @@ #define MFIB_ENTRY_DBG(_e, _fmt, _args...) #endif +/** + * MFIB extensions to each path + */ +typedef struct mfib_path_ext_t_ +{ + mfib_itf_flags_t mfpe_flags; + fib_node_index_t mfpe_path; +} mfib_path_ext_t; + /** * The source of an MFIB entry */ @@ -58,22 +67,39 @@ typedef struct mfib_entry_src_t_ */ mfib_source_t mfes_src; + /** + * Route flags + */ + mfib_entry_flags_t mfes_flags; + /** * The path-list of forwarding interfaces */ fib_node_index_t mfes_pl; /** - * Route flags + * RPF-ID */ - mfib_entry_flags_t mfes_flags; + fib_rpf_id_t mfes_rpf_id; + + /** + * Hash table of path extensions + */ + mfib_path_ext_t *mfes_exts; /** - * The hash table of all interfaces + * The hash table of all interfaces. + * This is forwarding time information derived from the paths + * and their extensions. */ mfib_itf_t *mfes_itfs; } mfib_entry_src_t; +/** + * Pool of path extensions + */ +static mfib_path_ext_t *mfib_path_ext_pool; + /** * String names for each source */ @@ -123,6 +149,24 @@ format_mfib_entry_dpo (u8 * s, va_list * args) MFIB_ENTRY_FORMAT_BRIEF)); } +static inline mfib_path_ext_t * +mfib_entry_path_ext_get (index_t mi) +{ + return (pool_elt_at_index(mfib_path_ext_pool, mi)); +} + +static u8 * +format_mfib_entry_path_ext (u8 * s, va_list * args) +{ + mfib_path_ext_t *path_ext; + index_t mpi = va_arg(*args, index_t); + + path_ext = mfib_entry_path_ext_get(mpi); + return (format(s, "path:%d flags:%U", + path_ext->mfpe_path, + format_mfib_itf_flags, path_ext->mfpe_flags)); +} + u8 * format_mfib_entry (u8 * s, va_list * args) { @@ -141,6 +185,8 @@ format_mfib_entry (u8 * s, va_list * args) if (level >= MFIB_ENTRY_FORMAT_DETAIL) { + fib_node_index_t path_index, mpi; + s = format (s, "\n"); s = format (s, " fib:%d", mfib_entry->mfe_fib_index); s = format (s, " index:%d", mfib_entry_get_index(mfib_entry)); @@ -153,6 +199,14 @@ format_mfib_entry (u8 * s, va_list * args) { s = fib_path_list_format(msrc->mfes_pl, s); } + s = format (s, " Extensions:\n", + mfib_source_names[msrc->mfes_src]); + hash_foreach(path_index, mpi, msrc->mfes_exts, + ({ + s = format(s, " %U\n", format_mfib_entry_path_ext, mpi); + })); + s = format (s, " Interface-Forwarding:\n", + mfib_source_names[msrc->mfes_src]); hash_foreach(sw_if_index, mfi, msrc->mfes_itfs, ({ s = format(s, " %U\n", format_mfib_itf, mfi); @@ -165,7 +219,7 @@ format_mfib_entry (u8 * s, va_list * args) ({ s = format(s, "\n %U", format_mfib_itf, mfi); })); - + s = format(s, "\n RPF-ID:%d", mfib_entry->mfe_rpf_id); s = format(s, "\n %U-chain\n %U", format_fib_forw_chain_type, mfib_entry_get_default_chain_type(mfib_entry), @@ -314,13 +368,6 @@ mfib_entry_src_remove (mfib_entry_t *mfib_entry, } } -static int -mfib_entry_src_n_itfs (const mfib_entry_src_t *msrc) -{ - return (hash_elts(msrc->mfes_itfs)); -} - - static void mfib_entry_last_lock_gone (fib_node_t *node) { @@ -338,7 +385,6 @@ mfib_entry_last_lock_gone (fib_node_t *node) mfib_entry_src_flush(msrc); } - fib_path_list_unlock(mfib_entry->mfe_parent); vec_free(mfib_entry->mfe_srcs); fib_node_deinit(&mfib_entry->mfe_node); @@ -417,10 +463,9 @@ mfib_entry_alloc (u32 fib_index, mfib_entry->mfe_flags = 0; mfib_entry->mfe_fib_index = fib_index; mfib_entry->mfe_prefix = *prefix; - mfib_entry->mfe_parent = FIB_NODE_INDEX_INVALID; - mfib_entry->mfe_sibling = FIB_NODE_INDEX_INVALID; mfib_entry->mfe_srcs = NULL; mfib_entry->mfe_itfs = NULL; + mfib_entry->mfe_rpf_id = MFIB_RPF_ID_NONE; dpo_reset(&mfib_entry->mfe_rep); @@ -431,10 +476,57 @@ mfib_entry_alloc (u32 fib_index, return (mfib_entry); } +static inline mfib_path_ext_t * +mfib_entry_path_ext_find (mfib_path_ext_t *exts, + fib_node_index_t path_index) +{ + uword *p; + + p = hash_get(exts, path_index); + + if (NULL != p) + { + return (mfib_entry_path_ext_get(p[0])); + } + + return (NULL); +} + +static mfib_path_ext_t* +mfib_path_ext_add (mfib_entry_src_t *msrc, + fib_node_index_t path_index, + mfib_itf_flags_t mfi_flags) +{ + mfib_path_ext_t *path_ext; + + pool_get(mfib_path_ext_pool, path_ext); + + path_ext->mfpe_flags = mfi_flags; + path_ext->mfpe_path = path_index; + + hash_set(msrc->mfes_exts, path_index, + path_ext - mfib_path_ext_pool); + + return (path_ext); +} + +static void +mfib_path_ext_remove (mfib_entry_src_t *msrc, + fib_node_index_t path_index) +{ + mfib_path_ext_t *path_ext; + + path_ext = mfib_entry_path_ext_find(msrc->mfes_exts, path_index); + + hash_unset(msrc->mfes_exts, path_index); + pool_put(mfib_path_ext_pool, path_ext); +} + typedef struct mfib_entry_collect_forwarding_ctx_t_ { load_balance_path_t * next_hops; fib_forward_chain_type_t fct; + mfib_entry_src_t *msrc; } mfib_entry_collect_forwarding_ctx_t; static int @@ -455,6 +547,20 @@ mfib_entry_src_collect_forwarding (fib_node_index_t pl_index, return (!0); } + /* + * If the path is not forwarding to use it + */ + mfib_path_ext_t *path_ext; + + path_ext = mfib_entry_path_ext_find(ctx->msrc->mfes_exts, + path_index); + + if (NULL != path_ext && + !(path_ext->mfpe_flags & MFIB_ITF_FLAG_FORWARD)) + { + return (!0); + } + switch (ctx->fct) { case FIB_FORW_CHAIN_TYPE_MCAST_IP4: @@ -483,46 +589,61 @@ mfib_entry_src_collect_forwarding (fib_node_index_t pl_index, } static void -mfib_entry_stack (mfib_entry_t *mfib_entry) +mfib_entry_stack (mfib_entry_t *mfib_entry, + mfib_entry_src_t *msrc) { dpo_proto_t dp; dp = fib_proto_to_dpo(mfib_entry_get_proto(mfib_entry)); - if (FIB_NODE_INDEX_INVALID != mfib_entry->mfe_parent) + if (NULL != msrc && + FIB_NODE_INDEX_INVALID != msrc->mfes_pl) { mfib_entry_collect_forwarding_ctx_t ctx = { .next_hops = NULL, .fct = mfib_entry_get_default_chain_type(mfib_entry), + .msrc = msrc, }; - fib_path_list_walk(mfib_entry->mfe_parent, + fib_path_list_walk(msrc->mfes_pl, mfib_entry_src_collect_forwarding, &ctx); if (!(MFIB_ENTRY_FLAG_EXCLUSIVE & mfib_entry->mfe_flags)) { - /* - * each path contirbutes a next-hop. form a replicate - * from those choices. - */ - if (!dpo_id_is_valid(&mfib_entry->mfe_rep) || - dpo_is_drop(&mfib_entry->mfe_rep)) + if (NULL == ctx.next_hops) { - dpo_id_t tmp_dpo = DPO_INVALID; - - dpo_set(&tmp_dpo, - DPO_REPLICATE, dp, - replicate_create(0, dp)); - + /* + * no next-hops, stack directly on the drop + */ dpo_stack(DPO_MFIB_ENTRY, dp, &mfib_entry->mfe_rep, - &tmp_dpo); - - dpo_reset(&tmp_dpo); + drop_dpo_get(dp)); + } + else + { + /* + * each path contirbutes a next-hop. form a replicate + * from those choices. + */ + if (!dpo_id_is_valid(&mfib_entry->mfe_rep) || + dpo_is_drop(&mfib_entry->mfe_rep)) + { + dpo_id_t tmp_dpo = DPO_INVALID; + + dpo_set(&tmp_dpo, + DPO_REPLICATE, dp, + replicate_create(0, dp)); + + dpo_stack(DPO_MFIB_ENTRY, dp, + &mfib_entry->mfe_rep, + &tmp_dpo); + + dpo_reset(&tmp_dpo); + } + replicate_multipath_update(&mfib_entry->mfe_rep, + ctx.next_hops); } - replicate_multipath_update(&mfib_entry->mfe_rep, - ctx.next_hops); } else { @@ -548,11 +669,11 @@ mfib_entry_stack (mfib_entry_t *mfib_entry) } } -static void -mfib_entry_forwarding_path_add (mfib_entry_src_t *msrc, - const fib_route_path_t *rpath) +static fib_node_index_t +mfib_entry_src_path_add (mfib_entry_src_t *msrc, + const fib_route_path_t *rpath) { - fib_node_index_t old_pl_index; + fib_node_index_t path_index; fib_route_path_t *rpaths; ASSERT(!(MFIB_ENTRY_FLAG_EXCLUSIVE & msrc->mfes_flags)); @@ -563,32 +684,26 @@ mfib_entry_forwarding_path_add (mfib_entry_src_t *msrc, rpaths = NULL; vec_add1(rpaths, rpath[0]); - old_pl_index = msrc->mfes_pl; - if (FIB_NODE_INDEX_INVALID == msrc->mfes_pl) { - msrc->mfes_pl = - fib_path_list_create(FIB_PATH_LIST_FLAG_NO_URPF, - rpaths); - } - else - { - msrc->mfes_pl = - fib_path_list_copy_and_path_add(msrc->mfes_pl, - FIB_PATH_LIST_FLAG_NO_URPF, - rpaths); + /* A non-shared path-list */ + msrc->mfes_pl = fib_path_list_create(FIB_PATH_LIST_FLAG_NO_URPF, + NULL); + fib_path_list_lock(msrc->mfes_pl); } - fib_path_list_lock(msrc->mfes_pl); - fib_path_list_unlock(old_pl_index); + + path_index = fib_path_list_path_add(msrc->mfes_pl, rpaths); vec_free(rpaths); + + return (path_index); } -static int -mfib_entry_forwarding_path_remove (mfib_entry_src_t *msrc, - const fib_route_path_t *rpath) +static fib_node_index_t +mfib_entry_src_path_remove (mfib_entry_src_t *msrc, + const fib_route_path_t *rpath) { - fib_node_index_t old_pl_index; + fib_node_index_t path_index; fib_route_path_t *rpaths; ASSERT(!(MFIB_ENTRY_FLAG_EXCLUSIVE & msrc->mfes_flags)); @@ -599,56 +714,31 @@ mfib_entry_forwarding_path_remove (mfib_entry_src_t *msrc, rpaths = NULL; vec_add1(rpaths, rpath[0]); - old_pl_index = msrc->mfes_pl; - - msrc->mfes_pl = - fib_path_list_copy_and_path_remove(msrc->mfes_pl, - FIB_PATH_LIST_FLAG_NONE, - rpaths); - - fib_path_list_lock(msrc->mfes_pl); - fib_path_list_unlock(old_pl_index); + path_index = fib_path_list_path_remove(msrc->mfes_pl, rpaths); vec_free(rpaths); - return (FIB_NODE_INDEX_INVALID != msrc->mfes_pl); + return (path_index); } static void mfib_entry_recalculate_forwarding (mfib_entry_t *mfib_entry) { - fib_node_index_t old_pl_index; mfib_entry_src_t *bsrc; - old_pl_index = mfib_entry->mfe_parent; - /* * copy the forwarding data from the bast source */ bsrc = mfib_entry_get_best_src(mfib_entry); - if (NULL == bsrc) - { - mfib_entry->mfe_parent = FIB_NODE_INDEX_INVALID; - } - else + if (NULL != bsrc) { - mfib_entry->mfe_parent = bsrc->mfes_pl; mfib_entry->mfe_flags = bsrc->mfes_flags; mfib_entry->mfe_itfs = bsrc->mfes_itfs; + mfib_entry->mfe_rpf_id = bsrc->mfes_rpf_id; } - /* - * re-stack the entry on the best forwarding info. - */ - if (old_pl_index != mfib_entry->mfe_parent || - FIB_NODE_INDEX_INVALID == old_pl_index) - { - mfib_entry_stack(mfib_entry); - - fib_path_list_lock(mfib_entry->mfe_parent); - fib_path_list_unlock(old_pl_index); - } + mfib_entry_stack(mfib_entry, bsrc); } @@ -656,6 +746,7 @@ fib_node_index_t mfib_entry_create (u32 fib_index, mfib_source_t source, const mfib_prefix_t *prefix, + fib_rpf_id_t rpf_id, mfib_entry_flags_t entry_flags) { fib_node_index_t mfib_entry_index; @@ -666,6 +757,7 @@ mfib_entry_create (u32 fib_index, &mfib_entry_index); msrc = mfib_entry_src_find_or_create(mfib_entry, source); msrc->mfes_flags = entry_flags; + msrc->mfes_rpf_id = rpf_id; mfib_entry_recalculate_forwarding(mfib_entry); @@ -682,13 +774,14 @@ static int mfib_entry_src_ok_for_delete (const mfib_entry_src_t *msrc) { return ((MFIB_ENTRY_FLAG_NONE == msrc->mfes_flags && - 0 == mfib_entry_src_n_itfs(msrc))); + 0 == fib_path_list_get_n_paths(msrc->mfes_pl))); } int mfib_entry_update (fib_node_index_t mfib_entry_index, mfib_source_t source, mfib_entry_flags_t entry_flags, + fib_rpf_id_t rpf_id, index_t repi) { mfib_entry_t *mfib_entry; @@ -697,6 +790,7 @@ mfib_entry_update (fib_node_index_t mfib_entry_index, mfib_entry = mfib_entry_get(mfib_entry_index); msrc = mfib_entry_src_find_or_create(mfib_entry, source); msrc->mfes_flags = entry_flags; + msrc->mfes_rpf_id = rpf_id; if (INDEX_INVALID != repi) { @@ -768,55 +862,79 @@ mfib_entry_path_update (fib_node_index_t mfib_entry_index, const fib_route_path_t *rpath, mfib_itf_flags_t itf_flags) { + fib_node_index_t path_index; + mfib_path_ext_t *path_ext; + mfib_itf_flags_t old, new; mfib_entry_t *mfib_entry; mfib_entry_src_t *msrc; - mfib_itf_t *mfib_itf; mfib_entry = mfib_entry_get(mfib_entry_index); ASSERT(NULL != mfib_entry); msrc = mfib_entry_src_find_or_create(mfib_entry, source); /* - * search for the interface in the current set + * add the path to the path-list. If it's a duplicate we'll get + * back the original path. + */ + path_index = mfib_entry_src_path_add(msrc, rpath); + + /* + * find the path extension for that path */ - mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs, - rpath[0].frp_sw_if_index); + path_ext = mfib_entry_path_ext_find(msrc->mfes_exts, path_index); - if (NULL == mfib_itf) + if (NULL == path_ext) { - /* - * this is a path we do not yet have. If it is forwarding then we - * add it to the replication set - */ - if (itf_flags & MFIB_ITF_FLAG_FORWARD) - { - mfib_entry_forwarding_path_add(msrc, rpath); - } - /* - * construct a new ITF for this entry's list - */ - mfib_entry_itf_add(msrc, - rpath[0].frp_sw_if_index, - mfib_itf_create(rpath[0].frp_sw_if_index, - itf_flags)); + old = MFIB_ITF_FLAG_NONE; + path_ext = mfib_path_ext_add(msrc, path_index, itf_flags); } else { - int was_forwarding = !!(mfib_itf->mfi_flags & MFIB_ITF_FLAG_FORWARD); - int is_forwarding = !!(itf_flags & MFIB_ITF_FLAG_FORWARD); + old = path_ext->mfpe_flags; + path_ext->mfpe_flags = itf_flags; + } - if (!was_forwarding && is_forwarding) - { - mfib_entry_forwarding_path_add(msrc, rpath); - } - else if (was_forwarding && !is_forwarding) + /* + * Has the path changed its contribution to the input interface set. + * Which only paths with interfaces can do... + */ + if (~0 != rpath[0].frp_sw_if_index) + { + mfib_itf_t *mfib_itf; + + new = itf_flags; + + if (old != new) { - mfib_entry_forwarding_path_remove(msrc, rpath); + if (MFIB_ITF_FLAG_NONE == new) + { + /* + * no more interface flags on this path, remove + * from the data-plane set + */ + mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index); + } + else if (MFIB_ITF_FLAG_NONE == old) + { + /* + * This interface is now contributing + */ + mfib_entry_itf_add(msrc, + rpath[0].frp_sw_if_index, + mfib_itf_create(rpath[0].frp_sw_if_index, + itf_flags)); + } + else + { + /* + * change of flag contributions + */ + mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs, + rpath[0].frp_sw_if_index); + /* Seen by packets inflight */ + mfib_itf->mfi_flags = new; + } } - /* - * packets in flight see these updates. - */ - mfib_itf->mfi_flags = itf_flags; } mfib_entry_recalculate_forwarding(mfib_entry); @@ -833,9 +951,9 @@ mfib_entry_path_remove (fib_node_index_t mfib_entry_index, mfib_source_t source, const fib_route_path_t *rpath) { + fib_node_index_t path_index; mfib_entry_t *mfib_entry; mfib_entry_src_t *msrc; - mfib_itf_t *mfib_itf; mfib_entry = mfib_entry_get(mfib_entry_index); ASSERT(NULL != mfib_entry); @@ -850,33 +968,23 @@ mfib_entry_path_remove (fib_node_index_t mfib_entry_index, } /* - * search for the interface in the current set + * remove the path from the path-list. If it's not there we'll get + * back invalid */ - mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs, - rpath[0].frp_sw_if_index); + path_index = mfib_entry_src_path_remove(msrc, rpath); - if (NULL == mfib_itf) + if (FIB_NODE_INDEX_INVALID != path_index) { /* - * removing a path that does not exist + * don't need the extension, nor the interface anymore */ - return (mfib_entry_ok_for_delete(mfib_entry)); - } - - /* - * we have this path. If it is forwarding then we - * remove it to the replication set - */ - if (mfib_itf->mfi_flags & MFIB_ITF_FLAG_FORWARD) - { - mfib_entry_forwarding_path_remove(msrc, rpath); + mfib_path_ext_remove(msrc, path_index); + if (~0 != rpath[0].frp_sw_if_index) + { + mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index); + } } - /* - * remove the interface/path from this entry's list - */ - mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index); - if (mfib_entry_src_ok_for_delete(msrc)) { /* @@ -1057,11 +1165,14 @@ mfib_entry_encode (fib_node_index_t mfib_entry_index, fib_route_path_encode_t **api_rpaths) { mfib_entry_t *mfib_entry; + mfib_entry_src_t *bsrc; mfib_entry = mfib_entry_get(mfib_entry_index); - if (FIB_NODE_INDEX_INVALID != mfib_entry->mfe_parent) + bsrc = mfib_entry_get_best_src(mfib_entry); + + if (FIB_NODE_INDEX_INVALID != bsrc->mfes_pl) { - fib_path_list_walk(mfib_entry->mfe_parent, + fib_path_list_walk(bsrc->mfes_pl, fib_path_encode, api_rpaths); } diff --git a/src/vnet/mfib/mfib_entry.h b/src/vnet/mfib/mfib_entry.h index dc8f49aa..4f62b18e 100644 --- a/src/vnet/mfib/mfib_entry.h +++ b/src/vnet/mfib/mfib_entry.h @@ -42,17 +42,6 @@ typedef struct mfib_entry_t_ { * The index of the FIB table this entry is in */ u32 mfe_fib_index; - /** - * the path-list for which this entry is a child. This is also the path-list - * that is contributing forwarding for this entry. - */ - fib_node_index_t mfe_parent; - /** - * index of this entry in the parent's child list. - * This is set when this entry is added as a child, but can also - * be changed by the parent as it manages its list. - */ - u32 mfe_sibling; /** * A vector of sources contributing forwarding @@ -65,7 +54,7 @@ typedef struct mfib_entry_t_ { CLIB_CACHE_LINE_ALIGN_MARK(cacheline1); /** - * The Replicate DPO used for forwarding. + * The DPO used for forwarding; replicate, drop, etc.. */ dpo_id_t mfe_rep; @@ -74,6 +63,11 @@ typedef struct mfib_entry_t_ { */ mfib_entry_flags_t mfe_flags; + /** + * RPF-ID used when the packets ingress not from an interface + */ + fib_rpf_id_t mfe_rpf_id; + /** * A hash table of interfaces */ @@ -90,11 +84,13 @@ extern u8 *format_mfib_entry(u8 * s, va_list * args); extern fib_node_index_t mfib_entry_create(u32 fib_index, mfib_source_t source, const mfib_prefix_t *prefix, + fib_rpf_id_t rpf_id, mfib_entry_flags_t entry_flags); extern int mfib_entry_update(fib_node_index_t fib_entry_index, mfib_source_t source, mfib_entry_flags_t entry_flags, + fib_rpf_id_t rpf_id, index_t rep_dpo); extern void mfib_entry_path_update(fib_node_index_t fib_entry_index, diff --git a/src/vnet/mfib/mfib_forward.c b/src/vnet/mfib/mfib_forward.c index 5fe0a57c..3d8f4f98 100644 --- a/src/vnet/mfib/mfib_forward.c +++ b/src/vnet/mfib/mfib_forward.c @@ -380,13 +380,27 @@ mfib_forward_rpf (vlib_main_t * vm, * for the case of throughput traffic that is not replicated * to the host stack nor sets local flags */ - if (PREDICT_TRUE(NULL != mfi0)) + + /* + * If the mfib entry has a configured RPF-ID check that + * in preference to an interface based RPF + */ + if (MFIB_RPF_ID_NONE != mfe0->mfe_rpf_id) { - iflags0 = mfi0->mfi_flags; + iflags0 = (mfe0->mfe_rpf_id == vnet_buffer(b0)->ip.rpf_id ? + MFIB_ITF_FLAG_ACCEPT : + MFIB_ITF_FLAG_NONE); } else { - iflags0 = MFIB_ITF_FLAG_NONE; + if (PREDICT_TRUE(NULL != mfi0)) + { + iflags0 = mfi0->mfi_flags; + } + else + { + iflags0 = MFIB_ITF_FLAG_NONE; + } } eflags0 = mfe0->mfe_flags; @@ -436,17 +450,16 @@ mfib_forward_rpf (vlib_main_t * vm, { mfib_forward_rpf_trace_t *t0; - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); t0->entry_index = mfei0; + t0->itf_flags = iflags0; if (NULL == mfi0) { t0->sw_if_index = ~0; - t0->itf_flags = MFIB_ITF_FLAG_NONE; } else { t0->sw_if_index = mfi0->mfi_sw_if_index; - t0->itf_flags = mfi0->mfi_flags; } } vlib_validate_buffer_enqueue_x1 (vm, node, next, @@ -478,7 +491,7 @@ VLIB_REGISTER_NODE (ip4_mfib_forward_rpf_node, static) = { .n_next_nodes = MFIB_FORWARD_RPF_N_NEXT, .next_nodes = { - [MFIB_FORWARD_RPF_NEXT_DROP] = "error-drop", + [MFIB_FORWARD_RPF_NEXT_DROP] = "ip4-drop", }, }; @@ -503,7 +516,7 @@ VLIB_REGISTER_NODE (ip6_mfib_forward_rpf_node, static) = { .n_next_nodes = MFIB_FORWARD_RPF_N_NEXT, .next_nodes = { - [MFIB_FORWARD_RPF_NEXT_DROP] = "error-drop", + [MFIB_FORWARD_RPF_NEXT_DROP] = "ip6-drop", }, }; diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c index 3b4bd985..7ffe8941 100644 --- a/src/vnet/mfib/mfib_table.c +++ b/src/vnet/mfib/mfib_table.c @@ -165,6 +165,7 @@ fib_node_index_t mfib_table_entry_update (u32 fib_index, const mfib_prefix_t *prefix, mfib_source_t source, + fib_rpf_id_t rpf_id, mfib_entry_flags_t entry_flags) { fib_node_index_t mfib_entry_index; @@ -181,7 +182,8 @@ mfib_table_entry_update (u32 fib_index, * update to a non-existing entry with non-zero flags */ mfib_entry_index = mfib_entry_create(fib_index, source, - prefix, entry_flags); + prefix, rpf_id, + entry_flags); mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index); } @@ -198,6 +200,7 @@ mfib_table_entry_update (u32 fib_index, if (mfib_entry_update(mfib_entry_index, source, entry_flags, + rpf_id, INDEX_INVALID)) { /* @@ -230,6 +233,7 @@ mfib_table_entry_path_update (u32 fib_index, mfib_entry_index = mfib_entry_create(fib_index, source, prefix, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_NONE); mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index); @@ -304,6 +308,7 @@ mfib_table_entry_special_add (u32 fib_index, mfib_entry_index = mfib_entry_create(fib_index, source, prefix, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_NONE); mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index); @@ -311,6 +316,7 @@ mfib_table_entry_special_add (u32 fib_index, mfib_entry_update(mfib_entry_index, source, (MFIB_ENTRY_FLAG_EXCLUSIVE | entry_flags), + MFIB_RPF_ID_NONE, rep_dpo); return (mfib_entry_index); diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h index 95239f7c..83aa04ef 100644 --- a/src/vnet/mfib/mfib_table.h +++ b/src/vnet/mfib/mfib_table.h @@ -122,6 +122,7 @@ extern fib_node_index_t mfib_table_lookup_exact_match(u32 fib_index, extern fib_node_index_t mfib_table_entry_update(u32 fib_index, const mfib_prefix_t *prefix, mfib_source_t source, + fib_rpf_id_t rpf_id, mfib_entry_flags_t flags); /** diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c index 36a303e8..7c92ae99 100644 --- a/src/vnet/mfib/mfib_test.c +++ b/src/vnet/mfib/mfib_test.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include @@ -201,8 +203,8 @@ mfib_test_validate_rep_v (const replicate_t *rep, if (DPO_RECEIVE != dt) { MFIB_TEST_REP((ai == dpo->dpoi_index), - "bucket %d stacks on %U", - bucket, + "bucket %d [exp:%d] stacks on %U", + bucket, ai, format_dpo_id, dpo, 0); } } @@ -734,6 +736,7 @@ mfib_test_i (fib_protocol_t PROTO, mfib_table_entry_update(fib_index, pfx_s_g, MFIB_SOURCE_API, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_SIGNAL); MFIB_TEST(mfib_test_entry(mfei, MFIB_ENTRY_FLAG_SIGNAL, @@ -824,6 +827,7 @@ mfib_test_i (fib_protocol_t PROTO, mfib_table_entry_update(fib_index, pfx_s_g, MFIB_SOURCE_API, + MFIB_RPF_ID_NONE, (MFIB_ENTRY_FLAG_SIGNAL | MFIB_ENTRY_FLAG_CONNECTED)); MFIB_TEST(mfib_test_entry(mfei, @@ -965,6 +969,7 @@ mfib_test_i (fib_protocol_t PROTO, mfib_table_entry_update(fib_index, pfx_s_g, MFIB_SOURCE_API, + MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_NONE); mfei = mfib_table_lookup_exact_match(fib_index, pfx_s_g); @@ -1073,6 +1078,117 @@ mfib_test_i (fib_protocol_t PROTO, MFIB_SOURCE_SRv6); dpo_reset(&td); + /* + * A Multicast LSP. This a mLDP head-end + */ + fib_node_index_t ai_mpls_10_10_10_1, lfei; + ip46_address_t nh_10_10_10_1 = { + .ip4 = { + .as_u32 = clib_host_to_net_u32(0x0a0a0a01), + }, + }; + ai_mpls_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_MPLS, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index); + + fib_prefix_t pfx_3500 = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 3500, + .fp_eos = MPLS_EOS, + .fp_payload_proto = DPO_PROTO_IP4, + }; + fib_test_rep_bucket_t mc_0 = { + .type = FT_REP_LABEL_O_ADJ, + .label_o_adj = { + .adj = ai_mpls_10_10_10_1, + .label = 3300, + .eos = MPLS_EOS, + }, + }; + mpls_label_t *l3300 = NULL; + vec_add1(l3300, 3300); + + /* + * MPLS enable an interface so we get the MPLS table created + */ + mpls_sw_interface_enable_disable(&mpls_main, + tm->hw[0]->sw_if_index, + 1); + + lfei = fib_table_entry_update_one_path(0, // default MPLS Table + &pfx_3500, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_MULTICAST, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + l3300, + FIB_ROUTE_PATH_FLAG_NONE); + MFIB_TEST(fib_test_validate_entry(lfei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 1, + &mc_0), + "3500 via replicate over 10.10.10.1"); + + /* + * An (S,G) that resolves via the mLDP head-end + */ + fib_route_path_t path_via_mldp = { + .frp_proto = FIB_PROTOCOL_MPLS, + .frp_local_label = pfx_3500.fp_label, + .frp_eos = MPLS_EOS, + .frp_sw_if_index = 0xffffffff, + .frp_fib_index = 0, + .frp_weight = 1, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + }; + dpo_id_t mldp_dpo = DPO_INVALID; + + fib_entry_contribute_forwarding(lfei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + &mldp_dpo); + + mfei = mfib_table_entry_path_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_via_mldp, + MFIB_ITF_FLAG_FORWARD); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 1, + DPO_REPLICATE, mldp_dpo.dpoi_index), + "%U over-mLDP replicate OK", + format_mfib_prefix, pfx_s_g); + + /* + * add a for-us path. this tests two types of non-attached paths on one entry + */ + mfei = mfib_table_entry_path_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_for_us, + MFIB_ITF_FLAG_FORWARD); + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 2, + DPO_REPLICATE, mldp_dpo.dpoi_index, + DPO_RECEIVE, 0), + "%U mLDP+for-us replicate OK", + format_mfib_prefix, pfx_s_g); + + mfib_table_entry_delete(fib_index, + pfx_s_g, + MFIB_SOURCE_API); + fib_table_entry_delete(0, + &pfx_3500, + FIB_SOURCE_API); + dpo_reset(&mldp_dpo); + /* * Unlock the table - it's the last lock so should be gone thereafter */ @@ -1086,6 +1202,13 @@ mfib_test_i (fib_protocol_t PROTO, adj_unlock(ai_2); adj_unlock(ai_3); + /* + * MPLS disable the interface + */ + mpls_sw_interface_enable_disable(&mpls_main, + tm->hw[0]->sw_if_index, + 0); + /* * test we've leaked no resources */ diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api index 2e3bfaf5..a1e1270a 100644 --- a/src/vnet/mpls/mpls.api +++ b/src/vnet/mpls/mpls.api @@ -55,6 +55,7 @@ define mpls_ip_bind_unbind_reply @param context - sender context, to match reply w/ request @param mt_is_add - Is this a route add or delete @param mt_sw_if_index - The SW interface index of the tunnel to delete + @param mt_is_multicast - Is the tunnel's underlying LSP multicast @param mt_next_hop_proto_is_ip4 - The next-hop is IPV4 @param mt_next_hop_weight - The weight, for UCMP @param mt_next_hop[16] - the nextop address @@ -70,6 +71,7 @@ define mpls_tunnel_add_del u32 mt_sw_if_index; u8 mt_is_add; u8 mt_l2_only; + u8 mt_is_multicast; u8 mt_next_hop_proto_is_ip4; u8 mt_next_hop_weight; u8 mt_next_hop[16]; @@ -102,30 +104,43 @@ define mpls_tunnel_dump i32 tunnel_index; }; -/** \brief mpls eth tunnel operational state response - @param tunnel_index - eth tunnel identifier - @param intfc_address - interface ipv4 addr - @param mask_width - interface ipv4 addr mask - @param hw_if_index - interface id - @param l2_only - - @param tunnel_dst_mac - - @param tx_sw_if_index - - @param encap_index - reference to mpls label table - @param nlabels - number of resolved labels - @param labels - resolved labels +/** \brief FIB path + @param sw_if_index - index of the interface + @param weight - The weight, for UCMP + @param is_local - local if non-zero, else remote + @param is_drop - Drop the packet + @param is_unreach - Drop the packet and rate limit send ICMP unreachable + @param is_prohibit - Drop the packet and rate limit send ICMP prohibited + @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2 + @param next_hop[16] - the next hop address + + WARNING: this type is replicated, pending cleanup completion + +*/ +typeonly manual_print manual_endian define fib_path2 +{ + u32 sw_if_index; + u32 weight; + u8 is_local; + u8 is_drop; + u8 is_unreach; + u8 is_prohibit; + u8 afi; + u8 next_hop[16]; + u32 labels[16]; +}; + +/** \brief mpls tunnel details */ -define mpls_tunnel_details +manual_endian manual_print define mpls_tunnel_details { u32 context; - u32 tunnel_index; - u8 mt_l2_only; u8 mt_sw_if_index; - u8 mt_next_hop_proto_is_ip4; - u8 mt_next_hop[16]; - u32 mt_next_hop_sw_if_index; - u32 mt_next_hop_table_id; - u32 mt_next_hop_n_labels; - u32 mt_next_hop_out_labels[mt_next_hop_n_labels]; + u8 mt_tunnel_index; + u8 mt_l2_only; + u8 mt_is_multicast; + u32 mt_count; + vl_api_fib_path2_t mt_paths[mt_count]; }; /** \brief MPLS Route Add / del route @@ -140,10 +155,14 @@ define mpls_tunnel_details create them @param mr_is_add - Is this a route add or delete @param mr_is_classify - Is this route result a classify + @param mr_is_multicast - Is this a multicast route @param mr_is_multipath - Is this route update a multipath - i.e. is this a path addition to an existing route @param mr_is_resolve_host - Recurse resolution constraint via a host prefix @param mr_is_resolve_attached - Recurse resolution constraint via attached prefix + @param mr_is_interface_rx - Interface Receive path + @param mr_is_interface_rx - RPF-ID Receive path. The next-hop interface + is used as the RPF-ID @param mr_next_hop_proto_is_ip4 - The next-hop is IPV4 @param mr_next_hop_weight - The weight, for UCMP @param mr_next_hop[16] - the nextop address @@ -164,9 +183,12 @@ define mpls_route_add_del u8 mr_create_table_if_needed; u8 mr_is_add; u8 mr_is_classify; + u8 mr_is_multicast; u8 mr_is_multipath; u8 mr_is_resolve_host; u8 mr_is_resolve_attached; + u8 mr_is_interface_rx; + u8 mr_is_rpf_id; u8 mr_next_hop_proto_is_ip4; u8 mr_next_hop_weight; u8 mr_next_hop[16]; @@ -187,31 +209,6 @@ define mpls_route_add_del_reply i32 retval; }; -/** \brief FIB path - @param sw_if_index - index of the interface - @param weight - The weight, for UCMP - @param is_local - local if non-zero, else remote - @param is_drop - Drop the packet - @param is_unreach - Drop the packet and rate limit send ICMP unreachable - @param is_prohibit - Drop the packet and rate limit send ICMP prohibited - @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2 - @param next_hop[16] - the next hop address - - WARNING: this type is replicated, pending cleanup completion - -*/ -typeonly manual_print manual_endian define fib_path2 -{ - u32 sw_if_index; - u32 weight; - u8 is_local; - u8 is_drop; - u8 is_unreach; - u8 is_prohibit; - u8 afi; - u8 next_hop[16]; -}; - /** \brief Dump MPLS fib table @param client_index - opaque cookie to identify the sender */ diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c index 482577b1..451b15cf 100644 --- a/src/vnet/mpls/mpls.c +++ b/src/vnet/mpls/mpls.c @@ -286,7 +286,15 @@ vnet_mpls_local_label (vlib_main_t * vm, rpath.frp_proto = FIB_PROTOCOL_IP4; vec_add1(rpaths, rpath); } - + else if (unformat (line_input, "rx-ip4 %U", + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U %U", unformat_ip6_address, &rpath.frp_addr.ip6, @@ -512,10 +520,3 @@ mpls_init (vlib_main_t * vm) } VLIB_INIT_FUNCTION (mpls_init); - -mpls_main_t * mpls_get_main (vlib_main_t * vm) -{ - vlib_call_init_function (vm, mpls_init); - return &mpls_main; -} - diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index f1aef6c9..6bfc491d 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -163,6 +164,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, dpo_proto_to_fib (pfx.fp_payload_proto), mp->mr_next_hop_table_id, mp->mr_create_table_if_needed, + mp->mr_is_rpf_id, &fib_index, &next_hop_fib_index); if (0 != rv) @@ -192,10 +194,13 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, 0, // mp->is_unreach, 0, // mp->is_prohibit, 0, // mp->is_local, + mp->mr_is_multicast, mp->mr_is_classify, mp->mr_classify_table_index, mp->mr_is_resolve_host, mp->mr_is_resolve_attached, + mp->mr_is_interface_rx, + mp->mr_is_rpf_id, fib_index, &pfx, mp->mr_next_hop_proto_is_ip4, &nh, ntohl (mp->mr_next_hop_sw_if_index), @@ -229,46 +234,54 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) int rv = 0; u32 tunnel_sw_if_index; int ii; + fib_route_path_t rpath, *rpaths = NULL; + + memset (&rpath, 0, sizeof (rpath)); stats_dslock_with_hint (1 /* release hint */ , 5 /* tag */ ); - if (mp->mt_is_add) + if (mp->mt_next_hop_proto_is_ip4) { - fib_route_path_t rpath, *rpaths = NULL; - mpls_label_t *label_stack = NULL; - - memset (&rpath, 0, sizeof (rpath)); - - if (mp->mt_next_hop_proto_is_ip4) - { - rpath.frp_proto = FIB_PROTOCOL_IP4; - clib_memcpy (&rpath.frp_addr.ip4, - mp->mt_next_hop, sizeof (rpath.frp_addr.ip4)); - } - else - { - rpath.frp_proto = FIB_PROTOCOL_IP6; - clib_memcpy (&rpath.frp_addr.ip6, - mp->mt_next_hop, sizeof (rpath.frp_addr.ip6)); - } - rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index); + rpath.frp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&rpath.frp_addr.ip4, + mp->mt_next_hop, sizeof (rpath.frp_addr.ip4)); + } + else + { + rpath.frp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&rpath.frp_addr.ip6, + mp->mt_next_hop, sizeof (rpath.frp_addr.ip6)); + } + rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index); + rpath.frp_weight = 1; + if (mp->mt_is_add) + { for (ii = 0; ii < mp->mt_next_hop_n_out_labels; ii++) - vec_add1 (label_stack, ntohl (mp->mt_next_hop_out_label_stack[ii])); + vec_add1 (rpath.frp_label_stack, + ntohl (mp->mt_next_hop_out_label_stack[ii])); + } - vec_add1 (rpaths, rpath); + vec_add1 (rpaths, rpath); - vnet_mpls_tunnel_add (rpaths, label_stack, - mp->mt_l2_only, &tunnel_sw_if_index); - vec_free (rpaths); - vec_free (label_stack); + tunnel_sw_if_index = ntohl (mp->mt_sw_if_index); + + if (mp->mt_is_add) + { + if (~0 == tunnel_sw_if_index) + tunnel_sw_if_index = vnet_mpls_tunnel_create (mp->mt_l2_only, + mp->mt_is_multicast); + vnet_mpls_tunnel_path_add (tunnel_sw_if_index, rpaths); } else { tunnel_sw_if_index = ntohl (mp->mt_sw_if_index); - vnet_mpls_tunnel_del (tunnel_sw_if_index); + if (!vnet_mpls_tunnel_path_remove (tunnel_sw_if_index, rpaths)) + vnet_mpls_tunnel_del (tunnel_sw_if_index); } + vec_free (rpaths); + stats_dsunlock (); /* *INDENT-OFF* */ @@ -289,10 +302,12 @@ typedef struct mpls_tunnel_send_walk_ctx_t_ static void send_mpls_tunnel_entry (u32 mti, void *arg) { + fib_route_path_encode_t *api_rpaths, *api_rpath; mpls_tunnel_send_walk_ctx_t *ctx; vl_api_mpls_tunnel_details_t *mp; const mpls_tunnel_t *mt; - u32 nlabels; + vl_api_fib_path2_t *fp; + u32 n; ctx = arg; @@ -300,18 +315,34 @@ send_mpls_tunnel_entry (u32 mti, void *arg) return; mt = mpls_tunnel_get (mti); - nlabels = vec_len (mt->mt_label_stack); + n = fib_path_list_get_n_paths (mt->mt_path_list); + + mp = vl_msg_api_alloc (sizeof (*mp) + n * sizeof (vl_api_fib_path2_t)); + memset (mp, 0, sizeof (*mp) + n * sizeof (vl_api_fib_path2_t)); - mp = vl_msg_api_alloc (sizeof (*mp) + nlabels * sizeof (u32)); - memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_MPLS_TUNNEL_DETAILS); mp->context = ctx->context; - mp->tunnel_index = ntohl (mti); - memcpy (mp->mt_next_hop_out_labels, - mt->mt_label_stack, nlabels * sizeof (u32)); + mp->mt_tunnel_index = ntohl (mti); + mp->mt_count = ntohl (n); + + fib_path_list_walk (mt->mt_path_list, fib_path_encode, &api_rpaths); + + fp = mp->mt_paths; + vec_foreach (api_rpath, api_rpaths) + { + memset (fp, 0, sizeof (*fp)); + + fp->weight = htonl (api_rpath->rpath.frp_weight); + fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index); + copy_fib_next_hop (api_rpath, fp); + fp++; + } // FIXME + // memcpy (mp->mt_next_hop_out_labels, + // mt->mt_label_stack, nlabels * sizeof (u32)); + vl_msg_api_send_shmem (ctx->q, (u8 *) & mp); } diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c index 1b9bdd05..86ad8bba 100644 --- a/src/vnet/mpls/mpls_input.c +++ b/src/vnet/mpls/mpls_input.c @@ -291,7 +291,7 @@ mpls_setup_nodes (vlib_main_t * vm) rt->last_outer_fib_index = 0; rt->mpls_main = &mpls_main; - ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST, + ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS, mpls_input_node.index); } diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c index ace6a70f..3c6be7e8 100644 --- a/src/vnet/mpls/mpls_lookup.c +++ b/src/vnet/mpls/mpls_lookup.c @@ -20,8 +20,17 @@ #include #include #include +#include -vlib_node_registration_t mpls_lookup_node; +/** + * Static MPLS VLIB forwarding node + */ +static vlib_node_registration_t mpls_lookup_node; + +/** + * The arc/edge from the MPLS lookup node to the MPLS replicate node + */ +static u32 mpls_lookup_to_replicate_edge; typedef struct { u32 next_index; @@ -156,81 +165,123 @@ mpls_lookup (vlib_main_t * vm, lbi2 = mpls_fib_table_forwarding_lookup (lfib_index2, h2); lbi3 = mpls_fib_table_forwarding_lookup (lfib_index3, h3); - lb0 = load_balance_get(lbi0); - lb1 = load_balance_get(lbi1); - lb2 = load_balance_get(lbi2); - lb3 = load_balance_get(lbi3); - hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0; hash_c2 = vnet_buffer(b2)->ip.flow_hash = 0; hash_c3 = vnet_buffer(b3)->ip.flow_hash = 0; - if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + if (MPLS_IS_REPLICATE & lbi0) { - hash_c0 = vnet_buffer (b0)->ip.flow_hash = - mpls_compute_flow_hash(h0, lb0->lb_hash_config); + next0 = mpls_lookup_to_replicate_edge; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + (lbi0 & ~MPLS_IS_REPLICATE); } - if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) + else { - hash_c1 = vnet_buffer (b1)->ip.flow_hash = - mpls_compute_flow_hash(h1, lb1->lb_hash_config); + lb0 = load_balance_get(lbi0); + + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + } + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + next0 = dpo0->dpoi_next_node; + + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); } - if (PREDICT_FALSE(lb2->lb_n_buckets > 1)) + if (MPLS_IS_REPLICATE & lbi1) { - hash_c2 = vnet_buffer (b2)->ip.flow_hash = - mpls_compute_flow_hash(h2, lb2->lb_hash_config); + next1 = mpls_lookup_to_replicate_edge; + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = + (lbi1 & ~MPLS_IS_REPLICATE); } - if (PREDICT_FALSE(lb3->lb_n_buckets > 1)) + else { - hash_c3 = vnet_buffer (b3)->ip.flow_hash = - mpls_compute_flow_hash(h3, lb3->lb_hash_config); - } - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - ASSERT (lb1->lb_n_buckets > 0); - ASSERT (is_pow2 (lb1->lb_n_buckets)); - ASSERT (lb2->lb_n_buckets > 0); - ASSERT (is_pow2 (lb2->lb_n_buckets)); - ASSERT (lb3->lb_n_buckets > 0); - ASSERT (is_pow2 (lb3->lb_n_buckets)); - - dpo0 = load_balance_get_bucket_i(lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); - dpo1 = load_balance_get_bucket_i(lb1, - (hash_c1 & - (lb1->lb_n_buckets_minus_1))); - dpo2 = load_balance_get_bucket_i(lb2, - (hash_c2 & - (lb2->lb_n_buckets_minus_1))); - dpo3 = load_balance_get_bucket_i(lb3, - (hash_c3 & - (lb3->lb_n_buckets_minus_1))); + lb1 = load_balance_get(lbi1); - next0 = dpo0->dpoi_next_node; - next1 = dpo1->dpoi_next_node; - next2 = dpo2->dpoi_next_node; - next3 = dpo3->dpoi_next_node; + if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) + { + hash_c1 = vnet_buffer (b1)->ip.flow_hash = + mpls_compute_flow_hash(h1, lb1->lb_hash_config); + } + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + dpo1 = load_balance_get_bucket_i(lb1, + (hash_c1 & + (lb1->lb_n_buckets_minus_1))); + next1 = dpo1->dpoi_next_node; + + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi1, 1, + vlib_buffer_length_in_chain (vm, b1)); + } + if (MPLS_IS_REPLICATE & lbi2) + { + next2 = mpls_lookup_to_replicate_edge; + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = + (lbi2 & ~MPLS_IS_REPLICATE); + } + else + { + lb2 = load_balance_get(lbi2); - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; - vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; - vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; + if (PREDICT_FALSE(lb2->lb_n_buckets > 1)) + { + hash_c2 = vnet_buffer (b2)->ip.flow_hash = + mpls_compute_flow_hash(h2, lb2->lb_hash_config); + } + ASSERT (lb2->lb_n_buckets > 0); + ASSERT (is_pow2 (lb2->lb_n_buckets)); + dpo2 = load_balance_get_bucket_i(lb2, + (hash_c2 & + (lb2->lb_n_buckets_minus_1))); + next2 = dpo2->dpoi_next_node; + + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi2, 1, + vlib_buffer_length_in_chain (vm, b2)); + } + if (MPLS_IS_REPLICATE & lbi3) + { + next3 = mpls_lookup_to_replicate_edge; + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = + (lbi3 & ~MPLS_IS_REPLICATE); + } + else + { + lb3 = load_balance_get(lbi3); - vlib_increment_combined_counter - (cm, thread_index, lbi0, 1, - vlib_buffer_length_in_chain (vm, b0)); - vlib_increment_combined_counter - (cm, thread_index, lbi1, 1, - vlib_buffer_length_in_chain (vm, b1)); - vlib_increment_combined_counter - (cm, thread_index, lbi2, 1, - vlib_buffer_length_in_chain (vm, b2)); - vlib_increment_combined_counter - (cm, thread_index, lbi3, 1, - vlib_buffer_length_in_chain (vm, b3)); + if (PREDICT_FALSE(lb3->lb_n_buckets > 1)) + { + hash_c3 = vnet_buffer (b3)->ip.flow_hash = + mpls_compute_flow_hash(h3, lb3->lb_hash_config); + } + ASSERT (lb3->lb_n_buckets > 0); + ASSERT (is_pow2 (lb3->lb_n_buckets)); + dpo3 = load_balance_get_bucket_i(lb3, + (hash_c3 & + (lb3->lb_n_buckets_minus_1))); + next3 = dpo3->dpoi_next_node; + + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi3, 1, + vlib_buffer_length_in_chain (vm, b3)); + } /* * before we pop the label copy th values we need to maintain. @@ -331,31 +382,41 @@ mpls_lookup (vlib_main_t * vm, vnet_buffer(b0)->sw_if_index[VLIB_RX]); lbi0 = mpls_fib_table_forwarding_lookup(lfib_index0, h0); - lb0 = load_balance_get(lbi0); - hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; - if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + + if (MPLS_IS_REPLICATE & lbi0) { - hash_c0 = vnet_buffer (b0)->ip.flow_hash = - mpls_compute_flow_hash(h0, lb0->lb_hash_config); + next0 = mpls_lookup_to_replicate_edge; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + (lbi0 & ~MPLS_IS_REPLICATE); } + else + { + lb0 = load_balance_get(lbi0); - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + } - dpo0 = load_balance_get_bucket_i(lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); - next0 = dpo0->dpoi_next_node; - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); - vlib_increment_combined_counter - (cm, thread_index, lbi0, 1, - vlib_buffer_length_in_chain (vm, b0)); + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + } /* - * before we pop the label copy th values we need to maintain. + * before we pop the label copy, values we need to maintain. * The label header is in network byte order. * last byte is the TTL. * bits 2 to 4 inclusive are the EXP bits @@ -398,7 +459,7 @@ static char * mpls_error_strings[] = { #undef mpls_error }; -VLIB_REGISTER_NODE (mpls_lookup_node) = { +VLIB_REGISTER_NODE (mpls_lookup_node, static) = { .function = mpls_lookup, .name = "mpls-lookup", /* Takes a vector of packets. */ @@ -621,3 +682,22 @@ VLIB_REGISTER_NODE (mpls_load_balance_node) = { }; VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance) + + +static clib_error_t * +mpls_lookup_init (vlib_main_t * vm) +{ + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, mpls_init))) + return error; + + mpls_lookup_to_replicate_edge = + vlib_node_add_named_next(vm, + mpls_lookup_node.index, + "mpls-replicate"); + + return (NULL); +} + +VLIB_INIT_FUNCTION (mpls_lookup_init); diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c index ac6fdcdf..1254dd9d 100644 --- a/src/vnet/mpls/mpls_tunnel.c +++ b/src/vnet/mpls/mpls_tunnel.c @@ -18,9 +18,12 @@ #include #include #include +#include #include #include #include +#include +#include /** * @brief pool of tunnel instances @@ -37,6 +40,11 @@ static u32 * mpls_tunnel_free_hw_if_indices; */ static u32 *mpls_tunnel_db; +/** + * @brief MPLS tunnel flags strings + */ +static const char *mpls_tunnel_attribute_names[] = MPLS_TUNNEL_ATTRIBUTES; + /** * @brief Get a tunnel object from a SW interface index */ @@ -44,103 +52,178 @@ static mpls_tunnel_t* mpls_tunnel_get_from_sw_if_index (u32 sw_if_index) { if ((vec_len(mpls_tunnel_db) < sw_if_index) || - (~0 == mpls_tunnel_db[sw_if_index])) - return (NULL); + (~0 == mpls_tunnel_db[sw_if_index])) + return (NULL); return (pool_elt_at_index(mpls_tunnel_pool, - mpls_tunnel_db[sw_if_index])); + mpls_tunnel_db[sw_if_index])); } /** - * @brief Return true if the label stack is imp-null only + * @brief Build a rewrite string for the MPLS tunnel. */ -static fib_forward_chain_type_t -mpls_tunnel_get_fwd_chain_type (const mpls_tunnel_t *mt) +static u8* +mpls_tunnel_build_rewrite_i (void) { - if ((1 == vec_len(mt->mt_label_stack)) && - (mt->mt_label_stack[0] == MPLS_IETF_IMPLICIT_NULL_LABEL)) - { - /* - * the only label in the label stack is implicit null - * we need to build an IP chain. - */ - if (FIB_PROTOCOL_IP4 == fib_path_list_get_proto(mt->mt_path_list)) - { - return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); - } - else - { - return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); - } - } - else - { - return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); - } + /* + * passing the adj code a NULL rewirte means 'i don't have one cos + * t'other end is unresolved'. That's not the case here. For the mpls + * tunnel there are just no bytes of encap to apply in the adj. We'll impose + * the label stack once we choose a path. So return a zero length rewrite. + */ + u8 *rewrite = NULL; + + vec_validate(rewrite, 0); + vec_reset_length(rewrite); + + return (rewrite); } /** * @brief Build a rewrite string for the MPLS tunnel. - * - * We have choices here; - * 1 - have an Adjacency with a zero length string and stack it on - * MPLS label objects - * 2 - put the label header rewrites in the adjacency string. - * - * We choose 2 since it results in fewer graph nodes in the egress path */ static u8* mpls_tunnel_build_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - vnet_link_t link_type, - const void *dst_address) + u32 sw_if_index, + vnet_link_t link_type, + const void *dst_address) { - mpls_unicast_header_t *muh; - mpls_tunnel_t *mt; - u8 *rewrite; - u32 mti, ii; + return (mpls_tunnel_build_rewrite_i()); +} - rewrite = NULL; - mti = mpls_tunnel_db[sw_if_index]; - mt = pool_elt_at_index(mpls_tunnel_pool, mti); +typedef struct mpls_tunnel_collect_forwarding_ctx_t_ +{ + load_balance_path_t * next_hops; + const mpls_tunnel_t *mt; + fib_forward_chain_type_t fct; +} mpls_tunnel_collect_forwarding_ctx_t; + +static int +mpls_tunnel_collect_forwarding (fib_node_index_t pl_index, + fib_node_index_t path_index, + void *arg) +{ + mpls_tunnel_collect_forwarding_ctx_t *ctx; + fib_path_ext_t *path_ext; + int have_path_ext; + + ctx = arg; /* - * The vector must be allocated as u8 so the length is correct + * if the path is not resolved, don't include it. */ - ASSERT(0 < vec_len(mt->mt_label_stack)); - vec_validate(rewrite, (sizeof(*muh) * vec_len(mt->mt_label_stack)) - 1); - ASSERT(rewrite); - muh = (mpls_unicast_header_t *)rewrite; + if (!fib_path_is_resolved(path_index)) + { + return (!0); + } /* - * The last (inner most) label in the stack may be EOS, all the rest Non-EOS + * get the matching path-extension for the path being visited. */ - for (ii = 0; ii < vec_len(mt->mt_label_stack)-1; ii++) + have_path_ext = 0; + vec_foreach(path_ext, ctx->mt->mt_path_exts) { - vnet_mpls_uc_set_label(&muh[ii].label_exp_s_ttl, mt->mt_label_stack[ii]); - vnet_mpls_uc_set_ttl(&muh[ii].label_exp_s_ttl, 255); - vnet_mpls_uc_set_exp(&muh[ii].label_exp_s_ttl, 0); - vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_NON_EOS); - muh[ii].label_exp_s_ttl = clib_host_to_net_u32(muh[ii].label_exp_s_ttl); + if (path_ext->fpe_path_index == path_index) + { + have_path_ext = 1; + break; + } } - vnet_mpls_uc_set_label(&muh[ii].label_exp_s_ttl, mt->mt_label_stack[ii]); - vnet_mpls_uc_set_ttl(&muh[ii].label_exp_s_ttl, 255); - vnet_mpls_uc_set_exp(&muh[ii].label_exp_s_ttl, 0); - - if ((VNET_LINK_MPLS == link_type) && - (mt->mt_label_stack[ii] != MPLS_IETF_IMPLICIT_NULL_LABEL)) + if (have_path_ext) { - vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_NON_EOS); + /* + * found a matching extension. stack it to obtain the forwarding + * info for this path. + */ + ctx->next_hops = fib_path_ext_stack(path_ext, + ctx->fct, + ctx->fct, + ctx->next_hops); } else + ASSERT(0); + /* + * else + * There should be a path-extenios associated with each path + */ + + return (!0); +} + +static void +mpls_tunnel_mk_lb (mpls_tunnel_t *mt, + vnet_link_t linkt, + fib_forward_chain_type_t fct, + dpo_id_t *dpo_lb) +{ + dpo_proto_t lb_proto; + + /* + * If the entry has path extensions then we construct a load-balance + * by stacking the extensions on the forwarding chains of the paths. + * Otherwise we use the load-balance of the path-list + */ + mpls_tunnel_collect_forwarding_ctx_t ctx = { + .mt = mt, + .next_hops = NULL, + .fct = fct, + }; + + /* + * As an optimisation we allocate the vector of next-hops to be sized + * equal to the maximum nuber of paths we will need, which is also the + * most likely number we will need, since in most cases the paths are 'up'. + */ + vec_validate(ctx.next_hops, fib_path_list_get_n_paths(mt->mt_path_list)); + vec_reset_length(ctx.next_hops); + + lb_proto = vnet_link_to_dpo_proto(linkt); + + fib_path_list_walk(mt->mt_path_list, + mpls_tunnel_collect_forwarding, + &ctx); + + if (!dpo_id_is_valid(dpo_lb)) { - vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_EOS); + /* + * first time create + */ + if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST) + { + dpo_set(dpo_lb, + DPO_REPLICATE, + lb_proto, + replicate_create(0, lb_proto)); + } + else + { + flow_hash_config_t fhc; + + fhc = 0; // FIXME + /* fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, */ + /* dpo_proto_to_fib(lb_proto)); */ + dpo_set(dpo_lb, + DPO_LOAD_BALANCE, + lb_proto, + load_balance_create(0, lb_proto, fhc)); + } } - muh[ii].label_exp_s_ttl = clib_host_to_net_u32(muh[ii].label_exp_s_ttl); - - return (rewrite); + if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST) + { + /* + * MPLS multicast + */ + replicate_multipath_update(dpo_lb, ctx.next_hops); + } + else + { + load_balance_multipath_update(dpo_lb, + ctx.next_hops, + LOAD_BALANCE_FLAG_NONE); + vec_free(ctx.next_hops); + } } /** @@ -161,45 +244,47 @@ mpls_tunnel_stack (adj_index_t ai) mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); if (NULL == mt) - return; + return; /* - * find the adjacency that is contributed by the FIB path-list - * that this tunnel resovles via, and use it as the next adj - * in the midchain + * while we're stacking the adj, remove the tunnel from the child list + * of the path list. this breaks a circular dependency of walk updates + * where the create of adjacencies in the children can lead to walks + * that get back here. */ - if (vnet_hw_interface_get_flags(vnet_get_main(), - mt->mt_hw_if_index) & - VNET_HW_INTERFACE_FLAG_LINK_UP) - { - dpo_id_t dpo = DPO_INVALID; + fib_path_list_lock(mt->mt_path_list); - fib_path_list_contribute_forwarding(mt->mt_path_list, - mpls_tunnel_get_fwd_chain_type(mt), - &dpo); - - if (DPO_LOAD_BALANCE == dpo.dpoi_type) - { - /* - * we don't support multiple paths, so no need to load-balance. - * pull the first and only choice and stack directly on that. - */ - load_balance_t *lb; - - lb = load_balance_get (dpo.dpoi_index); + fib_path_list_child_remove(mt->mt_path_list, + mt->mt_sibling_index); - ASSERT(1 == lb->lb_n_buckets); + /* + * Construct the DPO (load-balance or replicate) that we can stack + * the tunnel's midchain on + */ + if (vnet_hw_interface_get_flags(vnet_get_main(), + mt->mt_hw_if_index) & + VNET_HW_INTERFACE_FLAG_LINK_UP) + { + dpo_id_t dpo = DPO_INVALID; - dpo_copy(&dpo, load_balance_get_bucket_i (lb, 0)); - } + mpls_tunnel_mk_lb(mt, + adj->ia_link, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + &dpo); - adj_nbr_midchain_stack(ai, &dpo); - dpo_reset(&dpo); + adj_nbr_midchain_stack(ai, &dpo); + dpo_reset(&dpo); } else { - adj_nbr_midchain_unstack(ai); + adj_nbr_midchain_unstack(ai); } + + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mt - mpls_tunnel_pool); + + fib_path_list_lock(mt->mt_path_list); } /** @@ -207,7 +292,7 @@ mpls_tunnel_stack (adj_index_t ai) */ static adj_walk_rc_t mpls_adj_walk_cb (adj_index_t ai, - void *ctx) + void *ctx) { mpls_tunnel_stack(ai); @@ -224,17 +309,17 @@ mpls_tunnel_restack (mpls_tunnel_t *mt) */ FOR_EACH_FIB_PROTOCOL(proto) { - adj_nbr_walk(mt->mt_sw_if_index, - proto, - mpls_adj_walk_cb, - NULL); + adj_nbr_walk(mt->mt_sw_if_index, + proto, + mpls_adj_walk_cb, + NULL); } } static clib_error_t * mpls_tunnel_admin_up_down (vnet_main_t * vnm, - u32 hw_if_index, - u32 flags) + u32 hw_if_index, + u32 flags) { vnet_hw_interface_t * hi; mpls_tunnel_t *mt; @@ -244,13 +329,13 @@ mpls_tunnel_admin_up_down (vnet_main_t * vnm, mt = mpls_tunnel_get_from_sw_if_index(hi->sw_if_index); if (NULL == mt) - return (NULL); + return (NULL); if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) - vnet_hw_interface_set_flags (vnm, hw_if_index, - VNET_HW_INTERFACE_FLAG_LINK_UP); + vnet_hw_interface_set_flags (vnm, hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); else - vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */); + vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */); mpls_tunnel_restack(mt); @@ -263,22 +348,58 @@ mpls_tunnel_admin_up_down (vnet_main_t * vnm, */ static void mpls_tunnel_fixup (vlib_main_t *vm, - ip_adjacency_t *adj, - vlib_buffer_t *b0) + ip_adjacency_t *adj, + vlib_buffer_t *b0) { + /* + * A no-op w.r.t. the header. but reset the 'have we pushed any + * MPLS labels onto the packet' flag. That way when we enter the + * tunnel we'll get a TTL set to 255 + */ + vnet_buffer(b0)->mpls.first = 0; } static void mpls_tunnel_update_adj (vnet_main_t * vnm, - u32 sw_if_index, - adj_index_t ai) + u32 sw_if_index, + adj_index_t ai) { - adj_nbr_midchain_update_rewrite( - ai, mpls_tunnel_fixup, - ADJ_FLAG_NONE, - mpls_tunnel_build_rewrite(vnm, sw_if_index, - adj_get_link_type(ai), - NULL)); + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != ai); + + adj = adj_get(ai); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + case IP_LOOKUP_NEXT_GLEAN: + adj_nbr_midchain_update_rewrite(ai, mpls_tunnel_fixup, + ADJ_FLAG_NONE, + mpls_tunnel_build_rewrite_i()); + break; + case IP_LOOKUP_NEXT_MCAST: + /* + * Construct a partial rewrite from the known ethernet mcast dest MAC + * There's no MAC fixup, so the last 2 parameters are 0 + */ + adj_mcast_midchain_update_rewrite(ai, mpls_tunnel_fixup, + ADJ_FLAG_NONE, + mpls_tunnel_build_rewrite_i(), + 0, 0); + break; + + case IP_LOOKUP_NEXT_DROP: + case IP_LOOKUP_NEXT_PUNT: + case IP_LOOKUP_NEXT_LOCAL: + case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_MIDCHAIN: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: + case IP_LOOKUP_NEXT_ICMP_ERROR: + case IP_LOOKUP_N_NEXT: + ASSERT (0); + break; + } mpls_tunnel_stack(ai); } @@ -312,7 +433,7 @@ typedef struct mpls_tunnel_trace_t_ static u8 * format_mpls_tunnel_tx_trace (u8 * s, - va_list * args) + va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); @@ -327,8 +448,8 @@ format_mpls_tunnel_tx_trace (u8 * s, */ static uword mpls_tunnel_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) + vlib_node_runtime_t * node, + vlib_frame_t * frame) { u32 next_index; u32 * from, * to_next, n_left_from, n_left_to_next; @@ -355,32 +476,32 @@ mpls_tunnel_tx (vlib_main_t * vm, * FIXME DUAL LOOP */ while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t * b0; - u32 bi0; + { + vlib_buffer_t * b0; + u32 bi0; - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; - b0 = vlib_get_buffer(vm, bi0); + b0 = vlib_get_buffer(vm, bi0); - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_adj; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_adj; - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - mpls_tunnel_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->tunnel_id = rd->dev_instance; - } + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_tunnel_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = rd->dev_instance; + } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, mt->mt_l2_tx_arc); - } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, mt->mt_l2_tx_arc); + } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } @@ -417,13 +538,13 @@ mpls_tunnel_get (u32 mti) */ void mpls_tunnel_walk (mpls_tunnel_walk_cb_t cb, - void *ctx) + void *ctx) { u32 mti; pool_foreach_index(mti, mpls_tunnel_pool, ({ - cb(mti, ctx); + cb(mti, ctx); })); } @@ -435,25 +556,22 @@ vnet_mpls_tunnel_del (u32 sw_if_index) mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); if (NULL == mt) - return; - - fib_path_list_child_remove(mt->mt_path_list, - mt->mt_sibling_index); - if (ADJ_INDEX_INVALID != mt->mt_l2_adj) - adj_unlock(mt->mt_l2_adj); + return; - vec_free(mt->mt_label_stack); + if (FIB_NODE_INDEX_INVALID != mt->mt_path_list) + fib_path_list_child_remove(mt->mt_path_list, + mt->mt_sibling_index); + if (ADJ_INDEX_INVALID != mt->mt_l2_adj) + adj_unlock(mt->mt_l2_adj); vec_add1 (mpls_tunnel_free_hw_if_indices, mt->mt_hw_if_index); pool_put(mpls_tunnel_pool, mt); mpls_tunnel_db[sw_if_index] = ~0; } -void -vnet_mpls_tunnel_add (fib_route_path_t *rpaths, - mpls_label_t *label_stack, - u8 l2_only, - u32 *sw_if_index) +u32 +vnet_mpls_tunnel_create (u8 l2_only, + u8 is_multicast) { vnet_hw_interface_t * hi; mpls_tunnel_t *mt; @@ -466,28 +584,33 @@ vnet_mpls_tunnel_add (fib_route_path_t *rpaths, mti = mt - mpls_tunnel_pool; fib_node_init(&mt->mt_node, FIB_NODE_TYPE_MPLS_TUNNEL); mt->mt_l2_adj = ADJ_INDEX_INVALID; + mt->mt_path_list = FIB_NODE_INDEX_INVALID; + mt->mt_sibling_index = FIB_NODE_INDEX_INVALID; + + if (is_multicast) + mt->mt_flags |= MPLS_TUNNEL_FLAG_MCAST; /* * Create a new, or re=use and old, tunnel HW interface */ if (vec_len (mpls_tunnel_free_hw_if_indices) > 0) { - mt->mt_hw_if_index = - mpls_tunnel_free_hw_if_indices[vec_len(mpls_tunnel_free_hw_if_indices)-1]; - _vec_len (mpls_tunnel_free_hw_if_indices) -= 1; - hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index); - hi->hw_instance = mti; - hi->dev_instance = mti; + mt->mt_hw_if_index = + mpls_tunnel_free_hw_if_indices[vec_len(mpls_tunnel_free_hw_if_indices)-1]; + _vec_len (mpls_tunnel_free_hw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index); + hi->hw_instance = mti; + hi->dev_instance = mti; } - else + else { - mt->mt_hw_if_index = vnet_register_interface( - vnm, - mpls_tunnel_class.index, - mti, - mpls_tunnel_hw_interface_class.index, - mti); - hi = vnet_get_hw_interface(vnm, mt->mt_hw_if_index); + mt->mt_hw_if_index = vnet_register_interface( + vnm, + mpls_tunnel_class.index, + mti, + mpls_tunnel_hw_interface_class.index, + mti); + hi = vnet_get_hw_interface(vnm, mt->mt_hw_if_index); } /* @@ -497,43 +620,218 @@ vnet_mpls_tunnel_add (fib_route_path_t *rpaths, vec_validate_init_empty(mpls_tunnel_db, mt->mt_sw_if_index, ~0); mpls_tunnel_db[mt->mt_sw_if_index] = mti; + if (l2_only) + { + mt->mt_l2_adj = + adj_nbr_add_or_lock(fib_path_list_get_proto(mt->mt_path_list), + VNET_LINK_ETHERNET, + &zero_addr, + mt->mt_sw_if_index); + + mt->mt_l2_tx_arc = vlib_node_add_named_next(vlib_get_main(), + hi->tx_node_index, + "adj-l2-midchain"); + } + + return (mt->mt_sw_if_index); +} + +/* + * mpls_tunnel_path_ext_add + * + * append a path extension to the entry's list + */ +static void +mpls_tunnel_path_ext_append (mpls_tunnel_t *mt, + const fib_route_path_t *rpath) +{ + if (NULL != rpath->frp_label_stack) + { + fib_path_ext_t *path_ext; + + vec_add2(mt->mt_path_exts, path_ext, 1); + + fib_path_ext_init(path_ext, mt->mt_path_list, rpath); + } +} + +/* + * mpls_tunnel_path_ext_insert + * + * insert, sorted, a path extension to the entry's list. + * It's not strictly necessary in sort the path extensions, since each + * extension has the path index to which it resolves. However, by being + * sorted the load-balance produced has a deterministic order, not an order + * based on the sequence of extension additions. this is a considerable benefit. + */ +static void +mpls_tunnel_path_ext_insert (mpls_tunnel_t *mt, + const fib_route_path_t *rpath) +{ + if (0 == vec_len(mt->mt_path_exts)) + return (mpls_tunnel_path_ext_append(mt, rpath)); + + if (NULL != rpath->frp_label_stack) + { + fib_path_ext_t path_ext; + int i = 0; + + fib_path_ext_init(&path_ext, mt->mt_path_list, rpath); + + while (i < vec_len(mt->mt_path_exts) && + (fib_path_ext_cmp(&mt->mt_path_exts[i], rpath) < 0)) + { + i++; + } + + vec_insert_elts(mt->mt_path_exts, &path_ext, 1, i); + } +} + +void +vnet_mpls_tunnel_path_add (u32 sw_if_index, + fib_route_path_t *rpaths) +{ + mpls_tunnel_t *mt; + u32 mti; + + mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); + + if (NULL == mt) + return; + + mti = mt - mpls_tunnel_pool; + /* * construct a path-list from the path provided */ - mt->mt_path_list = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, rpaths); - mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, - FIB_NODE_TYPE_MPLS_TUNNEL, - mti); + if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) + { + mt->mt_path_list = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, rpaths); + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + } + else + { + fib_node_index_t old_pl_index; + fib_path_ext_t *path_ext; + + old_pl_index = mt->mt_path_list; + + mt->mt_path_list = + fib_path_list_copy_and_path_add(old_pl_index, + FIB_PATH_LIST_FLAG_SHARED, + rpaths); + + fib_path_list_child_remove(old_pl_index, + mt->mt_sibling_index); + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + /* + * re-resolve all the path-extensions with the new path-list + */ + vec_foreach(path_ext, mt->mt_path_exts) + { + fib_path_ext_resolve(path_ext, mt->mt_path_list); + } + } + mpls_tunnel_path_ext_insert(mt, rpaths); + mpls_tunnel_restack(mt); +} + +int +vnet_mpls_tunnel_path_remove (u32 sw_if_index, + fib_route_path_t *rpaths) +{ + mpls_tunnel_t *mt; + u32 mti; - mt->mt_label_stack = vec_dup(label_stack); + mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); - if (l2_only) + if (NULL == mt) + return (0); + + mti = mt - mpls_tunnel_pool; + + /* + * construct a path-list from the path provided + */ + if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) { - mt->mt_l2_adj = - adj_nbr_add_or_lock(fib_path_list_get_proto(mt->mt_path_list), - VNET_LINK_ETHERNET, - &zero_addr, - mt->mt_sw_if_index); - - mt->mt_l2_tx_arc = vlib_node_add_named_next(vlib_get_main(), - hi->tx_node_index, - "adj-l2-midchain"); + /* can't remove a path if we have onoe */ + return (0); } - - *sw_if_index = mt->mt_sw_if_index; + else + { + fib_node_index_t old_pl_index; + fib_path_ext_t *path_ext; + + old_pl_index = mt->mt_path_list; + + mt->mt_path_list = + fib_path_list_copy_and_path_remove(old_pl_index, + FIB_PATH_LIST_FLAG_SHARED, + rpaths); + + fib_path_list_child_remove(old_pl_index, + mt->mt_sibling_index); + + if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) + { + /* no paths left */ + return (0); + } + else + { + mt->mt_sibling_index = + fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + } + /* + * find the matching path extension and remove it + */ + vec_foreach(path_ext, mt->mt_path_exts) + { + if (!fib_path_ext_cmp(path_ext, rpaths)) + { + /* + * delete the element moving the remaining elements down 1 position. + * this preserves the sorted order. + */ + vec_free(path_ext->fpe_label_stack); + vec_delete(mt->mt_path_exts, 1, + (path_ext - mt->mt_path_exts)); + break; + } + } + /* + * re-resolve all the path-extensions with the new path-list + */ + vec_foreach(path_ext, mt->mt_path_exts) + { + fib_path_ext_resolve(path_ext, mt->mt_path_list); + } + + mpls_tunnel_restack(mt); + } + + return (fib_path_list_get_n_paths(mt->mt_path_list)); } + static clib_error_t * vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, + vlib_cli_command_t * cmd) { unformat_input_t _line_input, * line_input = &_line_input; vnet_main_t * vnm = vnet_get_main(); - u8 is_del = 0; - u8 l2_only = 0; + u8 is_del = 0, l2_only = 0, is_multicast =0; fib_route_path_t rpath, *rpaths = NULL; - mpls_label_t out_label = MPLS_LABEL_INVALID, *labels = NULL; + mpls_label_t out_label = MPLS_LABEL_INVALID; u32 sw_if_index; clib_error_t *error = NULL; @@ -541,87 +839,89 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, /* Get a line of input. */ if (! unformat_user (input, unformat_line_input, line_input)) - return 0; + return 0; while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "del %U", - unformat_vnet_sw_interface, vnm, - &sw_if_index)) - is_del = 1; - else if (unformat (line_input, "add")) - is_del = 0; - else if (unformat (line_input, "out-label %U", - unformat_mpls_unicast_label, &out_label)) - { - vec_add1(labels, out_label); - } - else if (unformat (line_input, "via %U %U", - unformat_ip4_address, - &rpath.frp_addr.ip4, - unformat_vnet_sw_interface, vnm, - &rpath.frp_sw_if_index)) - { - rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; - } - - else if (unformat (line_input, "via %U %U", - unformat_ip6_address, - &rpath.frp_addr.ip6, - unformat_vnet_sw_interface, vnm, - &rpath.frp_sw_if_index)) - { - rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP6; - } - else if (unformat (line_input, "via %U", - unformat_ip6_address, - &rpath.frp_addr.ip6)) - { - rpath.frp_fib_index = 0; - rpath.frp_weight = 1; - rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; - } - else if (unformat (line_input, "via %U", - unformat_ip4_address, - &rpath.frp_addr.ip4)) - { - rpath.frp_fib_index = 0; - rpath.frp_weight = 1; - rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; - } - else if (unformat (line_input, "l2-only")) - l2_only = 1; - else - { - error = clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); - goto done; - } + if (unformat (line_input, "del %U", + unformat_vnet_sw_interface, vnm, + &sw_if_index)) + is_del = 1; + else if (unformat (line_input, "add")) + is_del = 0; + else if (unformat (line_input, "out-label %U", + unformat_mpls_unicast_label, &out_label)) + { + vec_add1(rpath.frp_label_stack, out_label); + } + else if (unformat (line_input, "via %U %U", + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP4; + } + + else if (unformat (line_input, "via %U %U", + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP6; + } + else if (unformat (line_input, "via %U", + unformat_ip6_address, + &rpath.frp_addr.ip6)) + { + rpath.frp_fib_index = 0; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = FIB_PROTOCOL_IP6; + } + else if (unformat (line_input, "via %U", + unformat_ip4_address, + &rpath.frp_addr.ip4)) + { + rpath.frp_fib_index = 0; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = FIB_PROTOCOL_IP4; + } + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else if (unformat (line_input, "multicast")) + is_multicast = 1; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } if (is_del) { - vnet_mpls_tunnel_del(sw_if_index); + vnet_mpls_tunnel_del(sw_if_index); } else { - if (0 == vec_len(labels)) - { - error = clib_error_return (0, "No Output Labels '%U'", - format_unformat_error, line_input); - goto done; - } - - vec_add1(rpaths, rpath); - vnet_mpls_tunnel_add(rpaths, labels, l2_only, &sw_if_index); + if (0 == vec_len(rpath.frp_label_stack)) + { + error = clib_error_return (0, "No Output Labels '%U'", + format_unformat_error, line_input); + goto done; + } + + vec_add1(rpaths, rpath); + sw_if_index = vnet_mpls_tunnel_create(l2_only, is_multicast); + vnet_mpls_tunnel_path_add(sw_if_index, rpaths); } done: - vec_free(labels); vec_free(rpaths); unformat_free (line_input); @@ -638,7 +938,7 @@ done: ?*/ VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = { .path = "mpls tunnel", - .short_help = + .short_help = "mpls tunnel via [addr] [interface] [out-labels]", .function = vnet_create_mpls_tunnel_command_fn, }; @@ -647,19 +947,28 @@ static u8 * format_mpls_tunnel (u8 * s, va_list * args) { mpls_tunnel_t *mt = va_arg (*args, mpls_tunnel_t *); - int ii; + mpls_tunnel_attribute_t attr; + fib_path_ext_t *path_ext; s = format(s, "mpls_tunnel%d: sw_if_index:%d hw_if_index:%d", - mt - mpls_tunnel_pool, - mt->mt_sw_if_index, - mt->mt_hw_if_index); - s = format(s, "\n label-stack:\n "); - for (ii = 0; ii < vec_len(mt->mt_label_stack); ii++) - { - s = format(s, "%d, ", mt->mt_label_stack[ii]); + mt - mpls_tunnel_pool, + mt->mt_sw_if_index, + mt->mt_hw_if_index); + if (MPLS_TUNNEL_FLAG_NONE != mt->mt_flags) { + s = format(s, " \n flags:"); + FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(attr) { + if ((1<mt_flags) { + s = format (s, "%s,", mpls_tunnel_attribute_names[attr]); + } + } } s = format(s, "\n via:\n"); s = fib_path_list_format(mt->mt_path_list, s); + s = format(s, " Extensions:"); + vec_foreach(path_ext, mt->mt_path_exts) + { + s = format(s, "\n %U", format_fib_path_ext, path_ext); + } s = format(s, "\n"); return (s); @@ -667,42 +976,42 @@ format_mpls_tunnel (u8 * s, va_list * args) static clib_error_t * show_mpls_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, + vlib_cli_command_t * cmd) { mpls_tunnel_t * mt; u32 mti = ~0; if (pool_elts (mpls_tunnel_pool) == 0) - vlib_cli_output (vm, "No MPLS tunnels configured..."); + vlib_cli_output (vm, "No MPLS tunnels configured..."); while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "%d", &mti)) - ; - else - break; + if (unformat (input, "%d", &mti)) + ; + else + break; } if (~0 == mti) { - pool_foreach (mt, mpls_tunnel_pool, - ({ - vlib_cli_output (vm, "[@%d] %U", - mt - mpls_tunnel_pool, - format_mpls_tunnel, mt); - })); + pool_foreach (mt, mpls_tunnel_pool, + ({ + vlib_cli_output (vm, "[@%d] %U", + mt - mpls_tunnel_pool, + format_mpls_tunnel, mt); + })); } else { - if (pool_is_free_index(mpls_tunnel_pool, mti)) - return clib_error_return (0, "Not atunnel index %d", mti); + if (pool_is_free_index(mpls_tunnel_pool, mti)) + return clib_error_return (0, "Not atunnel index %d", mti); - mt = pool_elt_at_index(mpls_tunnel_pool, mti); + mt = pool_elt_at_index(mpls_tunnel_pool, mti); - vlib_cli_output (vm, "[@%d] %U", - mt - mpls_tunnel_pool, - format_mpls_tunnel, mt); + vlib_cli_output (vm, "[@%d] %U", + mt - mpls_tunnel_pool, + format_mpls_tunnel, mt); } return 0; @@ -715,7 +1024,7 @@ show_mpls_tunnel_command_fn (vlib_main_t * vm, * @cliexstart{sh mpls tunnel 2} * [@2] mpls_tunnel2: sw_if_index:5 hw_if_index:5 * label-stack: - * 3, + * 3, * via: * index:26 locks:1 proto:ipv4 uPRF-list:26 len:1 itfs:[2, ] * index:26 pl-index:26 ipv4 weight=1 attached-nexthop: oper-flags:resolved, @@ -743,7 +1052,7 @@ mpls_tunnel_from_fib_node (fib_node_t *node) */ static fib_node_back_walk_rc_t mpls_tunnel_back_walk (fib_node_t *node, - fib_node_back_walk_ctx_t *ctx) + fib_node_back_walk_ctx_t *ctx) { mpls_tunnel_restack(mpls_tunnel_from_fib_node(node)); diff --git a/src/vnet/mpls/mpls_tunnel.h b/src/vnet/mpls/mpls_tunnel.h index ee56c0fc..0b55d0db 100644 --- a/src/vnet/mpls/mpls_tunnel.h +++ b/src/vnet/mpls/mpls_tunnel.h @@ -17,6 +17,31 @@ #define __MPLS_TUNNEL_H__ #include +#include + +typedef enum mpls_tunnel_attribute_t_ +{ + MPLS_TUNNEL_ATTRIBUTE_FIRST = 0, + /** + * @brief The tunnel has an underlying multicast LSP + */ + MPLS_TUNNEL_ATTRIBUTE_MCAST = MPLS_TUNNEL_ATTRIBUTE_FIRST, + MPLS_TUNNEL_ATTRIBUTE_LAST = MPLS_TUNNEL_ATTRIBUTE_MCAST, +} mpls_tunnel_attribute_t; + +#define MPLS_TUNNEL_ATTRIBUTES { \ + [MPLS_TUNNEL_ATTRIBUTE_MCAST] = "multicast", \ +} +#define FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(_item) \ + for (_item = MPLS_TUNNEL_ATTRIBUTE_FIRST; \ + _item < MPLS_TUNNEL_ATTRIBUTE_LAST; \ + _item++) + +typedef enum mpls_tunnel_flag_t_ { + MPLS_TUNNEL_FLAG_NONE = 0, + MPLS_TUNNEL_FLAG_MCAST = (1 << MPLS_TUNNEL_ATTRIBUTE_MCAST), +} __attribute__ ((packed)) mpls_tunnel_flags_t; + /** * @brief A uni-directional MPLS tunnel @@ -28,6 +53,11 @@ typedef struct mpls_tunnel_t_ */ fib_node_t mt_node; + /** + * @brief Tunnel flags + */ + mpls_tunnel_flags_t mt_flags; + /** * @brief If the tunnel is an L2 tunnel, this is the link type ETHERNET * adjacency @@ -50,9 +80,9 @@ typedef struct mpls_tunnel_t_ u32 mt_sibling_index; /** - * @brief The Label stack to apply to egress packets + * A vector of path extensions o hold the label stack for each path */ - mpls_label_t *mt_label_stack; + fib_path_ext_t *mt_path_exts; /** * @brief Flag to indicate the tunnel is only for L2 traffic, that is @@ -74,12 +104,27 @@ typedef struct mpls_tunnel_t_ /** * @brief Create a new MPLS tunnel + * @return the SW Interface index of the newly created tuneel */ -extern void vnet_mpls_tunnel_add (fib_route_path_t *rpath, - mpls_label_t *label_stack, - u8 l2_only, - u32 *sw_if_index); +extern u32 vnet_mpls_tunnel_create (u8 l2_only, + u8 is_multicast); +/** + * @brief Add a path to an MPLS tunnel + */ +extern void vnet_mpls_tunnel_path_add (u32 sw_if_index, + fib_route_path_t *rpath); + +/** + * @brief remove a path from a tunnel. + * @return the number of remaining paths. 0 implies the tunnel can be deleted + */ +extern int vnet_mpls_tunnel_path_remove (u32 sw_if_index, + fib_route_path_t *rpath); + +/** + * @brief Delete an MPLS tunnel + */ extern void vnet_mpls_tunnel_del (u32 sw_if_index); extern const mpls_tunnel_t *mpls_tunnel_get(u32 index); diff --git a/src/vnet/mpls/mpls_types.h b/src/vnet/mpls/mpls_types.h index d7c629df..b1075cdd 100644 --- a/src/vnet/mpls/mpls_types.h +++ b/src/vnet/mpls/mpls_types.h @@ -1,3 +1,17 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef __MPLS_TYPES_H__ #define __MPLS_TYPES_H__ @@ -36,4 +50,10 @@ (((_lbl) > MPLS_IETF_MIN_UNRES_LABEL) && \ ((_lbl) <= MPLS_IETF_MAX_UNRES_LABEL)) +/** + * The top bit of the index, which is the result of the MPLS lookup + * is used to determine if the DPO is a load-balance or a replicate + */ +#define MPLS_IS_REPLICATE 0x80000000 + #endif diff --git a/src/vnet/srp/interface.c b/src/vnet/srp/interface.c index d427cc3c..44e2b0d6 100644 --- a/src/vnet/srp/interface.c +++ b/src/vnet/srp/interface.c @@ -58,7 +58,7 @@ srp_build_rewrite (vnet_main_t * vnm, #define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break _ (IP4, IP4); _ (IP6, IP6); - _ (MPLS, MPLS_UNICAST); + _ (MPLS, MPLS); _ (ARP, ARP); #undef _ default: diff --git a/test/test_ip_mcast.py b/test/test_ip_mcast.py index 36d597a7..c1397d70 100644 --- a/test/test_ip_mcast.py +++ b/test/test_ip_mcast.py @@ -622,6 +622,7 @@ class TestIPMcast(VppTestCase): (MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT | MRouteItfFlags.MFIB_ITF_FLAG_NEGATE_SIGNAL)) + self.vapi.cli("clear trace") tx = self._mcast_connected_send_stream("232.1.1.1") signals = self.vapi.mfib_signal_dump() diff --git a/test/test_mpls.py b/test/test_mpls.py index fc832644..700b7091 100644 --- a/test/test_mpls.py +++ b/test/test_mpls.py @@ -5,7 +5,9 @@ import socket from framework import VppTestCase, VppTestRunner from vpp_ip_route import VppIpRoute, VppRoutePath, VppMplsRoute, \ - VppMplsIpBind + VppMplsIpBind, VppIpMRoute, VppMRoutePath, \ + MRouteItfFlags, MRouteEntryFlags +from vpp_mpls_tunnel_interface import VppMPLSTunnelInterface from scapy.packet import Raw from scapy.layers.l2 import Ether @@ -21,7 +23,7 @@ class TestMPLS(VppTestCase): super(TestMPLS, self).setUp() # create 2 pg interfaces - self.create_pg_interfaces(range(2)) + self.create_pg_interfaces(range(4)) # setup both interfaces # assign them different tables. @@ -53,10 +55,12 @@ class TestMPLS(VppTestCase): mpls_labels, mpls_ttl=255, ping=0, - ip_itf=None): + ip_itf=None, + dst_ip=None, + n=257): self.reset_packet_infos() pkts = [] - for i in range(0, 257): + for i in range(0, n): info = self.create_packet_info(src_if, src_if) payload = self.info_to_payload(info) p = Ether(dst=src_if.local_mac, src=src_if.remote_mac) @@ -67,9 +71,14 @@ class TestMPLS(VppTestCase): else: p = p / MPLS(label=mpls_labels[ii], ttl=mpls_ttl, s=0) if not ping: - p = (p / IP(src=src_if.local_ip4, dst=src_if.remote_ip4) / - UDP(sport=1234, dport=1234) / - Raw(payload)) + if not dst_ip: + p = (p / IP(src=src_if.local_ip4, dst=src_if.remote_ip4) / + UDP(sport=1234, dport=1234) / + Raw(payload)) + else: + p = (p / IP(src=src_if.local_ip4, dst=dst_ip) / + UDP(sport=1234, dport=1234) / + Raw(payload)) else: p = (p / IP(src=ip_itf.remote_ip4, dst=ip_itf.local_ip4) / @@ -254,6 +263,13 @@ class TestMPLS(VppTestCase): except: raise + def send_and_assert_no_replies(self, intf, pkts, remark): + intf.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + for i in self.pg_interfaces: + i.assert_nothing_captured(remark=remark) + def test_swap(self): """ MPLS label swap tests """ @@ -278,7 +294,7 @@ class TestMPLS(VppTestCase): self.pg_start() rx = self.pg0.get_capture() - self.verify_capture_labelled_ip4(self.pg0, rx, tx, [33]) + self.verify_capture_labelled(self.pg0, rx, tx, [33]) # # A simple MPLS xconnect - non-eos label in label out @@ -358,7 +374,7 @@ class TestMPLS(VppTestCase): self.pg_start() rx = self.pg0.get_capture() - self.verify_capture_labelled_ip4(self.pg0, rx, tx, [33, 44, 45]) + self.verify_capture_labelled(self.pg0, rx, tx, [33, 44, 45], num=2) # # A recursive non-EOS x-connect, which resolves through another @@ -576,25 +592,19 @@ class TestMPLS(VppTestCase): # # Create a tunnel with a single out label # - nh_addr = socket.inet_pton(socket.AF_INET, self.pg0.remote_ip4) - - reply = self.vapi.mpls_tunnel_add_del( - 0xffffffff, # don't know the if index yet - 1, # IPv4 next-hop - nh_addr, - self.pg0.sw_if_index, - 0, # next-hop-table-id - 1, # next-hop-weight - 2, # num-out-labels, - [44, 46]) - self.vapi.sw_interface_set_flags(reply.sw_if_index, admin_up_down=1) + mpls_tun = VppMPLSTunnelInterface(self, + [VppRoutePath(self.pg0.remote_ip4, + self.pg0.sw_if_index, + labels=[44, 46])]) + mpls_tun.add_vpp_config() + mpls_tun.admin_up() # # add an unlabelled route through the new tunnel # route_10_0_0_3 = VppIpRoute(self, "10.0.0.3", 32, [VppRoutePath("0.0.0.0", - reply.sw_if_index)]) + mpls_tun._sw_if_index)]) route_10_0_0_3.add_vpp_config() self.vapi.cli("clear trace") @@ -738,6 +748,229 @@ class TestMPLS(VppTestCase): route_35_eos.remove_vpp_config() route_34_eos.remove_vpp_config() + def test_interface_rx(self): + """ MPLS Interface Receive """ + + # + # Add a non-recursive route that will forward the traffic + # post-interface-rx + # + route_10_0_0_1 = VppIpRoute(self, "10.0.0.1", 32, + table_id=1, + paths=[VppRoutePath(self.pg1.remote_ip4, + self.pg1.sw_if_index)]) + route_10_0_0_1.add_vpp_config() + + # + # An interface receive label that maps traffic to RX on interface + # pg1 + # by injecting the packet in on pg0, which is in table 0 + # doing an interface-rx on pg1 and matching a route in table 1 + # if the packet egresses, then we must have swapped to pg1 + # so as to have matched the route in table 1 + # + route_34_eos = VppMplsRoute(self, 34, 1, + [VppRoutePath("0.0.0.0", + self.pg1.sw_if_index, + is_interface_rx=1)]) + route_34_eos.add_vpp_config() + + # + # ping an interface in the default table + # PG0 is in the default table + # + self.vapi.cli("clear trace") + tx = self.create_stream_labelled_ip4(self.pg0, [34], n=257, + dst_ip="10.0.0.1") + self.pg0.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(257) + self.verify_capture_ip4(self.pg1, rx, tx) + + def test_mcast_mid_point(self): + """ MPLS Multicast Mid Point """ + + # + # Add a non-recursive route that will forward the traffic + # post-interface-rx + # + route_10_0_0_1 = VppIpRoute(self, "10.0.0.1", 32, + table_id=1, + paths=[VppRoutePath(self.pg1.remote_ip4, + self.pg1.sw_if_index)]) + route_10_0_0_1.add_vpp_config() + + # + # Add a mcast entry that replicate to pg2 and pg3 + # and replicate to a interface-rx (like a bud node would) + # + route_3400_eos = VppMplsRoute(self, 3400, 1, + [VppRoutePath(self.pg2.remote_ip4, + self.pg2.sw_if_index, + labels=[3401]), + VppRoutePath(self.pg3.remote_ip4, + self.pg3.sw_if_index, + labels=[3402]), + VppRoutePath("0.0.0.0", + self.pg1.sw_if_index, + is_interface_rx=1)], + is_multicast=1) + route_3400_eos.add_vpp_config() + + # + # ping an interface in the default table + # PG0 is in the default table + # + self.vapi.cli("clear trace") + tx = self.create_stream_labelled_ip4(self.pg0, [3400], n=257, + dst_ip="10.0.0.1") + self.pg0.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(257) + self.verify_capture_ip4(self.pg1, rx, tx) + + rx = self.pg2.get_capture(257) + self.verify_capture_labelled(self.pg2, rx, tx, [3401]) + rx = self.pg3.get_capture(257) + self.verify_capture_labelled(self.pg3, rx, tx, [3402]) + + def test_mcast_head(self): + """ MPLS Multicast Head-end """ + + # + # Create a multicast tunnel with two replications + # + mpls_tun = VppMPLSTunnelInterface(self, + [VppRoutePath(self.pg2.remote_ip4, + self.pg2.sw_if_index, + labels=[42]), + VppRoutePath(self.pg3.remote_ip4, + self.pg3.sw_if_index, + labels=[43])], + is_multicast=1) + mpls_tun.add_vpp_config() + mpls_tun.admin_up() + + # + # add an unlabelled route through the new tunnel + # + route_10_0_0_3 = VppIpRoute(self, "10.0.0.3", 32, + [VppRoutePath("0.0.0.0", + mpls_tun._sw_if_index)]) + route_10_0_0_3.add_vpp_config() + + self.vapi.cli("clear trace") + tx = self.create_stream_ip4(self.pg0, "10.0.0.3") + self.pg0.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg2.get_capture(257) + self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [42]) + rx = self.pg3.get_capture(257) + self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [43]) + + # + # An an IP multicast route via the tunnel + # A (*,G). + # one accepting interface, pg0, 1 forwarding interface via the tunnel + # + route_232_1_1_1 = VppIpMRoute( + self, + "0.0.0.0", + "232.1.1.1", 32, + MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE, + [VppMRoutePath(self.pg0.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT), + VppMRoutePath(mpls_tun._sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)]) + route_232_1_1_1.add_vpp_config() + + self.vapi.cli("clear trace") + tx = self.create_stream_ip4(self.pg0, "232.1.1.1") + self.pg0.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg2.get_capture(257) + self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [42]) + rx = self.pg3.get_capture(257) + self.verify_capture_tunneled_ip4(self.pg0, rx, tx, [43]) + + def test_mcast_tail(self): + """ MPLS Multicast Tail """ + + # + # Add a multicast route that will forward the traffic + # post-disposition + # + route_232_1_1_1 = VppIpMRoute( + self, + "0.0.0.0", + "232.1.1.1", 32, + MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE, + table_id=1, + paths=[VppMRoutePath(self.pg1.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)]) + route_232_1_1_1.add_vpp_config() + + # + # An interface receive label that maps traffic to RX on interface + # pg1 + # by injecting the packet in on pg0, which is in table 0 + # doing an rpf-id and matching a route in table 1 + # if the packet egresses, then we must have matched the route in + # table 1 + # + route_34_eos = VppMplsRoute(self, 34, 1, + [VppRoutePath("0.0.0.0", + self.pg1.sw_if_index, + nh_table_id=1, + rpf_id=55)], + is_multicast=1) + + route_34_eos.add_vpp_config() + + # + # Drop due to interface lookup miss + # + self.vapi.cli("clear trace") + tx = self.create_stream_labelled_ip4(self.pg0, [34], + dst_ip="232.1.1.1", n=1) + self.send_and_assert_no_replies(self.pg0, tx, "RPF-ID drop none") + + # + # set the RPF-ID of the enrtry to match the input packet's + # + route_232_1_1_1.update_rpf_id(55) + + self.vapi.cli("clear trace") + tx = self.create_stream_labelled_ip4(self.pg0, [34], + dst_ip="232.1.1.1", n=257) + self.pg0.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(257) + self.verify_capture_ip4(self.pg1, rx, tx) + + # + # set the RPF-ID of the enrtry to not match the input packet's + # + route_232_1_1_1.update_rpf_id(56) + tx = self.create_stream_labelled_ip4(self.pg0, [34], + dst_ip="232.1.1.1") + self.send_and_assert_no_replies(self.pg0, tx, "RPF-ID drop 56") + class TestMPLSDisabled(VppTestCase): """ MPLS disabled """ diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py index faf5f801..d6146f28 100644 --- a/test/vpp_ip_route.py +++ b/test/vpp_ip_route.py @@ -55,15 +55,24 @@ class VppRoutePath(object): nh_table_id=0, labels=[], nh_via_label=MPLS_LABEL_INVALID, - is_ip6=0): + is_ip6=0, + rpf_id=0, + is_interface_rx=0): self.nh_itf = nh_sw_if_index self.nh_table_id = nh_table_id self.nh_via_label = nh_via_label self.nh_labels = labels + self.weight = 1 + self.rpf_id = rpf_id if is_ip6: self.nh_addr = inet_pton(AF_INET6, nh_addr) else: self.nh_addr = inet_pton(AF_INET, nh_addr) + self.is_interface_rx = is_interface_rx + self.is_rpf_id = 0 + if rpf_id != 0: + self.is_rpf_id = 1 + self.nh_itf = rpf_id class VppMRoutePath(VppRoutePath): @@ -176,13 +185,15 @@ class VppIpMRoute(VppObject): """ def __init__(self, test, src_addr, grp_addr, - grp_addr_len, e_flags, paths, table_id=0, is_ip6=0): + grp_addr_len, e_flags, paths, table_id=0, + rpf_id=0, is_ip6=0): self._test = test self.paths = paths self.grp_addr_len = grp_addr_len self.table_id = table_id self.e_flags = e_flags self.is_ip6 = is_ip6 + self.rpf_id = rpf_id if is_ip6: self.grp_addr = inet_pton(AF_INET6, grp_addr) @@ -199,6 +210,7 @@ class VppIpMRoute(VppObject): self.e_flags, path.nh_itf, path.nh_i_flags, + rpf_id=self.rpf_id, table_id=self.table_id, is_ipv6=self.is_ip6) self._test.registry.register(self, self._test.logger) @@ -226,6 +238,18 @@ class VppIpMRoute(VppObject): table_id=self.table_id, is_ipv6=self.is_ip6) + def update_rpf_id(self, rpf_id): + self.rpf_id = rpf_id + self._test.vapi.ip_mroute_add_del(self.src_addr, + self.grp_addr, + self.grp_addr_len, + self.e_flags, + 0xffffffff, + 0, + rpf_id=self.rpf_id, + table_id=self.table_id, + is_ipv6=self.is_ip6) + def update_path_flags(self, itf, flags): for path in self.paths: if path.nh_itf == itf: @@ -342,14 +366,17 @@ class VppMplsRoute(VppObject): MPLS Route/LSP """ - def __init__(self, test, local_label, eos_bit, paths, table_id=0): + def __init__(self, test, local_label, eos_bit, paths, table_id=0, + is_multicast=0): self._test = test self.paths = paths self.local_label = local_label self.eos_bit = eos_bit self.table_id = table_id + self.is_multicast = is_multicast def add_vpp_config(self): + is_multipath = len(self.paths) > 1 for path in self.paths: self._test.vapi.mpls_route_add_del( self.local_label, @@ -357,7 +384,11 @@ class VppMplsRoute(VppObject): 1, path.nh_addr, path.nh_itf, + is_multicast=self.is_multicast, + is_multipath=is_multipath, table_id=self.table_id, + is_interface_rx=path.is_interface_rx, + is_rpf_id=path.is_rpf_id, next_hop_out_label_stack=path.nh_labels, next_hop_n_out_labels=len( path.nh_labels), @@ -372,6 +403,7 @@ class VppMplsRoute(VppObject): 1, path.nh_addr, path.nh_itf, + is_rpf_id=path.is_rpf_id, table_id=self.table_id, is_add=0) diff --git a/test/vpp_mpls_tunnel_interface.py b/test/vpp_mpls_tunnel_interface.py new file mode 100644 index 00000000..f2001574 --- /dev/null +++ b/test/vpp_mpls_tunnel_interface.py @@ -0,0 +1,46 @@ + +from vpp_interface import VppInterface +from vpp_ip_route import VppRoutePath +import socket + + +class VppMPLSTunnelInterface(VppInterface): + """ + VPP MPLS Tunnel interface + """ + + def __init__(self, test, paths, is_multicast=0): + """ Create MPLS Tunnel interface """ + self._sw_if_index = 0 + super(VppMPLSTunnelInterface, self).__init__(test) + self._test = test + self.t_paths = paths + self.is_multicast = is_multicast + + def add_vpp_config(self): + self._sw_if_index = 0xffffffff + for path in self.t_paths: + reply = self.test.vapi.mpls_tunnel_add_del( + self._sw_if_index, + 1, # IPv4 next-hop + path.nh_addr, + path.nh_itf, + path.nh_table_id, + path.weight, + next_hop_out_label_stack=path.nh_labels, + next_hop_n_out_labels=len(path.nh_labels), + is_multicast=self.is_multicast) + self._sw_if_index = reply.sw_if_index + + def remove_vpp_config(self): + for path in self.t_paths: + reply = self.test.vapi.mpls_tunnel_add_del( + self.sw_if_index, + 1, # IPv4 next-hop + path.nh_addr, + path.nh_itf, + path.nh_table_id, + path.weight, + next_hop_out_label_stack=path.nh_labels, + next_hop_n_out_labels=len(path.nh_labels), + is_add=0) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index e8025dff..ceb684b7 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -849,6 +849,9 @@ class VppPapiProvider(object): create_vrf_if_needed=0, is_resolve_host=0, is_resolve_attached=0, + is_interface_rx=0, + is_rpf_id=0, + is_multicast=0, is_add=1, is_drop=0, is_multipath=0, @@ -872,6 +875,7 @@ class VppPapiProvider(object): :param is_local: (Default value = 0) :param is_classify: (Default value = 0) :param is_multipath: (Default value = 0) + :param is_multicast: (Default value = 0) :param is_resolve_host: (Default value = 0) :param is_resolve_attached: (Default value = 0) :param not_last: (Default value = 0) @@ -889,8 +893,11 @@ class VppPapiProvider(object): 'mr_is_add': is_add, 'mr_is_classify': is_classify, 'mr_is_multipath': is_multipath, + 'mr_is_multicast': is_multicast, 'mr_is_resolve_host': is_resolve_host, 'mr_is_resolve_attached': is_resolve_attached, + 'mr_is_interface_rx': is_interface_rx, + 'mr_is_rpf_id': is_rpf_id, 'mr_next_hop_proto_is_ip4': next_hop_proto_is_ip4, 'mr_next_hop_weight': next_hop_weight, 'mr_next_hop': next_hop_address, @@ -936,7 +943,8 @@ class VppPapiProvider(object): next_hop_via_label=MPLS_LABEL_INVALID, create_vrf_if_needed=0, is_add=1, - l2_only=0): + l2_only=0, + is_multicast=0): """ :param dst_address_length: @@ -956,8 +964,8 @@ class VppPapiProvider(object): :param is_multipath: (Default value = 0) :param is_resolve_host: (Default value = 0) :param is_resolve_attached: (Default value = 0) - :param not_last: (Default value = 0) :param next_hop_weight: (Default value = 1) + :param is_multicast: (Default value = 0) """ return self.api( @@ -965,6 +973,7 @@ class VppPapiProvider(object): {'mt_sw_if_index': tun_sw_if_index, 'mt_is_add': is_add, 'mt_l2_only': l2_only, + 'mt_is_multicast': is_multicast, 'mt_next_hop_proto_is_ip4': next_hop_proto_is_ip4, 'mt_next_hop_weight': next_hop_weight, 'mt_next_hop': next_hop_address, @@ -1469,6 +1478,7 @@ class VppPapiProvider(object): e_flags, next_hop_sw_if_index, i_flags, + rpf_id=0, table_id=0, create_vrf_if_needed=0, is_add=1, @@ -1481,6 +1491,8 @@ class VppPapiProvider(object): {'next_hop_sw_if_index': next_hop_sw_if_index, 'entry_flags': e_flags, 'itf_flags': i_flags, + 'table_id': table_id, + 'rpf_id': rpf_id, 'create_vrf_if_needed': create_vrf_if_needed, 'is_add': is_add, 'is_ipv6': is_ipv6, -- cgit 1.2.3-korg From 153646e89c3be70c68348bdd497f8edd2b212a9c Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 5 Apr 2017 18:15:45 +0200 Subject: Common device-input interrupt infra Change-Id: I23b588eb56a3f5690158449a1f9bc8053cd3d251 Signed-off-by: Damjan Marion --- src/vlib/node_funcs.h | 14 ++++ src/vnet/devices/af_packet/af_packet.c | 7 +- src/vnet/devices/af_packet/node.c | 2 +- src/vnet/devices/devices.c | 135 +++++++++++++++++++++++++++++---- src/vnet/devices/devices.h | 60 ++++++++++++--- src/vnet/interface.c | 2 + src/vnet/interface.h | 3 + 7 files changed, 194 insertions(+), 29 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index 54e36874..4d7cc192 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -177,6 +177,20 @@ vlib_node_set_state (vlib_main_t * vm, u32 node_index, r->state = new_state; } +/** \brief Get node dispatch state. + @param vm vlib_main_t pointer, varies by thread + @param node_index index of the node + @return state for node, see vlib_node_state_t +*/ +always_inline vlib_node_state_t +vlib_node_get_state (vlib_main_t * vm, u32 node_index) +{ + vlib_node_main_t *nm = &vm->node_main; + vlib_node_t *n; + n = vec_elt (nm->nodes, node_index); + return n->state; +} + always_inline void vlib_node_set_interrupt_pending (vlib_main_t * vm, u32 node_index) { diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index 20285107..7464d4e6 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -270,9 +270,12 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index); apif->sw_if_index = sw->sw_if_index; - vnet_set_device_input_node (apif->hw_if_index, af_packet_input_node.index); - vnet_device_input_assign_thread (apif->hw_if_index, 0, /* queue */ + vnet_set_device_input_node (vnm, apif->hw_if_index, + af_packet_input_node.index); + vnet_device_input_assign_thread (vnm, apif->hw_if_index, 0, /* queue */ ~0 /* any cpu */ ); + vnet_device_input_set_mode (vnm, apif->hw_if_index, 0, + VNET_DEVICE_INPUT_MODE_INTERRUPT); vnet_hw_interface_set_flags (vnm, apif->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index 76980102..d3af41b5 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -251,7 +251,7 @@ af_packet_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_device_input_runtime_t *rt = (void *) node->runtime_data; vnet_device_and_queue_t *dq; - vec_foreach (dq, rt->devices_and_queues) + foreach_device_and_queue (dq, rt->devices_and_queues) { af_packet_if_t *apif; apif = vec_elt_at_index (apm->interfaces, dq->dev_instance); diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index 5e5e812c..c8a95087 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -102,11 +102,26 @@ vnet_device_queue_sort (void *a1, void *a2) return 0; } +static void +vnet_device_queue_update (vnet_main_t * vnm, vnet_device_input_runtime_t * rt) +{ + vnet_device_and_queue_t *dq; + vnet_hw_interface_t *hw; + + vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort); + + vec_foreach (dq, rt->devices_and_queues) + { + hw = vnet_get_hw_interface (vnm, dq->hw_if_index); + vec_validate (hw->dq_runtime_index_by_queue, dq->queue_id); + hw->dq_runtime_index_by_queue[dq->queue_id] = dq - rt->devices_and_queues; + } +} + void -vnet_device_input_assign_thread (u32 hw_if_index, +vnet_device_input_assign_thread (vnet_main_t * vnm, u32 hw_if_index, u16 queue_id, uword thread_index) { - vnet_main_t *vnm = vnet_get_main (); vnet_device_main_t *vdm = &vnet_device_main; vlib_main_t *vm; vnet_device_input_runtime_t *rt; @@ -135,16 +150,17 @@ vnet_device_input_assign_thread (u32 hw_if_index, dq->dev_instance = hw->dev_instance; dq->queue_id = queue_id; - vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort); + vnet_device_queue_update (vnm, rt); vec_validate (hw->input_node_thread_index_by_queue, queue_id); hw->input_node_thread_index_by_queue[queue_id] = thread_index; + vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state); } -static int -vnet_device_input_unassign_thread (u32 hw_if_index, u16 queue_id, - uword thread_index) +int +vnet_device_input_unassign_thread (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id, uword thread_index) { - vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); vnet_device_input_runtime_t *rt; vnet_device_and_queue_t *dq; @@ -161,9 +177,9 @@ vnet_device_input_unassign_thread (u32 hw_if_index, u16 queue_id, if (old_thread_index == thread_index) return 0; - rt = - vlib_node_get_runtime_data (vlib_mains[old_thread_index], - hw->input_node_index); + vm = vlib_mains[old_thread_index]; + + rt = vlib_node_get_runtime_data (vm, hw->input_node_index); vec_foreach (dq, rt->devices_and_queues) if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id) @@ -175,11 +191,89 @@ vnet_device_input_unassign_thread (u32 hw_if_index, u16 queue_id, return VNET_API_ERROR_INVALID_INTERFACE; deleted: - vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort); + + vnet_device_queue_update (vnm, rt); + + if (vec_len (rt->devices_and_queues) == 0) + vlib_node_set_state (vm, hw->input_node_index, VLIB_NODE_STATE_DISABLED); + + return 0; +} + + +int +vnet_device_input_set_mode (vnet_main_t * vnm, u32 hw_if_index, u16 queue_id, + vnet_device_input_mode_t mode) +{ + vlib_main_t *vm; + uword thread_index; + vnet_device_and_queue_t *dq; + vlib_node_state_t enabled_node_state; + ASSERT (mode < VNET_DEVICE_INPUT_N_MODES); + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + vnet_device_input_runtime_t *rt; + int is_polling = 0; + + if (hw->input_node_thread_index_by_queue == 0) + return VNET_API_ERROR_INVALID_INTERFACE; + + thread_index = hw->input_node_thread_index_by_queue[queue_id]; + vm = vlib_mains[thread_index]; + + rt = vlib_node_get_runtime_data (vm, hw->input_node_index); + + vec_foreach (dq, rt->devices_and_queues) + { + if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id) + dq->mode = mode; + if (dq->mode == VNET_DEVICE_INPUT_MODE_POLLING) + is_polling = 1; + } + + if (is_polling) + enabled_node_state = VLIB_NODE_STATE_POLLING; + else + enabled_node_state = VLIB_NODE_STATE_INTERRUPT; + + if (rt->enabled_node_state != enabled_node_state) + { + rt->enabled_node_state = enabled_node_state; + if (vlib_node_get_state (vm, hw->input_node_index) != + VLIB_NODE_STATE_DISABLED) + vlib_node_set_state (vm, hw->input_node_index, enabled_node_state); + } return 0; } +int +vnet_device_input_get_mode (vnet_main_t * vnm, u32 hw_if_index, u16 queue_id, + vnet_device_input_mode_t * mode) +{ + vlib_main_t *vm; + uword thread_index; + vnet_device_and_queue_t *dq; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + vnet_device_input_runtime_t *rt; + + if (hw->input_node_thread_index_by_queue == 0) + return VNET_API_ERROR_INVALID_INTERFACE; + + thread_index = hw->input_node_thread_index_by_queue[queue_id]; + vm = vlib_mains[thread_index]; + + rt = vlib_node_get_runtime_data (vm, hw->input_node_index); + + vec_foreach (dq, rt->devices_and_queues) + if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id) + { + *mode = dq->mode; + return 0; + } + + return VNET_API_ERROR_INVALID_INTERFACE; +} + static clib_error_t * show_device_placement_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -203,9 +297,11 @@ show_device_placement_fn (vlib_main_t * vm, unformat_input_t * input, vec_foreach (dq, rt->devices_and_queues) { - s = format (s, " %U queue %u\n", + s = format (s, " %U queue %u (%s)\n", format_vnet_sw_if_index_name, vnm, dq->hw_if_index, - dq->queue_id); + dq->queue_id, + dq->mode == VNET_DEVICE_INPUT_MODE_POLLING ? + "polling" : "interrupt"); } })); if (vec_len (s) > 0) @@ -238,6 +334,7 @@ set_device_placement (vlib_main_t * vm, unformat_input_t * input, unformat_input_t _line_input, *line_input = &_line_input; vnet_main_t *vnm = vnet_get_main (); vnet_device_main_t *vdm = &vnet_device_main; + vnet_device_input_mode_t mode; u32 hw_if_index = (u32) ~ 0; u32 queue_id = (u32) 0; u32 thread_index = (u32) ~ 0; @@ -275,13 +372,19 @@ set_device_placement (vlib_main_t * vm, unformat_input_t * input, return clib_error_return (0, "please specify valid worker thread or main"); - rv = - vnet_device_input_unassign_thread (hw_if_index, queue_id, thread_index); + rv = vnet_device_input_get_mode (vnm, hw_if_index, queue_id, &mode); + + if (rv) + return clib_error_return (0, "not found"); + + rv = vnet_device_input_unassign_thread (vnm, hw_if_index, queue_id, + thread_index); if (rv) return clib_error_return (0, "not found"); - vnet_device_input_assign_thread (hw_if_index, queue_id, thread_index); + vnet_device_input_assign_thread (vnm, hw_if_index, queue_id, thread_index); + vnet_device_input_set_mode (vnm, hw_if_index, queue_id, mode); return 0; } diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h index 966f8302..baf03b7c 100644 --- a/src/vnet/devices/devices.h +++ b/src/vnet/devices/devices.h @@ -55,16 +55,26 @@ typedef struct uword next_worker_thread_index; } vnet_device_main_t; +typedef enum +{ + VNET_DEVICE_INPUT_MODE_POLLING = 0, + VNET_DEVICE_INPUT_MODE_INTERRUPT, + VNET_DEVICE_INPUT_N_MODES, +} vnet_device_input_mode_t; + typedef struct { u32 hw_if_index; u32 dev_instance; u16 queue_id; + vnet_device_input_mode_t mode; + uword interrupt_pending; } vnet_device_and_queue_t; typedef struct { vnet_device_and_queue_t *devices_and_queues; + vlib_node_state_t enabled_node_state; } vnet_device_input_runtime_t; extern vnet_device_main_t vnet_device_main; @@ -72,15 +82,22 @@ extern vlib_node_registration_t device_input_node; extern const u32 device_input_next_node_advance[]; static inline void -vnet_set_device_input_node (u32 hw_if_index, u32 node_index) +vnet_set_device_input_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) { - vnet_main_t *vnm = vnet_get_main (); vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); hw->input_node_index = node_index; } -void vnet_device_input_assign_thread (u32 hw_if_index, u16 queue_id, - uword thread_index); +void vnet_device_input_assign_thread (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id, uword thread_index); +int vnet_device_input_unassign_thread (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id, uword thread_index); +int vnet_device_input_set_mode (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id, vnet_device_input_mode_t mode); +int vnet_device_input_get_mode (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id, + vnet_device_input_mode_t * mode); static inline u64 vnet_get_aggregate_rx_packets (void) @@ -111,18 +128,41 @@ vnet_get_device_and_queue (vlib_main_t * vm, vlib_node_runtime_t * node) return rt->devices_and_queues; } +static_always_inline uword +vnet_get_device_input_thread_index (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id) +{ + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + ASSERT (queue_id < vec_len (hw->input_node_thread_index_by_queue)); + return hw->input_node_thread_index_by_queue[queue_id]; +} + static_always_inline void vnet_device_input_set_interrupt_pending (vnet_main_t * vnm, u32 hw_if_index, u16 queue_id) { - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - - ASSERT (queue_id < vec_len (hw->input_node_thread_index_by_queue)); - u32 thread_index = hw->input_node_thread_index_by_queue[queue_id]; - vlib_node_set_interrupt_pending (vlib_mains[thread_index], - hw->input_node_index); + vlib_main_t *vm; + vnet_hw_interface_t *hw; + vnet_device_input_runtime_t *rt; + vnet_device_and_queue_t *dq; + uword idx; + + hw = vnet_get_hw_interface (vnm, hw_if_index); + idx = vnet_get_device_input_thread_index (vnm, hw_if_index, queue_id); + vm = vlib_mains[idx]; + rt = vlib_node_get_runtime_data (vm, hw->input_node_index); + idx = hw->dq_runtime_index_by_queue[queue_id]; + dq = vec_elt_at_index (rt->devices_and_queues, idx); + dq->interrupt_pending = 1; + + vlib_node_set_interrupt_pending (vm, hw->input_node_index); } +#define foreach_device_and_queue(var,vec) \ + for (var = (vec); var < vec_end (vec); var++) \ + if (clib_smp_swap (&((var)->interrupt_pending), 0) || \ + var->mode == VNET_DEVICE_INPUT_MODE_POLLING) + #endif /* included_vnet_vnet_device_h */ /* diff --git a/src/vnet/interface.c b/src/vnet/interface.c index 45417b2f..24f216f6 100644 --- a/src/vnet/interface.c +++ b/src/vnet/interface.c @@ -919,6 +919,8 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index) hash_unset_mem (im->hw_interface_by_name, hw->name); vec_free (hw->name); + vec_free (hw->input_node_thread_index_by_queue); + vec_free (hw->dq_runtime_index_by_queue); pool_put (im->hw_interfaces, hw); } diff --git a/src/vnet/interface.h b/src/vnet/interface.h index 08f08b10..9c223040 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -470,6 +470,9 @@ typedef struct vnet_hw_interface_t /* input node cpu index by queue */ u32 *input_node_thread_index_by_queue; + /* device input device_and_queue runtime index */ + uword *dq_runtime_index_by_queue; + } vnet_hw_interface_t; extern vnet_device_class_t vnet_local_interface_device_class; -- cgit 1.2.3-korg From e4dcba801ae7abeea7d8a9f5d0784a540a3c2bd8 Mon Sep 17 00:00:00 2001 From: Steven Date: Tue, 4 Apr 2017 16:56:54 -0700 Subject: vhost: interrupt mode enhancements - Add cpu index to the vring structure for quick lookup - Reduce the code that needs to be protected by vlib_worker_thread_barrier_sync - Set minimum timer no less than 1 ms Change-Id: Iafef4bf6879a8efb350abf4e0f517e38f7ff7a8b Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 59 +++++++++++++++--------------------- src/vnet/devices/virtio/vhost-user.h | 1 + 2 files changed, 26 insertions(+), 34 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 5e720f65..cce9705e 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -404,6 +404,7 @@ vhost_user_rx_thread_placement () thread_index = vui_workers[i]; i++; vhc = &vum->cpus[thread_index]; + txvq->interrupt_thread_index = thread_index; iaq.qid = qid; iaq.vhost_iface_index = vui - vum->vhost_user_interfaces; @@ -534,45 +535,33 @@ vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq) vhost_user_main_t *vum = &vhost_user_main; vhost_cpu_t *vhc; u32 thread_index; - vhost_iface_and_queue_t *vhiq; vlib_main_t *vm; - u32 ifq2; - u8 done = 0; + u32 ifq2, qid; + vhost_user_vring_t *txvq; + + qid = ifq & 0xff; + if ((qid % 2) == 0) + /* Only care about the odd number virtqueue which is TX */ + return; if (vhost_user_intf_ready (vui)) { - vec_foreach (vhc, vum->cpus) - { - if (vhc->operation_mode == VHOST_USER_POLLING_MODE) - continue; - - vec_foreach (vhiq, vhc->rx_queues) + txvq = &vui->vrings[qid]; + thread_index = txvq->interrupt_thread_index; + vhc = &vum->cpus[thread_index]; + if (vhc->operation_mode == VHOST_USER_INTERRUPT_MODE) { + vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main; /* - * Match the interface and the virtqueue number + * Convert virtqueue number in the lower byte to vring + * queue index for the input node process. Top bytes contain + * the interface, lower byte contains the queue index. */ - if ((vhiq->vhost_iface_index == (ifq >> 8)) && - (VHOST_VRING_IDX_TX (vhiq->qid) == (ifq & 0xff))) - { - thread_index = vhc - vum->cpus; - vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main; - /* - * Convert RX virtqueue number in the lower byte to vring - * queue index for the input node process. Top bytes contain - * the interface, lower byte contains the queue index. - */ - ifq2 = ((ifq >> 8) << 8) | vhiq->qid; - vhc->pending_input_bitmap = - clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1); - vlib_node_set_interrupt_pending (vm, - vhost_user_input_node.index); - done = 1; - break; - } + ifq2 = ((ifq >> 8) << 8) | qid / 2; + vhc->pending_input_bitmap = + clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1); + vlib_node_set_interrupt_pending (vm, vhost_user_input_node.index); } - if (done) - break; - } } } @@ -605,15 +594,14 @@ vhost_user_kickfd_read_ready (unix_file_t * uf) n = read (uf->file_descriptor, ((char *) &buff), 8); DBG_SOCK ("if %d KICK queue %d", uf->private_data >> 8, qid); - - vlib_worker_thread_barrier_sync (vlib_get_main ()); if (!vui->vrings[qid].started || (vhost_user_intf_ready (vui) != vui->is_up)) { + vlib_worker_thread_barrier_sync (vlib_get_main ()); vui->vrings[qid].started = 1; vhost_user_update_iface_state (vui); + vlib_worker_thread_barrier_release (vlib_get_main ()); } - vlib_worker_thread_barrier_release (vlib_get_main ()); vhost_user_set_interrupt_pending (vui, uf->private_data); return 0; @@ -2814,6 +2802,9 @@ vhost_user_send_interrupt_process (vlib_main_t * vm, clib_warning ("BUG: unhandled event type %d", event_type); break; } + /* No less than 1 millisecond */ + if (timeout < 1e-3) + timeout = 1e-3; } return 0; } diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index 67f18b8e..80f58a20 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -210,6 +210,7 @@ typedef struct u32 callfd_idx; u32 kickfd_idx; u64 log_guest_addr; + u32 interrupt_thread_index; } vhost_user_vring_t; #define VHOST_USER_POLLING_MODE 0 -- cgit 1.2.3-korg From 67e06070493607ccff463f6a2c812b76c7a43f1c Mon Sep 17 00:00:00 2001 From: Steven Date: Tue, 11 Apr 2017 12:24:47 -0700 Subject: Devices: set interface placement does not remove the old interface placement The command set interface placement is supposed to remove the existing interface placement and add a new interface placement based on the given options. My quick test for the CLI shows that the old interface placement continues to exist. But the new interface placement is also added. The bug exists in vnet_device_input_unassign_thread which checks the old thread index is the same as the passed thread index and skips the deletion if they are the same. The fix is to remove the check which is not supposed to be there. Change-Id: Ib055721fad47513949a03b3cb6dc292bd19fd1e8 Signed-off-by: Steven --- src/vnet/devices/devices.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index c8a95087..2f55adcb 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -174,9 +174,6 @@ vnet_device_input_unassign_thread (vnet_main_t * vnm, u32 hw_if_index, old_thread_index = hw->input_node_thread_index_by_queue[queue_id]; - if (old_thread_index == thread_index) - return 0; - vm = vlib_mains[old_thread_index]; rt = vlib_node_get_runtime_data (vm, hw->input_node_index); -- cgit 1.2.3-korg From 53129423a6f4e43b39f7547424fbaea99e56f7e2 Mon Sep 17 00:00:00 2001 From: Steven Date: Fri, 21 Apr 2017 13:31:50 -0700 Subject: vhost: remove socket linked file when deleting vhost interface - Unlink the file created for the socket when deleting vhost interface if we are the server mode. - Remove all vhost interfaces when VPP process is exitting. Change-Id: Id9b676cd027bbd67b473bbd01901d1ecc4d8e6cb Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index cce9705e..4f4f038a 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -1354,15 +1354,6 @@ vhost_user_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (vhost_user_init); -static clib_error_t * -vhost_user_exit (vlib_main_t * vm) -{ - /* TODO cleanup */ - return 0; -} - -VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit); - static u8 * format_vhost_trace (u8 * s, va_list * va) { @@ -2553,6 +2544,7 @@ vhost_user_term_if (vhost_user_intf_t * vui) vui->unix_server_index); unix_file_del (&unix_main, uf); vui->unix_server_index = ~0; + unlink (vui->sock_filename); } } @@ -2590,6 +2582,23 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) return rv; } +static clib_error_t * +vhost_user_exit (vlib_main_t * vm) +{ + vnet_main_t *vnm = vnet_get_main (); + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui; + + /* *INDENT-OFF* */ + pool_foreach (vui, vum->vhost_user_interfaces, { + vhost_user_delete_if (vnm, vm, vui->sw_if_index); + }); + /* *INDENT-ON* */ + return 0; +} + +VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit); + /** * Open server unix socket on specified sock_filename. */ -- cgit 1.2.3-korg From 11b8dbf78af49d270a0e72abe7dea73eec30d85f Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Mon, 24 Apr 2017 10:46:54 -0400 Subject: "autoreply" flag: autogenerate standard xxx_reply_t messages Change-Id: I72298aaae7d172082ece3a8edea4217c11b28d79 Signed-off-by: Dave Barach --- src/examples/sample-plugin/sample/sample.api | 10 +- src/plugins/acl/acl.api | 60 +--- src/plugins/dpdk/api/dpdk.api | 35 +- src/plugins/flowperpkt/flowperpkt.api | 23 +- .../export-vxlan-gpe/vxlan_gpe_ioam_export.api | 10 +- src/plugins/ioam/export/ioam_export.api | 10 +- src/plugins/ioam/ip6/ioam_cache.api | 10 +- src/plugins/ioam/lib-pot/pot.api | 34 +- src/plugins/ioam/lib-trace/trace.api | 26 +- src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api | 82 +---- src/plugins/lb/lb.api | 21 +- src/plugins/memif/memif.api | 12 +- src/plugins/snat/snat.api | 88 +---- src/tools/vppapigen/gram.y | 3 +- src/tools/vppapigen/lex.c | 57 +++- src/tools/vppapigen/lex.h | 1 + src/tools/vppapigen/node.c | 5 + src/tools/vppapigen/node.h | 2 + src/vlibmemory/memclnt.api | 7 +- src/vlibmemory/memory_vlib.c | 8 +- src/vnet/bfd/bfd.api | 132 +------- src/vnet/classify/classify.api | 37 +-- src/vnet/cop/cop.api | 28 +- src/vnet/devices/af_packet/af_packet.api | 12 +- src/vnet/devices/netmap/netmap.api | 24 +- src/vnet/devices/virtio/vhost_user.api | 24 +- src/vnet/dhcp/dhcp.api | 38 +-- src/vnet/flow/flow.api | 32 +- src/vnet/interface.api | 108 +----- src/vnet/ip/ip.api | 108 +----- src/vnet/ipsec/ipsec.api | 224 ++----------- src/vnet/l2/l2.api | 96 +----- src/vnet/l2tp/l2tp.api | 28 +- src/vnet/lisp-cp/lisp.api | 164 +-------- src/vnet/lisp-cp/one.api | 185 +---------- src/vnet/lisp-gpe/lisp_gpe.api | 48 +-- src/vnet/map/map.api | 22 +- src/vnet/mpls/mpls.api | 26 +- src/vnet/session/session.api | 68 +--- src/vnet/span/span.api | 10 +- src/vnet/sr/sr.api | 60 +--- src/vnet/unix/tap.api | 12 +- src/vnet/vxlan/vxlan.api | 12 +- src/vpp/api/vpe.api | 367 ++------------------- 44 files changed, 271 insertions(+), 2098 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/examples/sample-plugin/sample/sample.api b/src/examples/sample-plugin/sample/sample.api index f99cdb38..d565c0b1 100644 --- a/src/examples/sample-plugin/sample/sample.api +++ b/src/examples/sample-plugin/sample/sample.api @@ -16,7 +16,7 @@ /* Define a simple binary API to control the feature */ -define sample_macswap_enable_disable { +autoreply define sample_macswap_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -29,11 +29,3 @@ define sample_macswap_enable_disable { /* Interface handle */ u32 sw_if_index; }; - -define sample_macswap_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api index d981338d..3b334113 100644 --- a/src/plugins/acl/acl.api +++ b/src/plugins/acl/acl.api @@ -161,24 +161,13 @@ define acl_add_replace_reply @param acl_index - ACL index to delete */ -manual_print define acl_del +autoreply manual_print define acl_del { u32 client_index; u32 context; u32 acl_index; }; -/** \brief Reply to delete the ACL - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define acl_del_reply -{ - u32 context; - i32 retval; -}; - /* acl_interface_add_del(_reply) to be deprecated in lieu of acl_interface_set_acl_list */ /** \brief Use acl_interface_set_acl_list instead Append/remove an ACL index to/from the list of ACLs checked for an interface @@ -190,7 +179,7 @@ define acl_del_reply @param acl_index - index of ACL for the operation */ -manual_print define acl_interface_add_del +autoreply manual_print define acl_interface_add_del { u32 client_index; u32 context; @@ -204,17 +193,6 @@ manual_print define acl_interface_add_del u32 acl_index; }; -/** \brief Reply to alter the ACL list - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define acl_interface_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set the vector of input/output ACLs checked for an interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -224,7 +202,7 @@ define acl_interface_add_del_reply @param acls - vector of ACL indices */ -manual_print define acl_interface_set_acl_list +autoreply manual_print define acl_interface_set_acl_list { u32 client_index; u32 context; @@ -239,12 +217,6 @@ manual_print define acl_interface_set_acl_list @param retval 0 - no error */ -define acl_interface_set_acl_list_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump the specific ACL contents or all of the ACLs' contents @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -341,24 +313,13 @@ define macip_acl_add_reply @param acl_index - MACIP ACL index to delete */ -manual_print define macip_acl_del +autoreply manual_print define macip_acl_del { u32 client_index; u32 context; u32 acl_index; }; -/** \brief Reply to delete the MACIP ACL - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define macip_acl_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add or delete a MACIP ACL to/from interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -367,7 +328,7 @@ define macip_acl_del_reply @param acl_index - MACIP ACL index */ -manual_print define macip_acl_interface_add_del +autoreply manual_print define macip_acl_interface_add_del { u32 client_index; u32 context; @@ -377,17 +338,6 @@ manual_print define macip_acl_interface_add_del u32 acl_index; }; -/** \brief Reply to apply/unapply the MACIP ACL - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define macip_acl_interface_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump one or all defined MACIP ACLs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/dpdk/api/dpdk.api b/src/plugins/dpdk/api/dpdk.api index 21215d45..d43f8a36 100644 --- a/src/plugins/dpdk/api/dpdk.api +++ b/src/plugins/dpdk/api/dpdk.api @@ -21,7 +21,7 @@ @param pipe - pipe ID within its subport @param profile - pipe profile ID */ -define sw_interface_set_dpdk_hqos_pipe { +autoreply define sw_interface_set_dpdk_hqos_pipe { u32 client_index; u32 context; u32 sw_if_index; @@ -30,15 +30,6 @@ define sw_interface_set_dpdk_hqos_pipe { u32 profile; }; -/** \brief DPDK interface HQoS pipe profile set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_pipe_reply { - u32 context; - i32 retval; -}; - /** \brief DPDK interface HQoS subport parameters set request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -49,7 +40,7 @@ define sw_interface_set_dpdk_hqos_pipe_reply { @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) @param tc_period - enforcement period for rates (measured in milliseconds) */ -define sw_interface_set_dpdk_hqos_subport { +autoreply define sw_interface_set_dpdk_hqos_subport { u32 client_index; u32 context; u32 sw_if_index; @@ -60,15 +51,6 @@ define sw_interface_set_dpdk_hqos_subport { u32 tc_period; }; -/** \brief DPDK interface HQoS subport parameters set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_subport_reply { - u32 context; - i32 retval; -}; - /** \brief DPDK interface HQoS tctbl entry set request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -77,7 +59,7 @@ define sw_interface_set_dpdk_hqos_subport_reply { @param tc - traffic class (0 .. 3) @param queue - traffic class queue (0 .. 3) */ -define sw_interface_set_dpdk_hqos_tctbl { +autoreply define sw_interface_set_dpdk_hqos_tctbl { u32 client_index; u32 context; u32 sw_if_index; @@ -86,18 +68,9 @@ define sw_interface_set_dpdk_hqos_tctbl { u32 queue; }; -/** \brief DPDK interface HQoS tctbl entry set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_tctbl_reply { - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/plugins/flowperpkt/flowperpkt.api b/src/plugins/flowperpkt/flowperpkt.api index 1cf62c54..3ff92dca 100644 --- a/src/plugins/flowperpkt/flowperpkt.api +++ b/src/plugins/flowperpkt/flowperpkt.api @@ -12,7 +12,7 @@ @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param sw_if_index - index of the interface */ -manual_print define flowperpkt_tx_interface_add_del +autoreply manual_print define flowperpkt_tx_interface_add_del { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -28,20 +28,7 @@ manual_print define flowperpkt_tx_interface_add_del u32 sw_if_index; }; -/** \brief Reply to enable/disable per-packet IPFIX recording messages - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define flowperpkt_tx_interface_add_del_reply -{ - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; - -define flowperpkt_params +autoreply define flowperpkt_params { u32 client_index; u32 context; @@ -51,9 +38,3 @@ define flowperpkt_params u32 active_timer; /* ~0 is off, 0 is default */ u32 passive_timer; /* ~0 is off, 0 is default */ }; - -define flowperpkt_params_reply -{ - u32 context; - i32 retval; -}; diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api index 7b17c3f7..caa97e6e 100644 --- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api @@ -16,7 +16,7 @@ /* Define a simple binary API to control the feature */ -define vxlan_gpe_ioam_export_enable_disable { +autoreply define vxlan_gpe_ioam_export_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -32,11 +32,3 @@ define vxlan_gpe_ioam_export_enable_disable { /* Src ip address */ }; - -define vxlan_gpe_ioam_export_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; \ No newline at end of file diff --git a/src/plugins/ioam/export/ioam_export.api b/src/plugins/ioam/export/ioam_export.api index f22d9fc8..bb830561 100644 --- a/src/plugins/ioam/export/ioam_export.api +++ b/src/plugins/ioam/export/ioam_export.api @@ -16,7 +16,7 @@ /* Define a simple binary API to control the feature */ -define ioam_export_ip6_enable_disable { +autoreply define ioam_export_ip6_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -32,11 +32,3 @@ define ioam_export_ip6_enable_disable { /* Src ip address */ }; - -define ioam_export_ip6_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; diff --git a/src/plugins/ioam/ip6/ioam_cache.api b/src/plugins/ioam/ip6/ioam_cache.api index de50d57d..dd9c0186 100644 --- a/src/plugins/ioam/ip6/ioam_cache.api +++ b/src/plugins/ioam/ip6/ioam_cache.api @@ -16,7 +16,7 @@ /* API to control ioam caching */ -define ioam_cache_ip6_enable_disable { +autoreply define ioam_cache_ip6_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -27,11 +27,3 @@ define ioam_cache_ip6_enable_disable { u8 is_disable; }; - -define ioam_cache_ip6_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; diff --git a/src/plugins/ioam/lib-pot/pot.api b/src/plugins/ioam/lib-pot/pot.api index fa2fc126..c377cde0 100644 --- a/src/plugins/ioam/lib-pot/pot.api +++ b/src/plugins/ioam/lib-pot/pot.api @@ -27,7 +27,7 @@ @param list_name_len - length of the name of this profile list @param list_name - name of this profile list */ -define pot_profile_add { +autoreply define pot_profile_add { u32 client_index; u32 context; u8 id; @@ -42,22 +42,12 @@ define pot_profile_add { u8 list_name[0]; }; -/** \brief Proof of Transit profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pot_profile_add_reply { - u32 context; - i32 retval; -}; - - /** \brief Proof of Transit(POT): Activate POT profile in the list @param id - id of the profile @param list_name_len - length of the name of this profile list @param list_name - name of this profile list */ -define pot_profile_activate { +autoreply define pot_profile_activate { u32 client_index; u32 context; u8 id; @@ -65,37 +55,19 @@ define pot_profile_activate { u8 list_name[0]; }; -/** \brief Proof of Transit profile activate response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pot_profile_activate_reply { - u32 context; - i32 retval; -}; - /** \brief Delete POT Profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param list_name_len - length of the name of the profile list @param list_name - name of profile list to delete */ -define pot_profile_del { +autoreply define pot_profile_del { u32 client_index; u32 context; u8 list_name_len; u8 list_name[0]; }; -/** \brief Proof of Transit profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pot_profile_del_reply { - u32 context; - i32 retval; -}; - /** \brief Show POT Profiles @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/ioam/lib-trace/trace.api b/src/plugins/ioam/lib-trace/trace.api index cb958325..2f45c6e2 100644 --- a/src/plugins/ioam/lib-trace/trace.api +++ b/src/plugins/ioam/lib-trace/trace.api @@ -22,7 +22,7 @@ @param trace_tsp- Timestamp resolution @param app_data - Application specific opaque */ -define trace_profile_add { +autoreply define trace_profile_add { u32 client_index; u32 context; u8 trace_type; @@ -32,37 +32,15 @@ define trace_profile_add { u32 app_data; }; -/** \brief Trace profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define trace_profile_add_reply { - u32 context; - i32 retval; -}; - - - /** \brief Delete trace Profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define trace_profile_del { +autoreply define trace_profile_del { u32 client_index; u32 context; }; -/** \brief Trace profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define trace_profile_del_reply { - u32 context; - i32 retval; -}; - - - /** \brief Show trace Profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api index 056529a4..a6761f07 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api @@ -24,7 +24,7 @@ @param trace_enable - iOAM Trace enabled or not flag */ -define vxlan_gpe_ioam_enable { +autoreply define vxlan_gpe_ioam_enable { u32 client_index; u32 context; u16 id; @@ -33,38 +33,18 @@ define vxlan_gpe_ioam_enable { u8 trace_enable; }; -/** \brief iOAM Over VxLAN-GPE - Set iOAM transport for VXLAN-GPE reply - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define vxlan_gpe_ioam_enable_reply { - u32 context; - i32 retval; -}; - - /** \brief iOAM for VxLAN-GPE disable @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param id - profile id */ -define vxlan_gpe_ioam_disable +autoreply define vxlan_gpe_ioam_disable { u32 client_index; u32 context; u16 id; }; -/** \brief vxlan_gpe_ioam disable response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define vxlan_gpe_ioam_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Enable iOAM for a VNI (VXLAN-GPE) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -73,7 +53,7 @@ define vxlan_gpe_ioam_disable_reply @param remote - IPv4/6 Address of the remote VTEP */ -define vxlan_gpe_ioam_vni_enable { +autoreply define vxlan_gpe_ioam_vni_enable { u32 client_index; u32 context; u32 vni; @@ -82,18 +62,6 @@ define vxlan_gpe_ioam_vni_enable { u8 is_ipv6; }; -/** \brief Reply to enable iOAM for a VNI (VXLAN-GPE) - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_vni_enable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - /** \brief Disable iOAM for a VNI (VXLAN-GPE) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -102,7 +70,7 @@ define vxlan_gpe_ioam_vni_enable_reply { @param remote - IPv4/6 Address of the remote VTEP */ -define vxlan_gpe_ioam_vni_disable { +autoreply define vxlan_gpe_ioam_vni_disable { u32 client_index; u32 context; u32 vni; @@ -111,19 +79,6 @@ define vxlan_gpe_ioam_vni_disable { u8 is_ipv6; }; -/** \brief Reply to disable iOAM for a VNI (VXLAN-GPE) - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_vni_disable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - - /** \brief Enable iOAM for a VXLAN-GPE transit @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -131,7 +86,7 @@ define vxlan_gpe_ioam_vni_disable_reply { @param outer_fib_index- FIB index */ -define vxlan_gpe_ioam_transit_enable { +autoreply define vxlan_gpe_ioam_transit_enable { u32 client_index; u32 context; u32 outer_fib_index; @@ -139,18 +94,6 @@ define vxlan_gpe_ioam_transit_enable { u8 is_ipv6; }; -/** \brief Reply to enable iOAM for VXLAN-GPE transit - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_transit_enable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - /** \brief Disable iOAM for VXLAN-GPE transit @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -158,7 +101,7 @@ define vxlan_gpe_ioam_transit_enable_reply { @param outer_fib_index- FIB index */ -define vxlan_gpe_ioam_transit_disable { +autoreply define vxlan_gpe_ioam_transit_disable { u32 client_index; u32 context; u32 outer_fib_index; @@ -166,16 +109,3 @@ define vxlan_gpe_ioam_transit_disable { u8 is_ipv6; }; -/** \brief Reply to disable iOAM for VXLAN-GPE transit - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_transit_disable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - - diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api index 39ee3c8f..32cc669b 100644 --- a/src/plugins/lb/lb.api +++ b/src/plugins/lb/lb.api @@ -8,7 +8,7 @@ @param flow_timeout - Time in seconds after which, if no packet is received for a given flow, the flow is removed from the established flow table. */ -define lb_conf +autoreply define lb_conf { u32 client_index; u32 context; @@ -18,11 +18,6 @@ define lb_conf u32 flow_timeout; }; -define lb_conf_reply { - u32 context; - i32 retval; -}; - /** \brief Add a virtual address (or prefix) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -33,7 +28,7 @@ define lb_conf_reply { for this VIP (must be power of 2). @param is_del - The VIP should be removed. */ -define lb_add_del_vip { +autoreply define lb_add_del_vip { u32 client_index; u32 context; u8 ip_prefix[16]; @@ -43,11 +38,6 @@ define lb_add_del_vip { u8 is_del; }; -define lb_add_del_vip_reply { - u32 context; - i32 retval; -}; - /** \brief Add an application server for a given VIP @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -56,7 +46,7 @@ define lb_add_del_vip_reply { @param as_address - The application server address (IPv4 in lower order 32 bits). @param is_del - The AS should be removed. */ -define lb_add_del_as { +autoreply define lb_add_del_as { u32 client_index; u32 context; u8 vip_ip_prefix[16]; @@ -64,8 +54,3 @@ define lb_add_del_as { u8 as_address[16]; u8 is_del; }; - -define lb_add_del_as_reply { - u32 context; - i32 retval; -}; diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api index 6f946421..95e016c3 100644 --- a/src/plugins/memif/memif.api +++ b/src/plugins/memif/memif.api @@ -57,7 +57,7 @@ define memif_create_reply @param context - sender context, to match reply w/ request @param sw_if_index - software index of the interface to delete */ -define memif_delete +autoreply define memif_delete { u32 client_index; u32 context; @@ -65,16 +65,6 @@ define memif_delete u32 sw_if_index; }; -/** \brief Delete host-interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define memif_delete_reply -{ - u32 context; - i32 retval; -}; - /** \brief Memory interface details structure @param context - sender context, to match reply w/ request (memif_dump) @param sw_if_index - index of the interface diff --git a/src/plugins/snat/snat.api b/src/plugins/snat/snat.api index 9689f5f9..573b6753 100644 --- a/src/plugins/snat/snat.api +++ b/src/plugins/snat/snat.api @@ -29,7 +29,7 @@ @param vrf_id - VRF id of tenant, ~0 means independent of VRF @param is_add - 1 if add, 0 if delete */ -define snat_add_address_range { +autoreply define snat_add_address_range { u32 client_index; u32 context; u8 is_ip4; @@ -39,15 +39,6 @@ define snat_add_address_range { u8 is_add; }; -/** \brief Add S-NAT address range reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_address_range_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT addresses @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -77,7 +68,7 @@ define snat_address_details { @param is_inside - 1 if inside, 0 if outside @param sw_if_index - software index of the interface */ -define snat_interface_add_del_feature { +autoreply define snat_interface_add_del_feature { u32 client_index; u32 context; u8 is_add; @@ -85,15 +76,6 @@ define snat_interface_add_del_feature { u32 sw_if_index; }; -/** \brief Enable/disable S-NAT feature on the interface reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_interface_add_del_feature_reply { - u32 context; - i32 retval; -}; - /** \brief Dump interfaces with S-NAT feature @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -130,7 +112,7 @@ define snat_interface_details { used) @param vfr_id - VRF ID */ -define snat_add_static_mapping { +autoreply define snat_add_static_mapping { u32 client_index; u32 context; u8 is_add; @@ -145,15 +127,6 @@ define snat_add_static_mapping { u32 vrf_id; }; -/** \brief Add/delete S-NAT static mapping reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_static_mapping_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT static mappings @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -257,21 +230,12 @@ define snat_show_config_reply @param context - sender context, to match reply w/ request @param worker_mask - S-NAT workers mask */ -define snat_set_workers { +autoreply define snat_set_workers { u32 client_index; u32 context; u64 worker_mask; }; -/** \brief Set S-NAT workers reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_set_workers_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT workers @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -300,7 +264,7 @@ define snat_worker_details { @param is_add - 1 if add, 0 if delete @param sw_if_index - software index of the interface */ -define snat_add_del_interface_addr { +autoreply define snat_add_del_interface_addr { u32 client_index; u32 context; u8 is_add; @@ -308,15 +272,6 @@ define snat_add_del_interface_addr { u32 sw_if_index; }; -/** \brief Add/delete S-NAT pool address from specific interfce reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_del_interface_addr_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT pool addresses interfaces @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -342,7 +297,7 @@ define snat_interface_addr_details { @param src_port - source port number @param enable - 1 if enable, 0 if disable */ -define snat_ipfix_enable_disable { +autoreply define snat_ipfix_enable_disable { u32 client_index; u32 context; u32 domain_id; @@ -350,15 +305,6 @@ define snat_ipfix_enable_disable { u8 enable; }; -/** \brief Enable/disable S-NAT IPFIX logging reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_ipfix_enable_disable_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT users @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -437,7 +383,7 @@ define snat_user_session_details { @param out_addr - outside IP address @param out_addr - outside IP address prefix length */ -define snat_add_det_map { +autoreply define snat_add_det_map { u32 client_index; u32 context; u8 is_add; @@ -449,15 +395,6 @@ define snat_add_det_map { u8 out_plen; }; -/** \brief Add/delete S-NAT deterministic mapping reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_det_map_reply { - u32 context; - i32 retval; -}; - /** \brief Get outside address and port range from inside address @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -556,7 +493,7 @@ define snat_det_map_details { @param tcp_transitory - TCP transitory timeout (default 240sec) @param icmp - ICMP timeout (default 60sec) */ -define snat_det_set_timeouts { +autoreply define snat_det_set_timeouts { u32 client_index; u32 context; u32 udp; @@ -565,15 +502,6 @@ define snat_det_set_timeouts { u32 icmp; }; -/** \brief Set values of timeouts for deterministic NAT reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_det_set_timeouts_reply { - u32 context; - i32 retval; -}; - /** \brief Get values of timeouts for deterministic NAT (seconds) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/tools/vppapigen/gram.y b/src/tools/vppapigen/gram.y index de26af8d..9cea6023 100644 --- a/src/tools/vppapigen/gram.y +++ b/src/tools/vppapigen/gram.y @@ -38,7 +38,7 @@ void generate (YYSTYPE); %token NAME RPAR LPAR SEMI LBRACK RBRACK NUMBER PRIMTYPE BARF %token TPACKED DEFINE LCURLY RCURLY STRING UNION %token HELPER_STRING COMMA -%token NOVERSION MANUAL_PRINT MANUAL_ENDIAN TYPEONLY DONT_TRACE +%token NOVERSION MANUAL_PRINT MANUAL_ENDIAN TYPEONLY DONT_TRACE AUTOREPLY %% @@ -64,6 +64,7 @@ flag: | MANUAL_ENDIAN {$$ = $1;} | DONT_TRACE {$$ = $1;} | TYPEONLY {$$ = $1;} + | AUTOREPLY {$$ = $1;} ; defn: DEFINE NAME LCURLY defbody RCURLY SEMI diff --git a/src/tools/vppapigen/lex.c b/src/tools/vppapigen/lex.c index 733942ad..e6358143 100644 --- a/src/tools/vppapigen/lex.c +++ b/src/tools/vppapigen/lex.c @@ -27,6 +27,9 @@ #include "lex.h" #include "node.h" #include "tools/vppapigen/gram.h" +#include +#include +#include FILE *ifp, *ofp, *pythonfp, *jsonfp; char *vlib_app_name = "vpp"; @@ -38,6 +41,9 @@ int current_filename_allocated; unsigned long input_crc; unsigned long message_crc; int yydebug; +char *push_input_fifo; +char saved_ungetc_char; +char have_ungetc_char; /* * lexer variable definitions @@ -469,9 +475,50 @@ static char namebuf [MAXNAME]; static inline char getc_char (FILE *ifp) { + char rv; + + if (have_ungetc_char) { + have_ungetc_char = 0; + return saved_ungetc_char; + } + + if (clib_fifo_elts (push_input_fifo)) { + clib_fifo_sub1(push_input_fifo, rv); + return (rv & 0x7f); + } return ((char)(getc(ifp) & 0x7f)); } +u32 fe (char *fifo) +{ + return clib_fifo_elts (fifo); +} + +static inline void +ungetc_char (char c, FILE *ifp) +{ + saved_ungetc_char = c; + have_ungetc_char = 1; +} + +void autoreply (void *np_arg) +{ + static u8 *s; + node_t *np = (node_t *)np_arg; + int i; + + vec_reset_length (s); + + s = format (0, " define %s_reply\n", (char *)(np->data[0])); + s = format (s, "{\n"); + s = format (s, " u32 context;\n"); + s = format (s, " i32 retval;\n"); + s = format (s, "};\n"); + + for (i = 0; i < vec_len (s); i++) + clib_fifo_add1 (push_input_fifo, s[i]); +} + /* * yylex (well, yylex_1: The real yylex below does crc-hackery) */ @@ -595,7 +642,7 @@ static int yylex_1 (void) return (EOF); if (!isalnum (c) && c != '_') { - ungetc (c, ifp); + ungetc_char (c, ifp); namebuf [nameidx] = 0; the_lexer_state = START_STATE; return (name_check (namebuf, &yylval)); @@ -616,7 +663,7 @@ static int yylex_1 (void) return (EOF); if (!isdigit (c)) { - ungetc (c, ifp); + ungetc_char (c, ifp); namebuf [nameidx] = 0; the_lexer_state = START_STATE; yylval = (void *) atol(namebuf); @@ -889,6 +936,7 @@ int yylex (void) case MANUAL_ENDIAN: code = 276; break; case TYPEONLY: code = 278; break; case DONT_TRACE: code = 279; break; + case AUTOREPLY: code = 280; break; case EOF: code = ~0; break; /* hysterical compatibility */ @@ -929,6 +977,7 @@ static struct keytab { } keytab [] = /* Keep the table sorted, binary search used below! */ { + {"autoreply", NODE_AUTOREPLY}, {"define", NODE_DEFINE}, {"dont_trace", NODE_DONT_TRACE}, {"f64", NODE_F64}, @@ -1005,6 +1054,10 @@ static int name_check (const char *s, YYSTYPE *token_value) *token_value = (YYSTYPE) NODE_FLAG_DONT_TRACE; return(DONT_TRACE); + case NODE_AUTOREPLY: + *token_value = (YYSTYPE) NODE_FLAG_AUTOREPLY; + return(AUTOREPLY); + case NODE_NOVERSION: return(NOVERSION); diff --git a/src/tools/vppapigen/lex.h b/src/tools/vppapigen/lex.h index a0fdc735..275cf685 100644 --- a/src/tools/vppapigen/lex.h +++ b/src/tools/vppapigen/lex.h @@ -24,6 +24,7 @@ extern int yylex (void); extern void yyerror (char *); extern int yyparse (void); +extern void autoreply (void *); #ifndef YYSTYPE #define YYSTYPE void * diff --git a/src/tools/vppapigen/node.c b/src/tools/vppapigen/node.c index 359ac9c9..9f234037 100644 --- a/src/tools/vppapigen/node.c +++ b/src/tools/vppapigen/node.c @@ -1050,6 +1050,11 @@ YYSTYPE set_flags(YYSTYPE a1, YYSTYPE a2) flags = (int)(uword) a1; np->flags |= flags; + + /* Generate a foo_reply_t right here */ + if (flags & NODE_FLAG_AUTOREPLY) + autoreply(np); + return (a2); } /* diff --git a/src/tools/vppapigen/node.h b/src/tools/vppapigen/node.h index 297d6036..65bd5d10 100644 --- a/src/tools/vppapigen/node.h +++ b/src/tools/vppapigen/node.h @@ -53,6 +53,7 @@ enum node_subclass { /* WARNING: indices must match the vft... */ NODE_MANUAL_PRINT, NODE_MANUAL_ENDIAN, NODE_DONT_TRACE, + NODE_AUTOREPLY, }; enum passid { @@ -84,6 +85,7 @@ typedef struct node_ { #define NODE_FLAG_MANUAL_ENDIAN (1<<1) #define NODE_FLAG_TYPEONLY (1<<3) #define NODE_FLAG_DONT_TRACE (1<<4) +#define NODE_FLAG_AUTOREPLY (1<<5) typedef struct node_vft_ { void (*print)(struct node_ *); diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api index c38b483c..32e51407 100644 --- a/src/vlibmemory/memclnt.api +++ b/src/vlibmemory/memclnt.api @@ -72,7 +72,7 @@ define memclnt_read_timeout { /* * RPC */ -define rpc_call { +autoreply define rpc_call { u32 client_index; u32 context; u64 function; @@ -82,11 +82,6 @@ define rpc_call { u8 data[0]; }; -define rpc_reply { - i32 retval; - u32 context; -}; - /* * Lookup message-ID base by name */ diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index 7a536ee8..43574dea 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -1275,7 +1275,7 @@ VLIB_CLI_COMMAND (cli_show_api_plugin_command, static) = { static void vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) { - vl_api_rpc_reply_t *rmp; + vl_api_rpc_call_reply_t *rmp; int (*fp) (void *); i32 rv = 0; vlib_main_t *vm = vlib_get_main (); @@ -1305,7 +1305,7 @@ vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) if (q) { rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_RPC_REPLY); + rmp->_vl_msg_id = ntohs (VL_API_RPC_CALL_REPLY); rmp->context = mp->context; rmp->retval = rv; vl_msg_api_send_shmem (q, (u8 *) & rmp); @@ -1318,7 +1318,7 @@ vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) } static void -vl_api_rpc_reply_t_handler (vl_api_rpc_reply_t * mp) +vl_api_rpc_call_reply_t_handler (vl_api_rpc_call_reply_t * mp) { clib_warning ("unimplemented"); } @@ -1415,7 +1415,7 @@ vl_api_trace_plugin_msg_ids_t_handler (vl_api_trace_plugin_msg_ids_t * mp) #define foreach_rpc_api_msg \ _(RPC_CALL,rpc_call) \ -_(RPC_REPLY,rpc_reply) +_(RPC_CALL_REPLY,rpc_call_reply) #define foreach_plugin_trace_msg \ _(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids) diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api index 2cdcfad3..7bcaa4c3 100644 --- a/src/vnet/bfd/bfd.api +++ b/src/vnet/bfd/bfd.api @@ -18,43 +18,23 @@ @param context - sender context, to match reply w/ request @param sw_if_index - interface to use as echo source */ -define bfd_udp_set_echo_source +autoreply define bfd_udp_set_echo_source { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Set BFD feature response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_set_echo_source_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete BFD echo source @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define bfd_udp_del_echo_source +autoreply define bfd_udp_del_echo_source { u32 client_index; u32 context; }; -/** \brief Delete BFD echo source response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_del_echo_source_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -69,7 +49,7 @@ define bfd_udp_del_echo_source_reply @param bfd_key_id - key id sent out in BFD packets (if is_authenticated) @param conf_key_id - id of already configured key (if is_authenticated) */ -define bfd_udp_add +autoreply define bfd_udp_add { u32 client_index; u32 context; @@ -85,16 +65,6 @@ define bfd_udp_add u32 conf_key_id; }; -/** \brief Add UDP BFD session response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_add_reply -{ - u32 context; - i32 retval; -}; - /** \brief Modify UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -106,7 +76,7 @@ define bfd_udp_add_reply @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param detect_mult - detect multiplier (# of packets missed before connection goes down) */ -define bfd_udp_mod +autoreply define bfd_udp_mod { u32 client_index; u32 context; @@ -119,16 +89,6 @@ define bfd_udp_mod u8 detect_mult; }; -/** \brief Modify UDP BFD session response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_mod_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -137,7 +97,7 @@ define bfd_udp_mod_reply @param peer_addr - peer address @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 */ -define bfd_udp_del +autoreply define bfd_udp_del { u32 client_index; u32 context; @@ -147,16 +107,6 @@ define bfd_udp_del u8 is_ipv6; }; -/** \brief Delete UDP BFD session response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get all BFD sessions @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -206,7 +156,7 @@ define bfd_udp_session_details @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param admin_up_down - set the admin state, 1 = up, 0 = down */ -define bfd_udp_session_set_flags +autoreply define bfd_udp_session_set_flags { u32 client_index; u32 context; @@ -217,23 +167,13 @@ define bfd_udp_session_set_flags u8 admin_up_down; }; -/** \brief Reply to bfd_udp_session_set_flags - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define bfd_udp_session_set_flags_reply -{ - u32 context; - i32 retval; -}; - /** \brief Register for BFD events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param enable_disable - 1 => register for events, 0 => cancel registration @param pid - sender's pid */ -define want_bfd_events +autoreply define want_bfd_events { u32 client_index; u32 context; @@ -241,16 +181,6 @@ define want_bfd_events u32 pid; }; -/** \brief Reply for BFD events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_bfd_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief BFD UDP - add/replace key to configuration @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -259,7 +189,7 @@ define want_bfd_events_reply @param auth_type - authentication type (RFC 5880/4.1/Auth Type) @param key - key data */ -define bfd_auth_set_key +autoreply define bfd_auth_set_key { u32 client_index; u32 context; @@ -269,16 +199,6 @@ define bfd_auth_set_key u8 key[20]; }; -/** \brief BFD UDP - add/replace key reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_auth_set_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief BFD UDP - delete key from configuration @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -286,23 +206,13 @@ define bfd_auth_set_key_reply @param key_len - length of key (must be non-zero) @param key - key data */ -define bfd_auth_del_key +autoreply define bfd_auth_del_key { u32 client_index; u32 context; u32 conf_key_id; }; -/** \brief BFD UDP - delete key reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_auth_del_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get a list of configured authentication keys @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -338,7 +248,7 @@ define bfd_auth_keys_details @param bfd_key_id - key id sent out in BFD packets @param conf_key_id - id of already configured key */ -define bfd_udp_auth_activate +autoreply define bfd_udp_auth_activate { u32 client_index; u32 context; @@ -351,16 +261,6 @@ define bfd_udp_auth_activate u32 conf_key_id; }; -/** \brief BFD UDP - activate/change authentication reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_udp_auth_activate_reply -{ - u32 context; - i32 retval; -}; - /** \brief BFD UDP - deactivate authentication @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -370,7 +270,7 @@ define bfd_udp_auth_activate_reply @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param is_delayed - change is applied once peer applies the change (on first received non-authenticated packet) */ -define bfd_udp_auth_deactivate +autoreply define bfd_udp_auth_deactivate { u32 client_index; u32 context; @@ -381,16 +281,6 @@ define bfd_udp_auth_deactivate u8 is_delayed; }; -/** \brief BFD UDP - deactivate authentication reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_udp_auth_deactivate_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/classify/classify.api b/src/vnet/classify/classify.api index 51ebd6c8..cacb9bed 100644 --- a/src/vnet/classify/classify.api +++ b/src/vnet/classify/classify.api @@ -92,7 +92,7 @@ define classify_add_del_table_reply VRF id if action is 1 or 2. @param match[] - for add, match value for session, required */ -define classify_add_del_session +autoreply define classify_add_del_session { u32 client_index; u32 context; @@ -106,16 +106,6 @@ define classify_add_del_session u8 match[0]; }; -/** \brief Classify add / del session response - @param context - sender context, to match reply w/ request - @param retval - return code for the add/del session request -*/ -define classify_add_del_session_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset policer classify interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -127,7 +117,7 @@ define classify_add_del_session_reply Note: User is recommeneded to use just one valid table_index per call. (ip4_table_index, ip6_table_index, or l2_table_index) */ -define policer_classify_set_interface +autoreply define policer_classify_set_interface { u32 client_index; u32 context; @@ -138,16 +128,6 @@ define policer_classify_set_interface u8 is_add; }; -/** \brief Set/unset policer classify interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define policer_classify_set_interface_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get list of policer classify interfaces and tables @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -308,7 +288,7 @@ define classify_session_details Note: User is recommeneded to use just one valid table_index per call. (ip4_table_index, ip6_table_index, or l2_table_index) */ -define flow_classify_set_interface { +autoreply define flow_classify_set_interface { u32 client_index; u32 context; u32 sw_if_index; @@ -317,15 +297,6 @@ define flow_classify_set_interface { u8 is_add; }; -/** \brief Set/unset flow classify interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define flow_classify_set_interface_reply { - u32 context; - i32 retval; -}; - /** \brief Get list of flow classify interfaces and tables @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -353,4 +324,4 @@ define flow_classify_details { * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/cop/cop.api b/src/vnet/cop/cop.api index b34dae80..69316001 100644 --- a/src/vnet/cop/cop.api +++ b/src/vnet/cop/cop.api @@ -20,7 +20,7 @@ @param enable_disable - 1 => enable, 0 => disable */ -define cop_interface_enable_disable +autoreply define cop_interface_enable_disable { u32 client_index; u32 context; @@ -28,17 +28,6 @@ define cop_interface_enable_disable u8 enable_disable; }; -/** \brief cop: interface enable/disable junk filtration reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define cop_interface_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief cop: enable/disable whitelist filtration features on an interface Note: the supplied fib_id must match in order to remove the feature! @@ -51,7 +40,7 @@ define cop_interface_enable_disable_reply @param default_cop - 1 => enable non-ip4, non-ip6 filtration 0=> disable it */ -define cop_whitelist_enable_disable +autoreply define cop_whitelist_enable_disable { u32 client_index; u32 context; @@ -62,17 +51,6 @@ define cop_whitelist_enable_disable u8 default_cop; }; -/** \brief cop: interface enable/disable junk filtration reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define cop_whitelist_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief get_node_graph - get a copy of the vpp node graph including the current set of graph arcs. @@ -85,4 +63,4 @@ define cop_whitelist_enable_disable_reply * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/devices/af_packet/af_packet.api b/src/vnet/devices/af_packet/af_packet.api index 9fb2a207..8d40ad60 100644 --- a/src/vnet/devices/af_packet/af_packet.api +++ b/src/vnet/devices/af_packet/af_packet.api @@ -46,7 +46,7 @@ define af_packet_create_reply @param context - sender context, to match reply w/ request @param host_if_name - interface name */ -define af_packet_delete +autoreply define af_packet_delete { u32 client_index; u32 context; @@ -54,16 +54,6 @@ define af_packet_delete u8 host_if_name[64]; }; -/** \brief Delete host-interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define af_packet_delete_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/devices/netmap/netmap.api b/src/vnet/devices/netmap/netmap.api index 377ccffd..8dc698b9 100644 --- a/src/vnet/devices/netmap/netmap.api +++ b/src/vnet/devices/netmap/netmap.api @@ -22,7 +22,7 @@ @param is_pipe - is pipe @param is_master - 0=slave, 1=master */ -define netmap_create +autoreply define netmap_create { u32 client_index; u32 context; @@ -34,22 +34,12 @@ define netmap_create u8 is_master; }; -/** \brief Create netmap response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define netmap_create_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete netmap @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param netmap_if_name - interface name */ -define netmap_delete +autoreply define netmap_delete { u32 client_index; u32 context; @@ -57,16 +47,6 @@ define netmap_delete u8 netmap_if_name[64]; }; -/** \brief Delete netmap response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define netmap_delete_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/vnet/devices/virtio/vhost_user.api index 4f604e45..df7ce7ab 100644 --- a/src/vnet/devices/virtio/vhost_user.api +++ b/src/vnet/devices/virtio/vhost_user.api @@ -53,7 +53,7 @@ define create_vhost_user_if_reply @param sock_filename - unix socket filename, used to speak with frontend @param operation_mode - polling=0, interrupt=1, or adaptive=2 */ -define modify_vhost_user_if +autoreply define modify_vhost_user_if { u32 client_index; u32 context; @@ -65,36 +65,16 @@ define modify_vhost_user_if u8 operation_mode; }; -/** \brief vhost-user interface modify response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define modify_vhost_user_if_reply -{ - u32 context; - i32 retval; -}; - /** \brief vhost-user interface delete request @param client_index - opaque cookie to identify the sender */ -define delete_vhost_user_if +autoreply define delete_vhost_user_if { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief vhost-user interface delete response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define delete_vhost_user_if_reply -{ - u32 context; - i32 retval; -}; - /** \brief Vhost-user interface details structure (fix this) @param sw_if_index - index of the interface @param interface_name - name of interface diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api index 2db85a79..eb0b070d 100644 --- a/src/vnet/dhcp/dhcp.api +++ b/src/vnet/dhcp/dhcp.api @@ -24,7 +24,7 @@ @param dhcp_server[] - server address @param dhcp_src_address[] - */ -define dhcp_proxy_config +autoreply define dhcp_proxy_config { u32 client_index; u32 context; @@ -36,16 +36,6 @@ define dhcp_proxy_config u8 dhcp_src_address[16]; }; -/** \brief DHCP Proxy config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_proxy_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief DHCP Proxy set / unset vss request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -55,7 +45,7 @@ define dhcp_proxy_config_reply @param is_ipv6 - ip6 if non-zero, else ip4 @param is_add - set vss if non-zero, else delete */ -define dhcp_proxy_set_vss +autoreply define dhcp_proxy_set_vss { u32 client_index; u32 context; @@ -66,16 +56,6 @@ define dhcp_proxy_set_vss u8 is_add; }; -/** \brief DHCP proxy set / unset vss response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_proxy_set_vss_reply -{ - u32 context; - i32 retval; -}; - /** \brief DHCP Client config add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -86,7 +66,7 @@ define dhcp_proxy_set_vss_reply via dhcp_compl_event API message if non-zero @param pid - sender's pid */ -define dhcp_client_config +autoreply define dhcp_client_config { u32 client_index; u32 context; @@ -97,16 +77,6 @@ define dhcp_client_config u32 pid; }; -/** \brief DHCP Client config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_client_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief Tell client about a DHCP completion event @param client_index - opaque cookie to identify the sender @param pid - client pid registered to receive notification @@ -162,4 +132,4 @@ manual_endian manual_print define dhcp_proxy_details * Local Variables: * eval: (c-set-style "gnu") * End: - */ \ No newline at end of file + */ diff --git a/src/vnet/flow/flow.api b/src/vnet/flow/flow.api index 0e0f99bf..1c5e8c5c 100644 --- a/src/vnet/flow/flow.api +++ b/src/vnet/flow/flow.api @@ -24,7 +24,7 @@ @param template_interval - number of seconds after which to resend template @param udp_checksum - UDP checksum calculation enable flag */ -define set_ipfix_exporter +autoreply define set_ipfix_exporter { u32 client_index; u32 context; @@ -37,15 +37,6 @@ define set_ipfix_exporter u8 udp_checksum; }; -/** \brief Reply to IPFIX exporter configure request - @param context - sender context which was passed in the request -*/ -define set_ipfix_exporter_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPFIX exporter dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -84,22 +75,13 @@ define ipfix_exporter_details @param domain_id - domain ID reported in IPFIX messages for classify stream @param src_port - source port of UDP session for classify stream */ -define set_ipfix_classify_stream { +autoreply define set_ipfix_classify_stream { u32 client_index; u32 context; u32 domain_id; u16 src_port; }; -/** \brief IPFIX classify stream configure response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define set_ipfix_classify_stream_reply { - u32 context; - i32 retval; -}; - /** \brief IPFIX classify stream dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -127,7 +109,7 @@ define ipfix_classify_stream_details { @param ip_version - version of IP used in the classifier table @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified */ -define ipfix_classify_table_add_del { +autoreply define ipfix_classify_table_add_del { u32 client_index; u32 context; u32 table_id; @@ -136,14 +118,6 @@ define ipfix_classify_table_add_del { u8 is_add; }; -/** \brief IPFIX add classifier table response - @param context - sender context which was passed in the request -*/ -define ipfix_classify_table_add_del_reply { - u32 context; - i32 retval; -}; - /** \brief IPFIX classify tables dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/interface.api b/src/vnet/interface.api index 85fd73fb..9df63f18 100644 --- a/src/vnet/interface.api +++ b/src/vnet/interface.api @@ -6,7 +6,7 @@ @param link_up_down - Oper state sent on change event, not used in config. @param deleted - interface was deleted */ -define sw_interface_set_flags +autoreply define sw_interface_set_flags { u32 client_index; u32 context; @@ -17,23 +17,13 @@ define sw_interface_set_flags u8 deleted; }; -/** \brief Reply to sw_interface_set_flags - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define sw_interface_set_flags_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set interface MTU @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - index of the interface to set MTU on @param mtu - MTU */ -define sw_interface_set_mtu +autoreply define sw_interface_set_mtu { u32 client_index; u32 context; @@ -41,23 +31,13 @@ define sw_interface_set_mtu u16 mtu; }; -/** \brief Reply to sw_interface_set_mtu - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define sw_interface_set_mtu_reply -{ - u32 context; - i32 retval; -}; - /** \brief Register for interface events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param enable_disable - 1 => register for events, 0 => cancel registration @param pid - sender's pid */ -define want_interface_events +autoreply define want_interface_events { u32 client_index; u32 context; @@ -65,16 +45,6 @@ define want_interface_events u32 pid; }; -/** \brief Reply for interface events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_interface_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface details structure (fix this) @param sw_if_index - index of the interface @param sup_sw_if_index - index of parent interface if any, else same as sw_if_index @@ -184,7 +154,7 @@ define sw_interface_dump @param address_length - address length in bytes, 4 for ip4, 16 for ip6 @param address - array of address bytes */ -define sw_interface_add_del_address +autoreply define sw_interface_add_del_address { u32 client_index; u32 context; @@ -196,16 +166,6 @@ define sw_interface_add_del_address u8 address[16]; }; -/** \brief Reply to sw_interface_add_del_address - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define sw_interface_add_del_address_reply -{ - u32 context; - i32 retval; -}; - /** \brief Associate the specified interface with a fib table @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -213,7 +173,7 @@ define sw_interface_add_del_address_reply @param is_ipv6 - if non-zero ipv6, else ipv4 @param vrf_id - fib table/vrd id to associate the interface with */ -define sw_interface_set_table +autoreply define sw_interface_set_table { u32 client_index; u32 context; @@ -222,16 +182,6 @@ define sw_interface_set_table u32 vrf_id; }; -/** \brief Reply to sw_interface_set_table - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define sw_interface_set_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get VRF id assigned to interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -280,7 +230,7 @@ define vnet_interface_counters @param unnumbered_sw_if_index - interface which will use the address @param is_add - if non-zero set the association, else unset it */ -define sw_interface_set_unnumbered +autoreply define sw_interface_set_unnumbered { u32 client_index; u32 context; @@ -289,38 +239,18 @@ define sw_interface_set_unnumbered u8 is_add; }; -/** \brief Set unnumbered interface add / del response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_set_unnumbered_reply -{ - u32 context; - i32 retval; -}; - /** \brief Clear interface statistics @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - index of the interface to clear statistics */ -define sw_interface_clear_stats +autoreply define sw_interface_clear_stats { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Reply to sw_interface_clear_stats - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define sw_interface_clear_stats_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set / clear software interface tag @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -328,7 +258,7 @@ define sw_interface_clear_stats_reply @param add_del - 1 = add, 0 = delete @param tag - an ascii tag */ -define sw_interface_tag_add_del +autoreply define sw_interface_tag_add_del { u32 client_index; u32 context; @@ -337,23 +267,13 @@ define sw_interface_tag_add_del u8 tag[64]; }; -/** \brief Reply to set / clear software interface tag - @param context - sender context which was passed in the request - @param retval - return code for the request -*/ -define sw_interface_tag_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set an interface's MAC address @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - the interface whose MAC will be set @param mac_addr - the new MAC address */ -define sw_interface_set_mac_address +autoreply define sw_interface_set_mac_address { u32 client_index; u32 context; @@ -361,16 +281,6 @@ define sw_interface_set_mac_address u8 mac_address[6]; }; -/** \brief Reply to setting an interface MAC address request - @param context - sender context which was passed in the request - @param retval - return code for the request -*/ -define sw_interface_set_mac_address_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api index 6af1714f..7097a130 100644 --- a/src/vnet/ip/ip.api +++ b/src/vnet/ip/ip.api @@ -136,7 +136,7 @@ define ip_neighbor_details { @param mac_address - l2 address of the neighbor @param dst_address - ip4 or ip6 address of the neighbor */ -define ip_neighbor_add_del +autoreply define ip_neighbor_add_del { u32 client_index; u32 context; @@ -150,16 +150,6 @@ define ip_neighbor_add_del u8 dst_address[16]; }; -/** \brief Reply for IP Neighbor add / delete request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_neighbor_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set the ip flow hash config for a fib request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -172,7 +162,7 @@ define ip_neighbor_add_del_reply @param proto -if non-zero include proto in flow hash @param reverse - if non-zero include reverse in flow hash */ -define set_ip_flow_hash +autoreply define set_ip_flow_hash { u32 client_index; u32 context; @@ -186,16 +176,6 @@ define set_ip_flow_hash u8 reverse; }; -/** \brief Set the ip flow hash config for a fib response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define set_ip_flow_hash_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 router advertisement config request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -213,7 +193,7 @@ define set_ip_flow_hash_reply @param initial_count - @param initial_interval - */ -define sw_interface_ip6nd_ra_config +autoreply define sw_interface_ip6nd_ra_config { u32 client_index; u32 context; @@ -233,16 +213,6 @@ define sw_interface_ip6nd_ra_config u32 initial_interval; }; -/** \brief IPv6 router advertisement config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_ip6nd_ra_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 router advertisement prefix config request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -272,7 +242,7 @@ define sw_interface_ip6nd_ra_config_reply preferred [ADDRCONF]. A value of all one bits (0xffffffff) represents infinity. */ -define sw_interface_ip6nd_ra_prefix +autoreply define sw_interface_ip6nd_ra_prefix { u32 client_index; u32 context; @@ -289,16 +259,6 @@ define sw_interface_ip6nd_ra_prefix u32 pref_lifetime; }; -/** \brief IPv6 router advertisement prefix config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_ip6nd_ra_prefix_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 ND proxy config @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -306,7 +266,7 @@ define sw_interface_ip6nd_ra_prefix_reply @param address - The address of the host for which to proxy for @param is_add - Adding or deleting */ -define ip6nd_proxy_add_del +autoreply define ip6nd_proxy_add_del { u32 client_index; u32 context; @@ -315,16 +275,6 @@ define ip6nd_proxy_add_del u8 address[16]; }; -/** \brief IPv6 ND proxy response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define ip6nd_proxy_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 ND proxy details returned after request @param context - sender context, to match reply w/ request @param retval - return code for the request @@ -355,7 +305,7 @@ define ip6nd_proxy_dump @param sw_if_index - interface used to reach neighbor @param enable - if non-zero enable ip6 on interface, else disable */ -define sw_interface_ip6_enable_disable +autoreply define sw_interface_ip6_enable_disable { u32 client_index; u32 context; @@ -363,23 +313,13 @@ define sw_interface_ip6_enable_disable u8 enable; /* set to true if enable */ }; -/** \brief IPv6 interface enable / disable response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_ip6_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 set link local address on interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - interface to set link local on @param address[] - the new link local address */ -define sw_interface_ip6_set_link_local_address +autoreply define sw_interface_ip6_set_link_local_address { u32 client_index; u32 context; @@ -387,16 +327,6 @@ define sw_interface_ip6_set_link_local_address u8 address[16]; }; -/** \brief IPv6 set link local address on interface response - @param context - sender context, to match reply w/ request - @param retval - error code for the request -*/ -define sw_interface_ip6_set_link_local_address_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add / del route request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -422,7 +352,7 @@ define sw_interface_ip6_set_link_local_address_reply @param next_hop_out_label_stack - the next-hop output label stack, outer most first @param next_hop_via_label - The next-hop is a resolved via a local label */ -define ip_add_del_route +autoreply define ip_add_del_route { u32 client_index; u32 context; @@ -452,16 +382,6 @@ define ip_add_del_route u32 next_hop_out_label_stack[next_hop_n_out_labels]; }; -/** \brief Reply for add / del route request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_add_del_route_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add / del route request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -470,7 +390,7 @@ define ip_add_del_route_reply FIXME */ -define ip_mroute_add_del +autoreply define ip_mroute_add_del { u32 client_index; u32 context; @@ -488,16 +408,6 @@ define ip_mroute_add_del u8 src_address[16]; }; -/** \brief Reply for add / del mroute request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_mroute_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump IP multicast fib table @param client_index - opaque cookie to identify the sender */ diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api index ef090f84..203c5272 100644 --- a/src/vnet/ipsec/ipsec.api +++ b/src/vnet/ipsec/ipsec.api @@ -20,7 +20,7 @@ @param spd_id - SPD instance id (control plane allocated) */ -define ipsec_spd_add_del +autoreply define ipsec_spd_add_del { u32 client_index; u32 context; @@ -28,17 +28,6 @@ define ipsec_spd_add_del u32 spd_id; }; -/** \brief Reply for IPsec: Add/delete Security Policy Database entry - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_spd_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Add/delete SPD from interface @param client_index - opaque cookie to identify the sender @@ -49,7 +38,7 @@ define ipsec_spd_add_del_reply */ -define ipsec_interface_add_del_spd +autoreply define ipsec_interface_add_del_spd { u32 client_index; u32 context; @@ -59,17 +48,6 @@ define ipsec_interface_add_del_spd u32 spd_id; }; -/** \brief Reply for IPsec: Add/delete SPD from interface - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_interface_add_del_spd_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Add/delete Security Policy Database entry See RFC 4301, 4.4.1.1 on how to match packet to selectors @@ -95,7 +73,7 @@ define ipsec_interface_add_del_spd_reply */ -define ipsec_spd_add_del_entry +autoreply define ipsec_spd_add_del_entry { u32 client_index; u32 context; @@ -125,17 +103,6 @@ define ipsec_spd_add_del_entry u32 sa_id; }; -/** \brief Reply for IPsec: Add/delete Security Policy Database entry - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_spd_add_del_entry_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Add/delete Security Association Database entry @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -167,7 +134,7 @@ define ipsec_spd_add_del_entry_reply IPsec tunnel address copy mode (to support GDOI) */ -define ipsec_sad_add_del_entry +autoreply define ipsec_sad_add_del_entry { u32 client_index; u32 context; @@ -195,17 +162,6 @@ define ipsec_sad_add_del_entry u8 tunnel_dst_address[16]; }; -/** \brief Reply for IPsec: Add/delete Security Association Database entry - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_sad_add_del_entry_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Update Security Association keys @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -219,7 +175,7 @@ define ipsec_sad_add_del_entry_reply @param integrity_key - integrity keying material */ -define ipsec_sa_set_key +autoreply define ipsec_sa_set_key { u32 client_index; u32 context; @@ -233,17 +189,6 @@ define ipsec_sa_set_key u8 integrity_key[128]; }; -/** \brief Reply for IPsec: Update Security Association keys - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_sa_set_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Add/delete profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -251,7 +196,7 @@ define ipsec_sa_set_key_reply @param name - IKEv2 profile name @param is_add - Add IKEv2 profile if non-zero, else delete */ -define ikev2_profile_add_del +autoreply define ikev2_profile_add_del { u32 client_index; u32 context; @@ -260,16 +205,6 @@ define ikev2_profile_add_del u8 is_add; }; -/** \brief Reply for IKEv2: Add/delete profile - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 profile authentication method @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -280,7 +215,7 @@ define ikev2_profile_add_del_reply @param data_len - Authentication data length @param data - Authentication data (for rsa-sig cert file path) */ -define ikev2_profile_set_auth +autoreply define ikev2_profile_set_auth { u32 client_index; u32 context; @@ -292,16 +227,6 @@ define ikev2_profile_set_auth u8 data[0]; }; -/** \brief Reply for IKEv2: Set IKEv2 profile authentication method - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_set_auth_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 profile local/remote identification @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -312,7 +237,7 @@ define ikev2_profile_set_auth_reply @param data_len - Identification data length @param data - Identification data */ -define ikev2_profile_set_id +autoreply define ikev2_profile_set_id { u32 client_index; u32 context; @@ -324,16 +249,6 @@ define ikev2_profile_set_id u8 data[0]; }; -/** \brief Reply for IKEv2: - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_set_id_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 profile traffic selector parameters @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -346,7 +261,7 @@ define ikev2_profile_set_id_reply @param start_addr - The smallest address included in traffic selector @param end_addr - The largest address included in traffic selector */ -define ikev2_profile_set_ts +autoreply define ikev2_profile_set_ts { u32 client_index; u32 context; @@ -360,23 +275,13 @@ define ikev2_profile_set_ts u32 end_addr; }; -/** \brief Reply for IKEv2: Set IKEv2 profile traffic selector parameters - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_set_ts_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 local RSA private key @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param key_file - Key file absolute path */ -define ikev2_set_local_key +autoreply define ikev2_set_local_key { u32 client_index; u32 context; @@ -384,16 +289,6 @@ define ikev2_set_local_key u8 key_file[256]; }; -/** \brief Reply for IKEv2: Set IKEv2 local key - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_local_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 responder interface and IP address @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -402,7 +297,7 @@ define ikev2_set_local_key_reply @param sw_if_index - interface index @param address - interface address */ -define ikev2_set_responder +autoreply define ikev2_set_responder { u32 client_index; u32 context; @@ -412,17 +307,6 @@ define ikev2_set_responder u8 address[4]; }; -/** \brief Reply for IKEv2: Set IKEv2 responder interface and IP address - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_responder_reply -{ - u32 context; - i32 retval; -}; - - /** \brief IKEv2: Set IKEv2 IKE transforms in SA_INIT proposal (RFC 7296) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -434,7 +318,7 @@ define ikev2_set_responder_reply @param dh_group - Diffie-Hellman group */ -define ikev2_set_ike_transforms +autoreply define ikev2_set_ike_transforms { u32 client_index; u32 context; @@ -446,16 +330,6 @@ define ikev2_set_ike_transforms u32 dh_group; }; -/** \brief Reply for IKEv2: Set IKEv2 IKE transforms - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_ike_transforms_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 ESP transforms in SA_INIT proposal (RFC 7296) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -467,7 +341,7 @@ define ikev2_set_ike_transforms_reply @param dh_group - Diffie-Hellman group */ -define ikev2_set_esp_transforms +autoreply define ikev2_set_esp_transforms { u32 client_index; u32 context; @@ -479,16 +353,6 @@ define ikev2_set_esp_transforms u32 dh_group; }; -/** \brief Reply for IKEv2: Set IKEv2 ESP transforms - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_esp_transforms_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set Child SA lifetime, limited by time and/or data @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -500,7 +364,7 @@ define ikev2_set_esp_transforms_reply @param lifetime_maxdata - SA maximum life time in bytes (0 to disable) */ -define ikev2_set_sa_lifetime +autoreply define ikev2_set_sa_lifetime { u32 client_index; u32 context; @@ -512,16 +376,6 @@ define ikev2_set_sa_lifetime u64 lifetime_maxdata; }; -/** \brief Reply for IKEv2: Set Child SA lifetime - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_sa_lifetime_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the SA_INIT exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -529,7 +383,7 @@ define ikev2_set_sa_lifetime_reply @param name - IKEv2 profile name */ -define ikev2_initiate_sa_init +autoreply define ikev2_initiate_sa_init { u32 client_index; u32 context; @@ -537,16 +391,6 @@ define ikev2_initiate_sa_init u8 name[64]; }; -/** \brief Reply for IKEv2: Initiate the SA_INIT exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_sa_init_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the delete IKE SA exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -554,7 +398,7 @@ define ikev2_initiate_sa_init_reply @param ispi - IKE SA initiator SPI */ -define ikev2_initiate_del_ike_sa +autoreply define ikev2_initiate_del_ike_sa { u32 client_index; u32 context; @@ -562,16 +406,6 @@ define ikev2_initiate_del_ike_sa u64 ispi; }; -/** \brief Reply for IKEv2: Initiate the delete IKE SA exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_del_ike_sa_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the delete Child SA exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -579,7 +413,7 @@ define ikev2_initiate_del_ike_sa_reply @param ispi - Child SA initiator SPI */ -define ikev2_initiate_del_child_sa +autoreply define ikev2_initiate_del_child_sa { u32 client_index; u32 context; @@ -587,16 +421,6 @@ define ikev2_initiate_del_child_sa u32 ispi; }; -/** \brief Reply for IKEv2: Initiate the delete Child SA exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_del_child_sa_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the rekey Child SA exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -604,7 +428,7 @@ define ikev2_initiate_del_child_sa_reply @param ispi - Child SA initiator SPI */ -define ikev2_initiate_rekey_child_sa +autoreply define ikev2_initiate_rekey_child_sa { u32 client_index; u32 context; @@ -612,16 +436,6 @@ define ikev2_initiate_rekey_child_sa u32 ispi; }; -/** \brief Reply for IKEv2: Initiate the rekey Child SA exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_rekey_child_sa_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump ipsec policy database data @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -682,4 +496,4 @@ define ipsec_spd_details { * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index c23eebec..db42d635 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -70,66 +70,36 @@ define l2_fib_table_dump @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define l2_fib_clear_table +autoreply define l2_fib_clear_table { u32 client_index; u32 context; }; -/** \brief L2 fib clear table response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_fib_clear_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 FIB flush bridge domain entries @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bd_id - the entry's bridge domain id */ -define l2fib_flush_bd +autoreply define l2fib_flush_bd { u32 client_index; u32 context; u32 bd_id; }; -/** \brief L2 FIB flush bridge domain entries response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2fib_flush_bd_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 FIB flush interface entries @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bd_id - the entry's bridge domain id */ -define l2fib_flush_int +autoreply define l2fib_flush_int { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief L2 FIB flush interface entries response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2fib_flush_int_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 FIB add entry request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -140,7 +110,7 @@ define l2fib_flush_int_reply @param static_mac - @param filter_mac - */ -define l2fib_add_del +autoreply define l2fib_add_del { u32 client_index; u32 context; @@ -153,16 +123,6 @@ define l2fib_add_del u8 bvi_mac; }; -/** \brief L2 FIB add entry response - @param context - sender context, to match reply w/ request - @param retval - return code for the add l2fib entry request -*/ -define l2fib_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set L2 flags request !!! TODO - need more info, feature bits in l2_input.h @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -196,7 +156,7 @@ define l2_flags_reply @param bd_id - the bridge domain to create @param mac_age - mac aging time in min, 0 for disabled */ -define bridge_domain_set_mac_age +autoreply define bridge_domain_set_mac_age { u32 client_index; u32 context; @@ -204,16 +164,6 @@ define bridge_domain_set_mac_age u8 mac_age; }; -/** \brief Set bridge domain response - @param context - sender context, to match reply w/ request - @param retval - return code for the set l2 bits request -*/ -define bridge_domain_set_mac_age_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 bridge domain add or delete request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -226,7 +176,7 @@ define bridge_domain_set_mac_age_reply @param mac_age - mac aging time in min, 0 for disabled @param is_add - add or delete flag */ -define bridge_domain_add_del +autoreply define bridge_domain_add_del { u32 client_index; u32 context; @@ -240,16 +190,6 @@ define bridge_domain_add_del u8 is_add; }; -/** \brief L2 bridge domain add or delete response - @param context - sender context, to match reply w/ request - @param retval - return code for the set bridge flags request -*/ -define bridge_domain_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 bridge domain request operational state details @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -337,7 +277,7 @@ define bridge_flags_reply @param tag1 - Needed for any push or translate vtr op @param tag2 - Needed for any push 2 or translate x-2 vtr ops */ -define l2_interface_vlan_tag_rewrite +autoreply define l2_interface_vlan_tag_rewrite { u32 client_index; u32 context; @@ -348,16 +288,6 @@ define l2_interface_vlan_tag_rewrite u32 tag2; // second pushed tag }; -/** \brief L2 interface vlan tag rewrite response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_vlan_tag_rewrite_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 interface pbb tag rewrite configure request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -370,7 +300,7 @@ define l2_interface_vlan_tag_rewrite_reply @param b_vlanid - B-tag vlanid, needed for any push or translate qinq vtr op @param i_sid - I-tag service id, needed for any push or translate qinq vtr op */ -define l2_interface_pbb_tag_rewrite +autoreply define l2_interface_pbb_tag_rewrite { u32 client_index; u32 context; @@ -383,16 +313,6 @@ define l2_interface_pbb_tag_rewrite u32 i_sid; }; -/** \brief L2 interface pbb tag rewrite response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_pbb_tag_rewrite_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/l2tp/l2tp.api b/src/vnet/l2tp/l2tp.api index 5a5a5a48..4587a807 100644 --- a/src/vnet/l2tp/l2tp.api +++ b/src/vnet/l2tp/l2tp.api @@ -52,7 +52,7 @@ define l2tpv3_create_tunnel_reply u32 sw_if_index; }; -define l2tpv3_set_tunnel_cookies +autoreply define l2tpv3_set_tunnel_cookies { u32 client_index; u32 context; @@ -61,16 +61,6 @@ define l2tpv3_set_tunnel_cookies u64 new_remote_cookie; }; -/** \brief L2TP tunnel set cookies response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2tpv3_set_tunnel_cookies_reply -{ - u32 context; - i32 retval; -}; - define sw_if_l2tpv3_tunnel_details { u32 context; @@ -91,7 +81,7 @@ define sw_if_l2tpv3_tunnel_dump u32 context; }; -define l2tpv3_interface_enable_disable +autoreply define l2tpv3_interface_enable_disable { u32 client_index; u32 context; @@ -99,13 +89,7 @@ define l2tpv3_interface_enable_disable u32 sw_if_index; }; -define l2tpv3_interface_enable_disable_reply -{ - u32 context; - i32 retval; -}; - -define l2tpv3_set_lookup_key +autoreply define l2tpv3_set_lookup_key { u32 client_index; u32 context; @@ -113,12 +97,6 @@ define l2tpv3_set_lookup_key u8 key; }; -define l2tpv3_set_lookup_key_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/lisp-cp/lisp.api b/src/vnet/lisp-cp/lisp.api index a50a5ccb..8bed71b3 100644 --- a/src/vnet/lisp-cp/lisp.api +++ b/src/vnet/lisp-cp/lisp.api @@ -59,7 +59,7 @@ define lisp_add_del_locator_set_reply @param priority - priority of the lisp locator @param weight - weight of the lisp locator */ -define lisp_add_del_locator +autoreply define lisp_add_del_locator { u32 client_index; u32 context; @@ -70,16 +70,6 @@ define lisp_add_del_locator u8 weight; }; -/** \brief Reply for locator add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_locator_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete lisp eid-table @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -98,7 +88,7 @@ define lisp_add_del_locator_reply HMAC_SHA_256_128 2 @param key - secret key */ -define lisp_add_del_local_eid +autoreply define lisp_add_del_local_eid { u32 client_index; u32 context; @@ -112,16 +102,6 @@ define lisp_add_del_local_eid u8 key[64]; }; -/** \brief Reply for local_eid add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_local_eid_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add/delete map server @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -129,7 +109,7 @@ define lisp_add_del_local_eid_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - map server IP address */ -define lisp_add_del_map_server +autoreply define lisp_add_del_map_server { u32 client_index; u32 context; @@ -138,16 +118,6 @@ define lisp_add_del_map_server u8 ip_address[16]; }; -/** \brief Reply for lisp_add_del_map_server - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_map_server_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map-resolver @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -155,7 +125,7 @@ define lisp_add_del_map_server_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - array of address bytes */ -define lisp_add_del_map_resolver +autoreply define lisp_add_del_map_resolver { u32 client_index; u32 context; @@ -164,45 +134,25 @@ define lisp_add_del_map_resolver u8 ip_address[16]; }; -/** \brief Reply for map_resolver add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_map_resolver_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable or disable LISP feature @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_en - enable protocol if non-zero, else disable */ -define lisp_enable_disable +autoreply define lisp_enable_disable { u32 client_index; u32 context; u8 is_en; }; -/** \brief Reply for gpe enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable LISP PITR node @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param ls_name - locator set name @param is_add - add locator set if non-zero, else disable pitr */ -define lisp_pitr_set_locator_set +autoreply define lisp_pitr_set_locator_set { u32 client_index; u32 context; @@ -210,16 +160,6 @@ define lisp_pitr_set_locator_set u8 ls_name[64]; }; -/** \brief Reply for lisp_pitr_set_locator_set - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_pitr_set_locator_set_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable use of PETR @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -227,7 +167,7 @@ define lisp_pitr_set_locator_set_reply @param address - PETR IP address @param is_add - add locator set if non-zero, else disable pitr */ -define lisp_use_petr +autoreply define lisp_use_petr { u32 client_index; u32 context; @@ -236,16 +176,6 @@ define lisp_use_petr u8 is_add; }; -/** \brief Reply for lisp_pitr_set_locator_set - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_use_petr_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for LISP PETR status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -298,45 +228,25 @@ define show_lisp_rloc_probe_state_reply @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define lisp_rloc_probe_enable_disable +autoreply define lisp_rloc_probe_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for lisp_rloc_probe_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_rloc_probe_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable/disable LISP map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define lisp_map_register_enable_disable +autoreply define lisp_map_register_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for lisp_map_register_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_map_register_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get state of LISP map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -366,23 +276,13 @@ define show_lisp_map_register_state_reply 0 - destination only 1 - source/destaination */ -define lisp_map_request_mode +autoreply define lisp_map_request_mode { u32 client_index; u32 context; u8 mode; }; -/** \brief Reply for lisp_map_request_mode - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_map_request_mode_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for LISP map-request mode @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -430,7 +330,7 @@ typeonly manual_endian manual_print define remote_locator @param rloc_num - number of remote locators @param rlocs - remote locator records */ -manual_print manual_endian define lisp_add_del_remote_mapping +autoreply manual_print manual_endian define lisp_add_del_remote_mapping { u32 client_index; u32 context; @@ -448,16 +348,6 @@ manual_print manual_endian define lisp_add_del_remote_mapping vl_api_remote_locator_t rlocs[rloc_num]; }; -/** \brief Reply for lisp_add_del_remote_mapping - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_remote_mapping_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete LISP adjacency adjacency @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -470,7 +360,7 @@ define lisp_add_del_remote_mapping_reply @param reid - remote EID @param leid - local EID */ -define lisp_add_del_adjacency +autoreply define lisp_add_del_adjacency { u32 client_index; u32 context; @@ -483,23 +373,13 @@ define lisp_add_del_adjacency u8 leid_len; }; -/** \brief Reply for lisp_add_del_adjacency - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_adjacency_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map request itr rlocs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add address if non-zero, else delete @param locator_set_name - locator set name */ -define lisp_add_del_map_request_itr_rlocs +autoreply define lisp_add_del_map_request_itr_rlocs { u32 client_index; u32 context; @@ -512,12 +392,6 @@ define lisp_add_del_map_request_itr_rlocs @param retval - return code */ -define lisp_add_del_map_request_itr_rlocs_reply -{ - u32 context; - i32 retval; -}; - /** \brief map/unmap vni/bd_index to vrf @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -525,7 +399,7 @@ define lisp_add_del_map_request_itr_rlocs_reply @param dp_table - virtual network id/bridge domain index @param vrf - vrf */ -define lisp_eid_table_add_del_map +autoreply define lisp_eid_table_add_del_map { u32 client_index; u32 context; @@ -535,16 +409,6 @@ define lisp_eid_table_add_del_map u8 is_l2; }; -/** \brief Reply for lisp_eid_table_add_del_map - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_eid_table_add_del_map_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for map lisp locator status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/lisp-cp/one.api b/src/vnet/lisp-cp/one.api index ca82f694..2fa1edf6 100644 --- a/src/vnet/lisp-cp/one.api +++ b/src/vnet/lisp-cp/one.api @@ -59,7 +59,7 @@ define one_add_del_locator_set_reply @param priority - priority of the locator @param weight - weight of the locator */ -define one_add_del_locator +autoreply define one_add_del_locator { u32 client_index; u32 context; @@ -70,16 +70,6 @@ define one_add_del_locator u8 weight; }; -/** \brief Reply for locator add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_locator_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete ONE eid-table @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -98,7 +88,7 @@ define one_add_del_locator_reply HMAC_SHA_256_128 2 @param key - secret key */ -define one_add_del_local_eid +autoreply define one_add_del_local_eid { u32 client_index; u32 context; @@ -112,16 +102,6 @@ define one_add_del_local_eid u8 key[64]; }; -/** \brief Reply for local_eid add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_local_eid_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add/delete map server @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -129,7 +109,7 @@ define one_add_del_local_eid_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - map server IP address */ -define one_add_del_map_server +autoreply define one_add_del_map_server { u32 client_index; u32 context; @@ -138,16 +118,6 @@ define one_add_del_map_server u8 ip_address[16]; }; -/** \brief Reply for one_add_del_map_server - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_map_server_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map-resolver @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -155,7 +125,7 @@ define one_add_del_map_server_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - array of address bytes */ -define one_add_del_map_resolver +autoreply define one_add_del_map_resolver { u32 client_index; u32 context; @@ -164,45 +134,25 @@ define one_add_del_map_resolver u8 ip_address[16]; }; -/** \brief Reply for map_resolver add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_map_resolver_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable or disable ONE feature @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_en - enable protocol if non-zero, else disable */ -define one_enable_disable +autoreply define one_enable_disable { u32 client_index; u32 context; u8 is_en; }; -/** \brief Reply for gpe enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable ONE PITR node @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param ls_name - locator set name @param is_add - add locator set if non-zero, else disable pitr */ -define one_pitr_set_locator_set +autoreply define one_pitr_set_locator_set { u32 client_index; u32 context; @@ -210,16 +160,6 @@ define one_pitr_set_locator_set u8 ls_name[64]; }; -/** \brief Reply for one_pitr_set_locator_set - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_pitr_set_locator_set_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable use of PETR @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -227,7 +167,7 @@ define one_pitr_set_locator_set_reply @param address - PETR IP address @param is_add - add locator set if non-zero, else disable PETR */ -define one_use_petr +autoreply define one_use_petr { u32 client_index; u32 context; @@ -236,16 +176,6 @@ define one_use_petr u8 is_add; }; -/** \brief Reply for one_use_petr - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_use_petr_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for ONE PETR status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -298,45 +228,25 @@ define show_one_rloc_probe_state_reply @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define one_rloc_probe_enable_disable +autoreply define one_rloc_probe_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for one_rloc_probe_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_rloc_probe_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable/disable ONE map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define one_map_register_enable_disable +autoreply define one_map_register_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for one_map_register_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_map_register_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get state of ONE map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -366,23 +276,13 @@ define show_one_map_register_state_reply 0 - destination only 1 - source/destaination */ -define one_map_request_mode +autoreply define one_map_request_mode { u32 client_index; u32 context; u8 mode; }; -/** \brief Reply for one_map_request_mode - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_map_request_mode_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for ONE map-request mode @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -430,7 +330,7 @@ typeonly manual_endian manual_print define one_remote_locator @param rloc_num - number of remote locators @param rlocs - remote locator records */ -manual_print manual_endian define one_add_del_remote_mapping +autoreply manual_print manual_endian define one_add_del_remote_mapping { u32 client_index; u32 context; @@ -448,16 +348,6 @@ manual_print manual_endian define one_add_del_remote_mapping vl_api_one_remote_locator_t rlocs[rloc_num]; }; -/** \brief Reply for one_add_del_remote_mapping - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_remote_mapping_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete ONE adjacency adjacency @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -470,7 +360,7 @@ define one_add_del_remote_mapping_reply @param reid - remote EID @param leid - local EID */ -define one_add_del_adjacency +autoreply define one_add_del_adjacency { u32 client_index; u32 context; @@ -483,23 +373,13 @@ define one_add_del_adjacency u8 leid_len; }; -/** \brief Reply for one_add_del_adjacency - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_adjacency_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map request itr rlocs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add address if non-zero, else delete @param locator_set_name - locator set name */ -define one_add_del_map_request_itr_rlocs +autoreply define one_add_del_map_request_itr_rlocs { u32 client_index; u32 context; @@ -507,17 +387,6 @@ define one_add_del_map_request_itr_rlocs u8 locator_set_name[64]; }; -/** \brief Reply for one_add_del_map_request_itr_rlocs - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define one_add_del_map_request_itr_rlocs_reply -{ - u32 context; - i32 retval; -}; - /** \brief map/unmap vni/bd_index to vrf @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -525,7 +394,7 @@ define one_add_del_map_request_itr_rlocs_reply @param dp_table - virtual network id/bridge domain index @param vrf - vrf */ -define one_eid_table_add_del_map +autoreply define one_eid_table_add_del_map { u32 client_index; u32 context; @@ -535,16 +404,6 @@ define one_eid_table_add_del_map u8 is_l2; }; -/** \brief Reply for one_eid_table_add_del_map - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_eid_table_add_del_map_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for map one locator status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -901,31 +760,19 @@ define one_stats_details u32 bytes; }; -define one_stats_flush +autoreply define one_stats_flush { u32 client_index; u32 context; }; -define one_stats_flush_reply -{ - u32 context; - i32 retval; -}; - -define one_stats_enable_disable +autoreply define one_stats_enable_disable { u32 client_index; u32 context; u8 is_en; }; -define one_stats_enable_disable_reply -{ - u32 context; - i32 retval; -}; - define show_one_stats_enable_disable { u32 client_index; diff --git a/src/vnet/lisp-gpe/lisp_gpe.api b/src/vnet/lisp-gpe/lisp_gpe.api index 43a6a6cd..f79d18c1 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.api +++ b/src/vnet/lisp-gpe/lisp_gpe.api @@ -43,7 +43,7 @@ typeonly manual_print manual_endian define gpe_locator @param loc_num - number of locators @param locs - array of remote locators */ -manual_print manual_endian define gpe_add_del_fwd_entry +autoreply manual_print manual_endian define gpe_add_del_fwd_entry { u32 client_index; u32 context; @@ -60,44 +60,24 @@ manual_print manual_endian define gpe_add_del_fwd_entry vl_api_gpe_locator_t locs[loc_num]; }; -/** \brief Reply for gpe_fwd_entry add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_add_del_fwd_entry_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable or disable gpe protocol @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_en - enable protocol if non-zero, else disable */ -define gpe_enable_disable +autoreply define gpe_enable_disable { u32 client_index; u32 context; u8 is_en; }; -/** \brief Reply for gpe enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete gpe_iface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add address if non-zero, else delete */ -define gpe_add_del_iface +autoreply define gpe_add_del_iface { u32 client_index; u32 context; @@ -107,16 +87,6 @@ define gpe_add_del_iface u32 vni; }; -/** \brief Reply for gpe_iface add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_add_del_iface_reply -{ - u32 context; - i32 retval; -}; - define gpe_fwd_entries_get { u32 client_index; @@ -163,23 +133,13 @@ manual_endian manual_print define gpe_fwd_entry_path_details @param context - sender context, to match reply w/ request @param mode - LISP (value 0) or VXLAN (value 1) */ -define gpe_set_encap_mode +autoreply define gpe_set_encap_mode { u32 client_index; u32 context; u8 mode; }; -/** \brief Reply for set_encap_mode - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_set_encap_mode_reply -{ - u32 context; - i32 retval; -}; - /** \brief get GPE encapsulation mode @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/map/map.api b/src/vnet/map/map.api index 4e4be85e..d68f13f0 100644 --- a/src/vnet/map/map.api +++ b/src/vnet/map/map.api @@ -62,22 +62,13 @@ define map_add_domain_reply @param context - sender context, to match reply w/ request @param index - MAP Domain index */ -define map_del_domain +autoreply define map_del_domain { u32 client_index; u32 context; u32 index; }; -/** \brief Reply for MAP domain del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define map_del_domain_reply -{ - u32 context; - i32 retval; -}; /** \brief Add or Delete MAP rule from a domain (Only used for shared IPv4 per subscriber) @param client_index - opaque cookie to identify the sender @@ -87,7 +78,7 @@ define map_del_domain_reply @param ip6_dst - MAP CE IPv6 address @param psid - Rule PSID */ -define map_add_del_rule +autoreply define map_add_del_rule { u32 client_index; u32 context; @@ -97,15 +88,6 @@ define map_add_del_rule u16 psid; }; -/** \brief Reply for MAP rule add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define map_add_del_rule_reply -{ - u32 context; - i32 retval; -}; /** \brief Get list of map domains @param client_index - opaque cookie to identify the sender diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api index a1e1270a..c8a3ffb7 100644 --- a/src/vnet/mpls/mpls.api +++ b/src/vnet/mpls/mpls.api @@ -26,7 +26,7 @@ @param mb_address_length - Length of IP prefix @param mb_address[16] - IP prefix/ */ -define mpls_ip_bind_unbind +autoreply define mpls_ip_bind_unbind { u32 client_index; u32 context; @@ -40,16 +40,6 @@ define mpls_ip_bind_unbind u8 mb_address[16]; }; -/** \brief Reply for MPLS IP bind/unbind request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define mpls_ip_bind_unbind_reply -{ - u32 context; - i32 retval; -}; - /** \brief MPLS tunnel Add / del route @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -172,7 +162,7 @@ manual_endian manual_print define mpls_tunnel_details @param mr_next_hop_out_label_stack - the next-hop output label stack, outer most first @param next_hop_via_label - The next-hop is a resolved via a local label */ -define mpls_route_add_del +autoreply define mpls_route_add_del { u32 client_index; u32 context; @@ -199,16 +189,6 @@ define mpls_route_add_del u32 mr_next_hop_out_label_stack[mr_next_hop_n_out_labels]; }; -/** \brief Reply for MPLS route add / del request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define mpls_route_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump MPLS fib table @param client_index - opaque cookie to identify the sender */ @@ -240,4 +220,4 @@ manual_endian manual_print define mpls_fib_details * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api index e207e46f..4aef09da 100644 --- a/src/vnet/session/session.api +++ b/src/vnet/session/session.api @@ -49,26 +49,17 @@ define application_attach_reply { @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ - define application_detach { +autoreply define application_detach { u32 client_index; u32 context; }; - /** \brief detach reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define application_detach_reply { - u32 context; - i32 retval; -}; - /** \brief vpp->client, please map an additional shared memory segment @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param segment_name - */ -define map_another_segment { +autoreply define map_another_segment { u32 client_index; u32 context; u32 segment_size; @@ -83,7 +74,7 @@ define map_another_segment { "tcp://::/0/80" [ipv6] etc. @param options - socket options, fifo sizes, etc. */ -define bind_uri { +autoreply define bind_uri { u32 client_index; u32 context; u32 accept_cookie; @@ -97,7 +88,7 @@ define bind_uri { "tcp://::/0/80" [ipv6], etc. @param options - socket options, fifo sizes, etc. */ -define unbind_uri { +autoreply define unbind_uri { u32 client_index; u32 context; u8 uri[128]; @@ -122,24 +113,6 @@ define connect_uri { u64 options[16]; }; -/** \brief Bind reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bind_uri_reply { - u32 context; - i32 retval; -}; - -/** \brief unbind reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define unbind_uri_reply { - u32 context; - i32 retval; -}; - /** \brief vpp->client, connect reply @param context - sender context, to match reply w/ request @param retval - return code for the request @@ -165,15 +138,6 @@ define connect_uri_reply { u8 segment_name[128]; }; -/** \brief client->vpp - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define map_another_segment_reply { - u32 context; - i32 retval; -}; - /** \brief vpp->client, accept this session @param context - sender context, to match reply w/ request @param listener_handle - tells client which listener this pertains to @@ -290,7 +254,7 @@ define bind_sock { @param context - sender context, to match reply w/ request @param handle - bind handle obtained from bind reply */ -define unbind_sock { +autoreply define unbind_sock { u32 client_index; u32 context; u64 handle; @@ -339,15 +303,6 @@ define bind_sock_reply { u8 segment_name[128]; }; -/** \brief unbind reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define unbind_sock_reply { - u32 context; - i32 retval; -}; - /** \brief vpp/server->client, connect reply @param context - sender context, to match reply w/ request @param retval - return code for the request @@ -378,23 +333,14 @@ define connect_sock_reply { @param context - sender context, to match reply w/ request @param is_enable - disable session layer if 0, enable otherwise */ -define session_enable_disable { +autoreply define session_enable_disable { u32 client_index; u32 context; u8 is_enable; }; -/** \brief Reply for session enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define session_enable_disable_reply { - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") * End: - */ \ No newline at end of file + */ diff --git a/src/vnet/span/span.api b/src/vnet/span/span.api index 4babdd83..914fd8d0 100644 --- a/src/vnet/span/span.api +++ b/src/vnet/span/span.api @@ -21,7 +21,7 @@ @param sw_if_index_to - interface where the traffic is mirrored @param state - 0 = disabled, 1 = rx enabled, 2 = tx enabled, 3 tx & rx enabled */ -define sw_interface_span_enable_disable { +autoreply define sw_interface_span_enable_disable { u32 client_index; u32 context; u32 sw_if_index_from; @@ -29,14 +29,6 @@ define sw_interface_span_enable_disable { u8 state; }; -/** \brief Reply to SPAN enable/disable request - @param context - sender context which was passed in the request -*/ -define sw_interface_span_enable_disable_reply { - u32 context; - i32 retval; -}; - /** \brief SPAN dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/sr/sr.api b/src/vnet/sr/sr.api index 5feadcb0..9e900741 100644 --- a/src/vnet/sr/sr.api +++ b/src/vnet/sr/sr.api @@ -25,7 +25,7 @@ @param fib_table FIB table in which we should install the localsid entry @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. */ -define sr_localsid_add_del +autoreply define sr_localsid_add_del { u32 client_index; u32 context; @@ -39,16 +39,6 @@ define sr_localsid_add_del u8 nh_addr[16]; }; -/** \brief IPv6 SR LocalSID add/del request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_localsid_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR policy add @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -59,7 +49,7 @@ define sr_localsid_add_del_reply @param fib_table is the VRF where to install the FIB entry for the BSID @param segments is a vector of IPv6 address composing the segment list */ -define sr_policy_add +autoreply define sr_policy_add { u32 client_index; u32 context; @@ -72,16 +62,6 @@ define sr_policy_add u8 segments[0]; }; -/** \brief IPv6 SR Policy add request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_add_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR policy modification @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -94,7 +74,7 @@ define sr_policy_add_reply @param weight is the weight of the sid list. optional. @param is_encap Mode. Encapsulation or SRH insertion. */ -define sr_policy_mod +autoreply define sr_policy_mod { u32 client_index; u32 context; @@ -108,23 +88,13 @@ define sr_policy_mod u8 segments[0]; }; -/** \brief IPv6 SR Policy modification request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_mod_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR policy deletion @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bsid is the bindingSID of the SR Policy @param index is the index of the SR policy */ -define sr_policy_del +autoreply define sr_policy_del { u32 client_index; u32 context; @@ -132,16 +102,6 @@ define sr_policy_del u32 sr_policy_index; }; -/** \brief IPv6 SR Policy deletion request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR steering add/del @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -154,7 +114,7 @@ define sr_policy_del_reply @param sw_if_index is the incoming interface for L2 traffic @param traffic_type describes the type of traffic */ -define sr_steering_add_del +autoreply define sr_steering_add_del { u32 client_index; u32 context; @@ -168,16 +128,6 @@ define sr_steering_add_del u8 traffic_type; }; -/** \brief IPv6 SR steering add/del request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_steering_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump the list of SR LocalSIDs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/unix/tap.api b/src/vnet/unix/tap.api index 1fd0bb09..d9fba371 100644 --- a/src/vnet/unix/tap.api +++ b/src/vnet/unix/tap.api @@ -93,23 +93,13 @@ define tap_modify_reply @param context - sender context, to match reply w/ request @param sw_if_index - interface index of existing tap interface */ -define tap_delete +autoreply define tap_delete { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Reply for tap delete request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define tap_delete_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump tap interfaces request */ define sw_interface_tap_dump { diff --git a/src/vnet/vxlan/vxlan.api b/src/vnet/vxlan/vxlan.api index 048220fb..6c331a58 100644 --- a/src/vnet/vxlan/vxlan.api +++ b/src/vnet/vxlan/vxlan.api @@ -61,7 +61,7 @@ define vxlan_tunnel_details @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass @param enable - if non-zero enable, else disable */ -define sw_interface_set_vxlan_bypass +autoreply define sw_interface_set_vxlan_bypass { u32 client_index; u32 context; @@ -69,13 +69,3 @@ define sw_interface_set_vxlan_bypass u8 is_ipv6; u8 enable; }; - -/** \brief Interface set vxlan-bypass response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_set_vxlan_bypass_reply -{ - u32 context; - i32 retval; -}; \ No newline at end of file diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index a4ba180d..7c07c822 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -80,7 +80,7 @@ define create_vlan_subif_reply @param sw_if_index - index of the interface @param enable - if non-zero enable, else disable */ -define sw_interface_set_mpls_enable +autoreply define sw_interface_set_mpls_enable { u32 client_index; u32 context; @@ -88,16 +88,6 @@ define sw_interface_set_mpls_enable u8 enable; }; -/** \brief Reply for MPLS state on an interface - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define sw_interface_set_mpls_enable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Proxy ARP add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -106,7 +96,7 @@ define sw_interface_set_mpls_enable_reply @param low_address[4] - Low address of the Proxy ARP range @param hi_address[4] - High address of the Proxy ARP range */ -define proxy_arp_add_del +autoreply define proxy_arp_add_del { u32 client_index; u32 context; @@ -116,23 +106,13 @@ define proxy_arp_add_del u8 hi_address[4]; }; -/** \brief Reply for proxy arp add / del request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define proxy_arp_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Proxy ARP add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - Which interface to enable / disable Proxy Arp on @param enable_disable - 1 to enable Proxy ARP on interface, 0 to disable */ -define proxy_arp_intfc_enable_disable +autoreply define proxy_arp_intfc_enable_disable { u32 client_index; u32 context; @@ -141,23 +121,13 @@ define proxy_arp_intfc_enable_disable u8 enable_disable; }; -/** \brief Reply for Proxy ARP interface enable / disable request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define proxy_arp_intfc_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Reset VRF (remove all routes etc) request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4 @param vrf_id - ID of th FIB table / VRF to reset */ -define reset_vrf +autoreply define reset_vrf { u32 client_index; u32 context; @@ -165,16 +135,6 @@ define reset_vrf u32 vrf_id; }; -/** \brief Reply for Reset VRF request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define reset_vrf_reply -{ - u32 context; - i32 retval; -}; - /** \brief Is Address Reachable request - DISABLED @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -200,7 +160,7 @@ define is_address_reachable @param enable_disable - 1 = enable stats, 0 = disable @param pid - pid of process requesting stats updates */ -define want_stats +autoreply define want_stats { u32 client_index; u32 context; @@ -208,16 +168,6 @@ define want_stats u32 pid; }; -/** \brief Reply for Want Stats request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_stats_reply -{ - u32 context; - i32 retval; -}; - typeonly manual_print manual_endian define ip4_fib_counter { u32 address; @@ -331,7 +281,7 @@ define oam_event @param enable_disable- enable if non-zero, else disable @param pid - pid of the requesting process */ -define want_oam_events +autoreply define want_oam_events { u32 client_index; u32 context; @@ -339,16 +289,6 @@ define want_oam_events u32 pid; }; -/** \brief Want OAM events response - @param context - sender context, to match reply w/ request - @param retval - return code for the want oam stats request -*/ -define want_oam_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief OAM add / del target request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -357,7 +297,7 @@ define want_oam_events_reply @param dst_address[] - destination address of the target @param is_add - add target if non-zero, else delete */ -define oam_add_del +autoreply define oam_add_del { u32 client_index; u32 context; @@ -367,23 +307,13 @@ define oam_add_del u8 is_add; }; -/** \brief OAM add / del target response - @param context - sender context, to match reply w/ request - @param retval - return code of the request -*/ -define oam_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Reset fib table request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param vrf_id - vrf/table id of the fib table to reset @param is_ipv6 - an ipv6 fib to reset if non-zero, else ipv4 */ -define reset_fib +autoreply define reset_fib { u32 client_index; u32 context; @@ -391,16 +321,6 @@ define reset_fib u8 is_ipv6; }; -/** \brief Reset fib response - @param context - sender context, to match reply w/ request - @param retval - return code for the reset bfib request -*/ -define reset_fib_reply -{ - u32 context; - i32 retval; -}; - /** \brief Create loopback interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -458,23 +378,13 @@ define create_loopback_instance_reply @param context - sender context, to match reply w/ request @param sw_if_index - sw index of the interface that was created */ -define delete_loopback +autoreply define delete_loopback { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Delete loopback interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define delete_loopback_reply -{ - u32 context; - i32 retval; -}; - /** \brief Control ping from client to api server request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -543,7 +453,7 @@ define cli_inband_reply @param is_ipv6 - neighbor limit if non-zero, else ARP limit @param arp_neighbor_limit - the new limit, defaults are ~ 50k */ -define set_arp_neighbor_limit +autoreply define set_arp_neighbor_limit { u32 client_index; u32 context; @@ -551,16 +461,6 @@ define set_arp_neighbor_limit u32 arp_neighbor_limit; }; -/** \brief Set max allowed ARP or ip6 neighbor entries response - @param context - sender context, to match reply w/ request - @param retval - return code for request -*/ -define set_arp_neighbor_limit_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 interface patch add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -568,7 +468,7 @@ define set_arp_neighbor_limit_reply @param tx_sw_if_index - transmit side interface @param is_add - if non-zero set up the interface patch, else remove it */ -define l2_patch_add_del +autoreply define l2_patch_add_del { u32 client_index; u32 context; @@ -577,23 +477,13 @@ define l2_patch_add_del u8 is_add; }; -/** \brief L2 interface patch add / del response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_patch_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface set vpath request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - interface used to reach neighbor @param enable - if non-zero enable, else disable */ -define sw_interface_set_vpath +autoreply define sw_interface_set_vpath { u32 client_index; u32 context; @@ -601,16 +491,6 @@ define sw_interface_set_vpath u8 enable; }; -/** \brief Interface set vpath response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_set_vpath_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set L2 XConnect between two interfaces request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -618,7 +498,7 @@ define sw_interface_set_vpath_reply @param tx_sw_if_index - Transmit interface index @param enable - enable xconnect if not 0, else set to L3 mode */ -define sw_interface_set_l2_xconnect +autoreply define sw_interface_set_l2_xconnect { u32 client_index; u32 context; @@ -627,16 +507,6 @@ define sw_interface_set_l2_xconnect u8 enable; }; -/** \brief Set L2 XConnect response - @param context - sender context, to match reply w/ request - @param retval - L2 XConnect request return code -*/ -define sw_interface_set_l2_xconnect_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface bridge mode request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -646,7 +516,7 @@ define sw_interface_set_l2_xconnect_reply @param shg - Shared horizon group, for bridge mode only @param enable - Enable beige mode if not 0, else set to L3 mode */ -define sw_interface_set_l2_bridge +autoreply define sw_interface_set_l2_bridge { u32 client_index; u32 context; @@ -657,16 +527,6 @@ define sw_interface_set_l2_bridge u8 enable; }; -/** \brief Interface bridge mode response - @param context - sender context, to match reply w/ request - @param retval - Bridge mode request return code -*/ -define sw_interface_set_l2_bridge_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set bridge domain ip to mac entry request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -676,7 +536,7 @@ define sw_interface_set_l2_bridge_reply @param mac_address - MAC address @param */ -define bd_ip_mac_add_del +autoreply define bd_ip_mac_add_del { u32 client_index; u32 context; @@ -687,16 +547,6 @@ define bd_ip_mac_add_del u8 mac_address[6]; }; -/** \brief Set bridge domain ip to mac entry response - @param context - sender context, to match reply w/ request - @param retval - return code for the set bridge flags request -*/ -define bd_ip_mac_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset the classification table for an interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -704,7 +554,7 @@ define bd_ip_mac_add_del_reply @param sw_if_index - interface to associate with the table @param table_index - index of the table, if ~0 unset the table */ -define classify_set_interface_ip_table +autoreply define classify_set_interface_ip_table { u32 client_index; u32 context; @@ -713,16 +563,6 @@ define classify_set_interface_ip_table u32 table_index; /* ~0 => off */ }; -/** \brief Set/unset interface classification table response - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define classify_set_interface_ip_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset l2 classification tables for an interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -731,7 +571,7 @@ define classify_set_interface_ip_table_reply @param ip6_table_index - ip6 index @param other_table_index - other index */ -define classify_set_interface_l2_tables +autoreply define classify_set_interface_l2_tables { u32 client_index; u32 context; @@ -743,16 +583,6 @@ define classify_set_interface_l2_tables u8 is_input; }; -/** \brief Set/unset l2 classification tables for an interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define classify_set_interface_l2_tables_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get node index using name request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -809,7 +639,7 @@ define add_node_next_reply @param sw_if_index - interface to enable/disable filtering on @param enable_disable - if non-zero enable filtering, else disable */ -define l2_interface_efp_filter +autoreply define l2_interface_efp_filter { u32 client_index; u32 context; @@ -817,16 +647,6 @@ define l2_interface_efp_filter u32 enable_disable; }; -/** \brief L2 interface ethernet flow point filtering response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_efp_filter_reply -{ - u32 context; - i32 retval; -}; - define create_subif { u32 client_index; @@ -882,7 +702,7 @@ define show_version_reply }; /* Gross kludge, DGMS */ -define interface_name_renumber +autoreply define interface_name_renumber { u32 client_index; u32 context; @@ -890,12 +710,6 @@ define interface_name_renumber u32 new_show_dev_instance; }; -define interface_name_renumber_reply -{ - u32 context; - i32 retval; -}; - /** \brief Register for ip4 arp resolution events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -903,7 +717,7 @@ define interface_name_renumber_reply @param pid - sender's pid @param address - the exact ip4 address of interest */ -define want_ip4_arp_events +autoreply define want_ip4_arp_events { u32 client_index; u32 context; @@ -912,16 +726,6 @@ define want_ip4_arp_events u32 address; }; -/** \brief Reply for interface events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_ip4_arp_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief Tell client about an ip4 arp resolution event @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -949,7 +753,7 @@ define ip4_arp_event @param pid - sender's pid @param address - the exact ip6 address of interest */ -define want_ip6_nd_events +autoreply define want_ip6_nd_events { u32 client_index; u32 context; @@ -958,16 +762,6 @@ define want_ip6_nd_events u8 address[16]; }; -/** \brief Reply for ip6 nd resolution events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_ip6_nd_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief Tell client about an ip6 nd resolution or mac/ip event @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -999,7 +793,7 @@ define ip6_nd_event Note: User is recommeneded to use just one valid table_index per call. (ip4_table_index, ip6_table_index, or l2_table_index) */ -define input_acl_set_interface +autoreply define input_acl_set_interface { u32 client_index; u32 context; @@ -1010,16 +804,6 @@ define input_acl_set_interface u8 is_add; }; -/** \brief Set/unset input ACL interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define input_acl_set_interface_reply -{ - u32 context; - i32 retval; -}; - define get_node_graph { u32 client_index; @@ -1048,7 +832,7 @@ define get_node_graph_reply @param pow_enable - Proof of Work enabled or not flag @param trace_enable - iOAM Trace enabled or not flag */ -define ioam_enable +autoreply define ioam_enable { u32 client_index; u32 context; @@ -1060,38 +844,18 @@ define ioam_enable u32 node_id; }; -/** \brief iOAM Trace profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define ioam_enable_reply -{ - u32 context; - i32 retval; -}; - /** \brief iOAM disable @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param index - MAP Domain index */ -define ioam_disable +autoreply define ioam_disable { u32 client_index; u32 context; u16 id; }; -/** \brief iOAM disable response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define ioam_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Query relative index via node names @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1149,7 +913,7 @@ define pg_create_interface_reply @param count - number of packets to be captured @param pcap_file - pacp file name to store captured packets */ -define pg_capture +autoreply define pg_capture { u32 client_index; u32 context; @@ -1160,23 +924,13 @@ define pg_capture u8 pcap_file_name[pcap_name_length]; }; -/** \brief PacketGenerator capture packets response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pg_capture_reply -{ - u32 context; - i32 retval; -}; - /** \brief Enable / disable packet generator request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_enabled - 1 if enabling streams, 0 if disabling @param stream - stream name to be enable/disabled, if not specified handle all streams */ -define pg_enable_disable +autoreply define pg_enable_disable { u32 client_index; u32 context; @@ -1185,16 +939,6 @@ define pg_enable_disable u8 stream_name[stream_name_length]; }; -/** \brief Reply for enable / disable packet generator - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define pg_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Configure IP source and L4 port-range check @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1208,7 +952,7 @@ define pg_enable_disable_reply @param vrf_id - fib table/vrf id to associate the source and port-range check with @note To specify a single port set low_port and high_port entry the same */ -define ip_source_and_port_range_check_add_del +autoreply define ip_source_and_port_range_check_add_del { u32 client_index; u32 context; @@ -1222,16 +966,6 @@ define ip_source_and_port_range_check_add_del u32 vrf_id; }; -/** \brief Configure IP source and L4 port-range check reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_source_and_port_range_check_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set interface source and L4 port-range request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1239,7 +973,7 @@ define ip_source_and_port_range_check_add_del_reply @param tcp_vrf_id - VRF associated with source and TCP port-range check @param udp_vrf_id - VRF associated with source and TCP port-range check */ -define ip_source_and_port_range_check_interface_add_del +autoreply define ip_source_and_port_range_check_interface_add_del { u32 client_index; u32 context; @@ -1251,36 +985,17 @@ define ip_source_and_port_range_check_interface_add_del u32 udp_out_vrf_id; }; -/** \brief Set interface source and L4 port-range response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define ip_source_and_port_range_check_interface_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete sub interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - sw index of the interface that was created by create_subif */ -define delete_subif { +autoreply define delete_subif { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Delete sub interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define delete_subif_reply { - u32 context; - i32 retval; -}; - /** \brief Punt traffic to the host @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1289,7 +1004,7 @@ define delete_subif_reply { @param l4_protocol - L4 protocol to be punted, only UDP (0x11) is supported @param l4_port - TCP/UDP port to be punted */ -define punt { +autoreply define punt { u32 client_index; u32 context; u8 is_add; @@ -1298,23 +1013,13 @@ define punt { u16 l4_port; }; -/** \brief Reply to the punt request - @param context - sender context which was passed in the request - @param retval - return code of punt request -*/ -define punt_reply -{ - u32 context; - i32 retval; -}; - /** \brief Feature path enable/disable request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - the interface @param enable - 1 = on, 0 = off */ -define feature_enable_disable { +autoreply define feature_enable_disable { u32 client_index; u32 context; u32 sw_if_index; @@ -1323,16 +1028,6 @@ define feature_enable_disable { u8 feature_name[64]; }; -/** \brief Reply to the eature path enable/disable request - @param context - sender context which was passed in the request - @param retval - return code for the request -*/ -define feature_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") -- cgit 1.2.3-korg From 417488643611acbf8e3e2525d1e6c83f5eb107fa Mon Sep 17 00:00:00 2001 From: Steven Date: Tue, 25 Apr 2017 13:49:51 -0700 Subject: vhost: core dump on quit with worker threads Patch 6347 removed the socket file when the interface is deleted and when VPP process is exitting. The CLI for deleting the interface has builtin vlib_worker_thread_barrier_sync to prevent the worker threads from running. Unfortunately, the CLI quit does not have the builtin vlib_worker_thread_barrier_sync. As a result, it may cause the worker thread to crash. The fix is to add the vlib_worker_thread_barrier_sync in vhost_user_exit. Change-Id: I1eff81170e131098f1799662f0ab48d6fca3def7 Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 4f4f038a..bcfef9f5 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2589,11 +2589,13 @@ vhost_user_exit (vlib_main_t * vm) vhost_user_main_t *vum = &vhost_user_main; vhost_user_intf_t *vui; + vlib_worker_thread_barrier_sync (vlib_get_main ()); /* *INDENT-OFF* */ pool_foreach (vui, vum->vhost_user_interfaces, { vhost_user_delete_if (vnm, vm, vui->sw_if_index); }); /* *INDENT-ON* */ + vlib_worker_thread_barrier_release (vlib_get_main ()); return 0; } -- cgit 1.2.3-korg From bed5489a4eb109efe91dac8d259a03b1834b0db8 Mon Sep 17 00:00:00 2001 From: Pierre Pfister Date: Thu, 20 Apr 2017 15:34:00 +0200 Subject: vhost: Fix mmap size calculation I had a bug where a requested size of 1G was resulting in an aligned size of '1G + 2M', resulting in an OOM error. Previous code was adding one huge page size when memory is already aligned. Change-Id: Idd3aa0e9b893fb3efccba6ae1c7161e26d3f9456 Signed-off-by: Pierre Pfister --- src/vnet/devices/virtio/vhost-user.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index bcfef9f5..3ac76977 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -303,7 +303,7 @@ unmap_all_mem_regions (vhost_user_intf_t * vui) ssize_t map_sz = (vui->regions[i].memory_size + vui->regions[i].mmap_offset + - page_sz) & ~(page_sz - 1); + page_sz - 1) & ~(page_sz - 1); r = munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset, @@ -917,7 +917,7 @@ vhost_user_socket_read (unix_file_t * uf) /* align size to 2M page */ ssize_t map_sz = (vui->regions[i].memory_size + vui->regions[i].mmap_offset + - page_sz) & ~(page_sz - 1); + page_sz - 1) & ~(page_sz - 1); vui->region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE, MAP_SHARED, fds[i], 0); @@ -1168,7 +1168,7 @@ vhost_user_socket_read (unix_file_t * uf) /* align size to 2M page */ long page_sz = get_huge_page_size (fd); ssize_t map_sz = - (msg.log.size + msg.log.offset + page_sz) & ~(page_sz - 1); + (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1); vui->log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); -- cgit 1.2.3-korg From 5445f5fd9b9d020b285d48e571c86528932ac071 Mon Sep 17 00:00:00 2001 From: Steven Date: Tue, 25 Apr 2017 16:16:00 -0700 Subject: vhost: Disallow duplicate path name for vhost interface When creating or modifying a vhost interface, verify if the path name already existed and reject the command. Change-Id: I8b2d33b77c847f774492874f7d194fa72c488479 Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 36 +++++++++++++++++++++++++++++++++++- src/vnet/devices/virtio/vhost-user.h | 2 ++ 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 3ac76977..acc7bf82 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -1349,6 +1349,8 @@ vhost_user_init (vlib_main_t * vm) vum->random = random_default_seed (); + mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword)); + return 0; } @@ -2525,6 +2527,7 @@ static void vhost_user_term_if (vhost_user_intf_t * vui) { int q; + vhost_user_main_t *vum = &vhost_user_main; // Delete configured thread pinning vec_reset_length (vui->workers); @@ -2546,6 +2549,9 @@ vhost_user_term_if (vhost_user_intf_t * vui) vui->unix_server_index = ~0; unlink (vui->sock_filename); } + + mhash_unset (&vum->if_index_by_sock_name, vui->sock_filename, + &vui->if_index); } int @@ -2692,13 +2698,14 @@ vhost_user_vui_init (vnet_main_t * vnm, vnet_sw_interface_t *sw; sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); int q; + vhost_user_main_t *vum = &vhost_user_main; if (server_sock_fd != -1) { unix_file_t template = { 0 }; template.read_function = vhost_user_socksvr_accept_ready; template.file_descriptor = server_sock_fd; - template.private_data = vui - vhost_user_main.vhost_user_interfaces; //hw index + template.private_data = vui - vum->vhost_user_interfaces; //hw index vui->unix_server_index = unix_file_add (&unix_main, &template); } else @@ -2715,6 +2722,9 @@ vhost_user_vui_init (vnet_main_t * vnm, vui->unix_file_index = ~0; vui->log_base_addr = 0; vui->operation_mode = operation_mode; + vui->if_index = vui - vum->vhost_user_interfaces; + mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename, + &vui->if_index, 0); for (q = 0; q < VHOST_VRING_MAX_N; q++) vhost_user_vring_init (vui, q); @@ -2842,6 +2852,7 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, int rv = 0; int server_sock_fd = -1; vhost_user_main_t *vum = &vhost_user_main; + uword *if_index; if ((operation_mode != VHOST_USER_POLLING_MODE) && (operation_mode != VHOST_USER_INTERRUPT_MODE)) @@ -2852,6 +2863,17 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, return VNET_API_ERROR_INVALID_ARGUMENT; } + if_index = mhash_get (&vum->if_index_by_sock_name, (void *) sock_filename); + if (if_index) + { + if (sw_if_index) + { + vui = &vum->vhost_user_interfaces[*if_index]; + *sw_if_index = vui->sw_if_index; + } + return VNET_API_ERROR_IF_ALREADY_EXISTS; + } + if (is_server) { if ((rv = @@ -2901,6 +2923,7 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, int server_sock_fd = -1; int rv = 0; vnet_hw_interface_t *hwif; + uword *if_index; if ((operation_mode != VHOST_USER_POLLING_MODE) && (operation_mode != VHOST_USER_INTERRUPT_MODE)) @@ -2909,8 +2932,19 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, hwif->dev_class_index != vhost_user_dev_class.index) return VNET_API_ERROR_INVALID_SW_IF_INDEX; + if (sock_filename == NULL || !(strlen (sock_filename) > 0)) + return VNET_API_ERROR_INVALID_ARGUMENT; + vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance); + /* + * Disallow changing the interface to have the same path name + * as other interface + */ + if_index = mhash_get (&vum->if_index_by_sock_name, (void *) sock_filename); + if (if_index && (*if_index != vui->if_index)) + return VNET_API_ERROR_IF_ALREADY_EXISTS; + // First try to open server socket if (is_server) if ((rv = vhost_user_init_server_sock (sock_filename, diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index 80f58a20..56b65477 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -228,6 +228,7 @@ typedef struct u32 unix_file_index; char sock_filename[256]; int sock_errno; + uword if_index; u32 hw_if_index, sw_if_index; //Feature negotiation @@ -311,6 +312,7 @@ typedef struct typedef struct { + mhash_t if_index_by_sock_name; u32 mtu_bytes; vhost_user_intf_t *vhost_user_interfaces; u32 *show_dev_instance_by_real_dev_instance; -- cgit 1.2.3-korg From c855b73f785b3c4c1756927ad542de13ba193b6f Mon Sep 17 00:00:00 2001 From: Ray Kinsella Date: Fri, 21 Apr 2017 12:24:43 +0100 Subject: af_packet: reflect admin device state on host Setting the interface state in VPP on an af_packet device, was not being reflected on the host. This implied the user had to set the device state in VPP and then on the host, in order to put the interface into an 'up' state. This changes makes the device state consisent in VPP and the host. Change-Id: I6dc6aee79503e04576683db937b861337a2b375b Signed-off-by: Ray Kinsella --- src/vnet/devices/af_packet/af_packet.c | 50 ++++++++++++++++++++++++++-------- src/vnet/devices/af_packet/af_packet.h | 1 + src/vnet/devices/af_packet/device.c | 42 ++++++++++++++++++++++++++-- 3 files changed, 78 insertions(+), 15 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index 7464d4e6..92bd1092 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -82,26 +83,35 @@ af_packet_fd_read_ready (unix_file_t * uf) } static int -create_packet_v2_sock (u8 * name, tpacket_req_t * rx_req, +is_bridge (const u8 * host_if_name) +{ + u8 *s; + DIR *dir = NULL; + + s = format (0, "/sys/class/net/%s/bridge%c", host_if_name, 0); + dir = opendir ((char *) s); + vec_free (s); + + if (dir) + { + closedir (dir); + return 0; + } + + return -1; +} + +static int +create_packet_v2_sock (int host_if_index, tpacket_req_t * rx_req, tpacket_req_t * tx_req, int *fd, u8 ** ring) { int ret, err; struct sockaddr_ll sll; - uint host_if_index; int ver = TPACKET_V2; socklen_t req_sz = sizeof (struct tpacket_req); u32 ring_sz = rx_req->tp_block_size * rx_req->tp_block_nr + tx_req->tp_block_size * tx_req->tp_block_nr; - host_if_index = if_nametoindex ((const char *) name); - - if (!host_if_index) - { - DBG_SOCK ("Wrong host interface name"); - ret = VNET_API_ERROR_INVALID_INTERFACE; - goto error; - } - if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0) { DBG_SOCK ("Failed to create socket"); @@ -190,6 +200,7 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, uword *p; uword if_index; u8 *host_if_name_dup = vec_dup (host_if_name); + int host_if_index = -1; p = mhash_get (&apm->if_index_by_host_if_name, host_if_name); if (p) @@ -209,15 +220,29 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR; tx_req->tp_frame_nr = AF_PACKET_TX_FRAME_NR; - ret = create_packet_v2_sock (host_if_name, rx_req, tx_req, &fd, &ring); + host_if_index = if_nametoindex ((const char *) host_if_name); + + if (!host_if_index) + { + DBG_SOCK ("Wrong host interface name"); + return VNET_API_ERROR_INVALID_INTERFACE; + } + + ret = create_packet_v2_sock (host_if_index, rx_req, tx_req, &fd, &ring); if (ret != 0) goto error; + ret = is_bridge (host_if_name); + + if (ret == 0) /* is a bridge, ignore state */ + host_if_index = -1; + /* So far everything looks good, let's create interface */ pool_get (apm->interfaces, apif); if_index = apif - apm->interfaces; + apif->host_if_index = host_if_index; apif->fd = fd; apif->rx_ring = ring; apif->tx_ring = ring + rx_req->tp_block_size * rx_req->tp_block_nr; @@ -341,6 +366,7 @@ af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name) vec_free (apif->host_if_name); apif->host_if_name = NULL; + apif->host_if_index = -1; mhash_unset (&apm->if_index_by_host_if_name, host_if_name, &if_index); diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h index 77a2c7a3..194977f0 100644 --- a/src/vnet/devices/af_packet/af_packet.h +++ b/src/vnet/devices/af_packet/af_packet.h @@ -24,6 +24,7 @@ typedef struct CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); clib_spinlock_t lockp; u8 *host_if_name; + int host_if_index; int fd; struct tpacket_req *rx_req; struct tpacket_req *tx_req; diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c index 2ba3f579..2a17e6b3 100644 --- a/src/vnet/devices/af_packet/device.c +++ b/src/vnet/devices/af_packet/device.c @@ -18,6 +18,8 @@ */ #include +#include +#include #include #include @@ -205,17 +207,51 @@ af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, hw->dev_instance); u32 hw_flags; + int rv, fd = socket (AF_UNIX, SOCK_DGRAM, 0); + struct ifreq ifr; + + /* if interface is a bridge ignore */ + if (apif->host_if_index < 0) + return 0; /* no error */ + + /* use host_if_index in case host name has changed */ + ifr.ifr_ifindex = apif->host_if_index; + if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0) + { + clib_unix_warning ("af_packet_%s ioctl could not retrieve eth name", + apif->host_if_name); + } apif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + if ((rv = ioctl (fd, SIOCGIFFLAGS, &ifr)) < 0) + { + clib_unix_warning ("af_packet_%s error: %d", + apif->is_admin_up ? "up" : "down", rv); + } + if (apif->is_admin_up) - hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP; + { + hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP; + ifr.ifr_flags |= IFF_UP; + } else - hw_flags = 0; + { + hw_flags = 0; + ifr.ifr_flags &= ~IFF_UP; + } + + if ((rv = ioctl (fd, SIOCSIFFLAGS, &ifr)) < 0) + { + clib_unix_warning ("af_packet_%s error: %d", + apif->is_admin_up ? "up" : "down", rv); + } vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); - return 0; + close (fd); + + return 0; /* no error */ } static clib_error_t * -- cgit 1.2.3-korg From 4403690cda44134af3b9ea78d33a5cbf78a5acc9 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 28 Apr 2017 12:29:15 +0200 Subject: Add interface rx mode commands, unify rx mode and placement CLI Change-Id: Ib506c3e9d66170f29e3266ad6dc4d32b829befba Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/init.c | 10 +- src/vnet/api_errno.h | 3 +- src/vnet/devices/af_packet/af_packet.c | 18 ++- src/vnet/devices/devices.c | 172 +++--------------------- src/vnet/devices/devices.h | 34 ++--- src/vnet/interface.h | 15 +++ src/vnet/interface_cli.c | 238 +++++++++++++++++++++++++++++++++ src/vnet/interface_format.c | 17 +++ src/vnet/interface_funcs.h | 1 + 9 files changed, 326 insertions(+), 182 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 6f51ff64..2d21bfd9 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -704,8 +704,8 @@ dpdk_lib_init (dpdk_main_t * dm) sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->hw_if_index); xd->vlib_sw_if_index = sw->sw_if_index; - vnet_set_device_input_node (dm->vnet_main, xd->hw_if_index, - dpdk_input_node.index); + vnet_hw_interface_set_input_node (dm->vnet_main, xd->hw_if_index, + dpdk_input_node.index); if (devconf->workers) { @@ -713,7 +713,7 @@ dpdk_lib_init (dpdk_main_t * dm) q = 0; /* *INDENT-OFF* */ clib_bitmap_foreach (i, devconf->workers, ({ - vnet_device_input_assign_thread (dm->vnet_main, xd->hw_if_index, q++, + vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q++, vdm->first_worker_thread_index + i); })); /* *INDENT-ON* */ @@ -721,8 +721,8 @@ dpdk_lib_init (dpdk_main_t * dm) else for (q = 0; q < xd->rx_q_used; q++) { - vnet_device_input_assign_thread (dm->vnet_main, xd->hw_if_index, q, /* any */ - ~1); + vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q, /* any */ + ~1); } hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index); diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 0d5b2227..b87c197f 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -109,7 +109,8 @@ _(ENTRY_ALREADY_EXISTS, -116, "Entry already exists") \ _(SVM_SEGMENT_CREATE_FAIL, -117, "svm segment create fail") \ _(APPLICATION_NOT_ATTACHED, -118, "application not attached") \ _(BD_ALREADY_EXISTS, -119, "Bridge domain already exists") \ -_(BD_IN_USE, -120, "Bridge domain has member interfaces") +_(BD_IN_USE, -120, "Bridge domain has member interfaces") \ +_(UNSUPPORTED, -121, "Unsupported") typedef enum { diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index 92bd1092..cb52e6da 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -195,6 +195,7 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, u8 hw_addr[6]; clib_error_t *error; vnet_sw_interface_t *sw; + vnet_hw_interface_t *hw; vlib_thread_main_t *tm = vlib_get_thread_main (); vnet_main_t *vnm = vnet_get_main (); uword *p; @@ -294,17 +295,21 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, } sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index); + hw = vnet_get_hw_interface (vnm, apif->hw_if_index); apif->sw_if_index = sw->sw_if_index; - vnet_set_device_input_node (vnm, apif->hw_if_index, - af_packet_input_node.index); - vnet_device_input_assign_thread (vnm, apif->hw_if_index, 0, /* queue */ - ~0 /* any cpu */ ); - vnet_device_input_set_mode (vnm, apif->hw_if_index, 0, - VNET_DEVICE_INPUT_MODE_INTERRUPT); + vnet_hw_interface_set_input_node (vnm, apif->hw_if_index, + af_packet_input_node.index); + vnet_hw_interface_assign_rx_thread (vnm, apif->hw_if_index, 0, /* queue */ + ~0 /* any cpu */ ); + + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; vnet_hw_interface_set_flags (vnm, apif->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); + vnet_hw_interface_set_rx_mode (vnm, apif->hw_if_index, 0, + VNET_HW_INTERFACE_RX_MODE_INTERRUPT); + mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index, 0); if (sw_if_index) @@ -340,6 +345,7 @@ af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name) /* bring down the interface */ vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0); + vnet_hw_interface_unassign_rx_thread (vnm, apif->hw_if_index, 0); /* clean up */ if (apif->unix_file_index != ~0) diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index 2f55adcb..d75d905a 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -119,8 +119,8 @@ vnet_device_queue_update (vnet_main_t * vnm, vnet_device_input_runtime_t * rt) } void -vnet_device_input_assign_thread (vnet_main_t * vnm, u32 hw_if_index, - u16 queue_id, uword thread_index) +vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id, uword thread_index) { vnet_device_main_t *vdm = &vnet_device_main; vlib_main_t *vm; @@ -149,16 +149,19 @@ vnet_device_input_assign_thread (vnet_main_t * vnm, u32 hw_if_index, dq->hw_if_index = hw_if_index; dq->dev_instance = hw->dev_instance; dq->queue_id = queue_id; + dq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING; vnet_device_queue_update (vnm, rt); vec_validate (hw->input_node_thread_index_by_queue, queue_id); + vec_validate (hw->rx_mode_by_queue, queue_id); hw->input_node_thread_index_by_queue[queue_id] = thread_index; + hw->rx_mode_by_queue[queue_id] = VNET_HW_INTERFACE_RX_MODE_POLLING; vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state); } int -vnet_device_input_unassign_thread (vnet_main_t * vnm, u32 hw_if_index, - u16 queue_id, uword thread_index) +vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id) { vlib_main_t *vm; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); @@ -190,6 +193,7 @@ vnet_device_input_unassign_thread (vnet_main_t * vnm, u32 hw_if_index, deleted: vnet_device_queue_update (vnm, rt); + hw->rx_mode_by_queue[queue_id] = VNET_HW_INTERFACE_RX_MODE_UNKNOWN; if (vec_len (rt->devices_and_queues) == 0) vlib_node_set_state (vm, hw->input_node_index, VLIB_NODE_STATE_DISABLED); @@ -199,21 +203,28 @@ deleted: int -vnet_device_input_set_mode (vnet_main_t * vnm, u32 hw_if_index, u16 queue_id, - vnet_device_input_mode_t mode) +vnet_hw_interface_set_rx_mode (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id, vnet_hw_interface_rx_mode mode) { vlib_main_t *vm; uword thread_index; vnet_device_and_queue_t *dq; vlib_node_state_t enabled_node_state; - ASSERT (mode < VNET_DEVICE_INPUT_N_MODES); + ASSERT (mode < VNET_HW_INTERFACE_NUM_RX_MODES); vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); vnet_device_input_runtime_t *rt; int is_polling = 0; - if (hw->input_node_thread_index_by_queue == 0) + if (hw->input_node_thread_index_by_queue == 0 || hw->rx_mode_by_queue == 0) return VNET_API_ERROR_INVALID_INTERFACE; + if (hw->rx_mode_by_queue[queue_id] == mode) + return 0; + + if (mode != VNET_HW_INTERFACE_RX_MODE_POLLING && + (hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE) == 0) + return VNET_API_ERROR_UNSUPPORTED; + thread_index = hw->input_node_thread_index_by_queue[queue_id]; vm = vlib_mains[thread_index]; @@ -223,7 +234,7 @@ vnet_device_input_set_mode (vnet_main_t * vnm, u32 hw_if_index, u16 queue_id, { if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id) dq->mode = mode; - if (dq->mode == VNET_DEVICE_INPUT_MODE_POLLING) + if (dq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING) is_polling = 1; } @@ -244,8 +255,8 @@ vnet_device_input_set_mode (vnet_main_t * vnm, u32 hw_if_index, u16 queue_id, } int -vnet_device_input_get_mode (vnet_main_t * vnm, u32 hw_if_index, u16 queue_id, - vnet_device_input_mode_t * mode) +vnet_hw_interface_get_rx_mode (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id, vnet_hw_interface_rx_mode * mode) { vlib_main_t *vm; uword thread_index; @@ -271,146 +282,7 @@ vnet_device_input_get_mode (vnet_main_t * vnm, u32 hw_if_index, u16 queue_id, return VNET_API_ERROR_INVALID_INTERFACE; } -static clib_error_t * -show_device_placement_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - u8 *s = 0; - vnet_main_t *vnm = vnet_get_main (); - vnet_device_input_runtime_t *rt; - vnet_device_and_queue_t *dq; - vlib_node_t *pn = vlib_get_node_by_name (vm, (u8 *) "device-input"); - uword si; - int index = 0; - - /* *INDENT-OFF* */ - foreach_vlib_main (({ - clib_bitmap_foreach (si, pn->sibling_bitmap, - ({ - rt = vlib_node_get_runtime_data (this_vlib_main, si); - - if (vec_len (rt->devices_and_queues)) - s = format (s, " node %U:\n", format_vlib_node_name, vm, si); - - vec_foreach (dq, rt->devices_and_queues) - { - s = format (s, " %U queue %u (%s)\n", - format_vnet_sw_if_index_name, vnm, dq->hw_if_index, - dq->queue_id, - dq->mode == VNET_DEVICE_INPUT_MODE_POLLING ? - "polling" : "interrupt"); - } - })); - if (vec_len (s) > 0) - { - vlib_cli_output(vm, "Thread %u (%v):\n%v", index, - vlib_worker_threads[index].name, s); - vec_reset_length (s); - } - index++; - })); - /* *INDENT-ON* */ - - vec_free (s); - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (memif_delete_command, static) = { - .path = "show interface placement", - .short_help = "show interface placement", - .function = show_device_placement_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_device_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - clib_error_t *error = 0; - unformat_input_t _line_input, *line_input = &_line_input; - vnet_main_t *vnm = vnet_get_main (); - vnet_device_main_t *vdm = &vnet_device_main; - vnet_device_input_mode_t mode; - u32 hw_if_index = (u32) ~ 0; - u32 queue_id = (u32) 0; - u32 thread_index = (u32) ~ 0; - int rv; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) - ; - else if (unformat (line_input, "queue %d", &queue_id)) - ; - else if (unformat (line_input, "main", &thread_index)) - thread_index = 0; - else if (unformat (line_input, "worker %d", &thread_index)) - thread_index += vdm->first_worker_thread_index; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - unformat_free (line_input); - return error; - } - } - - unformat_free (line_input); - - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); - - if (thread_index > vdm->last_worker_thread_index) - return clib_error_return (0, - "please specify valid worker thread or main"); - - rv = vnet_device_input_get_mode (vnm, hw_if_index, queue_id, &mode); - - if (rv) - return clib_error_return (0, "not found"); - - rv = vnet_device_input_unassign_thread (vnm, hw_if_index, queue_id, - thread_index); - if (rv) - return clib_error_return (0, "not found"); - - vnet_device_input_assign_thread (vnm, hw_if_index, queue_id, thread_index); - vnet_device_input_set_mode (vnm, hw_if_index, queue_id, mode); - - return 0; -} - -/*? - * This command is used to assign a given interface, and optionally a - * given queue, to a different thread. If the 'queue' is not provided, - * it defaults to 0. - * - * @cliexpar - * Example of how to display the interface placement: - * @cliexstart{show interface placement} - * Thread 1 (vpp_wk_0): - * GigabitEthernet0/8/0 queue 0 - * GigabitEthernet0/9/0 queue 0 - * Thread 2 (vpp_wk_1): - * GigabitEthernet0/8/0 queue 1 - * GigabitEthernet0/9/0 queue 1 - * @cliexend - * Example of how to assign a interface and queue to a thread: - * @cliexcmd{set interface placement GigabitEthernet0/8/0 queue 1 thread 1} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { - .path = "set interface placement", - .short_help = "set interface placement [queue ] [thread | main]", - .function = set_device_placement, -}; -/* *INDENT-ON* */ static clib_error_t * vnet_device_init (vlib_main_t * vm) diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h index baf03b7c..f1f7e778 100644 --- a/src/vnet/devices/devices.h +++ b/src/vnet/devices/devices.h @@ -55,19 +55,12 @@ typedef struct uword next_worker_thread_index; } vnet_device_main_t; -typedef enum -{ - VNET_DEVICE_INPUT_MODE_POLLING = 0, - VNET_DEVICE_INPUT_MODE_INTERRUPT, - VNET_DEVICE_INPUT_N_MODES, -} vnet_device_input_mode_t; - typedef struct { u32 hw_if_index; u32 dev_instance; u16 queue_id; - vnet_device_input_mode_t mode; + vnet_hw_interface_rx_mode mode; uword interrupt_pending; } vnet_device_and_queue_t; @@ -82,22 +75,23 @@ extern vlib_node_registration_t device_input_node; extern const u32 device_input_next_node_advance[]; static inline void -vnet_set_device_input_node (vnet_main_t * vnm, u32 hw_if_index, - u32 node_index) +vnet_hw_interface_set_input_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) { vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); hw->input_node_index = node_index; } -void vnet_device_input_assign_thread (vnet_main_t * vnm, u32 hw_if_index, - u16 queue_id, uword thread_index); -int vnet_device_input_unassign_thread (vnet_main_t * vnm, u32 hw_if_index, - u16 queue_id, uword thread_index); -int vnet_device_input_set_mode (vnet_main_t * vnm, u32 hw_if_index, - u16 queue_id, vnet_device_input_mode_t mode); -int vnet_device_input_get_mode (vnet_main_t * vnm, u32 hw_if_index, - u16 queue_id, - vnet_device_input_mode_t * mode); +void vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id, uword thread_index); +int vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id); +int vnet_hw_interface_set_rx_mode (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id, + vnet_hw_interface_rx_mode mode); +int vnet_hw_interface_get_rx_mode (vnet_main_t * vnm, u32 hw_if_index, + u16 queue_id, + vnet_hw_interface_rx_mode * mode); static inline u64 vnet_get_aggregate_rx_packets (void) @@ -161,7 +155,7 @@ vnet_device_input_set_interrupt_pending (vnet_main_t * vnm, u32 hw_if_index, #define foreach_device_and_queue(var,vec) \ for (var = (vec); var < vec_end (vec); var++) \ if (clib_smp_swap (&((var)->interrupt_pending), 0) || \ - var->mode == VNET_DEVICE_INPUT_MODE_POLLING) + var->mode == VNET_HW_INTERFACE_RX_MODE_POLLING) #endif /* included_vnet_vnet_device_h */ diff --git a/src/vnet/interface.h b/src/vnet/interface.h index 9c223040..2344348b 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -405,6 +405,9 @@ typedef struct vnet_hw_interface_t #define VNET_HW_INTERFACE_FLAG_L2OUTPUT_SHIFT 9 #define VNET_HW_INTERFACE_FLAG_L2OUTPUT_MAPPED (1 << 9) + /* rx mode flags */ +#define VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE (1 << 10) + /* Hardware address as vector. Zero (e.g. zero-length vector) if no address for this class (e.g. PPP). */ u8 *hw_address; @@ -470,6 +473,9 @@ typedef struct vnet_hw_interface_t /* input node cpu index by queue */ u32 *input_node_thread_index_by_queue; + /* vnet_hw_interface_rx_mode by queue */ + u8 *rx_mode_by_queue; + /* device input device_and_queue runtime index */ uword *dq_runtime_index_by_queue; @@ -486,6 +492,15 @@ typedef enum VNET_SW_INTERFACE_TYPE_SUB, } vnet_sw_interface_type_t; +typedef enum +{ + VNET_HW_INTERFACE_RX_MODE_UNKNOWN, + VNET_HW_INTERFACE_RX_MODE_POLLING, + VNET_HW_INTERFACE_RX_MODE_INTERRUPT, + VNET_HW_INTERFACE_RX_MODE_ADAPTIVE, + VNET_HW_INTERFACE_NUM_RX_MODES, +} vnet_hw_interface_rx_mode; + typedef struct { /* diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index 94eb7ea9..bfce03e1 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -1175,7 +1175,245 @@ VLIB_CLI_COMMAND (clear_tag_command, static) = { }; /* *INDENT-ON* */ +static clib_error_t * +set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hw; + u32 hw_if_index = (u32) ~ 0; + u32 queue_id = (u32) ~ 0; + vnet_hw_interface_rx_mode mode = VNET_HW_INTERFACE_RX_MODE_UNKNOWN; + int i, rv = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + ; + else if (unformat (line_input, "queue %d", &queue_id)) + ; + else if (unformat (line_input, "polling")) + mode = VNET_HW_INTERFACE_RX_MODE_POLLING; + else if (unformat (line_input, "interrupt")) + mode = VNET_HW_INTERFACE_RX_MODE_INTERRUPT; + else if (unformat (line_input, "adaptive")) + mode = VNET_HW_INTERFACE_RX_MODE_ADAPTIVE; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + unformat_free (line_input); + return error; + } + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + if (mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN) + return clib_error_return (0, "please specify valid rx-mode"); + + hw = vnet_get_hw_interface (vnm, hw_if_index); + + if (queue_id == ~0) + for (i = 0; i < vec_len (hw->dq_runtime_index_by_queue); i++) + { + rv = vnet_hw_interface_set_rx_mode (vnm, hw_if_index, i, mode); + if (rv) + goto error; + } + else + rv = vnet_hw_interface_set_rx_mode (vnm, hw_if_index, queue_id, mode); + + if (rv) + goto error; + + return 0; + +error: + if (rv == VNET_API_ERROR_UNSUPPORTED) + return clib_error_return (0, "unsupported"); + + if (rv == VNET_API_ERROR_INVALID_INTERFACE) + return clib_error_return (0, "invalid interfaace"); + + return clib_error_return (0, "unknown error"); +} + +/*? + * This command is used to assign a given interface, and optionally a + * given queue, to a different thread. If the 'queue' is not provided, + * it defaults to 0. + * + * @cliexpar + * Example of how to display the interface placement: + * @cliexstart{show interface rx-placement} + * Thread 1 (vpp_wk_0): + * GigabitEthernet0/8/0 queue 0 + * GigabitEthernet0/9/0 queue 0 + * Thread 2 (vpp_wk_1): + * GigabitEthernet0/8/0 queue 1 + * GigabitEthernet0/9/0 queue 1 + * @cliexend + * Example of how to assign a interface and queue to a thread: + * @cliexcmd{set interface placement GigabitEthernet0/8/0 queue 1 thread 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_if_rx_mode,static) = { + .path = "set interface rx-mode", + .short_help = "set interface rx-mode [queue ] [polling | interrupt | adaptive]", + .function = set_interface_rx_mode, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_interface_rx_placement_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 *s = 0; + vnet_main_t *vnm = vnet_get_main (); + vnet_device_input_runtime_t *rt; + vnet_device_and_queue_t *dq; + vlib_node_t *pn = vlib_get_node_by_name (vm, (u8 *) "device-input"); + uword si; + int index = 0; + + /* *INDENT-OFF* */ + foreach_vlib_main (({ + clib_bitmap_foreach (si, pn->sibling_bitmap, + ({ + rt = vlib_node_get_runtime_data (this_vlib_main, si); + + if (vec_len (rt->devices_and_queues)) + s = format (s, " node %U:\n", format_vlib_node_name, vm, si); + + vec_foreach (dq, rt->devices_and_queues) + { + s = format (s, " %U queue %u (%U)\n", + format_vnet_sw_if_index_name, vnm, dq->hw_if_index, + dq->queue_id, + format_vnet_hw_interface_rx_mode, dq->mode); + } + })); + if (vec_len (s) > 0) + { + vlib_cli_output(vm, "Thread %u (%v):\n%v", index, + vlib_worker_threads[index].name, s); + vec_reset_length (s); + } + index++; + })); + /* *INDENT-ON* */ + + vec_free (s); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_interface_rx_placement, static) = { + .path = "show interface rx-placement", + .short_help = "show interface rx-placement", + .function = show_interface_rx_placement_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_interface_rx_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + vnet_device_main_t *vdm = &vnet_device_main; + vnet_hw_interface_rx_mode mode; + u32 hw_if_index = (u32) ~ 0; + u32 queue_id = (u32) 0; + u32 thread_index = (u32) ~ 0; + int rv; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + ; + else if (unformat (line_input, "queue %d", &queue_id)) + ; + else if (unformat (line_input, "main", &thread_index)) + thread_index = 0; + else if (unformat (line_input, "worker %d", &thread_index)) + thread_index += vdm->first_worker_thread_index; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + unformat_free (line_input); + return error; + } + } + + unformat_free (line_input); + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + + if (thread_index > vdm->last_worker_thread_index) + return clib_error_return (0, + "please specify valid worker thread or main"); + + rv = vnet_hw_interface_get_rx_mode (vnm, hw_if_index, queue_id, &mode); + + if (rv) + return clib_error_return (0, "not found"); + + rv = vnet_hw_interface_unassign_rx_thread (vnm, hw_if_index, queue_id); + + if (rv) + return clib_error_return (0, "not found"); + + vnet_hw_interface_assign_rx_thread (vnm, hw_if_index, queue_id, + thread_index); + vnet_hw_interface_set_rx_mode (vnm, hw_if_index, queue_id, mode); + + return 0; +} + +/*? + * This command is used to assign a given interface, and optionally a + * given queue, to a different thread. If the 'queue' is not provided, + * it defaults to 0. + * + * @cliexpar + * Example of how to display the interface placement: + * @cliexstart{show interface placement} + * Thread 1 (vpp_wk_0): + * GigabitEthernet0/8/0 queue 0 + * GigabitEthernet0/9/0 queue 0 + * Thread 2 (vpp_wk_1): + * GigabitEthernet0/8/0 queue 1 + * GigabitEthernet0/9/0 queue 1 + * @cliexend + * Example of how to assign a interface and queue to a thread: + * @cliexcmd{set interface placement GigabitEthernet0/8/0 queue 1 thread 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_if_rx_placement,static) = { + .path = "set interface rx-placement", + .short_help = "set interface rx-placement [queue ] [thread | main]", + .function = set_interface_rx_placement, +}; + +/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c index b961c778..03caf5c6 100644 --- a/src/vnet/interface_format.c +++ b/src/vnet/interface_format.c @@ -58,6 +58,23 @@ format_vnet_sw_interface_flags (u8 * s, va_list * args) return s; } +u8 * +format_vnet_hw_interface_rx_mode (u8 * s, va_list * args) +{ + vnet_hw_interface_rx_mode mode = va_arg (*args, vnet_hw_interface_rx_mode); + + if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + return format (s, "polling"); + + if (mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) + return format (s, "interrupt"); + + if (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE) + return format (s, "adaptive"); + + return format (s, "unknown"); +} + u8 * format_vnet_hw_interface (u8 * s, va_list * args) { diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h index b3aca2fd..999b72e5 100644 --- a/src/vnet/interface_funcs.h +++ b/src/vnet/interface_funcs.h @@ -277,6 +277,7 @@ clib_error_t *vnet_hw_interface_change_mac_address (vnet_main_t * vnm, /* Formats sw/hw interface. */ format_function_t format_vnet_hw_interface; +format_function_t format_vnet_hw_interface_rx_mode; format_function_t format_vnet_sw_interface; format_function_t format_vnet_sw_interface_name; format_function_t format_vnet_sw_interface_name_override; -- cgit 1.2.3-korg From 7bee80c823ca77de3aca803fdede77e4c7385a52 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 26 Apr 2017 15:32:12 +0200 Subject: Fix remaining 32-bit compile issues Change-Id: I9664214652229b663c3e3ba7406b4ede96bfb123 Signed-off-by: Damjan Marion --- Makefile | 8 ++++---- build-data/platforms/vpp.mk | 5 +++++ src/plugins/dpdk/buffer.c | 6 +++--- src/plugins/ixge/ixge.c | 5 +++-- src/svm/svm.c | 11 ++++++----- src/svm/svmtool.c | 4 ++-- src/tools/vppapigen/gram.y | 6 +++--- src/tools/vppapigen/node.c | 4 ++-- src/uri/uri_tcp_test.c | 21 ++++++++++++--------- src/uri/uri_udp_test.c | 15 +++++++++------ src/vat/api_format.c | 10 +++++----- src/vlib/threads.c | 2 +- src/vlibmemory/memory_client.c | 2 +- src/vlibmemory/memory_vlib.c | 3 ++- src/vnet/devices/virtio/vhost-user.c | 2 +- src/vnet/session/application_interface.c | 2 +- src/vnet/session/session_api.c | 14 +++++++------- src/vnet/tcp/builtin_client.c | 19 +++++++++++-------- src/vnet/tcp/builtin_server.c | 2 +- src/vppinfra/mheap.c | 2 +- 20 files changed, 80 insertions(+), 63 deletions(-) (limited to 'src/vnet/devices') diff --git a/Makefile b/Makefile index 8240e789..b344f377 100644 --- a/Makefile +++ b/Makefile @@ -263,9 +263,9 @@ define test TEST_DIR=$(WS_ROOT)/test \ VPP_TEST_BUILD_DIR=$(BR)/build-$(2)-native \ VPP_TEST_BIN=$(BR)/install-$(2)-native/vpp/bin/vpp \ - VPP_TEST_PLUGIN_PATH=$(BR)/install-$(2)-native/vpp/lib64/vpp_plugins \ + VPP_TEST_PLUGIN_PATH=$(wildcard $(BR)/install-$(2)-native/vpp/lib*/vpp_plugins) \ VPP_TEST_INSTALL_PATH=$(BR)/install-$(2)-native/ \ - LD_LIBRARY_PATH=$(BR)/install-$(2)-native/vpp/lib64/ \ + LD_LIBRARY_PATH=$(subst $(subst ,, ),:,$(wildcard $(BR)/install-$(2)-native/vpp/lib*/)) \ EXTENDED_TESTS=$(EXTENDED_TESTS) \ PYTHON=$(PYTHON) \ $(3) @@ -325,12 +325,12 @@ define run @echo "WARNING: STARTUP_CONF not defined or file doesn't exist." @echo " Running with minimal startup config: $(MINIMAL_STARTUP_CONF)\n" @cd $(STARTUP_DIR) && \ - sudo $(2) $(1)/vpp/bin/vpp $(MINIMAL_STARTUP_CONF) plugin_path $(1)/vpp/lib64/vpp_plugins + sudo $(2) $(1)/vpp/bin/vpp $(MINIMAL_STARTUP_CONF) plugin_path $(wildcard $(1)/vpp/lib*/vpp_plugins) endef else define run @cd $(STARTUP_DIR) && \ - sudo $(2) $(1)/vpp/bin/vpp $(shell cat $(STARTUP_CONF) | sed -e 's/#.*//') plugin_path $(1)/vpp/lib64/vpp_plugins + sudo $(2) $(1)/vpp/bin/vpp $(shell cat $(STARTUP_CONF) | sed -e 's/#.*//') plugin_path $(wildcard $(1)/vpp/lib*/vpp_plugins) endef endif diff --git a/build-data/platforms/vpp.mk b/build-data/platforms/vpp.mk index 5aafdd76..4577fa2e 100644 --- a/build-data/platforms/vpp.mk +++ b/build-data/platforms/vpp.mk @@ -46,6 +46,11 @@ vpp_root_packages = vpp gmod # vpp_dpdk_lib_dir = /usr/lib # vpp_dpdk_shared_lib = yes +# load balancer plugin is not portable on 32 bit platform +ifeq ($(MACHINE),i686) +vpp_configure_args_vpp = --disable-lb-plugin +endif + vpp_debug_TAG_CFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARCH) \ -fstack-protector-all -fPIC -Werror vpp_debug_TAG_LDFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARCH) \ diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c index c80b3fa8..2d4762ab 100644 --- a/src/plugins/dpdk/buffer.c +++ b/src/plugins/dpdk/buffer.c @@ -455,8 +455,8 @@ vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, uword save_vpm_start, save_vpm_end, save_vpm_size; struct rte_mempool_memhdr *memhdr; - this_pool_start = ~0ULL; - this_pool_end = 0LL; + this_pool_start = ~0; + this_pool_end = 0; STAILQ_FOREACH (memhdr, &rmp->mem_list, next) { @@ -465,7 +465,7 @@ vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, if (((uword) memhdr->addr) < this_pool_start) this_pool_start = (uword) (memhdr->addr); } - ASSERT (this_pool_start < ~0ULL && this_pool_end > 0); + ASSERT (this_pool_start < ~0 && this_pool_end > 0); this_pool_size = this_pool_end - this_pool_start; if (CLIB_DEBUG > 1) diff --git a/src/plugins/ixge/ixge.c b/src/plugins/ixge/ixge.c index 08f5b692..0d287250 100644 --- a/src/plugins/ixge/ixge.c +++ b/src/plugins/ixge/ixge.c @@ -20,7 +20,7 @@ * Please use supported DPDK driver instead. */ -#if __x86_64__ +#if __x86_64__ || __i386__ #include #ifndef CLIB_HAVE_VEC128 @@ -2929,7 +2929,6 @@ ixge_set_next_node (ixge_rx_next_t next, char *name) break; } } -#endif /* *INDENT-OFF* */ VLIB_PLUGIN_REGISTER () = { @@ -2937,8 +2936,10 @@ VLIB_PLUGIN_REGISTER () = { .default_disabled = 1, .description = "Intel 82599 Family Native Driver (experimental)", }; +#endif /* *INDENT-ON* */ + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/svm/svm.c b/src/svm/svm.c index 97add5a7..c96135cf 100644 --- a/src/svm/svm.c +++ b/src/svm/svm.c @@ -491,7 +491,7 @@ svm_map_region (svm_map_region_args_t * a) return (0); } - rp = mmap ((void *) a->baseva, a->size, + rp = mmap (uword_to_pointer (a->baseva, void *), a->size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0); if (rp == (svm_region_t *) MAP_FAILED) @@ -533,9 +533,10 @@ svm_map_region (svm_map_region_args_t * a) rp->virtual_size = a->size; rp->region_heap = - mheap_alloc_with_flags ((void *) (a->baseva + MMAP_PAGESIZE), - (a->pvt_heap_size != 0) ? - a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, + mheap_alloc_with_flags (uword_to_pointer + (a->baseva + MMAP_PAGESIZE, void *), + (a->pvt_heap_size != + 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, MHEAP_FLAG_DISABLE_VM); oldheap = svm_push_pvt_heap (rp); @@ -661,7 +662,7 @@ svm_map_region (svm_map_region_args_t * a) a->size = rp->virtual_size; munmap (rp, MMAP_PAGESIZE); - rp = (void *) mmap ((void *) a->baseva, a->size, + rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0); if ((uword) rp == (uword) MAP_FAILED) diff --git a/src/svm/svmtool.c b/src/svm/svmtool.c index b3195514..01ae4221 100644 --- a/src/svm/svmtool.c +++ b/src/svm/svmtool.c @@ -172,7 +172,7 @@ svm_map_region_nolock (svm_map_region_args_t * a) a->size = rp->virtual_size; munmap (rp, MMAP_PAGESIZE); - rp = (void *) mmap ((void *) a->baseva, a->size, + rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0); if ((uword) rp == (uword) MAP_FAILED) @@ -401,7 +401,7 @@ repair (char *chroot_path, int crash_root_region) a->size = root_rp->virtual_size; munmap (root_rp, MMAP_PAGESIZE); - root_rp = (void *) mmap ((void *) a->baseva, a->size, + root_rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0); if ((uword) root_rp == (uword) MAP_FAILED) diff --git a/src/tools/vppapigen/gram.y b/src/tools/vppapigen/gram.y index 9cea6023..52bb65c5 100644 --- a/src/tools/vppapigen/gram.y +++ b/src/tools/vppapigen/gram.y @@ -53,9 +53,9 @@ stmt: flist defn {$$ = set_flags($1, $2);} | defn {$$ = $1;} ; -flist: flist flag {$$ = (YYSTYPE)(unsigned long long) - ((unsigned long long) $1 - | (unsigned long long) $2);} +flist: flist flag {$$ = (YYSTYPE)(unsigned long) + ((unsigned long) $1 + | (unsigned long) $2);} | flag {$$ = $1;} ; diff --git a/src/tools/vppapigen/node.c b/src/tools/vppapigen/node.c index 9f234037..15868ee5 100644 --- a/src/tools/vppapigen/node.c +++ b/src/tools/vppapigen/node.c @@ -397,7 +397,7 @@ void node_define_generate (node_t *this, enum passid which, FILE *fp) fprintf(fp, ",\n"); } indent_me(fp); - fprintf (fp, "{\"crc\" : \"0x%08x\"}\n", (u32)(u64)CDATA3); + fprintf (fp, "{\"crc\" : \"0x%08x\"}\n", (u32)(uword)CDATA3); indent -= 4; indent_me(fp); fprintf(fp, "]"); @@ -1219,7 +1219,7 @@ void generate_msg_name_crc_list (YYSTYPE a1, FILE *fp) if (!(np->flags & NODE_FLAG_TYPEONLY)) { fprintf (fp, "\\\n_(VL_API_%s, %s, %08x) ", uppercase (np->data[0]), (i8 *) np->data[0], - (u32)(u64)np->data[3]); + (u32)(uword)np->data[3]); } } np = np->peer; diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c index b15fd6ce..22f246e5 100755 --- a/src/uri/uri_tcp_test.c +++ b/src/uri/uri_tcp_test.c @@ -262,7 +262,8 @@ vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t * } utm->our_event_queue = - (unix_shared_memory_queue_t *) mp->app_event_queue_address; + uword_to_pointer (mp->app_event_queue_address, + unix_shared_memory_queue_t *); utm->state = STATE_ATTACHED; } @@ -524,8 +525,9 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) return; } - utm->vpp_event_queue = (unix_shared_memory_queue_t *) - mp->vpp_event_queue_address; + utm->vpp_event_queue = + uword_to_pointer (mp->vpp_event_queue_address, + unix_shared_memory_queue_t *); /* * Setup session @@ -534,9 +536,9 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) pool_get (utm->sessions, session); session_index = session - utm->sessions; - rx_fifo = (svm_fifo_t *) mp->server_rx_fifo; + rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *); rx_fifo->client_session_index = session_index; - tx_fifo = (svm_fifo_t *) mp->server_tx_fifo; + tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *); tx_fifo->client_session_index = session_index; session->server_rx_fifo = rx_fifo; @@ -858,16 +860,17 @@ vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) ip_str = format (0, "%U", format_ip46_address, &mp->ip, mp->is_ip4); clib_warning ("Accepted session from: %s:%d", ip_str, clib_net_to_host_u16 (mp->port)); - utm->vpp_event_queue = (unix_shared_memory_queue_t *) - mp->vpp_event_queue_address; + utm->vpp_event_queue = + uword_to_pointer (mp->vpp_event_queue_address, + unix_shared_memory_queue_t *); /* Allocate local session and set it up */ pool_get (utm->sessions, session); session_index = session - utm->sessions; - rx_fifo = (svm_fifo_t *) mp->server_rx_fifo; + rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *); rx_fifo->client_session_index = session_index; - tx_fifo = (svm_fifo_t *) mp->server_tx_fifo; + tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *); tx_fifo->client_session_index = session_index; session->server_rx_fifo = rx_fifo; diff --git a/src/uri/uri_udp_test.c b/src/uri/uri_udp_test.c index 266215c8..8fb12ed2 100644 --- a/src/uri/uri_udp_test.c +++ b/src/uri/uri_udp_test.c @@ -232,7 +232,8 @@ vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t * } utm->our_event_queue = - (unix_shared_memory_queue_t *) mp->app_event_queue_address; + uword_to_pointer (mp->app_event_queue_address, + unix_shared_memory_queue_t *); } static void @@ -581,7 +582,8 @@ send_reply: vec_free (a->segment_name); - client_q = (unix_shared_memory_queue_t *) mp->client_queue_address; + client_q = + uword_to_pointer (mp->client_queue_address, unix_shared_memory_queue_t *); vl_msg_api_send_shmem (client_q, (u8 *) & rmp); } @@ -608,14 +610,15 @@ vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) if (start_time == 0.0) start_time = clib_time_now (&utm->clib_time); - utm->vpp_event_queue = (unix_shared_memory_queue_t *) - mp->vpp_event_queue_address; + utm->vpp_event_queue = + uword_to_pointer (mp->vpp_event_queue_address, + unix_shared_memory_queue_t *); pool_get (utm->sessions, session); - rx_fifo = (svm_fifo_t *) mp->server_rx_fifo; + rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *); rx_fifo->client_session_index = session - utm->sessions; - tx_fifo = (svm_fifo_t *) mp->server_tx_fifo; + tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *); tx_fifo->client_session_index = session - utm->sessions; session->server_rx_fifo = rx_fifo; diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 28b227b4..495b660e 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -1037,7 +1037,7 @@ vl_api_cli_reply_t_handler (vl_api_cli_reply_t * mp) i32 retval = ntohl (mp->retval); vam->retval = retval; - vam->shmem_result = (u8 *) mp->reply_in_shmem; + vam->shmem_result = uword_to_pointer (mp->reply_in_shmem, u8 *); vam->result_ready = 1; } @@ -1058,7 +1058,7 @@ vl_api_cli_reply_t_handler_json (vl_api_cli_reply_t * mp) pthread_mutex_lock (&am->vlib_rp->mutex); oldheap = svm_push_data_heap (am->vlib_rp); - reply = (u8 *) (mp->reply_in_shmem); + reply = uword_to_pointer (mp->reply_in_shmem, u8 *); vec_free (reply); svm_pop_heap (oldheap); @@ -2405,7 +2405,7 @@ static void vl_api_get_node_graph_reply_t_handler if (retval != 0) return; - reply = (u8 *) (mp->reply_in_shmem); + reply = uword_to_pointer (mp->reply_in_shmem, u8 *); pvt_copy = vec_dup (reply); /* Toss the shared-memory original... */ @@ -2456,7 +2456,7 @@ static void vl_api_get_node_graph_reply_t_handler_json vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); vat_json_object_add_uint (&node, "reply_in_shmem", mp->reply_in_shmem); - reply = (u8 *) (mp->reply_in_shmem); + reply = uword_to_pointer (mp->reply_in_shmem, u8 *); /* Toss the shared-memory original... */ pthread_mutex_lock (&am->vlib_rp->mutex); @@ -4959,7 +4959,7 @@ exec (vat_main_t * vam) svm_pop_heap (oldheap); pthread_mutex_unlock (&am->vlib_rp->mutex); - mp->cmd_in_shmem = (u64) cmd; + mp->cmd_in_shmem = pointer_to_uword (cmd); S (mp); timeout = vat_time_now (vam) + 10.0; diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 4a111f8d..9ccfd3a2 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -1125,7 +1125,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) VLIB_EARLY_CONFIG_FUNCTION (cpu_config, "cpu"); -#if !defined (__x86_64__) && !defined (__aarch64__) && !defined (__powerpc64__) && !defined(__arm__) +#if !defined (__x86_64__) && !defined (__i386__) && !defined (__aarch64__) && !defined (__powerpc64__) && !defined(__arm__) void __sync_fetch_and_add_8 (void) { diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c index d48a4fa1..a162d6bb 100644 --- a/src/vlibmemory/memory_client.c +++ b/src/vlibmemory/memory_client.c @@ -137,7 +137,7 @@ vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp) am->msg_index_by_name_and_crc = hash_create_string (0, sizeof (uword)); /* Recreate the vnet-side API message handler table */ - tblv = (u8 *) mp->message_table; + tblv = uword_to_pointer (mp->message_table, u8 *); serialize_open_vector (sm, tblv); unserialize_integer (sm, &nmsgs, sizeof (u32)); diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index 29a5c2c2..acba8b3f 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -216,7 +216,8 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) am->shmem_hdr->application_restarts); rp->context = mp->context; rp->response = ntohl (rv); - rp->message_table = (u64) am->serialized_message_table_in_shmem; + rp->message_table = + pointer_to_uword (am->serialized_message_table_in_shmem); vl_msg_api_send_shmem (q, (u8 *) & rp); } diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index acc7bf82..6ccc0d87 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -719,7 +719,7 @@ vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui, } if (is_host_address) { - addr = (u64) map_user_mem (vui, (uword) addr); + addr = pointer_to_uword (map_user_mem (vui, (uword) addr)); } if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size)) { diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c index ad44baa1..f74b0cfe 100644 --- a/src/vnet/session/application_interface.c +++ b/src/vnet/session/application_interface.c @@ -247,7 +247,7 @@ vnet_application_attach (vnet_app_attach_args_t * a) a->session_cb_vft))) return rv; - a->app_event_queue_address = (u64) app->event_queue; + a->app_event_queue_address = pointer_to_uword (app->event_queue); sm = segment_manager_get (app->first_segment_manager); segment_manager_get_segment_info (sm->segment_indices[0], &seg_name, &a->segment_size); diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c index 5a02a08e..8266922c 100755 --- a/src/vnet/session/session_api.c +++ b/src/vnet/session/session_api.c @@ -102,9 +102,9 @@ send_session_accept_callback (stream_session_t * s) tc = tp_vft->get_connection (s->connection_index, s->thread_index); mp->listener_handle = listen_session_get_handle (listener); mp->handle = stream_session_handle (s); - mp->server_rx_fifo = (u64) s->server_rx_fifo; - mp->server_tx_fifo = (u64) s->server_tx_fifo; - mp->vpp_event_queue_address = (u64) vpp_queue; + mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo); + mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo); + mp->vpp_event_queue_address = pointer_to_uword (vpp_queue); mp->port = tc->rmt_port; mp->is_ip4 = tc->is_ip4; clib_memcpy (&mp->ip, &tc->rmt_ip, sizeof (tc->rmt_ip)); @@ -172,10 +172,10 @@ send_session_connected_callback (u32 app_index, u32 api_context, if (!is_fail) { vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); - mp->server_rx_fifo = (u64) s->server_rx_fifo; - mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo); + mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo); mp->handle = stream_session_handle (s); - mp->vpp_event_queue_address = (u64) vpp_queue; + mp->vpp_event_queue_address = pointer_to_uword (vpp_queue); mp->retval = 0; } else @@ -225,7 +225,7 @@ redirect_connect_callback (u32 server_api_client_index, void *mp_arg) } /* Tell the server the client's API queue address, so it can reply */ - mp->client_queue_address = (u64) client_q; + mp->client_queue_address = pointer_to_uword (client_q); app = application_lookup (mp->client_index); if (!app) { diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c index 32d69a96..6f890874 100644 --- a/src/vnet/tcp/builtin_client.c +++ b/src/vnet/tcp/builtin_client.c @@ -274,11 +274,12 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) return; } - tm->our_event_queue = (unix_shared_memory_queue_t *) - mp->vpp_event_queue_address; - - tm->vpp_event_queue = (unix_shared_memory_queue_t *) - mp->vpp_event_queue_address; + tm->our_event_queue = + uword_to_pointer (mp->vpp_event_queue_address, + unix_shared_memory_queue_t *); + tm->vpp_event_queue = + uword_to_pointer (mp->vpp_event_queue_address, + unix_shared_memory_queue_t *); /* * Setup session @@ -288,9 +289,11 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) session_index = session - tm->sessions; session->bytes_to_receive = session->bytes_to_send = tm->bytes_to_send; - session->server_rx_fifo = (svm_fifo_t *) mp->server_rx_fifo; + session->server_rx_fifo = + uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *); session->server_rx_fifo->client_session_index = session_index; - session->server_tx_fifo = (svm_fifo_t *) mp->server_tx_fifo; + session->server_tx_fifo = + uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *); session->server_tx_fifo->client_session_index = session_index; session->vpp_session_handle = mp->handle; @@ -321,7 +324,7 @@ create_api_loopback (tclient_main_t * tm) memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = VL_API_MEMCLNT_CREATE; mp->context = 0xFEEDFACE; - mp->input_queue = (u64) tm->vl_input_queue; + mp->input_queue = pointer_to_uword (tm->vl_input_queue); strncpy ((char *) mp->name, "tcp_tester", sizeof (mp->name) - 1); vl_api_memclnt_create_t_handler (mp); diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c index 34682699..621ce02a 100644 --- a/src/vnet/tcp/builtin_server.c +++ b/src/vnet/tcp/builtin_server.c @@ -244,7 +244,7 @@ create_api_loopback (vlib_main_t * vm) memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = VL_API_MEMCLNT_CREATE; mp->context = 0xFEEDFACE; - mp->input_queue = (u64) bsm->vl_input_queue; + mp->input_queue = pointer_to_uword (bsm->vl_input_queue); strncpy ((char *) mp->name, "tcp_test_server", sizeof (mp->name) - 1); vl_api_memclnt_create_t_handler (mp); diff --git a/src/vppinfra/mheap.c b/src/vppinfra/mheap.c index b8828f9e..192732db 100644 --- a/src/vppinfra/mheap.c +++ b/src/vppinfra/mheap.c @@ -304,7 +304,7 @@ mheap_small_object_cache_mask (mheap_small_object_cache_t * c, uword bin) uword mask; /* $$$$ ELIOT FIXME: add Altivec version of this routine */ -#if !defined (CLIB_HAVE_VEC128) || defined (__ALTIVEC__) +#if !defined (CLIB_HAVE_VEC128) || defined (__ALTIVEC__) || defined (__i386__) mask = 0; #else u8x16 b = u8x16_splat (bin); -- cgit 1.2.3-korg From e3a395c8406a292becb719495052374449fcd152 Mon Sep 17 00:00:00 2001 From: Steven Date: Tue, 9 May 2017 16:19:50 -0700 Subject: device: Add callback for set interface rx-mode - When the interface rx-mode is changed via CLI, the corresponding device may want to know about it and to reset the driver. This patch is to add the callback. - In the function vnet_hw_interface_set_rx_mode, it appears it is missing a line hw->rx_mode_by_queue[queue_id] = mode because the function is checking if the new mode is the same as hw->rx_mode_by_queue which is initialized to POLLING. So if the function is called to change the mode to interrupt, it just returns without doing anything. This is the check that I am talking about in the same function. if (hw->rx_mode_by_queue[queue_id] == mode) return 0; Change-Id: Iaca2651c43e0ae3fda6fd8dc128e247b0851cc65 Signed-off-by: Steven --- src/vnet/devices/devices.c | 1 + src/vnet/interface.h | 26 +++++++++++------ src/vnet/interface_cli.c | 72 ++++++++++++++++++++++++++++++++++------------ 3 files changed, 72 insertions(+), 27 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index d75d905a..e71be602 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -225,6 +225,7 @@ vnet_hw_interface_set_rx_mode (vnet_main_t * vnm, u32 hw_if_index, (hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE) == 0) return VNET_API_ERROR_UNSUPPORTED; + hw->rx_mode_by_queue[queue_id] = mode; thread_index = hw->input_node_thread_index_by_queue[queue_id]; vm = vlib_mains[thread_index]; diff --git a/src/vnet/interface.h b/src/vnet/interface.h index 2344348b..1c985558 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -48,6 +48,15 @@ struct vnet_hw_interface_t; struct vnet_sw_interface_t; struct ip46_address_t; +typedef enum +{ + VNET_HW_INTERFACE_RX_MODE_UNKNOWN, + VNET_HW_INTERFACE_RX_MODE_POLLING, + VNET_HW_INTERFACE_RX_MODE_INTERRUPT, + VNET_HW_INTERFACE_RX_MODE_ADAPTIVE, + VNET_HW_INTERFACE_NUM_RX_MODES, +} vnet_hw_interface_rx_mode; + /* Interface up/down callback. */ typedef clib_error_t *(vnet_interface_function_t) (struct vnet_main_t * vnm, u32 if_index, u32 flags); @@ -61,6 +70,11 @@ typedef clib_error_t *(vnet_subif_add_del_function_t) typedef clib_error_t *(vnet_interface_set_mac_address_function_t) (struct vnet_hw_interface_t * hi, char *address); +/* Interface set rx mode callback. */ +typedef clib_error_t *(vnet_interface_set_rx_mode_function_t) + (struct vnet_main_t * vnm, u32 if_index, u32 queue_id, + vnet_hw_interface_rx_mode mode); + typedef enum vnet_interface_function_priority_t_ { VNET_ITF_FUNC_PRIORITY_LOW, @@ -134,6 +148,9 @@ typedef struct _vnet_device_class /* Function to call when sub-interface is added/deleted */ vnet_subif_add_del_function_t *subif_add_del_function; + /* Function to call interface rx mode is changed */ + vnet_interface_set_rx_mode_function_t *rx_mode_change_function; + /* Redistribute flag changes/existence of this interface class. */ u32 redistribute; @@ -492,15 +509,6 @@ typedef enum VNET_SW_INTERFACE_TYPE_SUB, } vnet_sw_interface_type_t; -typedef enum -{ - VNET_HW_INTERFACE_RX_MODE_UNKNOWN, - VNET_HW_INTERFACE_RX_MODE_POLLING, - VNET_HW_INTERFACE_RX_MODE_INTERRUPT, - VNET_HW_INTERFACE_RX_MODE_ADAPTIVE, - VNET_HW_INTERFACE_NUM_RX_MODES, -} vnet_hw_interface_rx_mode; - typedef struct { /* diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index bfce03e1..e18a80fc 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -1175,6 +1175,54 @@ VLIB_CLI_COMMAND (clear_tag_command, static) = { }; /* *INDENT-ON* */ +static clib_error_t * +set_hw_interface_rx_mode (vnet_main_t * vnm, u32 hw_if_index, + u32 queue_id, vnet_hw_interface_rx_mode mode) +{ + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + vnet_device_class_t *dev_class = + vnet_get_device_class (vnm, hw->dev_class_index); + clib_error_t *error; + vnet_hw_interface_rx_mode old_mode; + int rv; + + rv = vnet_hw_interface_get_rx_mode (vnm, hw_if_index, queue_id, &old_mode); + switch (rv) + { + case 0: + if (old_mode == mode) + return 0; /* same rx-mode, no change */ + break; + case VNET_API_ERROR_INVALID_INTERFACE: + return clib_error_return (0, "invalid interface"); + default: + return clib_error_return (0, "unknown error"); + } + + if (dev_class->rx_mode_change_function) + { + error = dev_class->rx_mode_change_function (vnm, hw_if_index, queue_id, + mode); + if (error) + return (error); + } + + rv = vnet_hw_interface_set_rx_mode (vnm, hw_if_index, queue_id, mode); + switch (rv) + { + case 0: + break; + case VNET_API_ERROR_UNSUPPORTED: + return clib_error_return (0, "unsupported"); + case VNET_API_ERROR_INVALID_INTERFACE: + return clib_error_return (0, "invalid interface"); + default: + return clib_error_return (0, "unknown error"); + } + + return 0; +} + static clib_error_t * set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -1186,7 +1234,7 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input, u32 hw_if_index = (u32) ~ 0; u32 queue_id = (u32) ~ 0; vnet_hw_interface_rx_mode mode = VNET_HW_INTERFACE_RX_MODE_UNKNOWN; - int i, rv = 0; + int i; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -1226,26 +1274,14 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input, if (queue_id == ~0) for (i = 0; i < vec_len (hw->dq_runtime_index_by_queue); i++) { - rv = vnet_hw_interface_set_rx_mode (vnm, hw_if_index, i, mode); - if (rv) - goto error; + error = set_hw_interface_rx_mode (vnm, hw_if_index, i, mode); + if (error) + break; } else - rv = vnet_hw_interface_set_rx_mode (vnm, hw_if_index, queue_id, mode); - - if (rv) - goto error; - - return 0; - -error: - if (rv == VNET_API_ERROR_UNSUPPORTED) - return clib_error_return (0, "unsupported"); - - if (rv == VNET_API_ERROR_INVALID_INTERFACE) - return clib_error_return (0, "invalid interfaace"); + error = set_hw_interface_rx_mode (vnm, hw_if_index, queue_id, mode); - return clib_error_return (0, "unknown error"); + return (error); } /*? -- cgit 1.2.3-korg From 7bfa119ead548fcca869bac7e777ce7137dc933b Mon Sep 17 00:00:00 2001 From: Ray Kinsella Date: Mon, 15 May 2017 11:52:43 +0100 Subject: af_packet: support changing the mtu size Added support to the af_packet device to change the MTU size. Change-Id: I9c9e1e17323721f3efccf70a10b753e12eef94d5 Signed-off-by: Ray Kinsella --- src/vnet/devices/af_packet/af_packet.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index cb52e6da..ea52878d 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include #include @@ -61,7 +64,26 @@ static u32 af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) { - /* nothing for now */ + clib_error_t *error; + u8 *s; + af_packet_main_t *apm = &af_packet_main; + af_packet_if_t *apif = + pool_elt_at_index (apm->interfaces, hi->dev_instance); + + if (ETHERNET_INTERFACE_FLAG_MTU == (flags & ETHERNET_INTERFACE_FLAG_MTU)) + { + s = format (0, "/sys/class/net/%s/mtu%c", apif->host_if_name, 0); + + error = vlib_sysfs_write ((char *) s, "%d", hi->max_packet_bytes); + vec_free (s); + + if (error) + { + clib_error_report (error); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + } + return 0; } -- cgit 1.2.3-korg From 025d4151e2d7627aa771d577d405464a276039ad Mon Sep 17 00:00:00 2001 From: Steven Date: Tue, 16 May 2017 21:26:13 -0700 Subject: vhost: bad packet assembled from descriptor chaining When the descriptor is chained via multiple parts, vhost is supposed to reassemble the different parts to form a packet prior to passing the packet to the next input node. However, bad packet was seen, having bad ethertype, source, and destination mac addresses. The problem was due to the destination pointer not being incremented as each chain is processed. THe result was the first chain is copied to the beginning of the buffer, the next chain is copied, then the last chain is also copied to the beginning of the buffer. As a result, the ethertype, source and destination mac, etc, are being overwritten by the very last chain of the descriptor. Change-Id: I78f9a91de68c85574047912576dcc311d7597e21 Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 6ccc0d87..64d55b65 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -1813,7 +1813,8 @@ vhost_user_if_input (vlib_main_t * vm, desc_table[desc_current].len - desc_data_offset; cpy->len = VLIB_BUFFER_DATA_SIZE - b_current->current_length; cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len; - cpy->dst = (uword) vlib_buffer_get_current (b_current); + cpy->dst = (uword) (vlib_buffer_get_current (b_current) + + b_current->current_length); cpy->src = desc_table[desc_current].addr + desc_data_offset; desc_data_offset += cpy->len; -- cgit 1.2.3-korg From 2038ad010b54ea6c2252bf487837d3e72448040f Mon Sep 17 00:00:00 2001 From: Ray Kinsella Date: Thu, 18 May 2017 11:56:28 +0100 Subject: af_packet: set mac address support Added support to the interfaces mac address. Resolved an fd leak when the interface is a bridge. Change-Id: I6608c51b11a50bd0ae4aabe0dc5788c4301b5a1e Signed-off-by: Ray Kinsella --- src/vnet/devices/af_packet/device.c | 48 ++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c index 2a17e6b3..4607d888 100644 --- a/src/vnet/devices/af_packet/device.c +++ b/src/vnet/devices/af_packet/device.c @@ -18,8 +18,10 @@ */ #include +#include #include #include +#include #include #include @@ -212,7 +214,7 @@ af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, /* if interface is a bridge ignore */ if (apif->host_if_index < 0) - return 0; /* no error */ + goto error; /* no error */ /* use host_if_index in case host name has changed */ ifr.ifr_ifindex = apif->host_if_index; @@ -220,6 +222,7 @@ af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, { clib_unix_warning ("af_packet_%s ioctl could not retrieve eth name", apif->host_if_name); + goto error; } apif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; @@ -228,6 +231,7 @@ af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, { clib_unix_warning ("af_packet_%s error: %d", apif->is_admin_up ? "up" : "down", rv); + goto error; } if (apif->is_admin_up) @@ -245,10 +249,12 @@ af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, { clib_unix_warning ("af_packet_%s error: %d", apif->is_admin_up ? "up" : "down", rv); + goto error; } vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); +error: close (fd); return 0; /* no error */ @@ -263,6 +269,45 @@ af_packet_subif_add_del_function (vnet_main_t * vnm, return 0; } +static clib_error_t *af_packet_set_mac_address_function + (struct vnet_hw_interface_t *hi, char *address) +{ + af_packet_main_t *apm = &af_packet_main; + af_packet_if_t *apif = + pool_elt_at_index (apm->interfaces, hi->dev_instance); + int rv, fd = socket (AF_UNIX, SOCK_DGRAM, 0); + struct ifreq ifr; + + /* if interface is a bridge ignore */ + if (apif->host_if_index < 0) + goto error; /* no error */ + + /* use host_if_index in case host name has changed */ + ifr.ifr_ifindex = apif->host_if_index; + if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0) + { + clib_unix_warning + ("af_packet_%s ioctl could not retrieve eth name, error: %d", + apif->host_if_name, rv); + goto error; + } + + clib_memcpy (ifr.ifr_hwaddr.sa_data, address, 6); + ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; + + if ((rv = ioctl (fd, SIOCSIFHWADDR, &ifr)) < 0) + { + clib_unix_warning ("af_packet_%s ioctl could not set mac, error: %d", + apif->host_if_name, rv); + goto error; + } + +error: + close (fd); + + return 0; /* no error */ +} + /* *INDENT-OFF* */ VNET_DEVICE_CLASS (af_packet_device_class) = { .name = "af-packet", @@ -276,6 +321,7 @@ VNET_DEVICE_CLASS (af_packet_device_class) = { .clear_counters = af_packet_clear_hw_interface_counters, .admin_up_down_function = af_packet_interface_admin_up_down, .subif_add_del_function = af_packet_subif_add_del_function, + .mac_addr_change_function = af_packet_set_mac_address_function, }; VLIB_DEVICE_TX_FUNCTION_MULTIARCH (af_packet_device_class, -- cgit 1.2.3-korg From 95827e430762a2858f4e56e1248a4a93d629a938 Mon Sep 17 00:00:00 2001 From: Steven Date: Thu, 18 May 2017 21:22:00 -0700 Subject: vhost: buffers leak and interface disable upon vring descriptor out of mmap When processing a vring descriptor which is outside of mmap, we disable the interface and spit a message to shut/no shut the interface. This is not practical as application using vhost cannot constantly checking the logs and do the recovery. The proposed fix is to log an error, like other errors that we encounter. The other bug is buffer leak in the function rewind. At the end of the while loop when b_current != b_head, we still have to give back 1 more buffer or add 1 to rx_buffers_len. Change-Id: I68c0b24f070e644cd8878f42272a7b518f14393f Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 64d55b65..451ae434 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -1557,6 +1557,7 @@ vhost_user_input_rewind_buffers (vlib_main_t * vm, b_current->current_length = 0; b_current->flags = 0; } + cpu->rx_buffers_len++; } static u32 @@ -1735,7 +1736,8 @@ vhost_user_if_input (vlib_main_t * vm, desc_current = 0; if (PREDICT_FALSE (desc_table == 0)) { - //FIXME: Handle error by shutdown the queue + vlib_error_count (vm, node->node_index, + VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); goto out; } } @@ -1868,13 +1870,8 @@ vhost_user_if_input (vlib_main_t * vm, (vhost_user_input_copy (vui, vum->cpus[thread_index].copy, copy_len, &map_hint))) { - clib_warning - ("Memory mapping error on interface hw_if_index=%d " - "(Shutting down - Switch interface down and up to restart)", - vui->hw_if_index); - vui->admin_up = 0; - copy_len = 0; - break; + vlib_error_count (vm, node->node_index, + VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); } copy_len = 0; @@ -1893,10 +1890,8 @@ vhost_user_if_input (vlib_main_t * vm, (vhost_user_input_copy (vui, vum->cpus[thread_index].copy, copy_len, &map_hint))) { - clib_warning ("Memory mapping error on interface hw_if_index=%d " - "(Shutting down - Switch interface down and up to restart)", - vui->hw_if_index); - vui->admin_up = 0; + vlib_error_count (vm, node->node_index, + VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1); } /* give buffers back to driver */ @@ -2324,10 +2319,8 @@ done: (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy, copy_len, &map_hint))) { - clib_warning ("Memory mapping error on interface hw_if_index=%d " - "(Shutting down - Switch interface down and up to restart)", - vui->hw_if_index); - vui->admin_up = 0; + vlib_error_count (vm, node->node_index, + VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); } CLIB_MEMORY_BARRIER (); -- cgit 1.2.3-korg From f3b53643e87e7521c57cccc157385d2fa4bd0d80 Mon Sep 17 00:00:00 2001 From: Steven Date: Mon, 1 May 2017 14:03:02 -0700 Subject: vhost: migrate to use device infra for worker thread assignment, rx-mode. and add adaptive mode support to receive queue - Migrate vhost to use device infra which does the interface/queue to worker thread assignment. - Retire vhost thread CLI and corresponding code which assigns interface/queue to worker thread. set interface placement should be used instead to customize the interface/queue to worker thread assignment. - Retire vhost interrupt/polling option when creating vhost-user interface. Instead, set interface rx-mode should be used. - Add code in vnet_device_input_unassign_thread to change the node state to interrupt if the last polling interface has left the worker thread for the device of the corresponding interface/queue. - Add adaptive mode support. The node state is set to interrupt initially. When the scheduler detects a burst of traffic, it switches the input node to polling. Then we inform the device that we don't need interrupt notification. When the traffic subsides, the scheduler switches the input node back to interrupt. Then we immediately tell the driver that we want interrupt notification again. - Remove some duplicate code in vlib/main.c Change-Id: Id19bb1b9e50e6521c6464f470f5825c26924d3a8 Signed-off-by: Steven --- src/vat/api_format.c | 67 +-- src/vlib/main.c | 11 +- src/vnet/devices/devices.c | 20 + src/vnet/devices/virtio/vhost-user.c | 748 +++++++++++++------------------ src/vnet/devices/virtio/vhost-user.h | 45 +- src/vnet/devices/virtio/vhost_user_api.c | 7 +- 6 files changed, 342 insertions(+), 556 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index aac59bb8..f3e6f64c 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -403,46 +403,6 @@ api_unformat_sw_if_index (unformat_input_t * input, va_list * args) } #endif /* VPP_API_TEST_BUILTIN */ -#define VHOST_USER_POLLING_MODE 0 -#define VHOST_USER_INTERRUPT_MODE 1 -#define VHOST_USER_ADAPTIVE_MODE 2 - -static u8 * -api_format_vhost_user_operation_mode (u8 * s, va_list * va) -{ - int operation_mode = va_arg (*va, int); - - switch (operation_mode) - { - case VHOST_USER_POLLING_MODE: - s = format (s, "%-9s", "polling"); - break; - case VHOST_USER_INTERRUPT_MODE: - s = format (s, "%-9s", "interrupt"); - break; - default: - s = format (s, "%-9s", "invalid"); - } - return s; -} - -static uword -api_unformat_vhost_user_operation_mode (unformat_input_t * input, - va_list * args) -{ - u8 *operation_mode = va_arg (*args, u8 *); - uword rc = 1; - - if (unformat (input, "interrupt")) - *operation_mode = VHOST_USER_INTERRUPT_MODE; - else if (unformat (input, "polling")) - *operation_mode = VHOST_USER_POLLING_MODE; - else - rc = 0; - - return rc; -} - static uword unformat_policer_rate_type (unformat_input_t * input, va_list * args) { @@ -11481,7 +11441,6 @@ api_create_vhost_user_if (vat_main_t * vam) u8 use_custom_mac = 0; u8 *tag = 0; int ret; - u8 operation_mode = VHOST_USER_POLLING_MODE; /* Shut up coverity */ memset (hwaddr, 0, sizeof (hwaddr)); @@ -11500,10 +11459,6 @@ api_create_vhost_user_if (vat_main_t * vam) is_server = 1; else if (unformat (i, "tag %s", &tag)) ; - else if (unformat (i, "mode %U", - api_unformat_vhost_user_operation_mode, - &operation_mode)) - ; else break; } @@ -11523,7 +11478,6 @@ api_create_vhost_user_if (vat_main_t * vam) M (CREATE_VHOST_USER_IF, mp); - mp->operation_mode = operation_mode; mp->is_server = is_server; clib_memcpy (mp->sock_filename, file_name, vec_len (file_name)); vec_free (file_name); @@ -11555,7 +11509,6 @@ api_modify_vhost_user_if (vat_main_t * vam) u8 sw_if_index_set = 0; u32 sw_if_index = (u32) ~ 0; int ret; - u8 operation_mode = VHOST_USER_POLLING_MODE; while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { @@ -11571,10 +11524,6 @@ api_modify_vhost_user_if (vat_main_t * vam) ; else if (unformat (i, "server")) is_server = 1; - else if (unformat (i, "mode %U", - api_unformat_vhost_user_operation_mode, - &operation_mode)) - ; else break; } @@ -11600,7 +11549,6 @@ api_modify_vhost_user_if (vat_main_t * vam) M (MODIFY_VHOST_USER_IF, mp); - mp->operation_mode = operation_mode; mp->sw_if_index = ntohl (sw_if_index); mp->is_server = is_server; clib_memcpy (mp->sock_filename, file_name, vec_len (file_name)); @@ -11656,12 +11604,11 @@ static void vl_api_sw_interface_vhost_user_details_t_handler { vat_main_t *vam = &vat_main; - print (vam->ofp, "%-25s %3" PRIu32 " %6" PRIu32 " %8x %6d %7d %U %s", + print (vam->ofp, "%-25s %3" PRIu32 " %6" PRIu32 " %8x %6d %7d %s", (char *) mp->interface_name, ntohl (mp->sw_if_index), ntohl (mp->virtio_net_hdr_sz), clib_net_to_host_u64 (mp->features), mp->is_server, - ntohl (mp->num_regions), api_format_vhost_user_operation_mode, - mp->operation_mode, (char *) mp->sock_filename); + ntohl (mp->num_regions), (char *) mp->sock_filename); print (vam->ofp, " Status: '%s'", strerror (ntohl (mp->sock_errno))); } @@ -11690,7 +11637,6 @@ static void vl_api_sw_interface_vhost_user_details_t_handler_json vat_json_object_add_string_copy (node, "sock_filename", mp->sock_filename); vat_json_object_add_uint (node, "num_regions", ntohl (mp->num_regions)); vat_json_object_add_uint (node, "sock_errno", ntohl (mp->sock_errno)); - vat_json_object_add_uint (node, "mode", mp->operation_mode); } static int @@ -11700,8 +11646,7 @@ api_sw_interface_vhost_user_dump (vat_main_t * vam) vl_api_control_ping_t *mp_ping; int ret; print (vam->ofp, - "Interface name idx hdr_sz features server regions mode" - " filename"); + "Interface name idx hdr_sz features server regions filename"); /* Get list of vhost-user interfaces */ M (SW_INTERFACE_VHOST_USER_DUMP, mp); @@ -19001,12 +18946,10 @@ _(l2_interface_vlan_tag_rewrite, \ "[translate-2-[1|2]] [push_dot1q 0] tag1 tag2 ") \ _(create_vhost_user_if, \ "socket [server] [renumber ] " \ - "[mac ] " \ - "[mode ]") \ + "[mac ]") \ _(modify_vhost_user_if, \ " | sw_if_index socket \n" \ - "[server] [renumber ] " \ - "[mode ]") \ + "[server] [renumber ]") \ _(delete_vhost_user_if, " | sw_if_index ") \ _(sw_interface_vhost_user_dump, "") \ _(show_version, "") \ diff --git a/src/vlib/main.c b/src/vlib/main.c index 8af1e7a9..0e6d66cd 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -1473,20 +1473,11 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (!nm->interrupt_threshold_vector_length) nm->interrupt_threshold_vector_length = 5; - if (is_main) - { - if (!nm->polling_threshold_vector_length) - nm->polling_threshold_vector_length = 10; - if (!nm->interrupt_threshold_vector_length) - nm->interrupt_threshold_vector_length = 5; - - nm->current_process_index = ~0; - } - /* Start all processes. */ if (is_main) { uword i; + nm->current_process_index = ~0; for (i = 0; i < vec_len (nm->processes); i++) cpu_time_now = dispatch_process (vm, nm->processes[i], /* frame */ 0, cpu_time_now); diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index e71be602..58c72077 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -150,6 +150,7 @@ vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, dq->dev_instance = hw->dev_instance; dq->queue_id = queue_id; dq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING; + rt->enabled_node_state = VLIB_NODE_STATE_POLLING; vnet_device_queue_update (vnm, rt); vec_validate (hw->input_node_thread_index_by_queue, queue_id); @@ -168,6 +169,7 @@ vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, vnet_device_input_runtime_t *rt; vnet_device_and_queue_t *dq; uword old_thread_index; + vnet_hw_interface_rx_mode mode; if (hw->input_node_thread_index_by_queue == 0) return VNET_API_ERROR_INVALID_INTERFACE; @@ -184,6 +186,7 @@ vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, vec_foreach (dq, rt->devices_and_queues) if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id) { + mode = dq->mode; vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues); goto deleted; } @@ -197,6 +200,23 @@ deleted: if (vec_len (rt->devices_and_queues) == 0) vlib_node_set_state (vm, hw->input_node_index, VLIB_NODE_STATE_DISABLED); + else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + { + /* + * if the deleted interface is polling, we may need to set the node state + * to interrupt if there is no more polling interface for this device's + * corresponding thread. This is because mixed interfaces + * (polling and interrupt), assigned to the same thread, set the + * thread to polling prior to the deletion. + */ + vec_foreach (dq, rt->devices_and_queues) + { + if (dq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + return 0; + } + rt->enabled_node_state = VLIB_NODE_STATE_INTERRUPT; + vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state); + } return 0; } diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 451ae434..23188934 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -362,140 +362,71 @@ vhost_user_tx_thread_placement (vhost_user_intf_t * vui) } } +/** + * @brief Unassign existing interface/queue to thread mappings and re-assign + * new interface/queue to thread mappings + */ static void vhost_user_rx_thread_placement () { vhost_user_main_t *vum = &vhost_user_main; vhost_user_intf_t *vui; - vhost_cpu_t *vhc; - u32 *workers = 0; - u32 thread_index; - vlib_main_t *vm; - - //Let's list all workers cpu indexes - u32 i; - for (i = vum->input_cpu_first_index; - i < vum->input_cpu_first_index + vum->input_cpu_count; i++) - { - vlib_node_set_state (vlib_mains[i], vhost_user_input_node.index, - VLIB_NODE_STATE_DISABLED); - vec_add1 (workers, i); - } - - vec_foreach (vhc, vum->cpus) - { - vec_reset_length (vhc->rx_queues); - } + vhost_user_vring_t *txvq; + vnet_main_t *vnm = vnet_get_main (); + u32 qid; + int rv; + u16 *queue; - i = 0; - vhost_iface_and_queue_t iaq; + // Scrap all existing mappings for all interfaces/queues /* *INDENT-OFF* */ pool_foreach (vui, vum->vhost_user_interfaces, { - u32 *vui_workers = vec_len (vui->workers) ? vui->workers : workers; - u32 qid; - for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++) + vec_foreach (queue, vui->rx_queues) { - vhost_user_vring_t *txvq = - &vui->vrings[VHOST_VRING_IDX_TX (qid)]; - if (!txvq->started) - continue; - - i %= vec_len (vui_workers); - thread_index = vui_workers[i]; - i++; - vhc = &vum->cpus[thread_index]; - txvq->interrupt_thread_index = thread_index; - - iaq.qid = qid; - iaq.vhost_iface_index = vui - vum->vhost_user_interfaces; - vec_add1 (vhc->rx_queues, iaq); + rv = vnet_hw_interface_unassign_rx_thread (vnm, vui->hw_if_index, + *queue); + if (rv) + clib_warning ("Warning: unable to unassign interface %d, " + "queue %d: rc=%d", vui->hw_if_index, *queue, rv); } + vec_reset_length (vui->rx_queues); }); /* *INDENT-ON* */ - vec_foreach (vhc, vum->cpus) - { - vhost_iface_and_queue_t *vhiq; - u8 mode = VHOST_USER_INTERRUPT_MODE; - - vec_foreach (vhiq, vhc->rx_queues) - { - vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; - if (vui->operation_mode == VHOST_USER_POLLING_MODE) + // Create the rx_queues for all interfaces + /* *INDENT-OFF* */ + pool_foreach (vui, vum->vhost_user_interfaces, { + for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++) { - /* At least one interface is polling, cpu is set to polling */ - mode = VHOST_USER_POLLING_MODE; - break; + txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; + if (txvq->started) + { + if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN) + /* Set polling as the default */ + txvq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING; + vec_add1 (vui->rx_queues, qid); + } } - } - vhc->operation_mode = mode; - } - - for (thread_index = vum->input_cpu_first_index; - thread_index < vum->input_cpu_first_index + vum->input_cpu_count; - thread_index++) - { - vlib_node_state_t state = VLIB_NODE_STATE_POLLING; + }); + /* *INDENT-ON* */ - vhc = &vum->cpus[thread_index]; - vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main; - switch (vhc->operation_mode) + // Assign new mappings for all interfaces/queues + /* *INDENT-OFF* */ + pool_foreach (vui, vum->vhost_user_interfaces, { + vnet_hw_interface_set_input_node (vnm, vui->hw_if_index, + vhost_user_input_node.index); + vec_foreach (queue, vui->rx_queues) { - case VHOST_USER_INTERRUPT_MODE: - state = VLIB_NODE_STATE_INTERRUPT; - break; - case VHOST_USER_POLLING_MODE: - state = VLIB_NODE_STATE_POLLING; - break; - default: - clib_warning ("BUG: bad operation mode %d", vhc->operation_mode); - break; + vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, *queue, + ~0); + txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)]; + rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, *queue, + txvq->mode); + if (rv) + clib_warning ("Warning: unable to set rx mode for interface %d, " + "queue %d: rc=%d", vui->hw_if_index, *queue, rv); } - vlib_node_set_state (vm, vhost_user_input_node.index, state); - } - - vec_free (workers); -} - -static int -vhost_user_thread_placement (u32 sw_if_index, u32 worker_thread_index, u8 del) -{ - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui; - vnet_hw_interface_t *hw; - - if (worker_thread_index < vum->input_cpu_first_index || - worker_thread_index >= - vum->input_cpu_first_index + vum->input_cpu_count) - return -1; - - if (!(hw = vnet_get_sup_hw_interface (vnet_get_main (), sw_if_index))) - return -2; - - vui = pool_elt_at_index (vum->vhost_user_interfaces, hw->dev_instance); - u32 found = ~0, *w; - vec_foreach (w, vui->workers) - { - if (*w == worker_thread_index) - { - found = w - vui->workers; - break; - } - } - - if (del) - { - if (found == ~0) - return -3; - vec_del1 (vui->workers, found); - } - else if (found == ~0) - { - vec_add1 (vui->workers, worker_thread_index); - } - - vhost_user_rx_thread_placement (); - return 0; + }); + /* *INDENT-ON* */ } /** @brief Returns whether at least one TX and one RX vring are enabled */ @@ -532,37 +463,17 @@ vhost_user_update_iface_state (vhost_user_intf_t * vui) static void vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq) { - vhost_user_main_t *vum = &vhost_user_main; - vhost_cpu_t *vhc; - u32 thread_index; - vlib_main_t *vm; - u32 ifq2, qid; - vhost_user_vring_t *txvq; + u32 qid; + vnet_main_t *vnm = vnet_get_main (); qid = ifq & 0xff; - if ((qid % 2) == 0) - /* Only care about the odd number virtqueue which is TX */ + if ((qid & 1) == 0) + /* Only care about the odd number, or TX, virtqueue */ return; if (vhost_user_intf_ready (vui)) - { - txvq = &vui->vrings[qid]; - thread_index = txvq->interrupt_thread_index; - vhc = &vum->cpus[thread_index]; - if (vhc->operation_mode == VHOST_USER_INTERRUPT_MODE) - { - vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main; - /* - * Convert virtqueue number in the lower byte to vring - * queue index for the input node process. Top bytes contain - * the interface, lower byte contains the queue index. - */ - ifq2 = ((ifq >> 8) << 8) | qid / 2; - vhc->pending_input_bitmap = - clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1); - vlib_node_set_interrupt_pending (vm, vhost_user_input_node.index); - } - } + // qid >> 1 is to convert virtqueue number to vring queue index + vnet_device_input_set_interrupt_pending (vnm, vui->hw_if_index, qid >> 1); } static clib_error_t * @@ -570,14 +481,10 @@ vhost_user_callfd_read_ready (unix_file_t * uf) { __attribute__ ((unused)) int n; u8 buff[8]; - vhost_user_intf_t *vui = - pool_elt_at_index (vhost_user_main.vhost_user_interfaces, - uf->private_data >> 8); n = read (uf->file_descriptor, ((char *) &buff), 8); DBG_SOCK ("if %d CALL queue %d", uf->private_data >> 8, uf->private_data & 0xff); - vhost_user_set_interrupt_pending (vui, uf->private_data); return 0; } @@ -1001,12 +908,8 @@ vhost_user_socket_read (unix_file_t * uf) vui->vrings[msg.state.index].last_avail_idx = vui->vrings[msg.state.index].used->idx; - if (vui->operation_mode == VHOST_USER_POLLING_MODE) - /* tell driver that we don't want interrupts */ - vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY; - else - /* tell driver that we want interrupts */ - vui->vrings[msg.state.index].used->flags = 0; + /* tell driver that we don't want interrupts */ + vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY; break; case VHOST_USER_SET_OWNER: @@ -1315,8 +1218,6 @@ vhost_user_init (vlib_main_t * vm) clib_error_t *error; vhost_user_main_t *vum = &vhost_user_main; vlib_thread_main_t *tm = vlib_get_thread_main (); - vlib_thread_registration_t *tr; - uword *p; error = vlib_call_init_function (vm, ip4_init); if (error) @@ -1335,18 +1236,6 @@ vhost_user_init (vlib_main_t * vm) cpu->rx_buffers_len = 0; } - /* find out which cpus will be used for input */ - vum->input_cpu_first_index = 0; - vum->input_cpu_count = 1; - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - vum->input_cpu_first_index = tr->first_index; - vum->input_cpu_count = tr->count; - } - vum->random = random_default_seed (); mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword)); @@ -1447,9 +1336,16 @@ vhost_user_send_call (vlib_main_t * vm, vhost_user_vring_t * vq) vhost_user_main_t *vum = &vhost_user_main; u64 x = 1; int fd = UNIX_GET_FD (vq->callfd_idx); - int rv __attribute__ ((unused)); - /* TODO: pay attention to rv */ + int rv; + rv = write (fd, &x, sizeof (x)); + if (rv <= 0) + { + clib_unix_warning + ("Error: Could not write to unix socket for callfd %d", fd); + return; + } + vq->n_since_last_int = 0; vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time; } @@ -1564,7 +1460,8 @@ static u32 vhost_user_if_input (vlib_main_t * vm, vhost_user_main_t * vum, vhost_user_intf_t * vui, - u16 qid, vlib_node_runtime_t * node) + u16 qid, vlib_node_runtime_t * node, + vnet_hw_interface_rx_mode mode) { vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; u16 n_rx_packets = 0; @@ -1590,6 +1487,26 @@ vhost_user_if_input (vlib_main_t * vm, vhost_user_send_call (vm, rxvq); } + /* + * For adaptive mode, it is optimized to reduce interrupts. + * If the scheduler switches the input node to polling due + * to burst of traffic, we tell the driver no interrupt. + * When the traffic subsides, the scheduler switches the node back to + * interrupt mode. We must tell the driver we want interrupt. + */ + if (PREDICT_FALSE (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) + { + if ((node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) || + !(node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) + /* Tell driver we want notification */ + txvq->used->flags = 0; + else + /* Tell driver we don't want notification */ + txvq->used->flags = VRING_USED_F_NO_NOTIFY; + } + if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE)) return 0; @@ -1926,34 +1843,23 @@ vhost_user_input (vlib_main_t * vm, { vhost_user_main_t *vum = &vhost_user_main; uword n_rx_packets = 0; - u32 thread_index = vlib_get_thread_index (); - vhost_iface_and_queue_t *vhiq; vhost_user_intf_t *vui; - vhost_cpu_t *vhc; + vnet_device_input_runtime_t *rt = + (vnet_device_input_runtime_t *) node->runtime_data; + vnet_device_and_queue_t *dq; - vhc = &vum->cpus[thread_index]; - if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE)) - { - vec_foreach (vhiq, vum->cpus[thread_index].rx_queues) + vec_foreach (dq, rt->devices_and_queues) + { + if (clib_smp_swap (&dq->interrupt_pending, 0) || + (node->state == VLIB_NODE_STATE_POLLING)) { - vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; - n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node); + vui = + pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance); + n_rx_packets = vhost_user_if_input (vm, vum, vui, dq->queue_id, node, + dq->mode); } - } - else - { - int i; - - /* *INDENT-OFF* */ - clib_bitmap_foreach (i, vhc->pending_input_bitmap, ({ - int qid = i & 0xff; + } - clib_bitmap_set (vhc->pending_input_bitmap, i, 0); - vui = pool_elt_at_index (vum->vhost_user_interfaces, i >> 8); - n_rx_packets += vhost_user_if_input (vm, vum, vui, qid, node); - })); - /* *INDENT-ON* */ - } return n_rx_packets; } @@ -2371,6 +2277,161 @@ done3: return frame->n_vectors; } +static uword +vhost_user_send_interrupt_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + vhost_user_intf_t *vui; + f64 timeout = 3153600000.0 /* 100 years */ ; + uword event_type, *event_data = 0; + vhost_user_main_t *vum = &vhost_user_main; + u16 *queue; + f64 now, poll_time_remaining; + f64 next_timeout; + u8 stop_timer = 0; + + while (1) + { + poll_time_remaining = + vlib_process_wait_for_event_or_clock (vm, timeout); + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + /* + * Use the remaining timeout if it is less than coalesce time to avoid + * resetting the existing timer in the middle of expiration + */ + timeout = poll_time_remaining; + if (vlib_process_suspend_time_is_zero (timeout) || + (timeout > vum->coalesce_time)) + timeout = vum->coalesce_time; + + now = vlib_time_now (vm); + switch (event_type) + { + case VHOST_USER_EVENT_STOP_TIMER: + stop_timer = 1; + break; + + case VHOST_USER_EVENT_START_TIMER: + stop_timer = 0; + if (!vlib_process_suspend_time_is_zero (poll_time_remaining)) + break; + /* fall through */ + + case ~0: + /* *INDENT-OFF* */ + pool_foreach (vui, vum->vhost_user_interfaces, { + next_timeout = timeout; + vec_foreach (queue, vui->rx_queues) + { + vhost_user_vring_t *rxvq = + &vui->vrings[VHOST_VRING_IDX_RX (*queue)]; + vhost_user_vring_t *txvq = + &vui->vrings[VHOST_VRING_IDX_TX (*queue)]; + + if (txvq->n_since_last_int) + { + if (now >= txvq->int_deadline) + vhost_user_send_call (vm, txvq); + else + next_timeout = txvq->int_deadline - now; + } + + if (rxvq->n_since_last_int) + { + if (now >= rxvq->int_deadline) + vhost_user_send_call (vm, rxvq); + else + next_timeout = rxvq->int_deadline - now; + } + + if ((next_timeout < timeout) && (next_timeout > 0.0)) + timeout = next_timeout; + } + }); + /* *INDENT-ON* */ + break; + + default: + clib_warning ("BUG: unhandled event type %d", event_type); + break; + } + /* No less than 1 millisecond */ + if (timeout < 1e-3) + timeout = 1e-3; + if (stop_timer) + timeout = 3153600000.0; + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = { + .function = vhost_user_send_interrupt_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "vhost-user-send-interrupt-process", +}; +/* *INDENT-ON* */ + +static clib_error_t * +vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, + u32 qid, vnet_hw_interface_rx_mode mode) +{ + vlib_main_t *vm = vnm->vlib_main; + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui = + pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance); + vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; + + if ((mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) || + (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) + { + if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + { + vum->ifq_count++; + // Start the timer if this is the first encounter on interrupt + // interface/queue + if ((vum->ifq_count == 1) && + (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0)) + vlib_process_signal_event (vm, + vhost_user_send_interrupt_node.index, + VHOST_USER_EVENT_START_TIMER, 0); + } + } + else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + { + if (((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) || + (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) && + vum->ifq_count) + { + vum->ifq_count--; + // Stop the timer if there is no more interrupt interface/queue + if ((vum->ifq_count == 0) && + (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0)) + vlib_process_signal_event (vm, + vhost_user_send_interrupt_node.index, + VHOST_USER_EVENT_STOP_TIMER, 0); + } + } + + txvq->mode = mode; + if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + txvq->used->flags = VRING_USED_F_NO_NOTIFY; + else if ((mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE) || + (mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT)) + txvq->used->flags = 0; + else + { + clib_warning ("BUG: unhandled mode %d changed for if %d queue %d", mode, + hw_if_index, qid); + return clib_error_return (0, "unsupported"); + } + + return 0; +} + static clib_error_t * vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) @@ -2399,6 +2460,7 @@ VNET_DEVICE_CLASS (vhost_user_dev_class,static) = { .format_device_name = format_vhost_user_interface_name, .name_renumber = vhost_user_name_renumber, .admin_up_down_function = vhost_user_interface_admin_up_down, + .rx_mode_change_function = vhost_user_interface_rx_mode_change, .format_tx_trace = format_vhost_trace, }; @@ -2523,8 +2585,6 @@ vhost_user_term_if (vhost_user_intf_t * vui) int q; vhost_user_main_t *vum = &vhost_user_main; - // Delete configured thread pinning - vec_reset_length (vui->workers); // disconnect interface sockets vhost_user_if_disconnect (vui); vhost_user_update_iface_state (vui); @@ -2555,6 +2615,7 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) vhost_user_intf_t *vui; int rv = 0; vnet_hw_interface_t *hwif; + u16 *queue; if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) || hwif->dev_class_index != vhost_user_dev_class.index) @@ -2565,6 +2626,28 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance); + vec_foreach (queue, vui->rx_queues) + { + vhost_user_vring_t *txvq; + + txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)]; + if ((vum->ifq_count > 0) && + ((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) || + (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))) + { + vum->ifq_count--; + // Stop the timer if there is no more interrupt interface/queue + if ((vum->ifq_count == 0) && + (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0)) + { + vlib_process_signal_event (vm, + vhost_user_send_interrupt_node.index, + VHOST_USER_EVENT_STOP_TIMER, 0); + break; + } + } + } + // Disable and reset interface vhost_user_term_if (vui); @@ -2687,13 +2770,15 @@ vhost_user_vui_init (vnet_main_t * vnm, vhost_user_intf_t * vui, int server_sock_fd, const char *sock_filename, - u64 feature_mask, u32 * sw_if_index, u8 operation_mode) + u64 feature_mask, u32 * sw_if_index) { vnet_sw_interface_t *sw; - sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); int q; vhost_user_main_t *vum = &vhost_user_main; + vnet_hw_interface_t *hw; + hw = vnet_get_hw_interface (vnm, vui->hw_if_index); + sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); if (server_sock_fd != -1) { unix_file_t template = { 0 }; @@ -2715,7 +2800,6 @@ vhost_user_vui_init (vnet_main_t * vnm, vui->feature_mask = feature_mask; vui->unix_file_index = ~0; vui->log_base_addr = 0; - vui->operation_mode = operation_mode; vui->if_index = vui - vum->vhost_user_interfaces; mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename, &vui->if_index, 0); @@ -2723,6 +2807,7 @@ vhost_user_vui_init (vnet_main_t * vnm, for (q = 0; q < VHOST_VRING_MAX_N; q++) vhost_user_vring_init (vui, q); + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); if (sw_if_index) @@ -2740,106 +2825,13 @@ vhost_user_vui_init (vnet_main_t * vnm, vhost_user_tx_thread_placement (vui); } -static uword -vhost_user_send_interrupt_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - vhost_user_intf_t *vui; - f64 timeout = 3153600000.0 /* 100 years */ ; - uword event_type, *event_data = 0; - vhost_user_main_t *vum = &vhost_user_main; - vhost_iface_and_queue_t *vhiq; - vhost_cpu_t *vhc; - f64 now, poll_time_remaining; - - while (1) - { - poll_time_remaining = - vlib_process_wait_for_event_or_clock (vm, timeout); - event_type = vlib_process_get_events (vm, &event_data); - vec_reset_length (event_data); - - /* - * Use the remaining timeout if it is less than coalesce time to avoid - * resetting the existing timer in the middle of expiration - */ - timeout = poll_time_remaining; - if (vlib_process_suspend_time_is_zero (timeout) || - (timeout > vum->coalesce_time)) - timeout = vum->coalesce_time; - - now = vlib_time_now (vm); - switch (event_type) - { - case VHOST_USER_EVENT_START_TIMER: - if (!vlib_process_suspend_time_is_zero (poll_time_remaining)) - break; - /* fall through */ - - case ~0: - vec_foreach (vhc, vum->cpus) - { - u32 thread_index = vhc - vum->cpus; - f64 next_timeout; - - next_timeout = timeout; - vec_foreach (vhiq, vum->cpus[thread_index].rx_queues) - { - vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; - vhost_user_vring_t *rxvq = - &vui->vrings[VHOST_VRING_IDX_RX (vhiq->qid)]; - vhost_user_vring_t *txvq = - &vui->vrings[VHOST_VRING_IDX_TX (vhiq->qid)]; - - if (txvq->n_since_last_int) - { - if (now >= txvq->int_deadline) - vhost_user_send_call (vm, txvq); - else - next_timeout = txvq->int_deadline - now; - } - - if (rxvq->n_since_last_int) - { - if (now >= rxvq->int_deadline) - vhost_user_send_call (vm, rxvq); - else - next_timeout = rxvq->int_deadline - now; - } - - if ((next_timeout < timeout) && (next_timeout > 0.0)) - timeout = next_timeout; - } - } - break; - - default: - clib_warning ("BUG: unhandled event type %d", event_type); - break; - } - /* No less than 1 millisecond */ - if (timeout < 1e-3) - timeout = 1e-3; - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = { - .function = vhost_user_send_interrupt_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "vhost-user-send-interrupt-process", -}; -/* *INDENT-ON* */ - int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 * sw_if_index, u64 feature_mask, - u8 renumber, u32 custom_dev_instance, u8 * hwaddr, - u8 operation_mode) + u8 renumber, u32 custom_dev_instance, u8 * hwaddr) { vhost_user_intf_t *vui = NULL; u32 sw_if_idx = ~0; @@ -2848,10 +2840,6 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_main_t *vum = &vhost_user_main; uword *if_index; - if ((operation_mode != VHOST_USER_POLLING_MODE) && - (operation_mode != VHOST_USER_INTERRUPT_MODE)) - return VNET_API_ERROR_UNIMPLEMENTED; - if (sock_filename == NULL || !(strlen (sock_filename) > 0)) { return VNET_API_ERROR_INVALID_ARGUMENT; @@ -2881,7 +2869,7 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_create_ethernet (vnm, vm, vui, hwaddr); vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename, - feature_mask, &sw_if_idx, operation_mode); + feature_mask, &sw_if_idx); if (renumber) vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); @@ -2892,14 +2880,6 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, // Process node must connect vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0); - if ((operation_mode == VHOST_USER_INTERRUPT_MODE) && - !vum->interrupt_mode && (vum->coalesce_time > 0.0) && - (vum->coalesce_frames > 0)) - { - vum->interrupt_mode = 1; - vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index, - VHOST_USER_EVENT_START_TIMER, 0); - } return rv; } @@ -2908,8 +2888,7 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 sw_if_index, - u64 feature_mask, u8 renumber, u32 custom_dev_instance, - u8 operation_mode) + u64 feature_mask, u8 renumber, u32 custom_dev_instance) { vhost_user_main_t *vum = &vhost_user_main; vhost_user_intf_t *vui = NULL; @@ -2919,9 +2898,6 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, vnet_hw_interface_t *hwif; uword *if_index; - if ((operation_mode != VHOST_USER_POLLING_MODE) && - (operation_mode != VHOST_USER_INTERRUPT_MODE)) - return VNET_API_ERROR_UNIMPLEMENTED; if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) || hwif->dev_class_index != vhost_user_dev_class.index) return VNET_API_ERROR_INVALID_SW_IF_INDEX; @@ -2947,8 +2923,7 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_term_if (vui); vhost_user_vui_init (vnm, vui, server_sock_fd, - sock_filename, feature_mask, &sw_if_idx, - operation_mode); + sock_filename, feature_mask, &sw_if_idx); if (renumber) vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); @@ -2956,33 +2931,9 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, // Process node must connect vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0); - if ((operation_mode == VHOST_USER_INTERRUPT_MODE) && - !vum->interrupt_mode && (vum->coalesce_time > 0.0) && - (vum->coalesce_frames > 0)) - { - vum->interrupt_mode = 1; - vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index, - VHOST_USER_EVENT_START_TIMER, 0); - } return rv; } -static uword -unformat_vhost_user_operation_mode (unformat_input_t * input, va_list * args) -{ - u8 *operation_mode = va_arg (*args, u8 *); - uword rc = 1; - - if (unformat (input, "interrupt")) - *operation_mode = VHOST_USER_INTERRUPT_MODE; - else if (unformat (input, "polling")) - *operation_mode = VHOST_USER_POLLING_MODE; - else - rc = 0; - - return rc; -} - clib_error_t * vhost_user_connect_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -2998,7 +2949,6 @@ vhost_user_connect_command_fn (vlib_main_t * vm, u8 hwaddr[6]; u8 *hw = NULL; clib_error_t *error = NULL; - u8 operation_mode = VHOST_USER_POLLING_MODE; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -3020,9 +2970,6 @@ vhost_user_connect_command_fn (vlib_main_t * vm, { renumber = 1; } - else if (unformat (line_input, "mode %U", - unformat_vhost_user_operation_mode, &operation_mode)) - ; else { error = clib_error_return (0, "unknown input `%U'", @@ -3036,8 +2983,7 @@ vhost_user_connect_command_fn (vlib_main_t * vm, int rv; if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename, is_server, &sw_if_index, feature_mask, - renumber, custom_dev_instance, hw, - operation_mode))) + renumber, custom_dev_instance, hw))) { error = clib_error_return (0, "vhost_user_create_if returned %d", rv); goto done; @@ -3127,7 +3073,6 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance); vec_add2 (r_vuids, vuid, 1); - vuid->operation_mode = vui->operation_mode; vuid->sw_if_index = vui->sw_if_index; vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz; vuid->features = vui->features; @@ -3152,25 +3097,6 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, return rv; } -static u8 * -format_vhost_user_operation_mode (u8 * s, va_list * va) -{ - int operation_mode = va_arg (*va, int); - - switch (operation_mode) - { - case VHOST_USER_POLLING_MODE: - s = format (s, "%s", "polling"); - break; - case VHOST_USER_INTERRUPT_MODE: - s = format (s, "%s", "interrupt"); - break; - default: - s = format (s, "%s", "invalid"); - } - return s; -} - clib_error_t * show_vhost_user_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -3182,10 +3108,8 @@ show_vhost_user_command_fn (vlib_main_t * vm, vhost_user_intf_t *vui; u32 hw_if_index, *hw_if_indices = 0; vnet_hw_interface_t *hi; - vhost_cpu_t *vhc; - vhost_iface_and_queue_t *vhiq; + u16 *queue; u32 ci; - int i, j, q; int show_descr = 0; struct feat_struct @@ -3238,6 +3162,8 @@ show_vhost_user_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "Virtio vhost-user interfaces"); vlib_cli_output (vm, "Global:\n coalesce frames %d time %e", vum->coalesce_frames, vum->coalesce_time); + vlib_cli_output (vm, " number of rx virtqueues in interrupt mode: %d", + vum->ifq_count); for (i = 0; i < vec_len (hw_if_indices); i++) { @@ -3279,23 +3205,21 @@ show_vhost_user_command_fn (vlib_main_t * vm, (vui->unix_server_index != ~0) ? "server" : "client", strerror (vui->sock_errno)); - vlib_cli_output (vm, " configured mode: %U\n", - format_vhost_user_operation_mode, vui->operation_mode); vlib_cli_output (vm, " rx placement: "); - vec_foreach (vhc, vum->cpus) + + vec_foreach (queue, vui->rx_queues) { - vec_foreach (vhiq, vhc->rx_queues) - { - if (vhiq->vhost_iface_index == vui - vum->vhost_user_interfaces) - { - vlib_cli_output (vm, " thread %d on vring %d\n", - vhc - vum->cpus, - VHOST_VRING_IDX_TX (vhiq->qid)); - vlib_cli_output (vm, " mode: %U\n", - format_vhost_user_operation_mode, - vhc->operation_mode); - } - } + vnet_main_t *vnm = vnet_get_main (); + uword thread_index; + vnet_hw_interface_rx_mode mode; + + thread_index = vnet_get_device_input_thread_index (vnm, + vui->hw_if_index, + *queue); + vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, *queue, &mode); + vlib_cli_output (vm, " thread %d on vring %d, %U\n", + thread_index, VHOST_VRING_IDX_TX (*queue), + format_vnet_hw_interface_rx_mode, mode); } vlib_cli_output (vm, " tx placement: %s\n", @@ -3444,8 +3368,7 @@ done: VLIB_CLI_COMMAND (vhost_user_connect_command, static) = { .path = "create vhost-user", .short_help = "create vhost-user socket [server] " - "[feature-mask ] [hwaddr ] [renumber ] " - "[mode {interrupt | polling}]", + "[feature-mask ] [hwaddr ] [renumber ] ", .function = vhost_user_connect_command_fn, }; /* *INDENT-ON* */ @@ -3648,69 +3571,6 @@ vhost_user_unmap_all (void) } } -static clib_error_t * -vhost_thread_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u32 worker_thread_index; - u32 sw_if_index; - u8 del = 0; - int rv; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - if (!unformat - (line_input, "%U %d", unformat_vnet_sw_interface, vnet_get_main (), - &sw_if_index, &worker_thread_index)) - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - - if (unformat (line_input, "del")) - del = 1; - - if ((rv = - vhost_user_thread_placement (sw_if_index, worker_thread_index, del))) - { - error = clib_error_return (0, "vhost_user_thread_placement returned %d", - rv); - goto done; - } - -done: - unformat_free (line_input); - - return error; -} - - -/*? - * This command is used to move the RX processing for the given - * interfaces to the provided thread. If the 'del' option is used, - * the forced thread assignment is removed and the thread assigment is - * reassigned automatically. Use 'show vhost-user ' - * to see the thread assignment. - * - * @cliexpar - * Example of how to move the RX processing for a given interface to a given thread: - * @cliexcmd{vhost thread VirtualEthernet0/0/0 1} - * Example of how to remove the forced thread assignment for a given interface: - * @cliexcmd{vhost thread VirtualEthernet0/0/0 1 del} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (vhost_user_thread_command, static) = { - .path = "vhost thread", - .short_help = "vhost thread [del]", - .function = vhost_thread_command_fn, -}; -/* *INDENT-ON* */ - /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index 56b65477..ceced342 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -66,13 +66,11 @@ typedef enum int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 * sw_if_index, u64 feature_mask, - u8 renumber, u32 custom_dev_instance, u8 * hwaddr, - u8 operation_mode); + u8 renumber, u32 custom_dev_instance, u8 * hwaddr); int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 sw_if_index, u64 feature_mask, - u8 renumber, u32 custom_dev_instance, - u8 operation_mode); + u8 renumber, u32 custom_dev_instance); int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index); @@ -210,14 +208,13 @@ typedef struct u32 callfd_idx; u32 kickfd_idx; u64 log_guest_addr; - u32 interrupt_thread_index; -} vhost_user_vring_t; -#define VHOST_USER_POLLING_MODE 0 -#define VHOST_USER_INTERRUPT_MODE 1 -#define VHOST_USER_ADAPTIVE_MODE 2 + /* The rx queue policy (interrupt/adaptive/polling) for this queue */ + u32 mode; +} vhost_user_vring_t; #define VHOST_USER_EVENT_START_TIMER 1 +#define VHOST_USER_EVENT_STOP_TIMER 2 typedef struct { @@ -258,18 +255,10 @@ typedef struct u8 use_tx_spinlock; u16 *per_cpu_tx_qid; - /* Vector of workers for this interface */ - u32 *workers; - - u8 operation_mode; + /* Vector of active rx queues for this interface */ + u16 *rx_queues; } vhost_user_intf_t; -typedef struct -{ - u16 vhost_iface_index; - u16 qid; -} vhost_iface_and_queue_t; - typedef struct { uword dst; @@ -292,7 +281,6 @@ typedef struct typedef struct { - vhost_iface_and_queue_t *rx_queues; u32 rx_buffers_len; u32 rx_buffers[VHOST_USER_RX_BUFFERS_N]; @@ -302,12 +290,6 @@ typedef struct /* This is here so it doesn't end-up * using stack or registers. */ vhost_trace_t *current_trace; - - /* bitmap of pending rx interfaces */ - uword *pending_input_bitmap; - - /* The operation mode computed per cpu based on interface setting */ - u8 operation_mode; } vhost_cpu_t; typedef struct @@ -320,20 +302,14 @@ typedef struct f64 coalesce_time; int dont_dump_vhost_user_memory; - /** first cpu index */ - u32 input_cpu_first_index; - - /** total cpu count */ - u32 input_cpu_count; - /** Per-CPU data for vhost-user */ vhost_cpu_t *cpus; /** Pseudo random iterator */ u32 random; - /* Node is in interrupt mode */ - u8 interrupt_mode; + /* The number of rx interface/queue pairs in interrupt mode */ + u32 ifq_count; } vhost_user_main_t; typedef struct @@ -346,7 +322,6 @@ typedef struct u8 sock_filename[256]; u32 num_regions; int sock_errno; - u8 operation_mode; } vhost_user_intf_details_t; int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c index ac7afa61..8dbd032b 100644 --- a/src/vnet/devices/virtio/vhost_user_api.c +++ b/src/vnet/devices/virtio/vhost_user_api.c @@ -81,8 +81,7 @@ vl_api_create_vhost_user_if_t_handler (vl_api_create_vhost_user_if_t * mp) rv = vhost_user_create_if (vnm, vm, (char *) mp->sock_filename, mp->is_server, &sw_if_index, (u64) ~ 0, mp->renumber, ntohl (mp->custom_dev_instance), - (mp->use_custom_mac) ? mp->mac_address : NULL, - mp->operation_mode); + (mp->use_custom_mac) ? mp->mac_address : NULL); /* Remember an interface tag for the new interface */ if (rv == 0) @@ -117,8 +116,7 @@ vl_api_modify_vhost_user_if_t_handler (vl_api_modify_vhost_user_if_t * mp) rv = vhost_user_modify_if (vnm, vm, (char *) mp->sock_filename, mp->is_server, sw_if_index, (u64) ~ 0, - mp->renumber, ntohl (mp->custom_dev_instance), - mp->operation_mode); + mp->renumber, ntohl (mp->custom_dev_instance)); REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_REPLY); } @@ -164,7 +162,6 @@ send_sw_interface_vhost_user_details (vpe_api_main_t * am, mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz); mp->features = clib_net_to_host_u64 (vui->features); mp->is_server = vui->is_server; - mp->operation_mode = vui->operation_mode; mp->num_regions = ntohl (vui->num_regions); mp->sock_errno = ntohl (vui->sock_errno); mp->context = context; -- cgit 1.2.3-korg From e6cc9cc77c9dcefa3d52e0cd90db35435f0eb64e Mon Sep 17 00:00:00 2001 From: Ray Kinsella Date: Sat, 20 May 2017 13:42:30 +0100 Subject: af_packet: fix coverity error Fix coverity error associated with fd. Change-Id: I0648aebaf356308bc03cc7217922479bfc4e22f7 Signed-off-by: Ray Kinsella --- src/vnet/devices/af_packet/device.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c index 4607d888..e01b1c71 100644 --- a/src/vnet/devices/af_packet/device.c +++ b/src/vnet/devices/af_packet/device.c @@ -212,6 +212,13 @@ af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, int rv, fd = socket (AF_UNIX, SOCK_DGRAM, 0); struct ifreq ifr; + if (0 > fd) + { + clib_unix_warning ("af_packet_%s could not open socket", + apif->host_if_name); + return 0; + } + /* if interface is a bridge ignore */ if (apif->host_if_index < 0) goto error; /* no error */ @@ -255,7 +262,8 @@ af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); error: - close (fd); + if (0 <= fd) + close (fd); return 0; /* no error */ } @@ -278,6 +286,13 @@ static clib_error_t *af_packet_set_mac_address_function int rv, fd = socket (AF_UNIX, SOCK_DGRAM, 0); struct ifreq ifr; + if (0 > fd) + { + clib_unix_warning ("af_packet_%s could not open socket", + apif->host_if_name); + return 0; + } + /* if interface is a bridge ignore */ if (apif->host_if_index < 0) goto error; /* no error */ @@ -303,7 +318,9 @@ static clib_error_t *af_packet_set_mac_address_function } error: - close (fd); + + if (0 <= fd) + close (fd); return 0; /* no error */ } -- cgit 1.2.3-korg From 388e51aaae5a845524410a8acdf1c0cbb4a13897 Mon Sep 17 00:00:00 2001 From: Steven Date: Thu, 1 Jun 2017 12:49:23 -0700 Subject: vhost: add debug vhost-user on | off CLI Add runtime debug vhost-user on | off CLI to facilitate troubleshooting. This feature is needed to avoid recompiling the code to debug vhost issues. The debugging messages should not be on the data path to avoid performance hit. Change-Id: I4c40f65dbb222557cba3fb8706fa3b7b62eec95f Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 76 ++++++++++++++++++++++++++---------- src/vnet/devices/virtio/vhost-user.h | 3 ++ 2 files changed, 59 insertions(+), 20 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 23188934..2e2b49c2 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -49,14 +49,14 @@ */ -#define VHOST_USER_DEBUG_SOCKET 0 #define VHOST_DEBUG_VQ 0 -#if VHOST_USER_DEBUG_SOCKET == 1 -#define DBG_SOCK(args...) clib_warning(args); -#else -#define DBG_SOCK(args...) -#endif +#define DBG_SOCK(args...) \ + { \ + vhost_user_main_t *_vum = &vhost_user_main; \ + if (_vum->debug) \ + clib_warning(args); \ + }; #if VHOST_DEBUG_VQ == 1 #define DBG_VQ(args...) clib_warning(args); @@ -483,8 +483,6 @@ vhost_user_callfd_read_ready (unix_file_t * uf) u8 buff[8]; n = read (uf->file_descriptor, ((char *) &buff), 8); - DBG_SOCK ("if %d CALL queue %d", uf->private_data >> 8, - uf->private_data & 0xff); return 0; } @@ -921,7 +919,7 @@ vhost_user_socket_read (unix_file_t * uf) break; case VHOST_USER_SET_VRING_CALL: - DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL u64 %d", + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL %d", vui->hw_if_index, msg.u64); q = (u8) (msg.u64 & 0xFF); @@ -954,7 +952,7 @@ vhost_user_socket_read (unix_file_t * uf) break; case VHOST_USER_SET_VRING_KICK: - DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK u64 %d", + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK %d", vui->hw_if_index, msg.u64); q = (u8) (msg.u64 & 0xFF); @@ -992,7 +990,7 @@ vhost_user_socket_read (unix_file_t * uf) break; case VHOST_USER_SET_VRING_ERR: - DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR u64 %d", + DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR %d", vui->hw_if_index, msg.u64); q = (u8) (msg.u64 & 0xFF); @@ -1020,9 +1018,6 @@ vhost_user_socket_read (unix_file_t * uf) break; case VHOST_USER_GET_VRING_BASE: - DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d", - vui->hw_if_index, msg.state.index, msg.state.num); - if (msg.state.index >= VHOST_VRING_MAX_N) { DBG_SOCK ("invalid vring index VHOST_USER_GET_VRING_BASE:" @@ -1040,6 +1035,8 @@ vhost_user_socket_read (unix_file_t * uf) /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */ vhost_user_vring_close (vui, msg.state.index); + DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d", + vui->hw_if_index, msg.state.index, msg.state.num); break; case VHOST_USER_NONE: @@ -1101,28 +1098,30 @@ vhost_user_socket_read (unix_file_t * uf) break; case VHOST_USER_GET_PROTOCOL_FEATURES: - DBG_SOCK ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES", - vui->hw_if_index); - msg.flags |= 4; msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | (1 << VHOST_USER_PROTOCOL_F_MQ); msg.size = sizeof (msg.u64); + DBG_SOCK + ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES - reply 0x%016llx", + vui->hw_if_index, msg.u64); break; case VHOST_USER_SET_PROTOCOL_FEATURES: - DBG_SOCK ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%lx", - vui->hw_if_index, msg.u64); + DBG_SOCK + ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%016llx", + vui->hw_if_index, msg.u64); vui->protocol_features = msg.u64; break; case VHOST_USER_GET_QUEUE_NUM: - DBG_SOCK ("if %d msg VHOST_USER_GET_QUEUE_NUM", vui->hw_if_index); msg.flags |= 4; msg.u64 = VHOST_VRING_MAX_N; msg.size = sizeof (msg.u64); + DBG_SOCK ("if %d msg VHOST_USER_GET_QUEUE_NUM - reply %d", + vui->hw_if_index, msg.u64); break; case VHOST_USER_SET_VRING_ENABLE: @@ -3533,6 +3532,43 @@ VLIB_CLI_COMMAND (show_vhost_user_command, static) = { }; /* *INDENT-ON* */ +clib_error_t * +debug_vhost_user_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + vhost_user_main_t *vum = &vhost_user_main; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "on")) + vum->debug = 1; + else if (unformat (line_input, "off")) + vum->debug = 0; + else + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (debug_vhost_user_command, static) = { + .path = "debug vhost-user", + .short_help = "debug vhost-user ", + .function = debug_vhost_user_command_fn, +}; +/* *INDENT-ON* */ + static clib_error_t * vhost_user_config (vlib_main_t * vm, unformat_input_t * input) { diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index ceced342..ed147a47 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -310,6 +310,9 @@ typedef struct /* The number of rx interface/queue pairs in interrupt mode */ u32 ifq_count; + + /* debug on or off */ + u8 debug; } vhost_user_main_t; typedef struct -- cgit 1.2.3-korg From 6f9ac6559ba155cc15dbc1153900f7bd3c4c9e13 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 15 Jun 2017 19:01:31 +0200 Subject: Add missing barrier sync to rx placement infra code Change-Id: I25ccf8260dbe7e1550aee3904a688fc135ce1f03 Signed-off-by: Damjan Marion --- src/vnet/devices/devices.c | 20 +++++++++++++++----- src/vnet/interface_cli.c | 4 +++- 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index 58c72077..e3311c43 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -123,7 +123,7 @@ vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, u16 queue_id, uword thread_index) { vnet_device_main_t *vdm = &vnet_device_main; - vlib_main_t *vm; + vlib_main_t *vm, *vm0; vnet_device_input_runtime_t *rt; vnet_device_and_queue_t *dq; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); @@ -143,6 +143,10 @@ vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, } vm = vlib_mains[thread_index]; + vm0 = vlib_get_main (); + + vlib_worker_thread_barrier_sync (vm0); + rt = vlib_node_get_runtime_data (vm, hw->input_node_index); vec_add2 (rt->devices_and_queues, dq, 1); @@ -157,6 +161,9 @@ vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, vec_validate (hw->rx_mode_by_queue, queue_id); hw->input_node_thread_index_by_queue[queue_id] = thread_index; hw->rx_mode_by_queue[queue_id] = VNET_HW_INTERFACE_RX_MODE_POLLING; + + vlib_worker_thread_barrier_release (vm0); + vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state); } @@ -164,7 +171,7 @@ int vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, u16 queue_id) { - vlib_main_t *vm; + vlib_main_t *vm, *vm0; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); vnet_device_input_runtime_t *rt; vnet_device_and_queue_t *dq; @@ -187,16 +194,19 @@ vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id) { mode = dq->mode; - vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues); - goto deleted; + goto delete; } return VNET_API_ERROR_INVALID_INTERFACE; -deleted: +delete: + vm0 = vlib_get_main (); + vlib_worker_thread_barrier_sync (vm0); + vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues); vnet_device_queue_update (vnm, rt); hw->rx_mode_by_queue[queue_id] = VNET_HW_INTERFACE_RX_MODE_UNKNOWN; + vlib_worker_thread_barrier_release (vm0); if (vec_len (rt->devices_and_queues) == 0) vlib_node_set_state (vm, hw->input_node_index, VLIB_NODE_STATE_DISABLED); diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index e18a80fc..d37c7894 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -1445,8 +1445,10 @@ set_interface_rx_placement (vlib_main_t * vm, unformat_input_t * input, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_set_if_rx_placement,static) = { .path = "set interface rx-placement", - .short_help = "set interface rx-placement [queue ] [thread | main]", + .short_help = "set interface rx-placement [queue ] " + "[worker | main]", .function = set_interface_rx_placement, + .is_mp_safe = 1, }; /* *INDENT-ON* */ -- cgit 1.2.3-korg From 4e53a0d0f01c8c81842d1f50fb5cf0d26e0c1713 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 21 Jun 2017 14:29:44 +0200 Subject: Introduce default rx mode for device drivers If interface is down and queues are not configured then we are not able to change rx-mode. This change introducess default mode which is stored per interface and applied if driver wants. Change-Id: I70149c21c1530eafc148d5e4aa03fbee53dec62f Signed-off-by: Damjan Marion --- src/plugins/memif/memif.c | 2 +- src/vnet/devices/devices.c | 3 +++ src/vnet/interface.c | 1 + src/vnet/interface.h | 2 ++ src/vnet/interface_cli.c | 18 ++++++++++++------ 5 files changed, 19 insertions(+), 7 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index ec67023b..fffb94c9 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -222,7 +222,7 @@ memif_connect (memif_if_t * mif) } vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, i, ~0); rv = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, i, - VNET_HW_INTERFACE_RX_MODE_INTERRUPT); + VNET_HW_INTERFACE_RX_MODE_DEFAULT); if (rv) clib_warning ("Warning: unable to set rx mode for interface %d queue %d: " diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index e3311c43..f64c6e0d 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -245,6 +245,9 @@ vnet_hw_interface_set_rx_mode (vnet_main_t * vnm, u32 hw_if_index, vnet_device_input_runtime_t *rt; int is_polling = 0; + if (mode == VNET_HW_INTERFACE_RX_MODE_DEFAULT) + mode = hw->default_rx_mode; + if (hw->input_node_thread_index_by_queue == 0 || hw->rx_mode_by_queue == 0) return VNET_API_ERROR_INVALID_INTERFACE; diff --git a/src/vnet/interface.c b/src/vnet/interface.c index e9042ae8..1370d048 100644 --- a/src/vnet/interface.c +++ b/src/vnet/interface.c @@ -706,6 +706,7 @@ vnet_register_interface (vnet_main_t * vnm, hw_index = hw - im->hw_interfaces; hw->hw_if_index = hw_index; + hw->default_rx_mode = VNET_HW_INTERFACE_RX_MODE_POLLING; if (dev_class->format_device_name) hw->name = format (0, "%U", dev_class->format_device_name, dev_instance); diff --git a/src/vnet/interface.h b/src/vnet/interface.h index 095fe961..d684e356 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -54,6 +54,7 @@ typedef enum VNET_HW_INTERFACE_RX_MODE_POLLING, VNET_HW_INTERFACE_RX_MODE_INTERRUPT, VNET_HW_INTERFACE_RX_MODE_ADAPTIVE, + VNET_HW_INTERFACE_RX_MODE_DEFAULT, VNET_HW_INTERFACE_NUM_RX_MODES, } vnet_hw_interface_rx_mode; @@ -492,6 +493,7 @@ typedef struct vnet_hw_interface_t /* vnet_hw_interface_rx_mode by queue */ u8 *rx_mode_by_queue; + vnet_hw_interface_rx_mode default_rx_mode; /* device input device_and_queue runtime index */ uword *dq_runtime_index_by_queue; diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index d37c7894..bf2873ac 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -1186,6 +1186,9 @@ set_hw_interface_rx_mode (vnet_main_t * vnm, u32 hw_if_index, vnet_hw_interface_rx_mode old_mode; int rv; + if (mode == VNET_HW_INTERFACE_RX_MODE_DEFAULT) + mode = hw->default_rx_mode; + rv = vnet_hw_interface_get_rx_mode (vnm, hw_if_index, queue_id, &old_mode); switch (rv) { @@ -1272,12 +1275,15 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input, hw = vnet_get_hw_interface (vnm, hw_if_index); if (queue_id == ~0) - for (i = 0; i < vec_len (hw->dq_runtime_index_by_queue); i++) - { - error = set_hw_interface_rx_mode (vnm, hw_if_index, i, mode); - if (error) - break; - } + { + for (i = 0; i < vec_len (hw->dq_runtime_index_by_queue); i++) + { + error = set_hw_interface_rx_mode (vnm, hw_if_index, i, mode); + if (error) + break; + } + hw->default_rx_mode = mode; + } else error = set_hw_interface_rx_mode (vnm, hw_if_index, queue_id, mode); -- cgit 1.2.3-korg From 26054ea1d1bad8d0d383bac59bfbe50912aee146 Mon Sep 17 00:00:00 2001 From: Christophe Fontaine Date: Tue, 20 Jun 2017 13:57:47 +0200 Subject: Fix SIGBUS on aarch64 A call to 'clib_smp_swap (&((dq)->interrupt_pending), 0)' was creating a SIGBUS. Instead of making dq->interrupt_pending aligned on 64bits, we reduce the size from uword (u64) to u32, as the number of pending interrupts will never go above max of u32. Change-Id: Ifa5a6d3b7adee222329a671be01305cf50853b33 Signed-off-by: Christophe Fontaine --- src/vnet/devices/devices.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h index f1f7e778..b74e3713 100644 --- a/src/vnet/devices/devices.h +++ b/src/vnet/devices/devices.h @@ -61,7 +61,7 @@ typedef struct u32 dev_instance; u16 queue_id; vnet_hw_interface_rx_mode mode; - uword interrupt_pending; + u32 interrupt_pending; } vnet_device_and_queue_t; typedef struct -- cgit 1.2.3-korg From 2ee2d57c3ac63c8fdcdae53366e29b7dcdb2014d Mon Sep 17 00:00:00 2001 From: Steven Date: Fri, 21 Jul 2017 16:38:41 -0700 Subject: vhost: debug vhost-user command needs better error checking on the syntax (VPP-916) The syntax for debug vhost-user is debug vhost-user However, currently the code does not reject the invalid command such as below debug vhost-user debug vhost-user on blah debug vhost-user off blah The fix is to enforece the correct syntax and reject the command when invalid option is entered. Change-Id: I1a04ae8ddb6dd299aa6d15b043362964e685ddde Signed-off-by: Steven (cherry picked from commit 6a4de2764d9e6cadf36af824dddb3f33c2d6dc7e) --- src/vnet/devices/virtio/vhost-user.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 2e2b49c2..82f76533 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -3540,22 +3540,43 @@ debug_vhost_user_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; clib_error_t *error = NULL; vhost_user_main_t *vum = &vhost_user_main; + u8 onoff = 0; + u8 input_found = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) - return 0; + return clib_error_return (0, "missing argument"); while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (input_found) + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + if (unformat (line_input, "on")) - vum->debug = 1; + { + input_found = 1; + onoff = 1; + } else if (unformat (line_input, "off")) - vum->debug = 0; + { + input_found = 1; + onoff = 0; + } else - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + vum->debug = onoff; + +done: unformat_free (line_input); return error; -- cgit 1.2.3-korg From 15ac81c16fba83033090299413a3a2dbb848a0f9 Mon Sep 17 00:00:00 2001 From: Pavel Kotucek Date: Tue, 20 Jun 2017 14:00:26 +0200 Subject: P2P Ethernet Change-Id: Idb97e573961b3bc2acdeef77582314590795f8c3 Signed-off-by: Pavel Kotucek --- src/vat/api_format.c | 17 +- src/vnet.am | 1 + src/vnet/api_errno.h | 3 +- src/vnet/devices/devices.c | 6 + src/vnet/ethernet/ethernet.h | 1 + src/vnet/ethernet/interface.c | 54 +++- src/vnet/ethernet/node.c | 17 +- src/vnet/ethernet/p2p_ethernet.api | 1 + src/vnet/ethernet/p2p_ethernet.c | 165 +++++++++- src/vnet/ethernet/p2p_ethernet.h | 42 ++- src/vnet/ethernet/p2p_ethernet_api.c | 17 +- src/vnet/ethernet/p2p_ethernet_input.c | 247 +++++++++++++++ src/vnet/interface.c | 4 + src/vnet/interface.h | 15 + src/vnet/interface_funcs.h | 3 +- test/test_p2p_ethernet.py | 538 +++++++++++++++++++++++++++++++++ test/vpp_papi_provider.py | 25 ++ test/vpp_sub_interface.py | 23 ++ 18 files changed, 1145 insertions(+), 34 deletions(-) create mode 100644 src/vnet/ethernet/p2p_ethernet_input.c create mode 100644 test/test_p2p_ethernet.py (limited to 'src/vnet/devices') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 5a0c4580..6a2d36de 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -19161,6 +19161,7 @@ api_p2p_ethernet_add (vat_main_t * vam) unformat_input_t *i = vam->input; vl_api_p2p_ethernet_add_t *mp; u32 parent_if_index = ~0; + u32 sub_id = ~0; u8 remote_mac[6]; u8 mac_set = 0; int ret; @@ -19176,6 +19177,8 @@ api_p2p_ethernet_add (vat_main_t * vam) if (unformat (i, "remote_mac %U", unformat_ethernet_address, remote_mac)) mac_set++; + else if (unformat (i, "sub_id %d", &sub_id)) + ; else { clib_warning ("parse error '%U'", format_unformat_error, i); @@ -19193,9 +19196,15 @@ api_p2p_ethernet_add (vat_main_t * vam) errmsg ("missing remote mac address"); return -99; } + if (sub_id == ~0) + { + errmsg ("missing sub-interface id"); + return -99; + } M (P2P_ETHERNET_ADD, mp); mp->parent_if_index = ntohl (parent_if_index); + mp->subif_id = ntohl (sub_id); clib_memcpy (mp->remote_mac, remote_mac, sizeof (remote_mac)); S (mp); @@ -20094,11 +20103,10 @@ _(l2_xconnect_dump, "") \ _(sw_interface_set_mtu, " | sw_if_index mtu ") \ _(ip_neighbor_dump, "[ip6] | sw_if_index ") \ _(sw_interface_get_table, " | sw_if_index [ipv6]") \ -_(p2p_ethernet_add, " | sw_if_index remote_mac ") \ +_(p2p_ethernet_add, " | sw_if_index remote_mac sub_id ") \ _(p2p_ethernet_del, " | sw_if_index remote_mac ") \ -_(lldp_config, "system-name tx-hold tx-interval ") \ -_(sw_interface_set_lldp, \ - " | sw_if_index [port-desc ] [disable]") +_(lldp_config, "system-name tx-hold tx-interval ") \ +_(sw_interface_set_lldp, " | sw_if_index [port-desc ] [disable]") /* List of command functions, CLI names map directly to functions */ #define foreach_cli_function \ @@ -20122,7 +20130,6 @@ _(search_node_table, "usage: search_node_table ...") \ _(set, "usage: set ") \ _(script, "usage: script ") \ _(unset, "usage: unset ") - #define _(N,n) \ static void vl_api_##n##_t_handler_uni \ (vl_api_##n##_t * mp) \ diff --git a/src/vnet.am b/src/vnet.am index 060e3f38..ad84c028 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -115,6 +115,7 @@ libvnet_la_SOURCES += \ vnet/ethernet/pg.c \ vnet/ethernet/sfp.c \ vnet/ethernet/p2p_ethernet.c \ + vnet/ethernet/p2p_ethernet_input.c \ vnet/ethernet/p2p_ethernet_api.c nobase_include_HEADERS += \ diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index b22bb3a8..747c65e7 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -112,7 +112,8 @@ _(BD_ALREADY_EXISTS, -119, "Bridge domain already exists") \ _(BD_IN_USE, -120, "Bridge domain has member interfaces") \ _(BD_NOT_MODIFIABLE, -121, "Bridge domain 0 can't be deleted/modified") \ _(BD_ID_EXCEED_MAX, -122, "Bridge domain ID exceed 16M limit") \ -_(UNSUPPORTED, -123, "Unsupported") +_(UNSUPPORTED, -123, "Unsupported") \ +_(SUBIF_DOESNT_EXIST, -124, "Subinterface doesn't exist") typedef enum { diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index f64c6e0d..2eb8e30e 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -77,6 +77,12 @@ VNET_FEATURE_INIT (span_input, static) = { .runs_before = VNET_FEATURES ("ethernet-input"), }; +VNET_FEATURE_INIT (p2p_ethernet_node, static) = { + .arc_name = "device-input", + .node_name = "p2p-ethernet-input", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; + VNET_FEATURE_INIT (ethernet_input, static) = { .arc_name = "device-input", .node_name = "ethernet-input", diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h index d9ab8c10..9ca256c9 100644 --- a/src/vnet/ethernet/ethernet.h +++ b/src/vnet/ethernet/ethernet.h @@ -169,6 +169,7 @@ typedef struct #define SUBINT_CONFIG_MATCH_3_TAG (1<<3) #define SUBINT_CONFIG_VALID (1<<4) #define SUBINT_CONFIG_L2 (1<<5) +#define SUBINT_CONFIG_P2P (1<<6) } subint_config_t; diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c index 9ac30bc6..3e78a49d 100644 --- a/src/vnet/ethernet/interface.c +++ b/src/vnet/ethernet/interface.c @@ -89,7 +89,10 @@ ethernet_build_rewrite (vnet_main_t * vnm, ethernet_type_t type; uword n_bytes = sizeof (h[0]); u8 *rewrite = NULL; + u8 is_p2p = 0; + if (sub_sw->type == VNET_SW_INTERFACE_TYPE_P2P) + is_p2p = 1; if (sub_sw != sup_sw) { if (sub_sw->sub.eth.flags.one_tag) @@ -100,13 +103,24 @@ ethernet_build_rewrite (vnet_main_t * vnm, { n_bytes += 2 * (sizeof (ethernet_vlan_header_t)); } - // Check for encaps that are not supported for L3 interfaces - if (!(sub_sw->sub.eth.flags.exact_match) || - (sub_sw->sub.eth.flags.default_sub) || - (sub_sw->sub.eth.flags.outer_vlan_id_any) || - (sub_sw->sub.eth.flags.inner_vlan_id_any)) + else if (PREDICT_FALSE (is_p2p)) { - return 0; + n_bytes = sizeof (ethernet_header_t); + } + if (PREDICT_FALSE (!is_p2p)) + { + // Check for encaps that are not supported for L3 interfaces + if (!(sub_sw->sub.eth.flags.exact_match) || + (sub_sw->sub.eth.flags.default_sub) || + (sub_sw->sub.eth.flags.outer_vlan_id_any) || + (sub_sw->sub.eth.flags.inner_vlan_id_any)) + { + return 0; + } + } + else + { + n_bytes = sizeof (ethernet_header_t); } } @@ -126,12 +140,20 @@ ethernet_build_rewrite (vnet_main_t * vnm, h = (ethernet_header_t *) rewrite; ei = pool_elt_at_index (em->interfaces, hw->hw_instance); clib_memcpy (h->src_address, ei->address, sizeof (h->src_address)); - if (dst_address) - clib_memcpy (h->dst_address, dst_address, sizeof (h->dst_address)); + if (is_p2p) + { + clib_memcpy (h->dst_address, sub_sw->p2p.client_mac, + sizeof (h->dst_address)); + } else - memset (h->dst_address, ~0, sizeof (h->dst_address)); /* broadcast */ + { + if (dst_address) + clib_memcpy (h->dst_address, dst_address, sizeof (h->dst_address)); + else + memset (h->dst_address, ~0, sizeof (h->dst_address)); /* broadcast */ + } - if (sub_sw->sub.eth.flags.one_tag) + if (PREDICT_FALSE (!is_p2p) && sub_sw->sub.eth.flags.one_tag) { ethernet_vlan_header_t *outer = (void *) (h + 1); @@ -143,7 +165,7 @@ ethernet_build_rewrite (vnet_main_t * vnm, outer->type = clib_host_to_net_u16 (type); } - else if (sub_sw->sub.eth.flags.two_tags) + else if (PREDICT_FALSE (!is_p2p) && sub_sw->sub.eth.flags.two_tags) { ethernet_vlan_header_t *outer = (void *) (h + 1); ethernet_vlan_header_t *inner = (void *) (outer + 1); @@ -174,7 +196,12 @@ ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) adj = adj_get (ai); - if (FIB_PROTOCOL_IP4 == adj->ia_nh_proto) + vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); + if (si->type == VNET_SW_INTERFACE_TYPE_P2P) + { + default_update_adjacency (vnm, sw_if_index, ai); + } + else if (FIB_PROTOCOL_IP4 == adj->ia_nh_proto) { arp_update_adjacency (vnm, sw_if_index, ai); } @@ -719,7 +746,8 @@ vnet_delete_sub_interface (u32 sw_if_index) vnet_interface_main_t *im = &vnm->interface_main; vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); - if (si->type == VNET_SW_INTERFACE_TYPE_SUB) + if (si->type == VNET_SW_INTERFACE_TYPE_SUB || + si->type == VNET_SW_INTERFACE_TYPE_P2P) { vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); u64 sup_and_sub_key = diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index 421d501a..f216216d 100755 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -823,7 +824,21 @@ ethernet_sw_interface_get_config (vnet_main_t * vnm, // Locate the subint for the given ethernet config si = vnet_get_sw_interface (vnm, sw_if_index); - if (si->sub.eth.flags.default_sub) + if (si->type == VNET_SW_INTERFACE_TYPE_P2P) + { + p2p_ethernet_main_t *p2pm = &p2p_main; + u32 p2pe_sw_if_index = + p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac); + if (p2pe_sw_if_index == ~0) + { + pool_get (p2pm->p2p_subif_pool, subint); + si->p2p.pool_index = subint - p2pm->p2p_subif_pool; + } + else + subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index); + *flags = SUBINT_CONFIG_P2P; + } + else if (si->sub.eth.flags.default_sub) { subint = &main_intf->default_subint; *flags = SUBINT_CONFIG_MATCH_0_TAG | diff --git a/src/vnet/ethernet/p2p_ethernet.api b/src/vnet/ethernet/p2p_ethernet.api index 72a73423..8fb66376 100644 --- a/src/vnet/ethernet/p2p_ethernet.api +++ b/src/vnet/ethernet/p2p_ethernet.api @@ -18,6 +18,7 @@ define p2p_ethernet_add u32 client_index; u32 context; u32 parent_if_index; + u32 subif_id; u8 remote_mac[6]; }; diff --git a/src/vnet/ethernet/p2p_ethernet.c b/src/vnet/ethernet/p2p_ethernet.c index 3c077318..e3f667b5 100644 --- a/src/vnet/ethernet/p2p_ethernet.c +++ b/src/vnet/ethernet/p2p_ethernet.c @@ -18,12 +18,152 @@ #include #include #include +#include + +p2p_ethernet_main_t p2p_main; + +static void +create_p2pe_key (p2p_key_t * p2pe_key, u32 parent_if_index, u8 * client_mac) +{ + clib_memcpy (p2pe_key->mac, client_mac, 6); + p2pe_key->pad1 = 0; + p2pe_key->hw_if_index = parent_if_index; + p2pe_key->pad2 = 0; +} + +u32 +p2p_ethernet_lookup (u32 parent_if_index, u8 * client_mac) +{ + p2p_ethernet_main_t *p2pm = &p2p_main; + p2p_key_t p2pe_key; + uword *p; + + create_p2pe_key (&p2pe_key, parent_if_index, client_mac); + p = hash_get_mem (p2pm->p2p_ethernet_by_key, &p2pe_key); + if (p) + return p[0]; + + return ~0; +} int p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index, - u8 * client_mac, int is_add) + u8 * client_mac, u32 p2pe_subif_id, int is_add, + u32 * p2pe_if_index) { - return 0; + vnet_main_t *vnm = vnet_get_main (); + p2p_ethernet_main_t *p2pm = &p2p_main; + vnet_interface_main_t *im = &vnm->interface_main; + + u32 p2pe_sw_if_index = ~0; + p2pe_sw_if_index = p2p_ethernet_lookup (parent_if_index, client_mac); + + if (p2pe_if_index) + *p2pe_if_index = ~0; + + if (is_add) + { + if (p2pe_sw_if_index == ~0) + { + vnet_hw_interface_t *hi; + + hi = vnet_get_hw_interface (vnm, parent_if_index); + if (hi->bond_info == VNET_HW_INTERFACE_BOND_INFO_SLAVE) + return VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED; + + u64 sup_and_sub_key = + ((u64) (hi->sw_if_index) << 32) | (u64) p2pe_subif_id; + uword *p; + p = hash_get_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key); + if (p) + { + if (CLIB_DEBUG > 0) + clib_warning + ("p2p ethernet sub-interface on sw_if_index %d with sub id %d already exists\n", + hi->sw_if_index, p2pe_subif_id); + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + } + vnet_sw_interface_t template = { + .type = VNET_SW_INTERFACE_TYPE_P2P, + .flood_class = VNET_FLOOD_CLASS_NORMAL, + .sup_sw_if_index = hi->sw_if_index, + .sub.id = p2pe_subif_id + }; + + clib_memcpy (template.p2p.client_mac, client_mac, + sizeof (template.p2p.client_mac)); + + if (vnet_create_sw_interface (vnm, &template, &p2pe_sw_if_index)) + return VNET_API_ERROR_SUBIF_CREATE_FAILED; + + vnet_interface_main_t *im = &vnm->interface_main; + sup_and_sub_key = + ((u64) (hi->sw_if_index) << 32) | (u64) p2pe_subif_id; + u64 *kp = clib_mem_alloc (sizeof (*kp)); + + *kp = sup_and_sub_key; + hash_set (hi->sub_interface_sw_if_index_by_id, p2pe_subif_id, + p2pe_sw_if_index); + hash_set_mem (im->sw_if_index_by_sup_and_sub, kp, p2pe_sw_if_index); + + p2p_key_t *p_p2pe_key; + p_p2pe_key = clib_mem_alloc (sizeof (*p_p2pe_key)); + create_p2pe_key (p_p2pe_key, parent_if_index, client_mac); + hash_set_mem (p2pm->p2p_ethernet_by_key, p_p2pe_key, + p2pe_sw_if_index); + + if (p2pe_if_index) + *p2pe_if_index = p2pe_sw_if_index; + + vec_validate (p2pm->p2p_ethernet_by_sw_if_index, parent_if_index); + if (p2pm->p2p_ethernet_by_sw_if_index[parent_if_index] == 0) + { + vnet_feature_enable_disable ("device-input", + "p2p-ethernet-input", + parent_if_index, 1, 0, 0); + /* Set promiscuous mode on the l2 interface */ + ethernet_set_flags (vnm, parent_if_index, + ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); + + } + p2pm->p2p_ethernet_by_sw_if_index[parent_if_index]++; + /* set the interface mode */ + set_int_l2_mode (vm, vnm, MODE_L3, p2pe_subif_id, 0, 0, 0, 0); + return 0; + } + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + } + else + { + if (p2pe_sw_if_index == ~0) + return VNET_API_ERROR_SUBIF_DOESNT_EXIST; + else + { + int rv = 0; + rv = vnet_delete_sub_interface (p2pe_sw_if_index); + if (!rv) + { + vec_validate (p2pm->p2p_ethernet_by_sw_if_index, + parent_if_index); + if (p2pm->p2p_ethernet_by_sw_if_index[parent_if_index] == 1) + { + vnet_feature_enable_disable ("device-input", + "p2p-ethernet-input", + parent_if_index, 0, 0, 0); + /* Disable promiscuous mode on the l2 interface */ + ethernet_set_flags (vnm, parent_if_index, 0); + } + p2pm->p2p_ethernet_by_sw_if_index[parent_if_index]--; + + /* Remove p2p_ethernet from hash map */ + p2p_key_t *p_p2pe_key; + p_p2pe_key = clib_mem_alloc (sizeof (*p_p2pe_key)); + create_p2pe_key (p_p2pe_key, parent_if_index, client_mac); + hash_unset_mem (p2pm->p2p_ethernet_by_key, p_p2pe_key); + } + return rv; + } + } } static clib_error_t * @@ -35,6 +175,7 @@ vnet_p2p_ethernet_add_del (vlib_main_t * vm, unformat_input_t * input, int is_add = 1; int remote_mac = 0; u32 hw_if_index = ~0; + u32 sub_id = ~0; u8 client_mac[6]; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) @@ -44,6 +185,8 @@ vnet_p2p_ethernet_add_del (vlib_main_t * vm, unformat_input_t * input, ; else if (unformat (input, "%U", unformat_ethernet_address, &client_mac)) remote_mac = 1; + else if (unformat (input, "sub-id %d", &sub_id)) + ; else if (unformat (input, "del")) is_add = 0; else @@ -54,9 +197,11 @@ vnet_p2p_ethernet_add_del (vlib_main_t * vm, unformat_input_t * input, return clib_error_return (0, "Please specify parent interface ..."); if (!remote_mac) return clib_error_return (0, "Please specify client MAC address ..."); + if (sub_id == ~0 && is_add) + return clib_error_return (0, "Please specify sub-interface id ..."); u32 rv; - rv = p2p_ethernet_add_del (vm, hw_if_index, client_mac, is_add); + rv = p2p_ethernet_add_del (vm, hw_if_index, client_mac, sub_id, is_add, 0); switch (rv) { case VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED: @@ -77,17 +222,21 @@ vnet_p2p_ethernet_add_del (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) = { - .path = "p2p_ethernet ", - .function = vnet_p2p_ethernet_add_del, - .short_help = "p2p_ethernet [del]",}; -/* *INDENT-ON* */ +.path = "p2p_ethernet ",.function = vnet_p2p_ethernet_add_del,.short_help = + "p2p_ethernet [sub-id | del]",}; static clib_error_t * p2p_ethernet_init (vlib_main_t * vm) { + p2p_ethernet_main_t *p2pm = &p2p_main; + + p2pm->vlib_main = vm; + p2pm->vnet_main = vnet_get_main (); + p2pm->p2p_ethernet_by_key = + hash_create_mem (0, sizeof (p2p_key_t), sizeof (uword)); + return 0; } diff --git a/src/vnet/ethernet/p2p_ethernet.h b/src/vnet/ethernet/p2p_ethernet.h index 31b93d82..bb1e2896 100644 --- a/src/vnet/ethernet/p2p_ethernet.h +++ b/src/vnet/ethernet/p2p_ethernet.h @@ -18,6 +18,46 @@ #include #include -int p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index, u8 * client_mac, int is_add); + +typedef struct { + /** + * Hash mapping parent sw_if_index and client mac address to p2p_ethernet sub-interface + */ + uword * p2p_ethernet_by_key; + + u32 *p2p_ethernet_by_sw_if_index; + + // Pool of p2p subifs; + subint_config_t *p2p_subif_pool; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} p2p_ethernet_main_t; + +extern p2p_ethernet_main_t p2p_main; + +typedef struct +{ + u32 sw_if_index; + u32 p2pe_sw_if_index; + u8 client_mac[6]; +} p2p_ethernet_trace_t; + +/** + * @brief Key struct for P2P Ethernet + * Key fields: parent sw_if_index and client mac address + * all fields in NET byte order + */ + +typedef struct { + u8 mac[6]; + u16 pad1; // padding for u64 mac address + u32 hw_if_index; + u32 pad2; // padding for u64 +} p2p_key_t; + +u32 p2p_ethernet_lookup (u32 parent_sw_if_index, u8* client_mac); +int p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index, u8 * client_mac, u32 sub_id, int is_add, u32 *p2pe_if_index); #endif /* included_vnet_p2p_ethernet_h */ diff --git a/src/vnet/ethernet/p2p_ethernet_api.c b/src/vnet/ethernet/p2p_ethernet_api.c index 1d9eaeb0..f2c730b4 100644 --- a/src/vnet/ethernet/p2p_ethernet_api.c +++ b/src/vnet/ethernet/p2p_ethernet_api.c @@ -51,12 +51,21 @@ vl_api_p2p_ethernet_add_t_handler (vl_api_p2p_ethernet_add_t * mp) int rv; u32 parent_if_index = htonl (mp->parent_if_index); + u32 sub_id = htonl (mp->subif_id); + u32 p2pe_if_index; u8 remote_mac[6]; clib_memcpy (remote_mac, mp->remote_mac, 6); - rv = p2p_ethernet_add_del (vm, parent_if_index, remote_mac, 1); - - REPLY_MACRO (VL_API_P2P_ETHERNET_ADD_REPLY); + rv = + p2p_ethernet_add_del (vm, parent_if_index, remote_mac, sub_id, 1, + &p2pe_if_index); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_P2P_ETHERNET_ADD_REPLY, + ({ + rmp->sw_if_index = htonl(p2pe_if_index); + })); + /* *INDENT-ON* */ } void @@ -70,7 +79,7 @@ vl_api_p2p_ethernet_del_t_handler (vl_api_p2p_ethernet_del_t * mp) u8 remote_mac[6]; clib_memcpy (remote_mac, mp->remote_mac, 6); - rv = p2p_ethernet_add_del (vm, parent_if_index, remote_mac, 0); + rv = p2p_ethernet_add_del (vm, parent_if_index, remote_mac, ~0, 0, 0); REPLY_MACRO (VL_API_P2P_ETHERNET_DEL_REPLY); } diff --git a/src/vnet/ethernet/p2p_ethernet_input.c b/src/vnet/ethernet/p2p_ethernet_input.c new file mode 100644 index 00000000..a58b832a --- /dev/null +++ b/src/vnet/ethernet/p2p_ethernet_input.c @@ -0,0 +1,247 @@ +/* + * node.c: p2p ethernet vpp node + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include +#include + +vlib_node_registration_t p2p_ethernet_input_node; + +/* packet trace format function */ +u8 * +format_p2p_ethernet_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + p2p_ethernet_trace_t *t = va_arg (*args, p2p_ethernet_trace_t *); + + vnet_main_t *vnm = &vnet_main; + s = format (s, "P2P ethernet: %U -> %U", + format_vnet_sw_if_index_name, vnm, t->sw_if_index, + format_vnet_sw_if_index_name, vnm, t->p2pe_sw_if_index); + + return s; +} + +#define foreach_p2p_ethernet_error \ +_(HITS, "P2P ethernet incoming packets processed") + +typedef enum +{ +#define _(sym,str) P2PE_ERROR_##sym, + foreach_p2p_ethernet_error +#undef _ + P2PE_N_ERROR, +} p2p_ethernet_error_t; + +static char *p2p_ethernet_error_strings[] = { +#define _(sym,string) string, + foreach_p2p_ethernet_error +#undef _ +}; + +static uword +p2p_ethernet_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_trace = vlib_get_trace_count (vm, node); + u32 n_left_from, *from, *to_next; + u32 next_index; + u32 n_p2p_ethernet_packets = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0 = 0, next1 = 0; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t *en0, *en1; + u32 rx0, rx1; + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + en0 = vlib_buffer_get_current (b0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + en1 = vlib_buffer_get_current (b1); + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + vnet_feature_next (sw_if_index0, &next0, b0); + vnet_feature_next (sw_if_index1, &next1, b1); + + rx0 = p2p_ethernet_lookup (sw_if_index0, en0->src_address); + rx1 = p2p_ethernet_lookup (sw_if_index1, en1->src_address); + + if (rx0 != ~0) + { + /* Send pkt to p2p_ethernet RX interface */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = rx0; + n_p2p_ethernet_packets += 1; + + if (PREDICT_FALSE (n_trace > 0)) + { + p2p_ethernet_trace_t *t0; + vlib_trace_buffer (vm, node, next_index, b0, + 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + t0->sw_if_index = sw_if_index0; + t0->p2pe_sw_if_index = rx0; + } + } + if (rx1 != ~0) + { + /* Send pkt to p2p_ethernet RX interface */ + vnet_buffer (b1)->sw_if_index[VLIB_RX] = rx1; + n_p2p_ethernet_packets += 1; + + if (PREDICT_FALSE (n_trace > 0)) + { + p2p_ethernet_trace_t *t1; + vlib_trace_buffer (vm, node, next_index, b1, + 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t1 = vlib_add_trace (vm, node, b1, sizeof (*t1)); + t1->sw_if_index = sw_if_index1; + t1->p2pe_sw_if_index = rx1; + } + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi1, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = 0; + u32 sw_if_index0; + ethernet_header_t *en0; + u32 rx0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + en0 = vlib_buffer_get_current (b0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + vnet_feature_next (sw_if_index0, &next0, b0); + + rx0 = p2p_ethernet_lookup (sw_if_index0, en0->src_address); + if (rx0 != ~0) + { + /* Send pkt to p2p_ethernet RX interface */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = rx0; + n_p2p_ethernet_packets += 1; + + if (PREDICT_FALSE (n_trace > 0)) + { + p2p_ethernet_trace_t *t0; + vlib_trace_buffer (vm, node, next_index, b0, + 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + t0->sw_if_index = sw_if_index0; + t0->p2pe_sw_if_index = rx0; + } + } + else + { + if (PREDICT_FALSE (n_trace > 0)) + { + node->flags |= VLIB_NODE_FLAG_TRACE; + } + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, p2p_ethernet_input_node.index, + P2PE_ERROR_HITS, n_p2p_ethernet_packets); + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (p2p_ethernet_input_node) = { + .function = p2p_ethernet_input_node_fn, + .name = "p2p-ethernet-input", + .vector_size = sizeof (u32), + .format_trace = format_p2p_ethernet_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(p2p_ethernet_error_strings), + .error_strings = p2p_ethernet_error_strings, + + .n_next_nodes = 1, + + /* edit / add dispositions here */ + .next_nodes = { + [0] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (p2p_ethernet_input_node, + p2p_ethernet_input_node_fn) +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/interface.c b/src/vnet/interface.c index dad1f315..721259a7 100644 --- a/src/vnet/interface.c +++ b/src/vnet/interface.c @@ -1151,6 +1151,10 @@ vnet_hw_interface_compare (vnet_main_t * vnm, int vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index) { + vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); + if (si->type == VNET_SW_INTERFACE_TYPE_P2P) + return 1; + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); vnet_hw_interface_class_t *hc = vnet_get_hw_interface_class (vnm, hw->hw_class_index); diff --git a/src/vnet/interface.h b/src/vnet/interface.h index fb75ff34..5ca489db 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -505,6 +505,7 @@ typedef enum /* A sub-interface. */ VNET_SW_INTERFACE_TYPE_SUB, + VNET_SW_INTERFACE_TYPE_P2P, } vnet_sw_interface_type_t; typedef struct @@ -538,6 +539,17 @@ typedef struct } eth; } vnet_sub_interface_t; +typedef struct +{ + /* + * Subinterface ID. A number 0-N to uniquely identify + * this subinterface under the main interface + */ + u32 id; + u32 pool_index; + u8 client_mac[6]; +} vnet_p2p_sub_interface_t; + typedef enum { /* Always flood */ @@ -594,6 +606,9 @@ typedef struct /* VNET_SW_INTERFACE_TYPE_SUB. */ vnet_sub_interface_t sub; + + /* VNET_SW_INTERFACE_TYPE_P2P. */ + vnet_p2p_sub_interface_t p2p; }; vnet_flood_class_t flood_class; diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h index 26eef9b9..142bef57 100644 --- a/src/vnet/interface_funcs.h +++ b/src/vnet/interface_funcs.h @@ -73,7 +73,8 @@ always_inline vnet_sw_interface_t * vnet_get_sup_sw_interface (vnet_main_t * vnm, u32 sw_if_index) { vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index); - if (sw->type == VNET_SW_INTERFACE_TYPE_SUB) + if (sw->type == VNET_SW_INTERFACE_TYPE_SUB || + sw->type == VNET_SW_INTERFACE_TYPE_P2P) sw = vnet_get_sw_interface (vnm, sw->sup_sw_if_index); return sw; } diff --git a/test/test_p2p_ethernet.py b/test/test_p2p_ethernet.py new file mode 100644 index 00000000..37a1d18b --- /dev/null +++ b/test/test_p2p_ethernet.py @@ -0,0 +1,538 @@ +#!/usr/bin/env python +import random +import unittest +import datetime +import re + +from scapy.packet import Raw +from scapy.layers.l2 import Ether +from scapy.layers.inet import IP, UDP +from scapy.layers.inet6 import IPv6 + +from framework import VppTestCase, VppTestRunner, running_extended_tests +from vpp_sub_interface import VppP2PSubint +from vpp_ip_route import VppIpRoute, VppRoutePath +from util import mactobinary + + +class P2PEthernetAPI(VppTestCase): + """P2P Ethernet tests""" + + p2p_sub_ifs = [] + + @classmethod + def setUpClass(cls): + super(P2PEthernetAPI, cls).setUpClass() + + # Create pg interfaces + cls.create_pg_interfaces(range(4)) + + # Set up all interfaces + for i in cls.pg_interfaces: + i.admin_up() + + def create_p2p_ethernet(self, parent_if, sub_id, remote_mac): + p2p = VppP2PSubint(self, parent_if, sub_id, mactobinary(remote_mac)) + self.p2p_sub_ifs.append(p2p) + + def delete_p2p_ethernet(self, parent_if, remote_mac): + self.vapi.delete_p2pethernet_subif(parent_if.sw_if_index, + mactobinary(remote_mac)) + + def test_api(self): + """delete/create p2p subif""" + self.logger.info("FFP_TEST_START_0000") + + self.create_p2p_ethernet(self.pg0, 1, "de:ad:00:00:00:01") + self.create_p2p_ethernet(self.pg0, 2, "de:ad:00:00:00:02") + intfs = self.vapi.cli("show interface") + + self.assertNotEqual(intfs.find('pg0.1'), -1) + self.assertNotEqual(intfs.find('pg0.2'), -1) + self.assertEqual(intfs.find('pg0.5'), -1) + + # create pg2.5 subif + self.create_p2p_ethernet(self.pg0, 5, "de:ad:00:00:00:ff") + intfs = self.vapi.cli("show interface") + self.assertNotEqual(intfs.find('pg0.5'), -1) + # delete pg2.5 subif + self.delete_p2p_ethernet(self.pg0, "de:ad:00:00:00:ff") + + intfs = self.vapi.cli("show interface") + + self.assertNotEqual(intfs.find('pg0.1'), -1) + self.assertNotEqual(intfs.find('pg0.2'), -1) + self.assertEqual(intfs.find('pg0.5'), -1) + + self.logger.info("FFP_TEST_FINISH_0000") + + def test_p2p_subif_creation_1k(self): + """create 1k of p2p subifs""" + self.logger.info("FFP_TEST_START_0001") + + macs = [] + clients = 1000 + mac = int("dead00000000", 16) + + for i in range(1, clients+1): + try: + macs.append(':'.join(re.findall('..', '{:02x}'.format(mac+i)))) + self.vapi.create_p2pethernet_subif(self.pg2.sw_if_index, + mactobinary(macs[i-1]), + i) + except Exception: + print "Failed to create subif %d %s" % (i, macs[i-1]) + raise + + intfs = self.vapi.cli("show interface").split("\n") + count = 0 + for intf in intfs: + if intf.startswith('pg2.'): + count += 1 + self.assertEqual(count, clients) + + self.logger.info("FFP_TEST_FINISH_0001") + + @unittest.skipUnless(running_extended_tests(), "part of extended tests") + def test_p2p_subif_creation_10k(self): + """create 100k of p2p subifs""" + self.logger.info("FFP_TEST_START_0001") + + macs = [] + clients = 100000 + mac = int("dead00000000", 16) + + s_time = datetime.datetime.now() + for i in range(1, clients+1): + if i % 1000 == 0: + e_time = datetime.datetime.now() + print "Created 1000 subifs in %s secs" % (e_time - s_time) + s_time = e_time + try: + macs.append(':'.join(re.findall('..', '{:02x}'.format(mac+i)))) + self.vapi.create_p2pethernet_subif(self.pg3.sw_if_index, + mactobinary(macs[i-1]), + i) + except Exception: + print "Failed to create subif %d %s" % (i, macs[i-1]) + raise + + intfs = self.vapi.cli("show interface").split("\n") + count = 0 + for intf in intfs: + if intf.startswith('pg3.'): + count += 1 + self.assertEqual(count, clients) + + self.logger.info("FFP_TEST_FINISH_0001") + + +class P2PEthernetIPV6(VppTestCase): + """P2P Ethernet IPv6 tests""" + + p2p_sub_ifs = [] + packets = [] + + @classmethod + def setUpClass(cls): + super(P2PEthernetIPV6, cls).setUpClass() + + # Create pg interfaces + cls.create_pg_interfaces(range(3)) + + # Packet sizes + cls.pg_if_packet_sizes = [64, 512, 1518, 9018] + + # Set up all interfaces + for i in cls.pg_interfaces: + i.admin_up() + + cls.pg0.generate_remote_hosts(3) + cls.pg0.configure_ipv6_neighbors() + + cls.pg1.config_ip6() + cls.pg1.generate_remote_hosts(3) + cls.pg1.configure_ipv6_neighbors() + cls.pg1.disable_ipv6_ra() + + def setUp(self): + super(P2PEthernetIPV6, self).setUp() + for p in self.packets: + self.packets.remove(p) + self.create_p2p_ethernet(self.pg0, 1, self.pg0._remote_hosts[0].mac) + self.create_p2p_ethernet(self.pg0, 2, self.pg0._remote_hosts[1].mac) + self.p2p_sub_ifs[0].config_ip6() + self.p2p_sub_ifs[1].config_ip6() + self.vapi.cli("trace add p2p-ethernet-input 50") + + def tearDown(self): + self.delete_p2p_ethernet(self.pg0, self.pg0._remote_hosts[0].mac) + self.delete_p2p_ethernet(self.pg0, self.pg0._remote_hosts[1].mac) + super(P2PEthernetIPV6, self).tearDown() + + def create_p2p_ethernet(self, parent_if, sub_id, remote_mac): + p2p = VppP2PSubint(self, parent_if, sub_id, mactobinary(remote_mac)) + p2p.admin_up() + p2p.config_ip6() + p2p.disable_ipv6_ra() + self.p2p_sub_ifs.append(p2p) + + def delete_p2p_ethernet(self, parent_if, remote_mac): + self.vapi.delete_p2pethernet_subif(parent_if.sw_if_index, + mactobinary(remote_mac)) + + def create_stream(self, src_mac=None, dst_mac=None, + src_ip=None, dst_ip=None, size=None): + pkt_size = size + if size is None: + pkt_size = random.choice(self.pg_if_packet_sizes) + p = Ether(src=src_mac, dst=dst_mac) + p /= IPv6(src=src_ip, dst=dst_ip) + p /= (UDP(sport=1234, dport=4321) / Raw('\xa5' * 20)) + self.extend_packet(p, pkt_size) + return p + + def send_packets(self, src_if=None, dst_if=None, packets=None, count=None): + self.pg_enable_capture([dst_if]) + if packets is None: + packets = self.packets + src_if.add_stream(packets) + self.pg_start() + if count is None: + count = len(packets) + return dst_if.get_capture(count) + + def verify_counters(self, counter_id, expected_value): + counters = self.vapi.cli("sh errors").split('\n') + counter_value = -1 + for i in range(1, len(counters)-1): + results = counters[i].split() + if results[1] == counter_id: + counter_value = int(results[0]) + break + self.assertEqual(counter_value, expected_value) + + def test_no_p2p_subif(self): + """standard routing without p2p subinterfaces""" + self.logger.info("FFP_TEST_START_0001") + + route_8000 = VppIpRoute(self, "8000::", 64, + [VppRoutePath(self.pg0.remote_ip6, + self.pg0.sw_if_index, + is_ip6=1)], + is_ip6=1) + route_8000.add_vpp_config() + + self.packets = [(Ether(dst=self.pg1.local_mac, + src=self.pg1.remote_mac) / + IPv6(src="3001::1", dst="8000::100") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100))] + self.send_packets(self.pg1, self.pg0) + + self.logger.info("FFP_TEST_FINISH_0001") + + def test_ip6_rx_p2p_subif(self): + """receive ipv6 packet via p2p subinterface""" + self.logger.info("FFP_TEST_START_0002") + + route_9001 = VppIpRoute(self, "9001::", 64, + [VppRoutePath(self.pg1.remote_ip6, + self.pg1.sw_if_index, + is_ip6=1)], + is_ip6=1) + route_9001.add_vpp_config() + + self.packets.append( + self.create_stream(src_mac=self.pg0._remote_hosts[0].mac, + dst_mac=self.pg0.local_mac, + src_ip=self.p2p_sub_ifs[0].remote_ip6, + dst_ip="9001::100")) + + self.send_packets(self.pg0, self.pg1, self.packets) + self.verify_counters('p2p-ethernet-input', 1) + + route_9001.remove_vpp_config() + self.logger.info("FFP_TEST_FINISH_0002") + + def test_ip6_rx_p2p_subif_route(self): + """route rx ip6 packet not matching p2p subinterface""" + self.logger.info("FFP_TEST_START_0003") + + self.pg0.config_ip6() + + route_3 = VppIpRoute(self, "9000::", 64, + [VppRoutePath(self.pg1._remote_hosts[0].ip6, + self.pg1.sw_if_index, + is_ip6=1)], + is_ip6=1) + route_3.add_vpp_config() + + self.packets.append( + self.create_stream(src_mac="02:03:00:00:ff:ff", + dst_mac=self.pg0.local_mac, + src_ip="a000::100", + dst_ip="9000::100")) + + self.send_packets(self.pg0, self.pg1) + + self.pg0.unconfig_ip6() + + route_3.remove_vpp_config() + + self.logger.info("FFP_TEST_FINISH_0003") + + def test_ip6_rx_p2p_subif_drop(self): + """drop rx packet not matching p2p subinterface""" + self.logger.info("FFP_TEST_START_0004") + + route_9001 = VppIpRoute(self, "9000::", 64, + [VppRoutePath(self.pg1._remote_hosts[0].ip6, + self.pg1.sw_if_index, + is_ip6=1)], + is_ip6=1) + route_9001.add_vpp_config() + + self.packets.append( + self.create_stream(src_mac="02:03:00:00:ff:ff", + dst_mac=self.pg0.local_mac, + src_ip="a000::100", + dst_ip="9000::100")) + + # no packet received + self.send_packets(self.pg0, self.pg1, count=0) + self.logger.info("FFP_TEST_FINISH_0004") + + def test_ip6_tx_p2p_subif(self): + """send packet via p2p subinterface""" + self.logger.info("FFP_TEST_START_0005") + + route_8000 = VppIpRoute(self, "8000::", 64, + [VppRoutePath(self.pg0.remote_ip6, + self.pg0.sw_if_index, + is_ip6=1)], + is_ip6=1) + route_8000.add_vpp_config() + route_8001 = VppIpRoute(self, "8001::", 64, + [VppRoutePath(self.p2p_sub_ifs[0].remote_ip6, + self.p2p_sub_ifs[0].sw_if_index, + is_ip6=1)], + is_ip6=1) + route_8001.add_vpp_config() + route_8002 = VppIpRoute(self, "8002::", 64, + [VppRoutePath(self.p2p_sub_ifs[1].remote_ip6, + self.p2p_sub_ifs[1].sw_if_index, + is_ip6=1)], + is_ip6=1) + route_8002.add_vpp_config() + + for i in range(0, 3): + self.packets.append( + self.create_stream(src_mac=self.pg1.remote_mac, + dst_mac=self.pg1.local_mac, + src_ip=self.pg1.remote_ip6, + dst_ip="800%d::100" % i)) + + self.send_packets(self.pg1, self.pg0, count=3) + + route_8000.remove_vpp_config() + route_8001.remove_vpp_config() + route_8002.remove_vpp_config() + + self.logger.info("FFP_TEST_FINISH_0005") + + def test_ip6_tx_p2p_subif_drop(self): + """drop tx ip6 packet not matching p2p subinterface""" + self.logger.info("FFP_TEST_START_0006") + + self.packets.append( + self.create_stream(src_mac="02:03:00:00:ff:ff", + dst_mac=self.pg0.local_mac, + src_ip="a000::100", + dst_ip="9000::100")) + + # no packet received + self.send_packets(self.pg0, self.pg1, count=0) + self.logger.info("FFP_TEST_FINISH_0006") + + +class P2PEthernetIPV4(VppTestCase): + """P2P Ethernet IPv4 tests""" + + p2p_sub_ifs = [] + packets = [] + + @classmethod + def setUpClass(cls): + super(P2PEthernetIPV4, cls).setUpClass() + + # Create pg interfaces + cls.create_pg_interfaces(range(3)) + + # Packet sizes + cls.pg_if_packet_sizes = [64, 512, 1518, 9018] + + # Set up all interfaces + for i in cls.pg_interfaces: + i.admin_up() + + cls.pg0.config_ip4() + cls.pg0.generate_remote_hosts(5) + cls.pg0.configure_ipv4_neighbors() + + cls.pg1.config_ip4() + cls.pg1.generate_remote_hosts(5) + cls.pg1.configure_ipv4_neighbors() + + def setUp(self): + super(P2PEthernetIPV4, self).setUp() + for p in self.packets: + self.packets.remove(p) + self.create_p2p_ethernet(self.pg0, 1, self.pg0._remote_hosts[0].mac) + self.create_p2p_ethernet(self.pg0, 2, self.pg0._remote_hosts[1].mac) + self.p2p_sub_ifs[0].config_ip4() + self.p2p_sub_ifs[1].config_ip4() + self.vapi.cli("trace add p2p-ethernet-input 50") + + def tearDown(self): + self.delete_p2p_ethernet(self.pg0, self.pg0._remote_hosts[0].mac) + self.delete_p2p_ethernet(self.pg0, self.pg0._remote_hosts[1].mac) + super(P2PEthernetIPV4, self).tearDown() + + def create_stream(self, src_mac=None, dst_mac=None, + src_ip=None, dst_ip=None, size=None): + pkt_size = size + if size is None: + pkt_size = random.choice(self.pg_if_packet_sizes) + p = Ether(src=src_mac, dst=dst_mac) + p /= IP(src=src_ip, dst=dst_ip) + p /= (UDP(sport=1234, dport=4321) / Raw('\xa5' * 20)) + self.extend_packet(p, pkt_size) + return p + + def send_packets(self, src_if=None, dst_if=None, packets=None, count=None): + self.pg_enable_capture([dst_if]) + if packets is None: + packets = self.packets + src_if.add_stream(packets) + self.pg_start() + if count is None: + count = len(packets) + return dst_if.get_capture(count) + + def verify_counters(self, counter_id, expected_value): + counters = self.vapi.cli("sh errors").split('\n') + counter_value = -1 + for i in range(1, len(counters)-1): + results = counters[i].split() + if results[1] == counter_id: + counter_value = int(results[0]) + break + self.assertEqual(counter_value, expected_value) + + def create_p2p_ethernet(self, parent_if, sub_id, remote_mac): + p2p = VppP2PSubint(self, parent_if, sub_id, mactobinary(remote_mac)) + p2p.admin_up() + p2p.config_ip4() + self.p2p_sub_ifs.append(p2p) + + def delete_p2p_ethernet(self, parent_if, remote_mac): + self.vapi.delete_p2pethernet_subif(parent_if.sw_if_index, + mactobinary(remote_mac)) + + def test_ip4_rx_p2p_subif(self): + """receive ipv4 packet via p2p subinterface""" + self.logger.info("FFP_TEST_START_0002") + + route_9000 = VppIpRoute(self, "9.0.0.0", 16, + [VppRoutePath(self.pg1.remote_ip4, + self.pg1.sw_if_index)]) + route_9000.add_vpp_config() + + self.packets.append( + self.create_stream(src_mac=self.pg0._remote_hosts[0].mac, + dst_mac=self.pg0.local_mac, + src_ip=self.p2p_sub_ifs[0].remote_ip4, + dst_ip="9.0.0.100")) + + self.send_packets(self.pg0, self.pg1, self.packets) + + self.verify_counters('p2p-ethernet-input', 1) + + route_9000.remove_vpp_config() + self.logger.info("FFP_TEST_FINISH_0002") + + def test_ip4_rx_p2p_subif_route(self): + """route rx packet not matching p2p subinterface""" + self.logger.info("FFP_TEST_START_0003") + + route_9001 = VppIpRoute(self, "9.0.0.0", 24, + [VppRoutePath(self.pg1.remote_ip4, + self.pg1.sw_if_index)]) + route_9001.add_vpp_config() + + self.packets.append( + self.create_stream(src_mac="02:01:00:00:ff:ff", + dst_mac=self.pg0.local_mac, + src_ip="8.0.0.100", + dst_ip="9.0.0.100")) + + self.send_packets(self.pg0, self.pg1) + + route_9001.remove_vpp_config() + + self.logger.info("FFP_TEST_FINISH_0003") + + def test_ip4_tx_p2p_subif(self): + """send ip4 packet via p2p subinterface""" + self.logger.info("FFP_TEST_START_0005") + + route_9100 = VppIpRoute(self, "9.1.0.100", 24, + [VppRoutePath(self.pg0.remote_ip4, + self.pg0.sw_if_index, + )]) + route_9100.add_vpp_config() + route_9200 = VppIpRoute(self, "9.2.0.100", 24, + [VppRoutePath(self.p2p_sub_ifs[0].remote_ip4, + self.p2p_sub_ifs[0].sw_if_index, + )]) + route_9200.add_vpp_config() + route_9300 = VppIpRoute(self, "9.3.0.100", 24, + [VppRoutePath(self.p2p_sub_ifs[1].remote_ip4, + self.p2p_sub_ifs[1].sw_if_index + )]) + route_9300.add_vpp_config() + + for i in range(0, 3): + self.packets.append( + self.create_stream(src_mac=self.pg1.remote_mac, + dst_mac=self.pg1.local_mac, + src_ip=self.pg1.remote_ip4, + dst_ip="9.%d.0.100" % (i+1))) + + self.send_packets(self.pg1, self.pg0) + + # route_7000.remove_vpp_config() + route_9100.remove_vpp_config() + route_9200.remove_vpp_config() + route_9300.remove_vpp_config() + + self.logger.info("FFP_TEST_FINISH_0005") + + def test_ip4_tx_p2p_subif_drop(self): + """drop tx ip4 packet not matching p2p subinterface""" + self.logger.info("FFP_TEST_START_0006") + + self.packets.append( + self.create_stream(src_mac="02:01:00:00:ff:ff", + dst_mac=self.pg0.local_mac, + src_ip="8.0.0.100", + dst_ip="9.0.0.100")) + + # no packet received + self.send_packets(self.pg0, self.pg1, count=0) + self.logger.info("FFP_TEST_FINISH_0006") + + +if __name__ == '__main__': + unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 204d9e31..801a6c2d 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -601,6 +601,19 @@ class VppPapiProvider(object): 'outer_vlan_id': outer_vlan, 'inner_vlan_id': inner_vlan}) + def create_p2pethernet_subif(self, sw_if_index, remote_mac, subif_id): + """Create p2p ethernet subinterface + + :param sw_if_index: main (parent) interface + :param remote_mac: client (remote) mac address + + """ + return self.api( + self.papi.p2p_ethernet_add, + {'parent_if_index': sw_if_index, + 'remote_mac': remote_mac, + 'subif_id': subif_id}) + def delete_subif(self, sw_if_index): """Delete subinterface @@ -609,6 +622,18 @@ class VppPapiProvider(object): return self.api(self.papi.delete_subif, {'sw_if_index': sw_if_index}) + def delete_p2pethernet_subif(self, sw_if_index, remote_mac): + """Delete p2p ethernet subinterface + + :param sw_if_index: main (parent) interface + :param remote_mac: client (remote) mac address + + """ + return self.api( + self.papi.p2p_ethernet_del, + {'parent_if_index': sw_if_index, + 'remote_mac': remote_mac}) + def create_vlan_subif(self, sw_if_index, vlan): """ diff --git a/test/vpp_sub_interface.py b/test/vpp_sub_interface.py index dcd82da2..cabee88d 100644 --- a/test/vpp_sub_interface.py +++ b/test/vpp_sub_interface.py @@ -188,3 +188,26 @@ class VppDot1ADSubint(VppSubInterface): def remove_dot1_layer(self, packet): return self.remove_dot1ad_layer(packet, self.outer_vlan, self.inner_vlan) + + +class VppP2PSubint(VppSubInterface): + + def __init__(self, test, parent, sub_id, remote_mac): + r = test.vapi.create_p2pethernet_subif(parent.sw_if_index, + remote_mac, sub_id) + self._sw_if_index = r.sw_if_index + super(VppP2PSubint, self).__init__(test, parent, sub_id) + + def add_dot1_layer(self, packet): + return packet + + def remove_dot1_layer(self, packet): + return packet + + def create_arp_req(self): + packet = VppPGInterface.create_arp_req(self) + return packet + + def create_ndp_req(self): + packet = VppPGInterface.create_ndp_req(self) + return packet -- cgit 1.2.3-korg From a07bd708002a9c3d3c584f0d692deed1a758b517 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Mon, 7 Aug 2017 07:53:49 -0700 Subject: Dedicated SW Interface Event Change-Id: I06a10a4291e61aec3f1396d2514ed6fe3901897a Signed-off-by: Neale Ranns Signed-off-by: Marek Gradzki --- src/vat/api_format.c | 17 ++++++----------- src/vnet/devices/virtio/vhost_user_api.c | 6 +++--- src/vnet/interface.api | 19 ++++++++++++++++--- src/vnet/interface_api.c | 8 ++++---- src/vnet/unix/tap_api.c | 6 +++--- .../CallbackJVppFacadeNotificationExample.java | 2 +- .../examples/CallbackNotificationApiExample.java | 18 ++++++------------ .../core/examples/FutureApiNotificationExample.java | 2 +- .../fd/vpp/jvpp/core/examples/NotificationUtils.java | 5 ++--- src/vpp-api/java/jvpp/gen/jvppgen/util.py | 3 +-- src/vpp/api/custom_dump.c | 20 ++++++++++++++++++++ test/vpp_papi_provider.py | 9 ++------- 12 files changed, 65 insertions(+), 50 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 009cf173..ddcd5621 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -972,8 +972,8 @@ static void vl_api_sw_interface_details_t_handler_json } #if VPP_API_TEST_BUILTIN == 0 -static void vl_api_sw_interface_set_flags_t_handler - (vl_api_sw_interface_set_flags_t * mp) +static void vl_api_sw_interface_event_t_handler + (vl_api_sw_interface_event_t * mp) { vat_main_t *vam = &vat_main; if (vam->interface_event_display) @@ -984,8 +984,8 @@ static void vl_api_sw_interface_set_flags_t_handler } #endif -static void vl_api_sw_interface_set_flags_t_handler_json - (vl_api_sw_interface_set_flags_t * mp) +static void vl_api_sw_interface_event_t_handler_json + (vl_api_sw_interface_event_t * mp) { /* JSON output not supported */ } @@ -5026,7 +5026,7 @@ _(LLDP_CONFIG_REPLY, lldp_config_reply) \ _(SW_INTERFACE_SET_LLDP_REPLY, sw_interface_set_lldp_reply) #define foreach_standalone_reply_msg \ -_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ +_(SW_INTERFACE_EVENT, sw_interface_event) \ _(VNET_INTERFACE_SIMPLE_COUNTERS, vnet_interface_simple_counters) \ _(VNET_INTERFACE_COMBINED_COUNTERS, vnet_interface_combined_counters) \ _(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ @@ -5772,7 +5772,7 @@ api_sw_interface_set_flags (vat_main_t * vam) vl_api_sw_interface_set_flags_t *mp; u32 sw_if_index; u8 sw_if_index_set = 0; - u8 admin_up = 0, link_up = 0; + u8 admin_up = 0; int ret; /* Parse args required to build the message */ @@ -5782,10 +5782,6 @@ api_sw_interface_set_flags (vat_main_t * vam) admin_up = 1; else if (unformat (i, "admin-down")) admin_up = 0; - else if (unformat (i, "link-up")) - link_up = 1; - else if (unformat (i, "link-down")) - link_up = 0; else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) sw_if_index_set = 1; @@ -5805,7 +5801,6 @@ api_sw_interface_set_flags (vat_main_t * vam) M (SW_INTERFACE_SET_FLAGS, mp); mp->sw_if_index = ntohl (sw_if_index); mp->admin_up_down = admin_up; - mp->link_up_down = link_up; /* send it... */ S (mp); diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c index 8dbd032b..3f0aac9e 100644 --- a/src/vnet/devices/virtio/vhost_user_api.c +++ b/src/vnet/devices/virtio/vhost_user_api.c @@ -52,11 +52,11 @@ _(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) * WARNING: replicated pending api refactor completion */ static void -send_sw_interface_flags_deleted (vpe_api_main_t * am, +send_sw_interface_event_deleted (vpe_api_main_t * am, unix_shared_memory_queue_t * q, u32 sw_if_index) { - vl_api_sw_interface_set_flags_t *mp; + vl_api_sw_interface_event_t *mp; mp = vl_msg_api_alloc (sizeof (*mp)); memset (mp, 0, sizeof (*mp)); @@ -143,7 +143,7 @@ vl_api_delete_vhost_user_if_t_handler (vl_api_delete_vhost_user_if_t * mp) return; vnet_clear_sw_interface_tag (vnm, sw_if_index); - send_sw_interface_flags_deleted (vam, q, sw_if_index); + send_sw_interface_event_deleted (vam, q, sw_if_index); } } diff --git a/src/vnet/interface.api b/src/vnet/interface.api index 14ff6d5a..a1890706 100644 --- a/src/vnet/interface.api +++ b/src/vnet/interface.api @@ -4,7 +4,6 @@ @param sw_if_index - index of the interface to set flags on @param admin_up_down - set the admin state, 1 = up, 0 = down @param link_up_down - Oper state sent on change event, not used in config. - @param deleted - interface was deleted */ autoreply define sw_interface_set_flags { @@ -13,8 +12,6 @@ autoreply define sw_interface_set_flags u32 sw_if_index; /* 1 = up, 0 = down */ u8 admin_up_down; - u8 link_up_down; - u8 deleted; }; /** \brief Set interface MTU @@ -31,6 +28,22 @@ autoreply define sw_interface_set_mtu u16 mtu; }; +/** \brief Interface Event generated by want_interface_events + @param context - sender context, to match reply w/ request + @param sw_if_index - index of the interface of the event + @param admin_up_down - The administrative state; 1 = up, 0 = down + @param link_up_down - The operational state; 1 = up, 0 = down + @param deleted - interface was deleted +*/ +define sw_interface_event +{ + u32 context; + u32 sw_if_index; + u8 admin_up_down; + u8 link_up_down; + u8 deleted; +}; + /** \brief Register for interface events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index ab0b255a..c56fef68 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -571,18 +571,18 @@ event_data_cmp (void *a1, void *a2) } static void -send_sw_interface_flags (vpe_api_main_t * am, +send_sw_interface_event (vpe_api_main_t * am, unix_shared_memory_queue_t * q, vnet_sw_interface_t * swif) { - vl_api_sw_interface_set_flags_t *mp; + vl_api_sw_interface_event_t *mp; vnet_main_t *vnm = am->vnet_main; vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, swif->sw_if_index); mp = vl_msg_api_alloc (sizeof (*mp)); memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_FLAGS); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_EVENT); mp->sw_if_index = ntohl (swif->sw_if_index); mp->admin_up_down = (swif->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? 1 : 0; @@ -638,7 +638,7 @@ link_state_process (vlib_main_t * vm, event_data[i])) { swif = vnet_get_sw_interface (vnm, event_data[i]); - send_sw_interface_flags (vam, q, swif); + send_sw_interface_event (vam, q, swif); } } })); diff --git a/src/vnet/unix/tap_api.c b/src/vnet/unix/tap_api.c index 9b8d52a6..7e812c4f 100644 --- a/src/vnet/unix/tap_api.c +++ b/src/vnet/unix/tap_api.c @@ -59,11 +59,11 @@ _(SW_INTERFACE_TAP_DUMP, sw_interface_tap_dump) * WARNING: replicated pending api refactor completion */ static void -send_sw_interface_flags_deleted (vpe_api_main_t * am, +send_sw_interface_event_deleted (vpe_api_main_t * am, unix_shared_memory_queue_t * q, u32 sw_if_index) { - vl_api_sw_interface_set_flags_t *mp; + vl_api_sw_interface_event_t *mp; mp = vl_msg_api_alloc (sizeof (*mp)); memset (mp, 0, sizeof (*mp)); @@ -196,7 +196,7 @@ vl_api_tap_delete_t_handler (vl_api_tap_delete_t * mp) vl_msg_api_send_shmem (q, (u8 *) & rmp); if (!rv) - send_sw_interface_flags_deleted (vam, q, sw_if_index); + send_sw_interface_event_deleted (vam, q, sw_if_index); } static void diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeNotificationExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeNotificationExample.java index b8b108b6..308dad9f 100644 --- a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeNotificationExample.java +++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeNotificationExample.java @@ -36,7 +36,7 @@ public class CallbackJVppFacadeNotificationExample { System.out.println("Successfully connected to VPP"); final AutoCloseable notificationListenerReg = - jvppCallbackFacade.getNotificationRegistry().registerSwInterfaceSetFlagsNotificationCallback( + jvppCallbackFacade.getNotificationRegistry().registerSwInterfaceEventNotificationCallback( NotificationUtils::printNotification ); diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackNotificationApiExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackNotificationApiExample.java index 6ee2de31..7d56b7ea 100644 --- a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackNotificationApiExample.java +++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackNotificationApiExample.java @@ -26,10 +26,9 @@ import io.fd.vpp.jvpp.JVppRegistry; import io.fd.vpp.jvpp.JVppRegistryImpl; import io.fd.vpp.jvpp.VppCallbackException; import io.fd.vpp.jvpp.core.JVppCoreImpl; -import io.fd.vpp.jvpp.core.callback.SwInterfaceSetFlagsCallback; -import io.fd.vpp.jvpp.core.callback.SwInterfaceSetFlagsNotificationCallback; +import io.fd.vpp.jvpp.core.callback.SwInterfaceEventNotificationCallback; import io.fd.vpp.jvpp.core.callback.WantInterfaceEventsCallback; -import io.fd.vpp.jvpp.core.dto.SwInterfaceSetFlagsNotification; +import io.fd.vpp.jvpp.core.dto.SwInterfaceEventNotification; import io.fd.vpp.jvpp.core.dto.SwInterfaceSetFlagsReply; import io.fd.vpp.jvpp.core.dto.WantInterfaceEventsReply; @@ -65,12 +64,12 @@ public class CallbackNotificationApiExample { testCallbackApi(); } - private static class TestCallback implements SwInterfaceSetFlagsNotificationCallback, - WantInterfaceEventsCallback, SwInterfaceSetFlagsCallback { + private static class TestCallback implements SwInterfaceEventNotificationCallback, + WantInterfaceEventsCallback { @Override - public void onSwInterfaceSetFlagsNotification( - final SwInterfaceSetFlagsNotification msg) { + public void onSwInterfaceEventNotification( + final SwInterfaceEventNotification msg) { printNotification(msg); } @@ -79,11 +78,6 @@ public class CallbackNotificationApiExample { System.out.println("Interface notification stream updated"); } - @Override - public void onSwInterfaceSetFlagsReply(final SwInterfaceSetFlagsReply swInterfaceSetFlagsReply) { - System.out.println("Interface flags set successfully"); - } - @Override public void onError(VppCallbackException ex) { System.out.printf("Received onError exception in getNodeIndexCallback: call=%s, reply=%d, context=%d%n", diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiNotificationExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiNotificationExample.java index f445dcc8..7460401e 100644 --- a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiNotificationExample.java +++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiNotificationExample.java @@ -33,7 +33,7 @@ public class FutureApiNotificationExample { final FutureJVppCoreFacade jvppFacade = new FutureJVppCoreFacade(registry, new JVppCoreImpl()); final AutoCloseable notificationListenerReg = jvppFacade.getNotificationRegistry() - .registerSwInterfaceSetFlagsNotificationCallback(NotificationUtils::printNotification)) { + .registerSwInterfaceEventNotificationCallback(NotificationUtils::printNotification)) { System.out.println("Successfully connected to VPP"); jvppFacade.wantInterfaceEvents(getEnableInterfaceNotificationsReq()).toCompletableFuture().get(); System.out.println("Interface events started"); diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/NotificationUtils.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/NotificationUtils.java index 7791cafe..d3f9dd2c 100644 --- a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/NotificationUtils.java +++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/NotificationUtils.java @@ -18,14 +18,14 @@ package io.fd.vpp.jvpp.core.examples; import java.io.PrintStream; import io.fd.vpp.jvpp.core.dto.SwInterfaceSetFlags; -import io.fd.vpp.jvpp.core.dto.SwInterfaceSetFlagsNotification; +import io.fd.vpp.jvpp.core.dto.SwInterfaceEventNotification; import io.fd.vpp.jvpp.core.dto.WantInterfaceEvents; final class NotificationUtils { private NotificationUtils() {} - static PrintStream printNotification(final SwInterfaceSetFlagsNotification msg) { + static PrintStream printNotification(final SwInterfaceEventNotification msg) { return System.out.printf("Received interface notification: ifc: %s%n", msg); } @@ -33,7 +33,6 @@ final class NotificationUtils { final SwInterfaceSetFlags swInterfaceSetFlags = new SwInterfaceSetFlags(); swInterfaceSetFlags.swIfIndex = 0; swInterfaceSetFlags.adminUpDown = 1; - swInterfaceSetFlags.deleted = 0; return swInterfaceSetFlags; } diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/util.py b/src/vpp-api/java/jvpp/gen/jvppgen/util.py index 947fc31d..42394419 100644 --- a/src/vpp-api/java/jvpp/gen/jvppgen/util.py +++ b/src/vpp-api/java/jvpp/gen/jvppgen/util.py @@ -161,7 +161,6 @@ unconventional_naming_rep_req = { # # FIXME no convention in the naming of events (notifications) in vpe.api notifications_message_suffixes = ("event", "counters") -notification_messages_reused = ["sw_interface_set_flags"] # messages that must be ignored. These messages are INSUFFICIENTLY marked as disabled in vpe.api # FIXME @@ -170,7 +169,7 @@ ignored_messages = [] def is_notification(name): """ Returns true if the structure is a notification regardless of its no other use """ - return is_just_notification(name) or name.lower() in notification_messages_reused + return is_just_notification(name) def is_just_notification(name): diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index a57799cb..0342476a 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -103,6 +103,22 @@ static void *vl_api_sw_interface_set_flags_t_print s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + if (mp->admin_up_down) + s = format (s, "admin-up "); + else + s = format (s, "admin-down "); + + FINISH; +} + +static void *vl_api_sw_interface_event_t_print + (vl_api_sw_interface_event_t * mp, void *handle) +{ + u8 *s; + s = format (0, "SCRIPT: sw_interface_event "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + if (mp->admin_up_down) s = format (s, "admin-up "); else @@ -113,6 +129,9 @@ static void *vl_api_sw_interface_set_flags_t_print else s = format (s, "link-down"); + if (mp->deleted) + s = format (s, " deleted"); + FINISH; } @@ -3010,6 +3029,7 @@ foreach_custom_print_no_arg_function _(CREATE_LOOPBACK, create_loopback) \ _(CREATE_LOOPBACK_INSTANCE, create_loopback_instance) \ _(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ +_(SW_INTERFACE_EVENT, sw_interface_event) \ _(SW_INTERFACE_ADD_DEL_ADDRESS, sw_interface_add_del_address) \ _(SW_INTERFACE_SET_TABLE, sw_interface_set_table) \ _(SW_INTERFACE_SET_MPLS_ENABLE, sw_interface_set_mpls_enable) \ diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 4d017c1f..c99d4583 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -550,21 +550,16 @@ class VppPapiProvider(object): 'tag1': tag1, 'tag2': tag2}) - def sw_interface_set_flags(self, sw_if_index, admin_up_down, - link_up_down=0, deleted=0): + def sw_interface_set_flags(self, sw_if_index, admin_up_down): """ :param admin_up_down: :param sw_if_index: - :param link_up_down: (Default value = 0) - :param deleted: (Default value = 0) """ return self.api(self.papi.sw_interface_set_flags, {'sw_if_index': sw_if_index, - 'admin_up_down': admin_up_down, - 'link_up_down': link_up_down, - 'deleted': deleted}) + 'admin_up_down': admin_up_down}) def create_subif(self, sw_if_index, sub_id, outer_vlan, inner_vlan, no_tags=0, one_tag=0, two_tags=0, dot1ad=0, exact_match=0, -- cgit 1.2.3-korg From 0714541603ef9dd80d724c38984e723d111e2886 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Fri, 18 Aug 2017 02:34:28 -0700 Subject: Use correct msg ID in the sw-interface-event from TAP and VHOST Change-Id: I0124fa264f7f390fc7cd9722da59be03116831c5 Signed-off-by: Neale Ranns --- src/vnet/devices/virtio/vhost_user_api.c | 2 +- src/vnet/unix/tap_api.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c index 3f0aac9e..78599241 100644 --- a/src/vnet/devices/virtio/vhost_user_api.c +++ b/src/vnet/devices/virtio/vhost_user_api.c @@ -60,7 +60,7 @@ send_sw_interface_event_deleted (vpe_api_main_t * am, mp = vl_msg_api_alloc (sizeof (*mp)); memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_FLAGS); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_EVENT); mp->sw_if_index = ntohl (sw_if_index); mp->admin_up_down = 0; diff --git a/src/vnet/unix/tap_api.c b/src/vnet/unix/tap_api.c index 7e812c4f..3a64e100 100644 --- a/src/vnet/unix/tap_api.c +++ b/src/vnet/unix/tap_api.c @@ -67,7 +67,7 @@ send_sw_interface_event_deleted (vpe_api_main_t * am, mp = vl_msg_api_alloc (sizeof (*mp)); memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_FLAGS); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_EVENT); mp->sw_if_index = ntohl (sw_if_index); mp->admin_up_down = 0; -- cgit 1.2.3-korg From 4ba75f54461c31fbb32b8ef9c05fe38da74cb295 Mon Sep 17 00:00:00 2001 From: Steven Date: Mon, 28 Aug 2017 11:34:47 -0700 Subject: vhost: Remove operation mode in the API create/delete/modify vhost_user APIs no longer support the operation mode (polling/interrupt/adaptive). They are now done via the generic interface. Change-Id: I9e9bd503f9b56c953ecd2b271b3e2007da20c72a Signed-off-by: Steven --- src/vnet/devices/virtio/vhost_user.api | 6 ------ 1 file changed, 6 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/vnet/devices/virtio/vhost_user.api index df7ce7ab..28d5e891 100644 --- a/src/vnet/devices/virtio/vhost_user.api +++ b/src/vnet/devices/virtio/vhost_user.api @@ -19,7 +19,6 @@ @param sock_filename - unix socket filename, used to speak with frontend @param use_custom_mac - enable or disable the use of the provided hardware address @param mac_address - hardware address to use if 'use_custom_mac' is set - @param operation_mode - polling=0, interrupt=1, or adaptive=2 */ define create_vhost_user_if { @@ -32,7 +31,6 @@ define create_vhost_user_if u8 use_custom_mac; u8 mac_address[6]; u8 tag[64]; - u8 operation_mode; }; /** \brief vhost-user interface create response @@ -51,7 +49,6 @@ define create_vhost_user_if_reply @param client_index - opaque cookie to identify the sender @param is_server - our side is socket server @param sock_filename - unix socket filename, used to speak with frontend - @param operation_mode - polling=0, interrupt=1, or adaptive=2 */ autoreply define modify_vhost_user_if { @@ -62,7 +59,6 @@ autoreply define modify_vhost_user_if u8 sock_filename[256]; u8 renumber; u32 custom_dev_instance; - u8 operation_mode; }; /** \brief vhost-user interface delete request @@ -83,7 +79,6 @@ autoreply define delete_vhost_user_if @param is_server - vhost-user server socket @param sock_filename - socket filename @param num_regions - number of used memory regions - @param operation_mode - polling=0, interrupt=1, or adaptive=2 */ define sw_interface_vhost_user_details { @@ -96,7 +91,6 @@ define sw_interface_vhost_user_details u8 sock_filename[256]; u32 num_regions; i32 sock_errno; - u8 operation_mode; }; define sw_interface_vhost_user_dump -- cgit 1.2.3-korg From 978788903b54a9ecdab045c67f94c4ba79633a9f Mon Sep 17 00:00:00 2001 From: Steven Date: Tue, 29 Aug 2017 09:23:26 -0700 Subject: vhost: Cache qsz_mask instead of qsz in vhost_user_vring_t In the data path, we grab qsz from vhost_user_vring_t to compute qsz_mask and store it in a stack variable to use on many occasions. We never use qsz for any meaningful purpose. It is more useful to cache qsz_mask in vhost_user_vring_t to avoid the needless computation in the data path. Change-Id: Idf4d94a9754d5c75c899f1f4f59602275b9904a6 Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 58 +++++++++++++++++------------------- src/vnet/devices/virtio/vhost-user.h | 2 +- 2 files changed, 28 insertions(+), 32 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 82f76533..bfd3e73e 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -854,7 +854,7 @@ vhost_user_socket_read (unix_file_t * uf) (msg.state.num == 0) || /* it cannot be zero */ ((msg.state.num - 1) & msg.state.num)) /* must be power of 2 */ goto close_socket; - vui->vrings[msg.state.index].qsz = msg.state.num; + vui->vrings[msg.state.index].qsz_mask = msg.state.num - 1; break; case VHOST_USER_SET_VRING_ADDR: @@ -1288,9 +1288,8 @@ vhost_user_rx_trace (vhost_trace_t * t, vlib_buffer_t * b, vhost_user_vring_t * txvq) { vhost_user_main_t *vum = &vhost_user_main; - u32 qsz_mask = txvq->qsz - 1; u32 last_avail_idx = txvq->last_avail_idx; - u32 desc_current = txvq->avail->ring[last_avail_idx & qsz_mask]; + u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask]; vring_desc_t *hdr_desc = 0; virtio_net_hdr_mrg_rxbuf_t *hdr; u32 hint = 0; @@ -1409,19 +1408,19 @@ vhost_user_rx_discard_packet (vlib_main_t * vm, */ u32 discarded_packets = 0; u32 avail_idx = txvq->avail->idx; - u16 qsz_mask = txvq->qsz - 1; while (discarded_packets != discard_max) { if (avail_idx == txvq->last_avail_idx) goto out; u16 desc_chain_head = - txvq->avail->ring[txvq->last_avail_idx & qsz_mask]; + txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask]; txvq->last_avail_idx++; - txvq->used->ring[txvq->last_used_idx & qsz_mask].id = desc_chain_head; - txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0; + txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id = + desc_chain_head; + txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0; vhost_user_log_dirty_ring (vui, txvq, - ring[txvq->last_used_idx & qsz_mask]); + ring[txvq->last_used_idx & txvq->qsz_mask]); txvq->last_used_idx++; discarded_packets++; } @@ -1469,7 +1468,6 @@ vhost_user_if_input (vlib_main_t * vm, u32 n_left_to_next, *to_next; u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; u32 n_trace = vlib_get_trace_count (vm, node); - u16 qsz_mask; u32 map_hint = 0; u16 thread_index = vlib_get_thread_index (); u16 copy_len = 0; @@ -1529,7 +1527,7 @@ vhost_user_if_input (vlib_main_t * vm, return 0; } - if (PREDICT_FALSE (n_left == txvq->qsz)) + if (PREDICT_FALSE (n_left == (txvq->qsz_mask + 1))) { /* * Informational error logging when VPP is not @@ -1539,8 +1537,6 @@ vhost_user_if_input (vlib_main_t * vm, VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1); } - qsz_mask = txvq->qsz - 1; - if (n_left > VLIB_FRAME_SIZE) n_left = VLIB_FRAME_SIZE; @@ -1605,7 +1601,8 @@ vhost_user_if_input (vlib_main_t * vm, break; } - desc_current = txvq->avail->ring[txvq->last_avail_idx & qsz_mask]; + desc_current = + txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask]; vum->cpus[thread_index].rx_buffers_len--; bi_current = (vum->cpus[thread_index].rx_buffers) [vum->cpus[thread_index].rx_buffers_len]; @@ -1621,10 +1618,12 @@ vhost_user_if_input (vlib_main_t * vm, rx_buffers_len - 1], LOAD); /* Just preset the used descriptor id and length for later */ - txvq->used->ring[txvq->last_used_idx & qsz_mask].id = desc_current; - txvq->used->ring[txvq->last_used_idx & qsz_mask].len = 0; + txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id = + desc_current; + txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0; vhost_user_log_dirty_ring (vui, txvq, - ring[txvq->last_used_idx & qsz_mask]); + ring[txvq->last_used_idx & + txvq->qsz_mask]); /* The buffer should already be initialized */ b_head->total_length_not_including_first_buffer = 0; @@ -1889,9 +1888,8 @@ vhost_user_tx_trace (vhost_trace_t * t, vlib_buffer_t * b, vhost_user_vring_t * rxvq) { vhost_user_main_t *vum = &vhost_user_main; - u32 qsz_mask = rxvq->qsz - 1; u32 last_avail_idx = rxvq->last_avail_idx; - u32 desc_current = rxvq->avail->ring[last_avail_idx & qsz_mask]; + u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask]; vring_desc_t *hdr_desc = 0; u32 hint = 0; @@ -1979,7 +1977,6 @@ vhost_user_tx (vlib_main_t * vm, pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance); u32 qid = ~0; vhost_user_vring_t *rxvq; - u16 qsz_mask; u8 error; u32 thread_index = vlib_get_thread_index (); u32 map_hint = 0; @@ -2006,8 +2003,6 @@ vhost_user_tx (vlib_main_t * vm, if (PREDICT_FALSE (vui->use_tx_spinlock)) vhost_user_vring_lock (vui, qid); - qsz_mask = rxvq->qsz - 1; /* qsz is always power of 2 */ - retry: error = VHOST_USER_TX_FUNC_ERROR_NONE; tx_headers_len = 0; @@ -2043,7 +2038,7 @@ retry: desc_table = rxvq->desc; desc_head = desc_index = - rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask]; + rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask]; /* Go deeper in case of indirect descriptor * I don't know of any driver providing indirect for RX. */ @@ -2108,13 +2103,13 @@ retry: &vum->cpus[thread_index].tx_headers[tx_headers_len - 1]; //Move from available to used buffer - rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id = + rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head; - rxvq->used->ring[rxvq->last_used_idx & qsz_mask].len = + rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len; vhost_user_log_dirty_ring (vui, rxvq, ring[rxvq->last_used_idx & - qsz_mask]); + rxvq->qsz_mask]); rxvq->last_avail_idx++; rxvq->last_used_idx++; @@ -2133,7 +2128,7 @@ retry: desc_table = rxvq->desc; desc_head = desc_index = - rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask]; + rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask]; if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT)) { @@ -2201,10 +2196,10 @@ retry: } //Move from available to used ring - rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id = desc_head; - rxvq->used->ring[rxvq->last_used_idx & qsz_mask].len = desc_len; + rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head; + rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len; vhost_user_log_dirty_ring (vui, rxvq, - ring[rxvq->last_used_idx & qsz_mask]); + ring[rxvq->last_used_idx & rxvq->qsz_mask]); rxvq->last_avail_idx++; rxvq->last_used_idx++; @@ -3263,7 +3258,8 @@ show_vhost_user_command_fn (vlib_main_t * vm, vlib_cli_output (vm, " qsz %d last_avail_idx %d last_used_idx %d\n", - vui->vrings[q].qsz, vui->vrings[q].last_avail_idx, + vui->vrings[q].qsz_mask + 1, + vui->vrings[q].last_avail_idx, vui->vrings[q].last_used_idx); if (vui->vrings[q].avail && vui->vrings[q].used) @@ -3286,7 +3282,7 @@ show_vhost_user_command_fn (vlib_main_t * vm, " id addr len flags next user_addr\n"); vlib_cli_output (vm, " ===== ================== ===== ====== ===== ==================\n"); - for (j = 0; j < vui->vrings[q].qsz; j++) + for (j = 0; j < vui->vrings[q].qsz_mask + 1; j++) { u32 mem_hint = 0; vlib_cli_output (vm, diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index ed147a47..ad6c4219 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -191,7 +191,7 @@ typedef struct vhost_user_msg { typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u16 qsz; + u16 qsz_mask; u16 last_avail_idx; u16 last_used_idx; u16 n_since_last_int; -- cgit 1.2.3-korg From 49a04b9545de22beaef5218d3985896d6ba37982 Mon Sep 17 00:00:00 2001 From: Steven Date: Sat, 29 Jul 2017 08:56:08 -0700 Subject: vhost: Disallow interrupt mode config if driver opts out interrupt support According to the spec, supporting interrupt mode from the driver is optional, not a must. When interrupt mode is configured on the interface, we should check to make sure that the driver didn't opt out for the kickfd support and reject the configuration if it did. Change-Id: I7d3dbaddde65458e1a6a802754a3768ae8685a0e Signed-off-by: Steven --- src/vnet/devices/virtio/vhost-user.c | 11 ++++++++--- src/vnet/devices/virtio/vhost-user.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index bfd3e73e..5fe378cb 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -933,7 +933,7 @@ vhost_user_socket_read (unix_file_t * uf) vui->vrings[q].callfd_idx = ~0; } - if (!(msg.u64 & 0x100)) + if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK)) { if (number_of_fds != 1) { @@ -965,7 +965,7 @@ vhost_user_socket_read (unix_file_t * uf) vui->vrings[q].kickfd_idx = ~0; } - if (!(msg.u64 & 0x100)) + if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK)) { if (number_of_fds != 1) { @@ -998,7 +998,7 @@ vhost_user_socket_read (unix_file_t * uf) if (vui->vrings[q].errfd != -1) close (vui->vrings[q].errfd); - if (!(msg.u64 & 0x100)) + if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK)) { if (number_of_fds != 1) goto close_socket; @@ -2382,6 +2382,11 @@ vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, if ((mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) || (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) { + if (txvq->kickfd_idx == ~0) + { + // We cannot support interrupt mode if the driver opts out + return clib_error_return (0, "Driver does not support interrupt"); + } if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING) { vum->ifq_count++; diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index ad6c4219..ae3b88e8 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -23,6 +23,7 @@ #define VHOST_VRING_IDX_RX(qid) (2*qid) #define VHOST_VRING_IDX_TX(qid) (2*qid + 1) +#define VHOST_USER_VRING_NOFD_MASK 0x100 #define VIRTQ_DESC_F_NEXT 1 #define VIRTQ_DESC_F_INDIRECT 4 #define VHOST_USER_REPLY_MASK (0x1 << 2) -- cgit 1.2.3-korg From bd8a611c7e4558f18e3280ace9e48ecae5673800 Mon Sep 17 00:00:00 2001 From: Steven Date: Sun, 30 Jul 2017 10:29:26 -0700 Subject: Devices: Set interface rx-mode may cause SIGSEGV with nonexistent queue When I type in set interface rx-mode with a nonexistent queue, I got a crash with the following traceback. It looks like the vm is NULL when vlib_node_get_runtime is called. DBGvpp# sh int rx Thread 0 (vpp_main): node dpdk-input: TenGigabitEthernet5/0/0 queue 0 (polling) TenGigabitEthernet5/0/1 queue 0 (polling) TenGigabitEthernet7/0/0 queue 0 (polling) TenGigabitEthernet7/0/1 queue 0 (polling) node vhost-user-input: VirtualEthernet0/0/2 queue 0 (adaptive) DBGvpp# set interface rx-mode VirtualEthernet0/0/2 queue 1 polling Thread 1 "vpp_main" received signal SIGSEGV, Segmentation fault. 0x00007ffff6d4e0bc in vlib_node_get_runtime (vm=0x0, node_index=125) at /home/sluong/vpp/build-data/../src/vlib/node_funcs.h:92 92 vlib_node_t *n = vec_elt (nm->nodes, node_index); (gdb) where at /home/sluong/vpp/build-data/../src/vlib/node_funcs.h:92 at /home/sluong/vpp/build-data/../src/vlib/node_funcs.h:112 vnm=0x6f0fa0 , hw_if_index=7, queue_id=1, mode=0x7fffb62099e8) at /home/sluong/vpp/build-data/../src/vnet/devices/devices.c:307 hw_if_index=7, queue_id=1, mode=VNET_HW_INTERFACE_RX_MODE_POLLING) at /home/sluong/vpp/build-data/../src/vnet/interface_cli.c:1192 vm=0x7ffff7b9d440 , input=0x7fffb6209ef0, cmd=0x7fffb61d5d14) at /home/sluong/vpp/build-data/../src/vnet/interface_cli.c:1288 vm=0x7ffff7b9d440 , cm=0x7ffff7b9d630 , input=0x7fffb6209ef0, parent_command_index=18) at /home/sluong/vpp/build-data/../src/vlib/cli.c:588 vm=0x7ffff7b9d440 , cm=0x7ffff7b9d630 , input=0x7fffb6209ef0, parent_command_index=12) The fix is to add a check for vec_len(hw->input_node_thread_index_by_queue) and vec_len (hw->rx_mode_by_queue) to reject the command if the queue_id is out of bound. While at it, I notice inputting queue_id=-1 is being interpreted as all queues. An easy fix is to not overload the queue_id variable with -1 to mean something else. Change-Id: Id70ec3e7d06ccc67635e6d28ef53420bdac4a988 Signed-off-by: Steven --- src/vnet/api_errno.h | 3 ++- src/vnet/devices/devices.c | 8 ++++++++ src/vnet/interface_cli.c | 9 +++++++-- 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 22522f34..c0deb1d0 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -114,7 +114,8 @@ _(BD_NOT_MODIFIABLE, -121, "Bridge domain 0 can't be deleted/modified") \ _(BD_ID_EXCEED_MAX, -122, "Bridge domain ID exceed 16M limit") \ _(SUBIF_DOESNT_EXIST, -123, "Subinterface doesn't exist") \ _(L2_MACS_EVENT_CLINET_PRESENT, -124, "Client already exist for L2 MACs events") \ -_(UNSUPPORTED, -125, "Unsupported") +_(INVALID_QUEUE, -125, "Invalid queue") \ +_(UNSUPPORTED, -126, "Unsupported") typedef enum { diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index 2eb8e30e..a38ecd2d 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -264,6 +264,10 @@ vnet_hw_interface_set_rx_mode (vnet_main_t * vnm, u32 hw_if_index, (hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE) == 0) return VNET_API_ERROR_UNSUPPORTED; + if ((vec_len (hw->input_node_thread_index_by_queue) < queue_id + 1) || + (vec_len (hw->rx_mode_by_queue) < queue_id + 1)) + return VNET_API_ERROR_INVALID_QUEUE; + hw->rx_mode_by_queue[queue_id] = mode; thread_index = hw->input_node_thread_index_by_queue[queue_id]; vm = vlib_mains[thread_index]; @@ -307,6 +311,10 @@ vnet_hw_interface_get_rx_mode (vnet_main_t * vnm, u32 hw_if_index, if (hw->input_node_thread_index_by_queue == 0) return VNET_API_ERROR_INVALID_INTERFACE; + if ((vec_len (hw->input_node_thread_index_by_queue) < queue_id + 1) || + (vec_len (hw->rx_mode_by_queue) < queue_id + 1)) + return VNET_API_ERROR_INVALID_QUEUE; + thread_index = hw->input_node_thread_index_by_queue[queue_id]; vm = vlib_mains[thread_index]; diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index f37f139b..a6680c5b 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -1313,6 +1313,8 @@ set_hw_interface_rx_mode (vnet_main_t * vnm, u32 hw_if_index, break; case VNET_API_ERROR_INVALID_INTERFACE: return clib_error_return (0, "invalid interface"); + case VNET_API_ERROR_INVALID_QUEUE: + return clib_error_return (0, "invalid queue"); default: return clib_error_return (0, "unknown error"); } @@ -1334,6 +1336,8 @@ set_hw_interface_rx_mode (vnet_main_t * vnm, u32 hw_if_index, return clib_error_return (0, "unsupported"); case VNET_API_ERROR_INVALID_INTERFACE: return clib_error_return (0, "invalid interface"); + case VNET_API_ERROR_INVALID_QUEUE: + return clib_error_return (0, "invalid queue"); default: return clib_error_return (0, "unknown error"); } @@ -1353,6 +1357,7 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input, u32 queue_id = (u32) ~ 0; vnet_hw_interface_rx_mode mode = VNET_HW_INTERFACE_RX_MODE_UNKNOWN; int i; + u8 input_queue_id = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -1363,7 +1368,7 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input, (line_input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) ; else if (unformat (line_input, "queue %d", &queue_id)) - ; + input_queue_id = 1; else if (unformat (line_input, "polling")) mode = VNET_HW_INTERFACE_RX_MODE_POLLING; else if (unformat (line_input, "interrupt")) @@ -1389,7 +1394,7 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input, hw = vnet_get_hw_interface (vnm, hw_if_index); - if (queue_id == ~0) + if (input_queue_id == 0) { for (i = 0; i < vec_len (hw->dq_runtime_index_by_queue); i++) { -- cgit 1.2.3-korg From 3b64d6334b4e8d0759cff043a55042f88d1ccb0e Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 8 Sep 2017 12:26:12 +0200 Subject: vlib: move linux-specific code to vlib/linux Change-Id: Id79d2c2be7a98e15416a537c890a8f2dd6d4464d Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/init.c | 1 + src/plugins/memif/memif.c | 1 + src/plugins/memif/private.h | 30 -- src/vlib.am | 7 +- src/vlib/linux/pci.c | 666 +++++++++++++++++++++++++++++++++ src/vlib/linux/physmem.c | 411 ++++++++++++++++++++ src/vlib/linux/syscall.h | 58 +++ src/vlib/linux/sysfs.c | 250 +++++++++++++ src/vlib/linux/sysfs.h | 44 +++ src/vlib/pci/linux_pci.c | 665 -------------------------------- src/vlib/threads_cli.c | 1 + src/vlib/unix/physmem.c | 439 ---------------------- src/vlib/unix/unix.h | 17 - src/vlib/unix/util.c | 219 ----------- src/vnet/devices/af_packet/af_packet.c | 1 + 15 files changed, 1438 insertions(+), 1372 deletions(-) create mode 100644 src/vlib/linux/pci.c create mode 100644 src/vlib/linux/physmem.c create mode 100644 src/vlib/linux/syscall.h create mode 100644 src/vlib/linux/sysfs.c create mode 100644 src/vlib/linux/sysfs.h delete mode 100644 src/vlib/pci/linux_pci.c delete mode 100644 src/vlib/unix/physmem.c (limited to 'src/vnet/devices') diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index e23542f7..4ef3b676 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 7e2d947f..4c387b92 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h index 985ac5ec..b5f2f8ff 100644 --- a/src/plugins/memif/private.h +++ b/src/plugins/memif/private.h @@ -228,24 +228,6 @@ int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args); int memif_delete_if (vlib_main_t * vm, memif_if_t * mif); clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm); -#ifndef __NR_memfd_create -#if defined __x86_64__ -#define __NR_memfd_create 319 -#elif defined __arm__ -#define __NR_memfd_create 385 -#elif defined __aarch64__ -#define __NR_memfd_create 279 -#else -#error "__NR_memfd_create unknown for this architecture" -#endif -#endif - -static inline int -memfd_create (const char *name, unsigned int flags) -{ - return syscall (__NR_memfd_create, name, flags); -} - static_always_inline void * memif_get_buffer (memif_if_t * mif, memif_ring_t * ring, u16 slot) { @@ -253,18 +235,6 @@ memif_get_buffer (memif_if_t * mif, memif_ring_t * ring, u16 slot) return mif->regions[region].shm + ring->desc[slot].offset; } -#ifndef F_LINUX_SPECIFIC_BASE -#define F_LINUX_SPECIFIC_BASE 1024 -#endif -#define MFD_ALLOW_SEALING 0x0002U -#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) -#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) - -#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ -#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ -#define F_SEAL_GROW 0x0004 /* prevent file from growing */ -#define F_SEAL_WRITE 0x0008 /* prevent writes */ - /* memif.c */ clib_error_t *memif_init_regions_and_queues (memif_if_t * mif); clib_error_t *memif_connect (memif_if_t * mif); diff --git a/src/vlib.am b/src/vlib.am index cab90e2d..41d68690 100644 --- a/src/vlib.am +++ b/src/vlib.am @@ -32,13 +32,15 @@ libvlib_la_SOURCES = \ vlib/format.c \ vlib/i2c.c \ vlib/init.c \ + vlib/linux/pci.c \ + vlib/linux/physmem.c \ + vlib/linux/sysfs.c \ vlib/main.c \ vlib/mc.c \ vlib/node.c \ vlib/node_cli.c \ vlib/node_format.c \ vlib/pci/pci.c \ - vlib/pci/linux_pci.c \ vlib/threads.c \ vlib/threads_cli.c \ vlib/trace.c @@ -58,6 +60,8 @@ nobase_include_HEADERS += \ vlib/global_funcs.h \ vlib/i2c.h \ vlib/init.h \ + vlib/linux/sysfs.h \ + vlib/linux/syscall.h \ vlib/main.h \ vlib/mc.h \ vlib/node_funcs.h \ @@ -79,7 +83,6 @@ libvlib_la_SOURCES += \ vlib/unix/mc_socket.c \ vlib/unix/plugin.c \ vlib/unix/plugin.h \ - vlib/unix/physmem.c \ vlib/unix/util.c nobase_include_HEADERS += \ diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c new file mode 100644 index 00000000..cd2affdc --- /dev/null +++ b/src/vlib/linux/pci.c @@ -0,0 +1,666 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pci.c: Linux user space PCI bus management. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct +{ + /* /sys/bus/pci/devices/... directory name for this device. */ + u8 *dev_dir_name; + + /* Resource file descriptors. */ + int *resource_fds; + + /* File descriptor for config space read/write. */ + int config_fd; + + /* File descriptor for /dev/uio%d */ + int uio_fd; + + /* Minor device for uio device. */ + u32 uio_minor; + + /* Index given by unix_file_add. */ + u32 unix_file_index; + +} linux_pci_device_t; + +/* Pool of PCI devices. */ +typedef struct +{ + vlib_main_t *vlib_main; + linux_pci_device_t *linux_pci_devices; +} linux_pci_main_t; + +extern linux_pci_main_t linux_pci_main; + +/* Call to allocate/initialize the pci subsystem. + This is not an init function so that users can explicitly enable + pci only when it's needed. */ +clib_error_t *pci_bus_init (vlib_main_t * vm); + +clib_error_t *vlib_pci_bind_to_uio (vlib_pci_device_t * d, + char *uio_driver_name); + +linux_pci_main_t linux_pci_main; + +clib_error_t * +vlib_pci_bind_to_uio (vlib_pci_device_t * d, char *uio_driver_name) +{ + clib_error_t *error = 0; + u8 *s = 0, *driver_name = 0; + DIR *dir = 0; + struct dirent *e; + int fd, clear_driver_override = 0; + u8 *dev_dir_name = format (0, "/sys/bus/pci/devices/%U", + format_vlib_pci_addr, &d->bus_address); + + s = format (s, "%v/driver%c", dev_dir_name, 0); + driver_name = vlib_sysfs_link_to_name ((char *) s); + vec_reset_length (s); + + if (driver_name && + ((strcmp ("vfio-pci", (char *) driver_name) == 0) || + (strcmp ("uio_pci_generic", (char *) driver_name) == 0) || + (strcmp ("igb_uio", (char *) driver_name) == 0))) + goto done; + + /* walk trough all linux interfaces and if interface belonging to + this device is founf check if interface is admin up */ + dir = opendir ("/sys/class/net"); + s = format (s, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); + + if (!dir) + { + error = clib_error_return (0, "Skipping PCI device %U: failed to " + "read /sys/class/net", + format_vlib_pci_addr, &d->bus_address); + goto done; + } + + fd = socket (PF_INET, SOCK_DGRAM, 0); + if (fd < 0) + { + error = clib_error_return_unix (0, "socket"); + goto done; + } + + while ((e = readdir (dir))) + { + struct ifreq ifr; + struct ethtool_drvinfo drvinfo; + + if (e->d_name[0] == '.') /* skip . and .. */ + continue; + + memset (&ifr, 0, sizeof ifr); + memset (&drvinfo, 0, sizeof drvinfo); + ifr.ifr_data = (char *) &drvinfo; + strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ - 1); + drvinfo.cmd = ETHTOOL_GDRVINFO; + if (ioctl (fd, SIOCETHTOOL, &ifr) < 0) + { + /* Some interfaces (eg "lo") don't support this ioctl */ + if ((errno != ENOTSUP) && (errno != ENODEV)) + clib_unix_warning ("ioctl fetch intf %s bus info error", + e->d_name); + continue; + } + + if (strcmp ((char *) s, drvinfo.bus_info)) + continue; + + memset (&ifr, 0, sizeof (ifr)); + strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ - 1); + if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl fetch intf %s flags", + e->d_name); + close (fd); + goto done; + } + + if (ifr.ifr_flags & IFF_UP) + { + error = clib_error_return (0, "Skipping PCI device %U as host " + "interface %s is up", + format_vlib_pci_addr, &d->bus_address, + e->d_name); + close (fd); + goto done; + } + } + + close (fd); + vec_reset_length (s); + + s = format (s, "%v/driver/unbind%c", dev_dir_name, 0); + vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); + vec_reset_length (s); + + s = format (s, "%v/driver_override%c", dev_dir_name, 0); + if (access ((char *) s, F_OK) == 0) + { + vlib_sysfs_write ((char *) s, "%s", uio_driver_name); + clear_driver_override = 1; + } + else + { + vec_reset_length (s); + s = format (s, "/sys/bus/pci/drivers/%s/new_id%c", uio_driver_name, 0); + vlib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id, + d->device_id); + } + vec_reset_length (s); + + s = format (s, "/sys/bus/pci/drivers/%s/bind%c", uio_driver_name, 0); + vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); + vec_reset_length (s); + + if (clear_driver_override) + { + s = format (s, "%v/driver_override%c", dev_dir_name, 0); + vlib_sysfs_write ((char *) s, "%c", 0); + vec_reset_length (s); + } + +done: + closedir (dir); + vec_free (s); + vec_free (dev_dir_name); + vec_free (driver_name); + return error; +} + + +static clib_error_t * +scan_uio_dir (void *arg, u8 * path_name, u8 * file_name) +{ + linux_pci_device_t *l = arg; + unformat_input_t input; + + unformat_init_string (&input, (char *) file_name, vec_len (file_name)); + + if (!unformat (&input, "uio%d", &l->uio_minor)) + abort (); + + unformat_free (&input); + return 0; +} + +static clib_error_t * +linux_pci_uio_read_ready (unix_file_t * uf) +{ + vlib_pci_main_t *pm = &pci_main; + vlib_pci_device_t *d; + int __attribute__ ((unused)) rv; + + u32 icount; + rv = read (uf->file_descriptor, &icount, 4); + + d = pool_elt_at_index (pm->pci_devs, uf->private_data); + + if (d->interrupt_handler) + d->interrupt_handler (d); + + vlib_pci_intr_enable (d); + + return /* no error */ 0; +} + +static clib_error_t * +linux_pci_uio_error_ready (unix_file_t * uf) +{ + u32 error_index = (u32) uf->private_data; + + return clib_error_return (0, "pci device %d: error", error_index); +} + +static void +add_device (vlib_pci_device_t * dev, linux_pci_device_t * pdev) +{ + vlib_pci_main_t *pm = &pci_main; + linux_pci_main_t *lpm = &linux_pci_main; + linux_pci_device_t *l; + + pool_get (lpm->linux_pci_devices, l); + l[0] = pdev[0]; + + l->dev_dir_name = vec_dup (l->dev_dir_name); + + dev->os_handle = l - lpm->linux_pci_devices; + + { + u8 *uio_dir = format (0, "%s/uio", l->dev_dir_name); + foreach_directory_file ((char *) uio_dir, scan_uio_dir, l, /* scan_dirs */ + 1); + vec_free (uio_dir); + } + + { + char *uio_name = (char *) format (0, "/dev/uio%d%c", l->uio_minor, 0); + l->uio_fd = open (uio_name, O_RDWR); + if (l->uio_fd < 0) + clib_unix_error ("open `%s'", uio_name); + vec_free (uio_name); + } + + { + unix_file_t template = { 0 }; + unix_main_t *um = &unix_main; + + template.read_function = linux_pci_uio_read_ready; + template.file_descriptor = l->uio_fd; + template.error_function = linux_pci_uio_error_ready; + template.private_data = dev - pm->pci_devs; + + l->unix_file_index = unix_file_add (um, &template); + } +} + +static void +linux_pci_device_free (linux_pci_device_t * l) +{ + int i; + for (i = 0; i < vec_len (l->resource_fds); i++) + if (l->resource_fds[i] > 0) + close (l->resource_fds[i]); + if (l->config_fd > 0) + close (l->config_fd); + if (l->uio_fd > 0) + close (l->uio_fd); + vec_free (l->resource_fds); + vec_free (l->dev_dir_name); +} + +/* Configuration space read/write. */ +clib_error_t * +vlib_pci_read_write_config (vlib_pci_device_t * dev, + vlib_read_or_write_t read_or_write, + uword address, void *data, u32 n_bytes) +{ + linux_pci_main_t *lpm = &linux_pci_main; + linux_pci_device_t *p; + int n; + + p = pool_elt_at_index (lpm->linux_pci_devices, dev->os_handle); + + if (read_or_write == VLIB_READ) + n = pread (p->config_fd, data, n_bytes, address); + else + n = pwrite (p->config_fd, data, n_bytes, address); + + if (n != n_bytes) + return clib_error_return_unix (0, "%s", + read_or_write == VLIB_READ + ? "read" : "write"); + + return 0; +} + +static clib_error_t * +os_map_pci_resource_internal (uword os_handle, + u32 resource, u8 * addr, void **result) +{ + linux_pci_main_t *pm = &linux_pci_main; + linux_pci_device_t *p; + struct stat stat_buf; + u8 *file_name; + int fd; + clib_error_t *error; + int flags = MAP_SHARED; + + error = 0; + p = pool_elt_at_index (pm->linux_pci_devices, os_handle); + + file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0); + fd = open ((char *) file_name, O_RDWR); + if (fd < 0) + { + error = clib_error_return_unix (0, "open `%s'", file_name); + goto done; + } + + if (fstat (fd, &stat_buf) < 0) + { + error = clib_error_return_unix (0, "fstat `%s'", file_name); + goto done; + } + + vec_validate (p->resource_fds, resource); + p->resource_fds[resource] = fd; + if (addr != 0) + flags |= MAP_FIXED; + + *result = mmap (addr, + /* size */ stat_buf.st_size, + PROT_READ | PROT_WRITE, flags, + /* file */ fd, + /* offset */ 0); + if (*result == (void *) -1) + { + error = clib_error_return_unix (0, "mmap `%s'", file_name); + goto done; + } + +done: + if (error) + { + if (fd >= 0) + close (fd); + } + vec_free (file_name); + return error; +} + +clib_error_t * +vlib_pci_map_resource (vlib_pci_device_t * dev, u32 resource, void **result) +{ + return (os_map_pci_resource_internal + (dev->os_handle, resource, 0 /* addr */ , + result)); +} + +clib_error_t * +vlib_pci_map_resource_fixed (vlib_pci_device_t * dev, + u32 resource, u8 * addr, void **result) +{ + return (os_map_pci_resource_internal + (dev->os_handle, resource, addr, result)); +} + +void +vlib_pci_free_device (vlib_pci_device_t * dev) +{ + linux_pci_main_t *pm = &linux_pci_main; + linux_pci_device_t *l; + + l = pool_elt_at_index (pm->linux_pci_devices, dev->os_handle); + linux_pci_device_free (l); + pool_put (pm->linux_pci_devices, l); +} + +pci_device_registration_t * __attribute__ ((unused)) +pci_device_next_registered (pci_device_registration_t * r) +{ + uword i; + + /* Null vendor id marks end of initialized list. */ + for (i = 0; r->supported_devices[i].vendor_id != 0; i++) + ; + + return clib_elf_section_data_next (r, i * sizeof (r->supported_devices[0])); +} + +static clib_error_t * +init_device_from_registered (vlib_main_t * vm, + vlib_pci_device_t * dev, + linux_pci_device_t * pdev) +{ + vlib_pci_main_t *pm = &pci_main; + pci_device_registration_t *r; + pci_device_id_t *i; + clib_error_t *error; + + r = pm->pci_device_registrations; + + while (r) + { + for (i = r->supported_devices; i->vendor_id != 0; i++) + if (i->vendor_id == dev->vendor_id && i->device_id == dev->device_id) + { + error = vlib_pci_bind_to_uio (dev, "uio_pci_generic"); + if (error) + { + clib_error_report (error); + continue; + } + + add_device (dev, pdev); + dev->interrupt_handler = r->interrupt_handler; + return r->init_function (vm, dev); + } + r = r->next_registration; + } + /* No driver, close the PCI config-space FD */ + close (pdev->config_fd); + return 0; +} + +static clib_error_t * +init_device (vlib_main_t * vm, + vlib_pci_device_t * dev, linux_pci_device_t * pdev) +{ + return init_device_from_registered (vm, dev, pdev); +} + +static clib_error_t * +scan_device (void *arg, u8 * dev_dir_name, u8 * ignored) +{ + vlib_main_t *vm = arg; + vlib_pci_main_t *pm = &pci_main; + int fd; + u8 *f; + clib_error_t *error = 0; + vlib_pci_device_t *dev; + linux_pci_device_t pdev = { 0 }; + u32 tmp; + + f = format (0, "%v/config%c", dev_dir_name, 0); + fd = open ((char *) f, O_RDWR); + + /* Try read-only access if write fails. */ + if (fd < 0) + fd = open ((char *) f, O_RDONLY); + + if (fd < 0) + { + error = clib_error_return_unix (0, "open `%s'", f); + goto done; + } + + pool_get (pm->pci_devs, dev); + + /* You can only read more that 64 bytes of config space as root; so we try to + read the full space but fall back to just the first 64 bytes. */ + if (read (fd, &dev->config_data, sizeof (dev->config_data)) != + sizeof (dev->config_data) + && read (fd, &dev->config0, + sizeof (dev->config0)) != sizeof (dev->config0)) + { + pool_put (pm->pci_devs, dev); + error = clib_error_return_unix (0, "read `%s'", f); + close (fd); + goto done; + } + + { + static pci_config_header_t all_ones; + if (all_ones.vendor_id == 0) + memset (&all_ones, ~0, sizeof (all_ones)); + + if (!memcmp (&dev->config0.header, &all_ones, sizeof (all_ones))) + { + pool_put (pm->pci_devs, dev); + error = clib_error_return (0, "invalid PCI config for `%s'", f); + close (fd); + goto done; + } + } + + if (dev->config0.header.header_type == 0) + pci_config_type0_little_to_host (&dev->config0); + else + pci_config_type1_little_to_host (&dev->config1); + + /* Parse bus, dev, function from directory name. */ + { + unformat_input_t input; + + unformat_init_string (&input, (char *) dev_dir_name, + vec_len (dev_dir_name)); + + if (!unformat (&input, "/sys/bus/pci/devices/%U", + unformat_vlib_pci_addr, &dev->bus_address)) + abort (); + + unformat_free (&input); + + } + + + pdev.config_fd = fd; + pdev.dev_dir_name = dev_dir_name; + + hash_set (pm->pci_dev_index_by_pci_addr, dev->bus_address.as_u32, + dev - pm->pci_devs); + + vec_reset_length (f); + f = format (f, "%v/vpd%c", dev_dir_name, 0); + fd = open ((char *) f, O_RDONLY); + if (fd >= 0) + { + while (1) + { + u8 tag[3]; + u8 *data = 0; + int len; + + if (read (fd, &tag, 3) != 3) + break; + + if (tag[0] != 0x82 && tag[0] != 0x90 && tag[0] != 0x91) + break; + + len = (tag[2] << 8) | tag[1]; + vec_validate (data, len); + + if (read (fd, data, len) != len) + { + vec_free (data); + break; + } + if (tag[0] == 0x82) + dev->product_name = data; + else if (tag[0] == 0x90) + dev->vpd_r = data; + else if (tag[0] == 0x91) + dev->vpd_w = data; + + data = 0; + } + close (fd); + } + + dev->numa_node = -1; + vec_reset_length (f); + f = format (f, "%v/numa_node%c", dev_dir_name, 0); + vlib_sysfs_read ((char *) f, "%u", &dev->numa_node); + + vec_reset_length (f); + f = format (f, "%v/class%c", dev_dir_name, 0); + vlib_sysfs_read ((char *) f, "0x%x", &tmp); + dev->device_class = tmp >> 8; + + vec_reset_length (f); + f = format (f, "%v/vendor%c", dev_dir_name, 0); + vlib_sysfs_read ((char *) f, "0x%x", &tmp); + dev->vendor_id = tmp; + + vec_reset_length (f); + f = format (f, "%v/device%c", dev_dir_name, 0); + vlib_sysfs_read ((char *) f, "0x%x", &tmp); + dev->device_id = tmp; + + error = init_device (vm, dev, &pdev); + + vec_reset_length (f); + f = format (f, "%v/driver%c", dev_dir_name, 0); + dev->driver_name = vlib_sysfs_link_to_name ((char *) f); + +done: + vec_free (f); + return error; +} + +clib_error_t * +linux_pci_init (vlib_main_t * vm) +{ + vlib_pci_main_t *pm = &pci_main; + clib_error_t *error; + + pm->vlib_main = vm; + + if ((error = vlib_call_init_function (vm, unix_input_init))) + return error; + + ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32)); + pm->pci_dev_index_by_pci_addr = hash_create (0, sizeof (uword)); + + error = foreach_directory_file ("/sys/bus/pci/devices", scan_device, vm, + /* scan_dirs */ 0); + + /* Complain and continue. might not be root, etc. */ + if (error) + clib_error_report (error); + + return error; +} + +VLIB_INIT_FUNCTION (linux_pci_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c new file mode 100644 index 00000000..6731295c --- /dev/null +++ b/src/vlib/linux/physmem.c @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * physmem.c: Unix physical memory + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static void * +unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx, + uword n_bytes, uword alignment) +{ + vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); + uword lo_offset, hi_offset; + uword *to_free = 0; + + if (pr->heap == 0) + return 0; + + /* IO memory is always at least cache aligned. */ + alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES); + + while (1) + { + mheap_get_aligned (pr->heap, n_bytes, + /* align */ alignment, + /* align offset */ 0, + &lo_offset); + + /* Allocation failed? */ + if (lo_offset == ~0) + break; + + if (pr->flags & VLIB_PHYSMEM_F_FAKE) + break; + + /* Make sure allocation does not span DMA physical chunk boundary. */ + hi_offset = lo_offset + n_bytes - 1; + + if ((lo_offset >> pr->log2_page_size) == + (hi_offset >> pr->log2_page_size)) + break; + + /* Allocation would span chunk boundary, queue it to be freed as soon as + we find suitable chunk. */ + vec_add1 (to_free, lo_offset); + } + + if (to_free != 0) + { + uword i; + for (i = 0; i < vec_len (to_free); i++) + mheap_put (pr->heap, to_free[i]); + vec_free (to_free); + } + + return lo_offset != ~0 ? pr->heap + lo_offset : 0; +} + +static void +unix_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx, void *x) +{ + vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); + /* Return object to region's heap. */ + mheap_put (pr->heap, x - pr->heap); +} + +static u64 +get_page_paddr (int fd, uword addr) +{ + int pagesize = sysconf (_SC_PAGESIZE); + u64 seek, pagemap = 0; + + seek = ((u64) addr / pagesize) * sizeof (u64); + if (lseek (fd, seek, SEEK_SET) != seek) + { + clib_unix_warning ("lseek to 0x%llx", seek); + return 0; + } + if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap))) + { + clib_unix_warning ("read ptbits"); + return 0; + } + if ((pagemap & (1ULL << 63)) == 0) + return 0; + + pagemap &= pow2_mask (55); + + return pagemap * pagesize; +} + +static clib_error_t * +unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, + u8 numa_node, u32 flags, + vlib_physmem_region_index_t * idx) +{ + vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_physmem_region_t *pr; + clib_error_t *error = 0; + int pagemap_fd = -1; + u8 *mount_dir = 0; + u8 *filename = 0; + struct stat st; + int old_mpol; + int mmap_flags; + struct bitmask *old_mask = numa_allocate_nodemask (); + + if (geteuid () != 0 && (flags & VLIB_PHYSMEM_F_FAKE) == 0) + return clib_error_return (0, "not allowed"); + + pool_get (vpm->regions, pr); + + if ((pr - vpm->regions) >= 256) + { + error = clib_error_return (0, "maximum number of regions reached"); + goto error; + } + + pr->index = pr - vpm->regions; + pr->fd = -1; + pr->flags = flags; + + if (get_mempolicy (&old_mpol, old_mask->maskp, old_mask->size + 1, NULL, 0) + == -1) + { + error = clib_error_return_unix (0, "get_mempolicy"); + goto error; + } + + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + if ((pagemap_fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) + { + error = clib_error_return_unix (0, "open '/proc/self/pagemap'"); + goto error; + } + + mount_dir = format (0, "%s/physmem_region%d%c", + vlib_unix_get_runtime_dir (), pr->index, 0); + filename = format (0, "%s/mem%c", mount_dir, 0); + + unlink ((char *) mount_dir); + + error = vlib_unix_recursive_mkdir ((char *) mount_dir); + if (error) + goto error; + + if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) + { + error = clib_error_return_unix (0, "mount hugetlb directory '%s'", + mount_dir); + goto error; + } + + if ((pr->fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) + { + error = clib_error_return_unix (0, "open"); + goto error; + } + + mmap_flags = MAP_SHARED | MAP_HUGETLB | MAP_LOCKED; + } + else + { + if ((pr->fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1) + return clib_error_return_unix (0, "memfd_create"); + + if ((fcntl (pr->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) + { + error = + clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); + goto error; + } + mmap_flags = MAP_SHARED; + } + + if (fstat (pr->fd, &st)) + { + error = clib_error_return_unix (0, "fstat"); + goto error; + } + + pr->log2_page_size = min_log2 (st.st_blksize); + pr->n_pages = ((size - 1) >> pr->log2_page_size) + 1; + size = pr->n_pages * (1 << pr->log2_page_size); + + if ((ftruncate (pr->fd, size)) == -1) + { + error = clib_error_return_unix (0, "ftruncate length: %d", size); + goto error; + } + + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + error = vlib_sysfs_prealloc_hugepages (numa_node, + 1 << (pr->log2_page_size - 10), + pr->n_pages); + if (error) + goto error; + } + + numa_set_preferred (numa_node); + + pr->mem = mmap (0, size, (PROT_READ | PROT_WRITE), mmap_flags, pr->fd, 0); + + if (pr->mem == MAP_FAILED) + { + pr->mem = 0; + error = clib_error_return_unix (0, "mmap"); + goto error; + } + + if (set_mempolicy (old_mpol, old_mask->maskp, old_mask->size + 1) == -1) + { + error = clib_error_return_unix (0, "set_mempolicy"); + goto error; + } + + pr->size = pr->n_pages << pr->log2_page_size; + pr->page_mask = (1 << pr->log2_page_size) - 1; + pr->numa_node = numa_node; + pr->name = format (0, "%s", name); + + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + int i; + for (i = 0; i < pr->n_pages; i++) + { + void *ptr = pr->mem + (i << pr->log2_page_size); + int node; + move_pages (0, 1, &ptr, 0, &node, 0); + if (numa_node != node) + { + clib_warning + ("physmem page for region \'%s\' allocated on the wrong" + " numa node (requested %u actual %u)", pr->name, + pr->numa_node, node, i); + break; + } + } + } + + if (flags & VLIB_PHYSMEM_F_INIT_MHEAP) + { + pr->heap = mheap_alloc_with_flags (pr->mem, pr->size, + /* Don't want mheap mmap/munmap with IO memory. */ + MHEAP_FLAG_DISABLE_VM | + MHEAP_FLAG_THREAD_SAFE); + fformat (stdout, "%U", format_mheap, pr->heap, /* verbose */ 1); + } + + if (flags & VLIB_PHYSMEM_F_HAVE_BUFFERS) + { + vlib_buffer_add_mem_range (vm, pointer_to_uword (pr->mem), pr->size); + } + + *idx = pr->index; + + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + int i; + for (i = 0; i < pr->n_pages; i++) + { + uword vaddr = + pointer_to_uword (pr->mem) + (((u64) i) << pr->log2_page_size); + u64 page_paddr = get_page_paddr (pagemap_fd, vaddr); + vec_add1 (pr->page_table, page_paddr); + } + } + + goto done; + +error: + if (pr->fd > -1) + close (pr->fd); + + if (pr->mem) + munmap (pr->mem, size); + + memset (pr, 0, sizeof (*pr)); + pool_put (vpm->regions, pr); + +done: + if (mount_dir) + { + umount2 ((char *) mount_dir, MNT_DETACH); + rmdir ((char *) mount_dir); + vec_free (mount_dir); + } + numa_free_cpumask (old_mask); + vec_free (filename); + if (pagemap_fd > -1) + close (pagemap_fd); + return error; +} + +static void +unix_physmem_region_free (vlib_main_t * vm, vlib_physmem_region_index_t idx) +{ + vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); + + if (pr->fd > 0) + close (pr->fd); + munmap (pr->mem, pr->size); + vec_free (pr->name); + pool_put (vpm->regions, pr); +} + +clib_error_t * +unix_physmem_init (vlib_main_t * vm) +{ + clib_error_t *error = 0; + + /* Avoid multiple calls. */ + if (vm->os_physmem_alloc_aligned) + return error; + + vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned; + vm->os_physmem_free = unix_physmem_free; + vm->os_physmem_region_alloc = unix_physmem_region_alloc; + vm->os_physmem_region_free = unix_physmem_region_free; + + return error; +} + +static clib_error_t * +show_physmem (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_physmem_region_t *pr; + + /* *INDENT-OFF* */ + pool_foreach (pr, vpm->regions, ( + { + vlib_cli_output (vm, "index %u name '%s' page-size %uKB num-pages %d " + "numa-node %u fd %d\n", + pr->index, pr->name, (1 << (pr->log2_page_size -10)), + pr->n_pages, pr->numa_node, pr->fd); + if (pr->heap) + vlib_cli_output (vm, " %U", format_mheap, pr->heap, /* verbose */ 1); + else + vlib_cli_output (vm, " no heap\n"); + })); + /* *INDENT-ON* */ + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_physmem_command, static) = { + .path = "show physmem", + .short_help = "Show physical memory allocation", + .function = show_physmem, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/linux/syscall.h b/src/vlib/linux/syscall.h new file mode 100644 index 00000000..9e37997e --- /dev/null +++ b/src/vlib/linux/syscall.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_linux_syscall_h +#define included_linux_syscall_h + +#ifndef __NR_memfd_create +#if defined __x86_64__ +#define __NR_memfd_create 319 +#elif defined __arm__ +#define __NR_memfd_create 385 +#elif defined __aarch64__ +#define __NR_memfd_create 279 +#else +#error "__NR_memfd_create unknown for this architecture" +#endif +#endif + +static inline int +memfd_create (const char *name, unsigned int flags) +{ + return syscall (__NR_memfd_create, name, flags); +} + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif +#define MFD_ALLOW_SEALING 0x0002U +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ + + +#endif /* included_linux_syscall_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/linux/sysfs.c b/src/vlib/linux/sysfs.c new file mode 100644 index 00000000..f92f9ef5 --- /dev/null +++ b/src/vlib/linux/sysfs.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include + +clib_error_t * +vlib_sysfs_write (char *file_name, char *fmt, ...) +{ + u8 *s; + int fd; + clib_error_t *error = 0; + + fd = open (file_name, O_WRONLY); + if (fd < 0) + return clib_error_return_unix (0, "open `%s'", file_name); + + va_list va; + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + + if (write (fd, s, vec_len (s)) < 0) + error = clib_error_return_unix (0, "write `%s'", file_name); + + vec_free (s); + close (fd); + return error; +} + +clib_error_t * +vlib_sysfs_read (char *file_name, char *fmt, ...) +{ + unformat_input_t input; + u8 *s = 0; + int fd; + ssize_t sz; + uword result; + + fd = open (file_name, O_RDONLY); + if (fd < 0) + return clib_error_return_unix (0, "open `%s'", file_name); + + vec_validate (s, 4095); + + sz = read (fd, s, vec_len (s)); + if (sz < 0) + { + close (fd); + vec_free (s); + return clib_error_return_unix (0, "read `%s'", file_name); + } + + _vec_len (s) = sz; + unformat_init_vector (&input, s); + + va_list va; + va_start (va, fmt); + result = va_unformat (&input, fmt, &va); + va_end (va); + + vec_free (s); + close (fd); + + if (result == 0) + return clib_error_return (0, "unformat error"); + + return 0; +} + +u8 * +vlib_sysfs_link_to_name (char *link) +{ + char *p, buffer[64]; + unformat_input_t in; + u8 *s = 0; + int r; + + r = readlink (link, buffer, sizeof (buffer) - 1); + + if (r < 0) + return 0; + + buffer[r] = 0; + p = strrchr (buffer, '/'); + + if (!p) + return 0; + + unformat_init_string (&in, p + 1, strlen (p + 1)); + if (unformat (&in, "%s", &s) != 1) + clib_unix_warning ("no string?"); + unformat_free (&in); + + return s; +} + +clib_error_t * +vlib_sysfs_set_nr_hugepages (unsigned int numa_node, int page_size, int nr) +{ + clib_error_t *error = 0; + struct stat sb; + u8 *p = 0; + + p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); + + if (stat ((char *) p, &sb) == 0) + { + if (S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' is not directory", p); + goto done; + } + } + else if (numa_node == 0) + { + vec_reset_length (p); + p = format (p, "/sys/kernel/mm%c", 0); + if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' does not exist or it is not " + "directory", p); + goto done; + } + } + else + { + error = clib_error_return (0, "'%s' does not exist", p); + goto done; + } + + _vec_len (p) -= 1; + p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0); + vlib_sysfs_write ((char *) p, "%d", nr); + +done: + vec_free (p); + return error; +} + + +static clib_error_t * +vlib_sysfs_get_xxx_hugepages (char *type, unsigned int numa_node, + int page_size, int *val) +{ + clib_error_t *error = 0; + struct stat sb; + u8 *p = 0; + + p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); + + if (stat ((char *) p, &sb) == 0) + { + if (S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' is not directory", p); + goto done; + } + } + else if (numa_node == 0) + { + vec_reset_length (p); + p = format (p, "/sys/kernel/mm%c", 0); + if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' does not exist or it is not " + "directory", p); + goto done; + } + } + else + { + error = clib_error_return (0, "'%s' does not exist", p); + goto done; + } + + _vec_len (p) -= 1; + p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size, + type, 0); + error = vlib_sysfs_read ((char *) p, "%d", val); + +done: + vec_free (p); + return error; +} + +clib_error_t * +vlib_sysfs_get_free_hugepages (unsigned int numa_node, int page_size, int *v) +{ + return vlib_sysfs_get_xxx_hugepages ("free", numa_node, page_size, v); +} + +clib_error_t * +vlib_sysfs_get_nr_hugepages (unsigned int numa_node, int page_size, int *v) +{ + return vlib_sysfs_get_xxx_hugepages ("nr", numa_node, page_size, v); +} + +clib_error_t * +vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, int page_size, + int *v) +{ + return vlib_sysfs_get_xxx_hugepages ("surplus", numa_node, page_size, v); +} + +clib_error_t * +vlib_sysfs_prealloc_hugepages (unsigned int numa_node, int page_size, int nr) +{ + clib_error_t *error = 0; + int n, needed; + error = vlib_sysfs_get_free_hugepages (numa_node, page_size, &n); + if (error) + return error; + needed = nr - n; + if (needed <= 0) + return 0; + + error = vlib_sysfs_get_nr_hugepages (numa_node, page_size, &n); + if (error) + return error; + clib_warning ("pre-allocating %u additional %uK hugepages on numa node %u", + needed, page_size, numa_node); + return vlib_sysfs_set_nr_hugepages (numa_node, page_size, n + needed); +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/linux/sysfs.h b/src/vlib/linux/sysfs.h new file mode 100644 index 00000000..14b71317 --- /dev/null +++ b/src/vlib/linux/sysfs.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_linux_sysfs_h +#define included_linux_sysfs_h + +clib_error_t *vlib_sysfs_write (char *file_name, char *fmt, ...); + +clib_error_t *vlib_sysfs_read (char *file_name, char *fmt, ...); + +u8 *vlib_sysfs_link_to_name (char *link); + +clib_error_t *vlib_sysfs_set_nr_hugepages (unsigned int numa_node, + int page_size, int nr); +clib_error_t *vlib_sysfs_get_nr_hugepages (unsigned int numa_node, + int page_size, int *v); +clib_error_t *vlib_sysfs_get_free_hugepages (unsigned int numa_node, + int page_size, int *v); +clib_error_t *vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, + int page_size, int *v); +clib_error_t *vlib_sysfs_prealloc_hugepages (unsigned int numa_node, + int page_size, int nr); + +#endif /* included_linux_sysfs_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/pci/linux_pci.c b/src/vlib/pci/linux_pci.c deleted file mode 100644 index 2d3c0a88..00000000 --- a/src/vlib/pci/linux_pci.c +++ /dev/null @@ -1,665 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * pci.c: Linux user space PCI bus management. - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -typedef struct -{ - /* /sys/bus/pci/devices/... directory name for this device. */ - u8 *dev_dir_name; - - /* Resource file descriptors. */ - int *resource_fds; - - /* File descriptor for config space read/write. */ - int config_fd; - - /* File descriptor for /dev/uio%d */ - int uio_fd; - - /* Minor device for uio device. */ - u32 uio_minor; - - /* Index given by unix_file_add. */ - u32 unix_file_index; - -} linux_pci_device_t; - -/* Pool of PCI devices. */ -typedef struct -{ - vlib_main_t *vlib_main; - linux_pci_device_t *linux_pci_devices; -} linux_pci_main_t; - -extern linux_pci_main_t linux_pci_main; - -/* Call to allocate/initialize the pci subsystem. - This is not an init function so that users can explicitly enable - pci only when it's needed. */ -clib_error_t *pci_bus_init (vlib_main_t * vm); - -clib_error_t *vlib_pci_bind_to_uio (vlib_pci_device_t * d, - char *uio_driver_name); - -linux_pci_main_t linux_pci_main; - -clib_error_t * -vlib_pci_bind_to_uio (vlib_pci_device_t * d, char *uio_driver_name) -{ - clib_error_t *error = 0; - u8 *s = 0, *driver_name = 0; - DIR *dir = 0; - struct dirent *e; - int fd, clear_driver_override = 0; - u8 *dev_dir_name = format (0, "/sys/bus/pci/devices/%U", - format_vlib_pci_addr, &d->bus_address); - - s = format (s, "%v/driver%c", dev_dir_name, 0); - driver_name = vlib_sysfs_link_to_name ((char *) s); - vec_reset_length (s); - - if (driver_name && - ((strcmp ("vfio-pci", (char *) driver_name) == 0) || - (strcmp ("uio_pci_generic", (char *) driver_name) == 0) || - (strcmp ("igb_uio", (char *) driver_name) == 0))) - goto done; - - /* walk trough all linux interfaces and if interface belonging to - this device is founf check if interface is admin up */ - dir = opendir ("/sys/class/net"); - s = format (s, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); - - if (!dir) - { - error = clib_error_return (0, "Skipping PCI device %U: failed to " - "read /sys/class/net", - format_vlib_pci_addr, &d->bus_address); - goto done; - } - - fd = socket (PF_INET, SOCK_DGRAM, 0); - if (fd < 0) - { - error = clib_error_return_unix (0, "socket"); - goto done; - } - - while ((e = readdir (dir))) - { - struct ifreq ifr; - struct ethtool_drvinfo drvinfo; - - if (e->d_name[0] == '.') /* skip . and .. */ - continue; - - memset (&ifr, 0, sizeof ifr); - memset (&drvinfo, 0, sizeof drvinfo); - ifr.ifr_data = (char *) &drvinfo; - strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ - 1); - drvinfo.cmd = ETHTOOL_GDRVINFO; - if (ioctl (fd, SIOCETHTOOL, &ifr) < 0) - { - /* Some interfaces (eg "lo") don't support this ioctl */ - if ((errno != ENOTSUP) && (errno != ENODEV)) - clib_unix_warning ("ioctl fetch intf %s bus info error", - e->d_name); - continue; - } - - if (strcmp ((char *) s, drvinfo.bus_info)) - continue; - - memset (&ifr, 0, sizeof (ifr)); - strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ - 1); - if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0) - { - error = clib_error_return_unix (0, "ioctl fetch intf %s flags", - e->d_name); - close (fd); - goto done; - } - - if (ifr.ifr_flags & IFF_UP) - { - error = clib_error_return (0, "Skipping PCI device %U as host " - "interface %s is up", - format_vlib_pci_addr, &d->bus_address, - e->d_name); - close (fd); - goto done; - } - } - - close (fd); - vec_reset_length (s); - - s = format (s, "%v/driver/unbind%c", dev_dir_name, 0); - vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); - vec_reset_length (s); - - s = format (s, "%v/driver_override%c", dev_dir_name, 0); - if (access ((char *) s, F_OK) == 0) - { - vlib_sysfs_write ((char *) s, "%s", uio_driver_name); - clear_driver_override = 1; - } - else - { - vec_reset_length (s); - s = format (s, "/sys/bus/pci/drivers/%s/new_id%c", uio_driver_name, 0); - vlib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id, - d->device_id); - } - vec_reset_length (s); - - s = format (s, "/sys/bus/pci/drivers/%s/bind%c", uio_driver_name, 0); - vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); - vec_reset_length (s); - - if (clear_driver_override) - { - s = format (s, "%v/driver_override%c", dev_dir_name, 0); - vlib_sysfs_write ((char *) s, "%c", 0); - vec_reset_length (s); - } - -done: - closedir (dir); - vec_free (s); - vec_free (dev_dir_name); - vec_free (driver_name); - return error; -} - - -static clib_error_t * -scan_uio_dir (void *arg, u8 * path_name, u8 * file_name) -{ - linux_pci_device_t *l = arg; - unformat_input_t input; - - unformat_init_string (&input, (char *) file_name, vec_len (file_name)); - - if (!unformat (&input, "uio%d", &l->uio_minor)) - abort (); - - unformat_free (&input); - return 0; -} - -static clib_error_t * -linux_pci_uio_read_ready (unix_file_t * uf) -{ - vlib_pci_main_t *pm = &pci_main; - vlib_pci_device_t *d; - int __attribute__ ((unused)) rv; - - u32 icount; - rv = read (uf->file_descriptor, &icount, 4); - - d = pool_elt_at_index (pm->pci_devs, uf->private_data); - - if (d->interrupt_handler) - d->interrupt_handler (d); - - vlib_pci_intr_enable (d); - - return /* no error */ 0; -} - -static clib_error_t * -linux_pci_uio_error_ready (unix_file_t * uf) -{ - u32 error_index = (u32) uf->private_data; - - return clib_error_return (0, "pci device %d: error", error_index); -} - -static void -add_device (vlib_pci_device_t * dev, linux_pci_device_t * pdev) -{ - vlib_pci_main_t *pm = &pci_main; - linux_pci_main_t *lpm = &linux_pci_main; - linux_pci_device_t *l; - - pool_get (lpm->linux_pci_devices, l); - l[0] = pdev[0]; - - l->dev_dir_name = vec_dup (l->dev_dir_name); - - dev->os_handle = l - lpm->linux_pci_devices; - - { - u8 *uio_dir = format (0, "%s/uio", l->dev_dir_name); - foreach_directory_file ((char *) uio_dir, scan_uio_dir, l, /* scan_dirs */ - 1); - vec_free (uio_dir); - } - - { - char *uio_name = (char *) format (0, "/dev/uio%d%c", l->uio_minor, 0); - l->uio_fd = open (uio_name, O_RDWR); - if (l->uio_fd < 0) - clib_unix_error ("open `%s'", uio_name); - vec_free (uio_name); - } - - { - unix_file_t template = { 0 }; - unix_main_t *um = &unix_main; - - template.read_function = linux_pci_uio_read_ready; - template.file_descriptor = l->uio_fd; - template.error_function = linux_pci_uio_error_ready; - template.private_data = dev - pm->pci_devs; - - l->unix_file_index = unix_file_add (um, &template); - } -} - -static void -linux_pci_device_free (linux_pci_device_t * l) -{ - int i; - for (i = 0; i < vec_len (l->resource_fds); i++) - if (l->resource_fds[i] > 0) - close (l->resource_fds[i]); - if (l->config_fd > 0) - close (l->config_fd); - if (l->uio_fd > 0) - close (l->uio_fd); - vec_free (l->resource_fds); - vec_free (l->dev_dir_name); -} - -/* Configuration space read/write. */ -clib_error_t * -vlib_pci_read_write_config (vlib_pci_device_t * dev, - vlib_read_or_write_t read_or_write, - uword address, void *data, u32 n_bytes) -{ - linux_pci_main_t *lpm = &linux_pci_main; - linux_pci_device_t *p; - int n; - - p = pool_elt_at_index (lpm->linux_pci_devices, dev->os_handle); - - if (read_or_write == VLIB_READ) - n = pread (p->config_fd, data, n_bytes, address); - else - n = pwrite (p->config_fd, data, n_bytes, address); - - if (n != n_bytes) - return clib_error_return_unix (0, "%s", - read_or_write == VLIB_READ - ? "read" : "write"); - - return 0; -} - -static clib_error_t * -os_map_pci_resource_internal (uword os_handle, - u32 resource, u8 * addr, void **result) -{ - linux_pci_main_t *pm = &linux_pci_main; - linux_pci_device_t *p; - struct stat stat_buf; - u8 *file_name; - int fd; - clib_error_t *error; - int flags = MAP_SHARED; - - error = 0; - p = pool_elt_at_index (pm->linux_pci_devices, os_handle); - - file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0); - fd = open ((char *) file_name, O_RDWR); - if (fd < 0) - { - error = clib_error_return_unix (0, "open `%s'", file_name); - goto done; - } - - if (fstat (fd, &stat_buf) < 0) - { - error = clib_error_return_unix (0, "fstat `%s'", file_name); - goto done; - } - - vec_validate (p->resource_fds, resource); - p->resource_fds[resource] = fd; - if (addr != 0) - flags |= MAP_FIXED; - - *result = mmap (addr, - /* size */ stat_buf.st_size, - PROT_READ | PROT_WRITE, flags, - /* file */ fd, - /* offset */ 0); - if (*result == (void *) -1) - { - error = clib_error_return_unix (0, "mmap `%s'", file_name); - goto done; - } - -done: - if (error) - { - if (fd >= 0) - close (fd); - } - vec_free (file_name); - return error; -} - -clib_error_t * -vlib_pci_map_resource (vlib_pci_device_t * dev, u32 resource, void **result) -{ - return (os_map_pci_resource_internal - (dev->os_handle, resource, 0 /* addr */ , - result)); -} - -clib_error_t * -vlib_pci_map_resource_fixed (vlib_pci_device_t * dev, - u32 resource, u8 * addr, void **result) -{ - return (os_map_pci_resource_internal - (dev->os_handle, resource, addr, result)); -} - -void -vlib_pci_free_device (vlib_pci_device_t * dev) -{ - linux_pci_main_t *pm = &linux_pci_main; - linux_pci_device_t *l; - - l = pool_elt_at_index (pm->linux_pci_devices, dev->os_handle); - linux_pci_device_free (l); - pool_put (pm->linux_pci_devices, l); -} - -pci_device_registration_t * __attribute__ ((unused)) -pci_device_next_registered (pci_device_registration_t * r) -{ - uword i; - - /* Null vendor id marks end of initialized list. */ - for (i = 0; r->supported_devices[i].vendor_id != 0; i++) - ; - - return clib_elf_section_data_next (r, i * sizeof (r->supported_devices[0])); -} - -static clib_error_t * -init_device_from_registered (vlib_main_t * vm, - vlib_pci_device_t * dev, - linux_pci_device_t * pdev) -{ - vlib_pci_main_t *pm = &pci_main; - pci_device_registration_t *r; - pci_device_id_t *i; - clib_error_t *error; - - r = pm->pci_device_registrations; - - while (r) - { - for (i = r->supported_devices; i->vendor_id != 0; i++) - if (i->vendor_id == dev->vendor_id && i->device_id == dev->device_id) - { - error = vlib_pci_bind_to_uio (dev, "uio_pci_generic"); - if (error) - { - clib_error_report (error); - continue; - } - - add_device (dev, pdev); - dev->interrupt_handler = r->interrupt_handler; - return r->init_function (vm, dev); - } - r = r->next_registration; - } - /* No driver, close the PCI config-space FD */ - close (pdev->config_fd); - return 0; -} - -static clib_error_t * -init_device (vlib_main_t * vm, - vlib_pci_device_t * dev, linux_pci_device_t * pdev) -{ - return init_device_from_registered (vm, dev, pdev); -} - -static clib_error_t * -scan_device (void *arg, u8 * dev_dir_name, u8 * ignored) -{ - vlib_main_t *vm = arg; - vlib_pci_main_t *pm = &pci_main; - int fd; - u8 *f; - clib_error_t *error = 0; - vlib_pci_device_t *dev; - linux_pci_device_t pdev = { 0 }; - u32 tmp; - - f = format (0, "%v/config%c", dev_dir_name, 0); - fd = open ((char *) f, O_RDWR); - - /* Try read-only access if write fails. */ - if (fd < 0) - fd = open ((char *) f, O_RDONLY); - - if (fd < 0) - { - error = clib_error_return_unix (0, "open `%s'", f); - goto done; - } - - pool_get (pm->pci_devs, dev); - - /* You can only read more that 64 bytes of config space as root; so we try to - read the full space but fall back to just the first 64 bytes. */ - if (read (fd, &dev->config_data, sizeof (dev->config_data)) != - sizeof (dev->config_data) - && read (fd, &dev->config0, - sizeof (dev->config0)) != sizeof (dev->config0)) - { - pool_put (pm->pci_devs, dev); - error = clib_error_return_unix (0, "read `%s'", f); - close (fd); - goto done; - } - - { - static pci_config_header_t all_ones; - if (all_ones.vendor_id == 0) - memset (&all_ones, ~0, sizeof (all_ones)); - - if (!memcmp (&dev->config0.header, &all_ones, sizeof (all_ones))) - { - pool_put (pm->pci_devs, dev); - error = clib_error_return (0, "invalid PCI config for `%s'", f); - close (fd); - goto done; - } - } - - if (dev->config0.header.header_type == 0) - pci_config_type0_little_to_host (&dev->config0); - else - pci_config_type1_little_to_host (&dev->config1); - - /* Parse bus, dev, function from directory name. */ - { - unformat_input_t input; - - unformat_init_string (&input, (char *) dev_dir_name, - vec_len (dev_dir_name)); - - if (!unformat (&input, "/sys/bus/pci/devices/%U", - unformat_vlib_pci_addr, &dev->bus_address)) - abort (); - - unformat_free (&input); - - } - - - pdev.config_fd = fd; - pdev.dev_dir_name = dev_dir_name; - - hash_set (pm->pci_dev_index_by_pci_addr, dev->bus_address.as_u32, - dev - pm->pci_devs); - - vec_reset_length (f); - f = format (f, "%v/vpd%c", dev_dir_name, 0); - fd = open ((char *) f, O_RDONLY); - if (fd >= 0) - { - while (1) - { - u8 tag[3]; - u8 *data = 0; - int len; - - if (read (fd, &tag, 3) != 3) - break; - - if (tag[0] != 0x82 && tag[0] != 0x90 && tag[0] != 0x91) - break; - - len = (tag[2] << 8) | tag[1]; - vec_validate (data, len); - - if (read (fd, data, len) != len) - { - vec_free (data); - break; - } - if (tag[0] == 0x82) - dev->product_name = data; - else if (tag[0] == 0x90) - dev->vpd_r = data; - else if (tag[0] == 0x91) - dev->vpd_w = data; - - data = 0; - } - close (fd); - } - - dev->numa_node = -1; - vec_reset_length (f); - f = format (f, "%v/numa_node%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "%u", &dev->numa_node); - - vec_reset_length (f); - f = format (f, "%v/class%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); - dev->device_class = tmp >> 8; - - vec_reset_length (f); - f = format (f, "%v/vendor%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); - dev->vendor_id = tmp; - - vec_reset_length (f); - f = format (f, "%v/device%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); - dev->device_id = tmp; - - error = init_device (vm, dev, &pdev); - - vec_reset_length (f); - f = format (f, "%v/driver%c", dev_dir_name, 0); - dev->driver_name = vlib_sysfs_link_to_name ((char *) f); - -done: - vec_free (f); - return error; -} - -clib_error_t * -linux_pci_init (vlib_main_t * vm) -{ - vlib_pci_main_t *pm = &pci_main; - clib_error_t *error; - - pm->vlib_main = vm; - - if ((error = vlib_call_init_function (vm, unix_input_init))) - return error; - - ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32)); - pm->pci_dev_index_by_pci_addr = hash_create (0, sizeof (uword)); - - error = foreach_directory_file ("/sys/bus/pci/devices", scan_device, vm, - /* scan_dirs */ 0); - - /* Complain and continue. might not be root, etc. */ - if (error) - clib_error_report (error); - - return error; -} - -VLIB_INIT_FUNCTION (linux_pci_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c index 36f8109e..f8d5d8f9 100644 --- a/src/vlib/threads_cli.c +++ b/src/vlib/threads_cli.c @@ -18,6 +18,7 @@ #include #include +#include #include static u8 * diff --git a/src/vlib/unix/physmem.c b/src/vlib/unix/physmem.c deleted file mode 100644 index d5d5d6c8..00000000 --- a/src/vlib/unix/physmem.c +++ /dev/null @@ -1,439 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * physmem.c: Unix physical memory - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#ifndef __NR_memfd_create -#if defined __x86_64__ -#define __NR_memfd_create 319 -#elif defined __arm__ -#define __NR_memfd_create 385 -#elif defined __aarch64__ -#define __NR_memfd_create 279 -#else -#error "__NR_memfd_create unknown for this architecture" -#endif -#endif - -static inline int -memfd_create (const char *name, unsigned int flags) -{ - return syscall (__NR_memfd_create, name, flags); -} - -#ifndef F_LINUX_SPECIFIC_BASE -#define F_LINUX_SPECIFIC_BASE 1024 -#endif -#define MFD_ALLOW_SEALING 0x0002U -#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) -#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) - -#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ -#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ -#define F_SEAL_GROW 0x0004 /* prevent file from growing */ -#define F_SEAL_WRITE 0x0008 /* prevent writes */ - -static void * -unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx, - uword n_bytes, uword alignment) -{ - vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); - uword lo_offset, hi_offset; - uword *to_free = 0; - - if (pr->heap == 0) - return 0; - - /* IO memory is always at least cache aligned. */ - alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES); - - while (1) - { - mheap_get_aligned (pr->heap, n_bytes, - /* align */ alignment, - /* align offset */ 0, - &lo_offset); - - /* Allocation failed? */ - if (lo_offset == ~0) - break; - - if (pr->flags & VLIB_PHYSMEM_F_FAKE) - break; - - /* Make sure allocation does not span DMA physical chunk boundary. */ - hi_offset = lo_offset + n_bytes - 1; - - if ((lo_offset >> pr->log2_page_size) == - (hi_offset >> pr->log2_page_size)) - break; - - /* Allocation would span chunk boundary, queue it to be freed as soon as - we find suitable chunk. */ - vec_add1 (to_free, lo_offset); - } - - if (to_free != 0) - { - uword i; - for (i = 0; i < vec_len (to_free); i++) - mheap_put (pr->heap, to_free[i]); - vec_free (to_free); - } - - return lo_offset != ~0 ? pr->heap + lo_offset : 0; -} - -static void -unix_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx, void *x) -{ - vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); - /* Return object to region's heap. */ - mheap_put (pr->heap, x - pr->heap); -} - -static u64 -get_page_paddr (int fd, uword addr) -{ - int pagesize = sysconf (_SC_PAGESIZE); - u64 seek, pagemap = 0; - - seek = ((u64) addr / pagesize) * sizeof (u64); - if (lseek (fd, seek, SEEK_SET) != seek) - { - clib_unix_warning ("lseek to 0x%llx", seek); - return 0; - } - if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap))) - { - clib_unix_warning ("read ptbits"); - return 0; - } - if ((pagemap & (1ULL << 63)) == 0) - return 0; - - pagemap &= pow2_mask (55); - - return pagemap * pagesize; -} - -static clib_error_t * -unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, - u8 numa_node, u32 flags, - vlib_physmem_region_index_t * idx) -{ - vlib_physmem_main_t *vpm = &vm->physmem_main; - vlib_physmem_region_t *pr; - clib_error_t *error = 0; - int pagemap_fd = -1; - u8 *mount_dir = 0; - u8 *filename = 0; - struct stat st; - int old_mpol; - int mmap_flags; - struct bitmask *old_mask = numa_allocate_nodemask (); - - if (geteuid () != 0 && (flags & VLIB_PHYSMEM_F_FAKE) == 0) - return clib_error_return (0, "not allowed"); - - pool_get (vpm->regions, pr); - - if ((pr - vpm->regions) >= 256) - { - error = clib_error_return (0, "maximum number of regions reached"); - goto error; - } - - pr->index = pr - vpm->regions; - pr->fd = -1; - pr->flags = flags; - - if (get_mempolicy (&old_mpol, old_mask->maskp, old_mask->size + 1, NULL, 0) - == -1) - { - error = clib_error_return_unix (0, "get_mempolicy"); - goto error; - } - - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - if ((pagemap_fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) - { - error = clib_error_return_unix (0, "open '/proc/self/pagemap'"); - goto error; - } - - mount_dir = format (0, "%s/physmem_region%d%c", - vlib_unix_get_runtime_dir (), pr->index, 0); - filename = format (0, "%s/mem%c", mount_dir, 0); - - unlink ((char *) mount_dir); - - error = vlib_unix_recursive_mkdir ((char *) mount_dir); - if (error) - goto error; - - if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) - { - error = clib_error_return_unix (0, "mount hugetlb directory '%s'", - mount_dir); - goto error; - } - - if ((pr->fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) - { - error = clib_error_return_unix (0, "open"); - goto error; - } - - mmap_flags = MAP_SHARED | MAP_HUGETLB | MAP_LOCKED; - } - else - { - if ((pr->fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1) - return clib_error_return_unix (0, "memfd_create"); - - if ((fcntl (pr->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - { - error = - clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); - goto error; - } - mmap_flags = MAP_SHARED; - } - - if (fstat (pr->fd, &st)) - { - error = clib_error_return_unix (0, "fstat"); - goto error; - } - - pr->log2_page_size = min_log2 (st.st_blksize); - pr->n_pages = ((size - 1) >> pr->log2_page_size) + 1; - size = pr->n_pages * (1 << pr->log2_page_size); - - if ((ftruncate (pr->fd, size)) == -1) - { - error = clib_error_return_unix (0, "ftruncate length: %d", size); - goto error; - } - - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - error = vlib_sysfs_prealloc_hugepages (numa_node, - 1 << (pr->log2_page_size - 10), - pr->n_pages); - if (error) - goto error; - } - - numa_set_preferred (numa_node); - - pr->mem = mmap (0, size, (PROT_READ | PROT_WRITE), mmap_flags, pr->fd, 0); - - if (pr->mem == MAP_FAILED) - { - pr->mem = 0; - error = clib_error_return_unix (0, "mmap"); - goto error; - } - - if (set_mempolicy (old_mpol, old_mask->maskp, old_mask->size + 1) == -1) - { - error = clib_error_return_unix (0, "set_mempolicy"); - goto error; - } - - pr->size = pr->n_pages << pr->log2_page_size; - pr->page_mask = (1 << pr->log2_page_size) - 1; - pr->numa_node = numa_node; - pr->name = format (0, "%s", name); - - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - int i; - for (i = 0; i < pr->n_pages; i++) - { - void *ptr = pr->mem + (i << pr->log2_page_size); - int node; - move_pages (0, 1, &ptr, 0, &node, 0); - if (numa_node != node) - { - clib_warning - ("physmem page for region \'%s\' allocated on the wrong" - " numa node (requested %u actual %u)", pr->name, - pr->numa_node, node, i); - break; - } - } - } - - if (flags & VLIB_PHYSMEM_F_INIT_MHEAP) - { - pr->heap = mheap_alloc_with_flags (pr->mem, pr->size, - /* Don't want mheap mmap/munmap with IO memory. */ - MHEAP_FLAG_DISABLE_VM | - MHEAP_FLAG_THREAD_SAFE); - fformat (stdout, "%U", format_mheap, pr->heap, /* verbose */ 1); - } - - if (flags & VLIB_PHYSMEM_F_HAVE_BUFFERS) - { - vlib_buffer_add_mem_range (vm, pointer_to_uword (pr->mem), pr->size); - } - - *idx = pr->index; - - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - int i; - for (i = 0; i < pr->n_pages; i++) - { - uword vaddr = - pointer_to_uword (pr->mem) + (((u64) i) << pr->log2_page_size); - u64 page_paddr = get_page_paddr (pagemap_fd, vaddr); - vec_add1 (pr->page_table, page_paddr); - } - } - - goto done; - -error: - if (pr->fd > -1) - close (pr->fd); - - if (pr->mem) - munmap (pr->mem, size); - - memset (pr, 0, sizeof (*pr)); - pool_put (vpm->regions, pr); - -done: - if (mount_dir) - { - umount2 ((char *) mount_dir, MNT_DETACH); - rmdir ((char *) mount_dir); - vec_free (mount_dir); - } - numa_free_cpumask (old_mask); - vec_free (filename); - if (pagemap_fd > -1) - close (pagemap_fd); - return error; -} - -static void -unix_physmem_region_free (vlib_main_t * vm, vlib_physmem_region_index_t idx) -{ - vlib_physmem_main_t *vpm = &vm->physmem_main; - vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); - - if (pr->fd > 0) - close (pr->fd); - munmap (pr->mem, pr->size); - vec_free (pr->name); - pool_put (vpm->regions, pr); -} - -clib_error_t * -unix_physmem_init (vlib_main_t * vm) -{ - clib_error_t *error = 0; - - /* Avoid multiple calls. */ - if (vm->os_physmem_alloc_aligned) - return error; - - vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned; - vm->os_physmem_free = unix_physmem_free; - vm->os_physmem_region_alloc = unix_physmem_region_alloc; - vm->os_physmem_region_free = unix_physmem_region_free; - - return error; -} - -static clib_error_t * -show_physmem (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - vlib_physmem_main_t *vpm = &vm->physmem_main; - vlib_physmem_region_t *pr; - - /* *INDENT-OFF* */ - pool_foreach (pr, vpm->regions, ( - { - vlib_cli_output (vm, "index %u name '%s' page-size %uKB num-pages %d " - "numa-node %u fd %d\n", - pr->index, pr->name, (1 << (pr->log2_page_size -10)), - pr->n_pages, pr->numa_node, pr->fd); - if (pr->heap) - vlib_cli_output (vm, " %U", format_mheap, pr->heap, /* verbose */ 1); - else - vlib_cli_output (vm, " no heap\n"); - })); - /* *INDENT-ON* */ - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_physmem_command, static) = { - .path = "show physmem", - .short_help = "Show physical memory allocation", - .function = show_physmem, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/unix/unix.h b/src/vlib/unix/unix.h index b5a33427..1b0d8b9d 100644 --- a/src/vlib/unix/unix.h +++ b/src/vlib/unix/unix.h @@ -217,23 +217,6 @@ extern u8 **vlib_thread_stacks; /* utils */ -clib_error_t *vlib_sysfs_write (char *file_name, char *fmt, ...); - -clib_error_t *vlib_sysfs_read (char *file_name, char *fmt, ...); - -u8 *vlib_sysfs_link_to_name (char *link); - -clib_error_t *vlib_sysfs_set_nr_hugepages (unsigned int numa_node, - int page_size, int nr); -clib_error_t *vlib_sysfs_get_nr_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_get_free_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_prealloc_hugepages (unsigned int numa_node, - int page_size, int nr); - clib_error_t *foreach_directory_file (char *dir_name, clib_error_t * (*f) (void *arg, u8 * path_name, diff --git a/src/vlib/unix/util.c b/src/vlib/unix/util.c index 0e252aca..5472751e 100644 --- a/src/vlib/unix/util.c +++ b/src/vlib/unix/util.c @@ -98,225 +98,6 @@ foreach_directory_file (char *dir_name, return error; } -clib_error_t * -vlib_sysfs_write (char *file_name, char *fmt, ...) -{ - u8 *s; - int fd; - clib_error_t *error = 0; - - fd = open (file_name, O_WRONLY); - if (fd < 0) - return clib_error_return_unix (0, "open `%s'", file_name); - - va_list va; - va_start (va, fmt); - s = va_format (0, fmt, &va); - va_end (va); - - if (write (fd, s, vec_len (s)) < 0) - error = clib_error_return_unix (0, "write `%s'", file_name); - - vec_free (s); - close (fd); - return error; -} - -clib_error_t * -vlib_sysfs_read (char *file_name, char *fmt, ...) -{ - unformat_input_t input; - u8 *s = 0; - int fd; - ssize_t sz; - uword result; - - fd = open (file_name, O_RDONLY); - if (fd < 0) - return clib_error_return_unix (0, "open `%s'", file_name); - - vec_validate (s, 4095); - - sz = read (fd, s, vec_len (s)); - if (sz < 0) - { - close (fd); - vec_free (s); - return clib_error_return_unix (0, "read `%s'", file_name); - } - - _vec_len (s) = sz; - unformat_init_vector (&input, s); - - va_list va; - va_start (va, fmt); - result = va_unformat (&input, fmt, &va); - va_end (va); - - vec_free (s); - close (fd); - - if (result == 0) - return clib_error_return (0, "unformat error"); - - return 0; -} - -u8 * -vlib_sysfs_link_to_name (char *link) -{ - char *p, buffer[64]; - unformat_input_t in; - u8 *s = 0; - int r; - - r = readlink (link, buffer, sizeof (buffer) - 1); - - if (r < 0) - return 0; - - buffer[r] = 0; - p = strrchr (buffer, '/'); - - if (!p) - return 0; - - unformat_init_string (&in, p + 1, strlen (p + 1)); - if (unformat (&in, "%s", &s) != 1) - clib_unix_warning ("no string?"); - unformat_free (&in); - - return s; -} - -clib_error_t * -vlib_sysfs_set_nr_hugepages (unsigned int numa_node, int page_size, int nr) -{ - clib_error_t *error = 0; - struct stat sb; - u8 *p = 0; - - p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); - - if (stat ((char *) p, &sb) == 0) - { - if (S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' is not directory", p); - goto done; - } - } - else if (numa_node == 0) - { - vec_reset_length (p); - p = format (p, "/sys/kernel/mm%c", 0); - if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' does not exist or it is not " - "directory", p); - goto done; - } - } - else - { - error = clib_error_return (0, "'%s' does not exist", p); - goto done; - } - - _vec_len (p) -= 1; - p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0); - vlib_sysfs_write ((char *) p, "%d", nr); - -done: - vec_free (p); - return error; -} - - -static clib_error_t * -vlib_sysfs_get_xxx_hugepages (char *type, unsigned int numa_node, - int page_size, int *val) -{ - clib_error_t *error = 0; - struct stat sb; - u8 *p = 0; - - p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); - - if (stat ((char *) p, &sb) == 0) - { - if (S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' is not directory", p); - goto done; - } - } - else if (numa_node == 0) - { - vec_reset_length (p); - p = format (p, "/sys/kernel/mm%c", 0); - if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' does not exist or it is not " - "directory", p); - goto done; - } - } - else - { - error = clib_error_return (0, "'%s' does not exist", p); - goto done; - } - - _vec_len (p) -= 1; - p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size, - type, 0); - error = vlib_sysfs_read ((char *) p, "%d", val); - -done: - vec_free (p); - return error; -} - -clib_error_t * -vlib_sysfs_get_free_hugepages (unsigned int numa_node, int page_size, int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("free", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_get_nr_hugepages (unsigned int numa_node, int page_size, int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("nr", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, int page_size, - int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("surplus", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_prealloc_hugepages (unsigned int numa_node, int page_size, int nr) -{ - clib_error_t *error = 0; - int n, needed; - error = vlib_sysfs_get_free_hugepages (numa_node, page_size, &n); - if (error) - return error; - needed = nr - n; - if (needed <= 0) - return 0; - - error = vlib_sysfs_get_nr_hugepages (numa_node, page_size, &n); - if (error) - return error; - clib_warning ("pre-allocating %u additional %uK hugepages on numa node %u", - needed, page_size, numa_node); - return vlib_sysfs_set_nr_hugepages (numa_node, page_size, n + needed); -} - clib_error_t * vlib_unix_recursive_mkdir (char *path) { diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index ea52878d..e7e69214 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -26,6 +26,7 @@ #include #include +#include #include #include -- cgit 1.2.3-korg From 56dd5438b04b869065d8e901c315496bb6777455 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 8 Sep 2017 19:52:02 +0200 Subject: move unix_file_* code to vppinfra This will allow us to use this code in client libraries without vlib. Change-Id: I8557b752496841ba588aa36b6082cbe2cd1867fe Signed-off-by: Damjan Marion --- src/plugins/memif/memif.c | 36 ++++----- src/plugins/memif/private.h | 38 +++++----- src/plugins/memif/socket.c | 56 +++++++------- src/vlib/linux/pci.c | 13 ++-- src/vlib/unix/cli.c | 85 +++++++++++---------- src/vlib/unix/input.c | 13 ++-- src/vlib/unix/main.c | 1 + src/vlib/unix/mc_socket.c | 69 ++++++++--------- src/vlib/unix/mc_socket.h | 2 +- src/vlib/unix/unix.h | 76 +------------------ src/vlibapi/api_common.h | 2 +- src/vlibsocket/api.h | 12 +-- src/vlibsocket/sockclnt_vlib.c | 12 +-- src/vlibsocket/socksvr_vlib.c | 92 +++++++++++----------- src/vnet/devices/af_packet/af_packet.c | 12 +-- src/vnet/devices/af_packet/af_packet.h | 2 +- src/vnet/devices/netmap/netmap.c | 14 ++-- src/vnet/devices/netmap/netmap.h | 2 +- src/vnet/devices/virtio/vhost-user.c | 62 +++++++-------- src/vnet/devices/virtio/vhost-user.h | 2 +- src/vnet/ip/punt.c | 8 +- src/vnet/ip/punt.h | 2 +- src/vnet/unix/tapcli.c | 19 ++--- src/vnet/unix/tuntap.c | 10 +-- src/vppinfra.am | 1 + src/vppinfra/file.h | 134 +++++++++++++++++++++++++++++++++ 26 files changed, 423 insertions(+), 352 deletions(-) create mode 100644 src/vppinfra/file.h (limited to 'src/vnet/devices') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 4c387b92..8fec409a 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -52,10 +52,10 @@ memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) static void memif_queue_intfd_close (memif_queue_t * mq) { - if (mq->int_unix_file_index != ~0) + if (mq->int_clib_file_index != ~0) { - memif_file_del_by_index (mq->int_unix_file_index); - mq->int_unix_file_index = ~0; + memif_file_del_by_index (mq->int_clib_file_index); + mq->int_clib_file_index = ~0; mq->int_fd = -1; } else if (mq->int_fd > -1) @@ -94,13 +94,13 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err) vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); /* close connection socket */ - if (mif->conn_unix_file_index != ~0) + if (mif->conn_clib_file_index != ~0) { memif_socket_file_t *msf = vec_elt_at_index (mm->socket_files, mif->socket_file_index); hash_unset (msf->dev_instance_by_fd, mif->conn_fd); - memif_file_del_by_index (mif->conn_unix_file_index); - mif->conn_unix_file_index = ~0; + memif_file_del_by_index (mif->conn_clib_file_index); + mif->conn_clib_file_index = ~0; } else if (mif->conn_fd > -1) close (mif->conn_fd); @@ -145,7 +145,7 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err) } static clib_error_t * -memif_int_fd_read_ready (unix_file_t * uf) +memif_int_fd_read_ready (clib_file_t * uf) { memif_main_t *mm = &memif_main; vnet_main_t *vnm = vnet_get_main (); @@ -173,7 +173,7 @@ clib_error_t * memif_connect (memif_if_t * mif) { vnet_main_t *vnm = vnet_get_main (); - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; memif_region_t *mr; int i; @@ -219,7 +219,7 @@ memif_connect (memif_if_t * mif) { template.file_descriptor = mq->int_fd; template.private_data = (mif->dev_instance << 16) | (i & 0xFFFF); - memif_file_add (&mq->int_unix_file_index, &template); + memif_file_add (&mq->int_clib_file_index, &template); } vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, i, ~0); rv = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, i, @@ -330,7 +330,7 @@ memif_init_regions_and_queues (memif_if_t * mif) memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i); if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0) return clib_error_return_unix (0, "eventfd[tx queue %u]", i); - mq->int_unix_file_index = ~0; + mq->int_clib_file_index = ~0; mq->ring = memif_get_ring (mif, MEMIF_RING_S2M, i); mq->log2_ring_size = mif->cfg.log2_ring_size; mq->region = 0; @@ -346,7 +346,7 @@ memif_init_regions_and_queues (memif_if_t * mif) memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i); if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0) return clib_error_return_unix (0, "eventfd[rx queue %u]", i); - mq->int_unix_file_index = ~0; + mq->int_clib_file_index = ~0; mq->ring = memif_get_ring (mif, MEMIF_RING_M2S, i); mq->log2_ring_size = mif->cfg.log2_ring_size; mq->region = 0; @@ -432,7 +432,7 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) (sockfd, (struct sockaddr *) &sun, sizeof (struct sockaddr_un)) == 0) { - unix_file_t t = { 0 }; + clib_file_t t = { 0 }; mif->conn_fd = sockfd; t.read_function = memif_slave_conn_fd_read_ready; @@ -440,7 +440,7 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) t.error_function = memif_slave_conn_fd_error; t.file_descriptor = mif->conn_fd; t.private_data = mif->dev_instance; - memif_file_add (&mif->conn_unix_file_index, &t); + memif_file_add (&mif->conn_clib_file_index, &t); hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance); mif->flags |= MEMIF_IF_FLAG_CONNECTING; @@ -507,7 +507,7 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif) if (msf->is_listener) { uword *x; - memif_file_del_by_index (msf->unix_file_index); + memif_file_del_by_index (msf->clib_file_index); vec_foreach (x, msf->pending_file_indices) { memif_file_del_by_index (*x); @@ -639,7 +639,7 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) mif->socket_file_index = msf - mm->socket_files; mif->id = args->id; mif->sw_if_index = mif->hw_if_index = mif->per_interface_next_index = ~0; - mif->conn_unix_file_index = ~0; + mif->conn_clib_file_index = ~0; mif->conn_fd = -1; mif->mode = args->mode; if (args->secret) @@ -737,12 +737,12 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) goto error; } - msf->unix_file_index = ~0; - unix_file_t template = { 0 }; + msf->clib_file_index = ~0; + clib_file_t template = { 0 }; template.read_function = memif_conn_fd_accept_ready; template.file_descriptor = msf->fd; template.private_data = mif->socket_file_index; - memif_file_add (&msf->unix_file_index, &template); + memif_file_add (&msf->clib_file_index, &template); } msf->ref_cnt++; diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h index b5f2f8ff..912ec59a 100644 --- a/src/plugins/memif/private.h +++ b/src/plugins/memif/private.h @@ -41,34 +41,34 @@ #if MEMIF_DEBUG == 1 #define memif_file_add(a, b) do { \ ASSERT (*a == ~0); \ - *a = unix_file_add (&unix_main, b); \ - clib_warning ("unix_file_add fd %d private_data %u idx %u", \ + *a = clib_file_add (&file_main, b); \ + clib_warning ("clib_file_add fd %d private_data %u idx %u", \ (b)->file_descriptor, (b)->private_data, *a); \ } while (0) #define memif_file_del(a) do { \ - clib_warning ("unix_file_del idx %u",a - unix_main.file_pool); \ - unix_file_del (&unix_main, a); \ + clib_warning ("clib_file_del idx %u",a - file_main.file_pool); \ + clib_file_del (&file_main, a); \ } while (0) #define memif_file_del_by_index(a) do { \ - clib_warning ("unix_file_del idx %u", a); \ - unix_file_del_by_index (&unix_main, a); \ + clib_warning ("clib_file_del idx %u", a); \ + clib_file_del_by_index (&file_main, a); \ } while (0) #else #define memif_file_add(a, b) do { \ ASSERT (*a == ~0); \ - *a = unix_file_add (&unix_main, b); \ + *a = clib_file_add (&file_main, b); \ } while (0) -#define memif_file_del(a) unix_file_del(&unix_main, a) -#define memif_file_del_by_index(a) unix_file_del_by_index(&unix_main, a) +#define memif_file_del(a) clib_file_del(&file_main, a) +#define memif_file_del_by_index(a) clib_file_del_by_index(&file_main, a) #endif typedef struct { u8 *filename; int fd; - uword unix_file_index; + uword clib_file_index; uword *pending_file_indices; int ref_cnt; int is_listener; @@ -106,7 +106,7 @@ typedef struct /* interrupts */ int int_fd; - uword int_unix_file_index; + uword int_clib_file_index; u64 int_count; } memif_queue_t; @@ -140,7 +140,7 @@ typedef struct /* socket connection */ uword socket_file_index; int conn_fd; - uword conn_unix_file_index; + uword conn_clib_file_index; memif_msg_fifo_elt_t *msg_queue; u8 *secret; @@ -241,13 +241,13 @@ clib_error_t *memif_connect (memif_if_t * mif); void memif_disconnect (memif_if_t * mif, clib_error_t * err); /* socket.c */ -clib_error_t *memif_conn_fd_accept_ready (unix_file_t * uf); -clib_error_t *memif_master_conn_fd_read_ready (unix_file_t * uf); -clib_error_t *memif_slave_conn_fd_read_ready (unix_file_t * uf); -clib_error_t *memif_master_conn_fd_write_ready (unix_file_t * uf); -clib_error_t *memif_slave_conn_fd_write_ready (unix_file_t * uf); -clib_error_t *memif_master_conn_fd_error (unix_file_t * uf); -clib_error_t *memif_slave_conn_fd_error (unix_file_t * uf); +clib_error_t *memif_conn_fd_accept_ready (clib_file_t * uf); +clib_error_t *memif_master_conn_fd_read_ready (clib_file_t * uf); +clib_error_t *memif_slave_conn_fd_read_ready (clib_file_t * uf); +clib_error_t *memif_master_conn_fd_write_ready (clib_file_t * uf); +clib_error_t *memif_slave_conn_fd_write_ready (clib_file_t * uf); +clib_error_t *memif_master_conn_fd_error (clib_file_t * uf); +clib_error_t *memif_slave_conn_fd_error (clib_file_t * uf); clib_error_t *memif_msg_send_disconnect (memif_if_t * mif, clib_error_t * err); u8 *format_memif_device_name (u8 * s, va_list * args); diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c index 79ae07be..1abc0f11 100644 --- a/src/plugins/memif/socket.c +++ b/src/plugins/memif/socket.c @@ -246,7 +246,7 @@ memif_msg_receive_hello (memif_if_t * mif, memif_msg_t * msg) static clib_error_t * memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg, - unix_file_t * uf) + clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_socket_file_t *msf = @@ -258,7 +258,7 @@ memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg, if (i->version != MEMIF_VERSION) { - memif_file_del_by_index (uf - unix_main.file_pool); + memif_file_del_by_index (uf - file_main.file_pool); return clib_error_return (0, "unsupported version"); } @@ -291,7 +291,7 @@ memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg, } mif->conn_fd = uf->file_descriptor; - mif->conn_unix_file_index = uf - unix_main.file_pool; + mif->conn_clib_file_index = uf - file_main.file_pool; hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance); mif->remote_name = memif_str2vec (i->name, sizeof (i->name)); *mifp = mif; @@ -316,7 +316,7 @@ memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg, error: tmp.conn_fd = uf->file_descriptor; memif_msg_send_disconnect (&tmp, err); - memif_file_del_by_index (uf - unix_main.file_pool); + memif_file_del_by_index (uf - file_main.file_pool); return err; } @@ -377,7 +377,7 @@ memif_msg_receive_add_ring (memif_if_t * mif, memif_msg_t * msg, int fd) } mq->int_fd = fd; - mq->int_unix_file_index = ~0; + mq->int_clib_file_index = ~0; mq->log2_ring_size = ar->log2_ring_size; mq->region = ar->region; mq->offset = ar->offset; @@ -422,7 +422,7 @@ memif_msg_receive_disconnect (memif_if_t * mif, memif_msg_t * msg) } static clib_error_t * -memif_msg_receive (memif_if_t ** mifp, unix_file_t * uf) +memif_msg_receive (memif_if_t ** mifp, clib_file_t * uf) { char ctl[CMSG_SPACE (sizeof (int)) + CMSG_SPACE (sizeof (struct ucred))] = { 0 }; @@ -544,20 +544,21 @@ memif_msg_receive (memif_if_t ** mifp, unix_file_t * uf) return err; } - if (clib_fifo_elts (mif->msg_queue) && mif->conn_unix_file_index != ~0) - unix_file_set_data_available_to_write (mif->conn_unix_file_index, 1); + if (clib_fifo_elts (mif->msg_queue) && mif->conn_clib_file_index != ~0) + clib_file_set_data_available_to_write (&file_main, + mif->conn_clib_file_index, 1); return 0; } clib_error_t * -memif_master_conn_fd_read_ready (unix_file_t * uf) +memif_master_conn_fd_read_ready (clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_socket_file_t *msf = pool_elt_at_index (mm->socket_files, uf->private_data); uword *p; memif_if_t *mif = 0; - uword conn_unix_file_index = ~0; + uword conn_clib_file_index = ~0; clib_error_t *err = 0; p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor); @@ -570,13 +571,13 @@ memif_master_conn_fd_read_ready (unix_file_t * uf) /* This is new connection, remove index from pending vector */ int i; vec_foreach_index (i, msf->pending_file_indices) - if (msf->pending_file_indices[i] == uf - unix_main.file_pool) + if (msf->pending_file_indices[i] == uf - file_main.file_pool) { - conn_unix_file_index = msf->pending_file_indices[i]; + conn_clib_file_index = msf->pending_file_indices[i]; vec_del1 (msf->pending_file_indices, i); break; } - ASSERT (conn_unix_file_index != ~0); + ASSERT (conn_clib_file_index != ~0); } err = memif_msg_receive (&mif, uf); if (err) @@ -588,7 +589,7 @@ memif_master_conn_fd_read_ready (unix_file_t * uf) } clib_error_t * -memif_slave_conn_fd_read_ready (unix_file_t * uf) +memif_slave_conn_fd_read_ready (clib_file_t * uf) { memif_main_t *mm = &memif_main; clib_error_t *err; @@ -603,17 +604,18 @@ memif_slave_conn_fd_read_ready (unix_file_t * uf) } static clib_error_t * -memif_conn_fd_write_ready (unix_file_t * uf, memif_if_t * mif) +memif_conn_fd_write_ready (clib_file_t * uf, memif_if_t * mif) { memif_msg_fifo_elt_t *e; clib_fifo_sub2 (mif->msg_queue, e); - unix_file_set_data_available_to_write (mif->conn_unix_file_index, 0); + clib_file_set_data_available_to_write (&file_main, + mif->conn_clib_file_index, 0); memif_msg_send (mif->conn_fd, &e->msg, e->fd); return 0; } clib_error_t * -memif_master_conn_fd_write_ready (unix_file_t * uf) +memif_master_conn_fd_write_ready (clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_socket_file_t *msf = @@ -630,7 +632,7 @@ memif_master_conn_fd_write_ready (unix_file_t * uf) } clib_error_t * -memif_slave_conn_fd_write_ready (unix_file_t * uf) +memif_slave_conn_fd_write_ready (clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); @@ -638,7 +640,7 @@ memif_slave_conn_fd_write_ready (unix_file_t * uf) } clib_error_t * -memif_slave_conn_fd_error (unix_file_t * uf) +memif_slave_conn_fd_error (clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); @@ -652,7 +654,7 @@ memif_slave_conn_fd_error (unix_file_t * uf) } clib_error_t * -memif_master_conn_fd_error (unix_file_t * uf) +memif_master_conn_fd_error (clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_socket_file_t *msf = @@ -674,7 +676,7 @@ memif_master_conn_fd_error (unix_file_t * uf) { int i; vec_foreach_index (i, msf->pending_file_indices) - if (msf->pending_file_indices[i] == uf - unix_main.file_pool) + if (msf->pending_file_indices[i] == uf - file_main.file_pool) { vec_del1 (msf->pending_file_indices, i); memif_file_del (uf); @@ -689,7 +691,7 @@ memif_master_conn_fd_error (unix_file_t * uf) clib_error_t * -memif_conn_fd_accept_ready (unix_file_t * uf) +memif_conn_fd_accept_ready (clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_socket_file_t *msf = @@ -697,8 +699,8 @@ memif_conn_fd_accept_ready (unix_file_t * uf) int addr_len; struct sockaddr_un client; int conn_fd; - unix_file_t template = { 0 }; - uword unix_file_index = ~0; + clib_file_t template = { 0 }; + uword clib_file_index = ~0; clib_error_t *err; @@ -715,16 +717,16 @@ memif_conn_fd_accept_ready (unix_file_t * uf) template.file_descriptor = conn_fd; template.private_data = uf->private_data; - memif_file_add (&unix_file_index, &template); + memif_file_add (&clib_file_index, &template); err = memif_msg_enq_hello (conn_fd); if (err) { clib_error_report (err); - memif_file_del_by_index (unix_file_index); + memif_file_del_by_index (clib_file_index); } else - vec_add1 (msf->pending_file_indices, unix_file_index); + vec_add1 (msf->pending_file_indices, clib_file_index); return 0; } diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c index cd2affdc..4ce19190 100644 --- a/src/vlib/linux/pci.c +++ b/src/vlib/linux/pci.c @@ -68,8 +68,8 @@ typedef struct /* Minor device for uio device. */ u32 uio_minor; - /* Index given by unix_file_add. */ - u32 unix_file_index; + /* Index given by clib_file_add. */ + u32 clib_file_index; } linux_pci_device_t; @@ -237,7 +237,7 @@ scan_uio_dir (void *arg, u8 * path_name, u8 * file_name) } static clib_error_t * -linux_pci_uio_read_ready (unix_file_t * uf) +linux_pci_uio_read_ready (clib_file_t * uf) { vlib_pci_main_t *pm = &pci_main; vlib_pci_device_t *d; @@ -257,7 +257,7 @@ linux_pci_uio_read_ready (unix_file_t * uf) } static clib_error_t * -linux_pci_uio_error_ready (unix_file_t * uf) +linux_pci_uio_error_ready (clib_file_t * uf) { u32 error_index = (u32) uf->private_data; @@ -294,15 +294,14 @@ add_device (vlib_pci_device_t * dev, linux_pci_device_t * pdev) } { - unix_file_t template = { 0 }; - unix_main_t *um = &unix_main; + clib_file_t template = { 0 }; template.read_function = linux_pci_uio_read_ready; template.file_descriptor = l->uio_fd; template.error_function = linux_pci_uio_error_ready; template.private_data = dev - pm->pci_devs; - l->unix_file_index = unix_file_add (um, &template); + l->clib_file_index = clib_file_add (&file_main, &template); } } diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c index 068a4e16..39368823 100644 --- a/src/vlib/unix/cli.c +++ b/src/vlib/unix/cli.c @@ -141,7 +141,7 @@ typedef struct typedef struct { /** The file index held by unix.c */ - u32 unix_file_index; + u32 clib_file_index; /** Vector of output pending write to file descriptor. */ u8 *output_vector; @@ -502,11 +502,11 @@ unix_cli_match_action (unix_cli_parse_actions_t * a, * are available to be sent. */ static void -unix_cli_add_pending_output (unix_file_t * uf, +unix_cli_add_pending_output (clib_file_t * uf, unix_cli_file_t * cf, u8 * buffer, uword buffer_bytes) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vec_add (cf->output_vector, buffer, buffer_bytes); if (vec_len (cf->output_vector) > 0) @@ -514,7 +514,7 @@ unix_cli_add_pending_output (unix_file_t * uf, int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; if (!skip_update) - um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); } } @@ -522,10 +522,10 @@ unix_cli_add_pending_output (unix_file_t * uf, * that no more bytes are available to be sent. */ static void -unix_cli_del_pending_output (unix_file_t * uf, +unix_cli_del_pending_output (clib_file_t * uf, unix_cli_file_t * cf, uword n_bytes) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vec_delete (cf->output_vector, n_bytes, 0); if (vec_len (cf->output_vector) <= 0) @@ -533,7 +533,7 @@ unix_cli_del_pending_output (unix_file_t * uf, int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE; if (!skip_update) - um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); } } @@ -580,7 +580,7 @@ unix_vlib_findchr (u8 chr, u8 * str, word len) */ static void unix_vlib_cli_output_raw (unix_cli_file_t * cf, - unix_file_t * uf, u8 * buffer, uword buffer_bytes) + clib_file_t * uf, u8 * buffer, uword buffer_bytes) { int n = 0; @@ -610,7 +610,7 @@ unix_vlib_cli_output_raw (unix_cli_file_t * cf, */ static void unix_vlib_cli_output_cooked (unix_cli_file_t * cf, - unix_file_t * uf, + clib_file_t * uf, u8 * buffer, uword buffer_bytes) { word end = 0, start = 0; @@ -646,7 +646,7 @@ unix_vlib_cli_output_cooked (unix_cli_file_t * cf, /** @brief Output the CLI prompt */ static void -unix_cli_cli_prompt (unix_cli_file_t * cf, unix_file_t * uf) +unix_cli_cli_prompt (unix_cli_file_t * cf, clib_file_t * uf) { unix_cli_main_t *cm = &unix_cli_main; @@ -655,7 +655,7 @@ unix_cli_cli_prompt (unix_cli_file_t * cf, unix_file_t * uf) /** @brief Output a pager prompt and show number of buffered lines */ static void -unix_cli_pager_prompt (unix_cli_file_t * cf, unix_file_t * uf) +unix_cli_pager_prompt (unix_cli_file_t * cf, clib_file_t * uf) { u8 *prompt; u32 h; @@ -678,7 +678,7 @@ unix_cli_pager_prompt (unix_cli_file_t * cf, unix_file_t * uf) /** @brief Output a pager "skipping" message */ static void -unix_cli_pager_message (unix_cli_file_t * cf, unix_file_t * uf, +unix_cli_pager_message (unix_cli_file_t * cf, clib_file_t * uf, char *message, char *postfix) { u8 *prompt; @@ -694,7 +694,7 @@ unix_cli_pager_message (unix_cli_file_t * cf, unix_file_t * uf, /** @brief Erase the printed pager prompt */ static void -unix_cli_pager_prompt_erase (unix_cli_file_t * cf, unix_file_t * uf) +unix_cli_pager_prompt_erase (unix_cli_file_t * cf, clib_file_t * uf) { if (cf->ansi_capable) { @@ -716,7 +716,7 @@ unix_cli_pager_prompt_erase (unix_cli_file_t * cf, unix_file_t * uf) /** @brief Uses an ANSI escape sequence to move the cursor */ static void -unix_cli_ansi_cursor (unix_cli_file_t * cf, unix_file_t * uf, u16 x, u16 y) +unix_cli_ansi_cursor (unix_cli_file_t * cf, clib_file_t * uf, u16 x, u16 y) { u8 *str; @@ -732,7 +732,7 @@ unix_cli_ansi_cursor (unix_cli_file_t * cf, unix_file_t * uf, u16 x, u16 y) * @param uf Unix file of the CLI session. */ static void -unix_cli_pager_redraw (unix_cli_file_t * cf, unix_file_t * uf) +unix_cli_pager_redraw (unix_cli_file_t * cf, clib_file_t * uf) { unix_cli_pager_index_t *pi = NULL; u8 *line = NULL; @@ -930,12 +930,13 @@ static void unix_vlib_cli_output (uword cli_file_index, u8 * buffer, uword buffer_bytes) { unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; unix_cli_main_t *cm = &unix_cli_main; unix_cli_file_t *cf; - unix_file_t *uf; + clib_file_t *uf; cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); - uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); if (cf->no_pager || um->cli_pager_buffer_limit == 0 || cf->height == 0) { @@ -1037,7 +1038,8 @@ static void unix_cli_file_welcome (unix_cli_main_t * cm, unix_cli_file_t * cf) { unix_main_t *um = &unix_main; - unix_file_t *uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + clib_file_main_t *fm = &file_main; + clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); unix_cli_banner_t *banner; int i, len; @@ -1104,7 +1106,7 @@ unix_cli_file_welcome_timer (any arg, f64 delay) static i32 unix_cli_process_telnet (unix_main_t * um, unix_cli_file_t * cf, - unix_file_t * uf, u8 * input_vector, uword len) + clib_file_t * uf, u8 * input_vector, uword len) { /* Input_vector starts at IAC byte. * See if we have a complete message; if not, return -1 so we wait for more. @@ -1229,7 +1231,7 @@ static int unix_cli_line_process_one (unix_cli_main_t * cm, unix_main_t * um, unix_cli_file_t * cf, - unix_file_t * uf, + clib_file_t * uf, u8 input, unix_cli_parse_action_t action) { u8 *prev; @@ -2059,10 +2061,10 @@ unix_cli_line_process_one (unix_cli_main_t * cm, /** @brief Process input bytes on a stream to provide line editing and * command history in the CLI. */ static int -unix_cli_line_edit (unix_cli_main_t * cm, - unix_main_t * um, unix_cli_file_t * cf) +unix_cli_line_edit (unix_cli_main_t * cm, unix_main_t * um, + clib_file_main_t * fm, unix_cli_file_t * cf) { - unix_file_t *uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); int i; for (i = 0; i < vec_len (cf->input_vector); i++) @@ -2139,7 +2141,8 @@ static void unix_cli_process_input (unix_cli_main_t * cm, uword cli_file_index) { unix_main_t *um = &unix_main; - unix_file_t *uf; + clib_file_main_t *fm = &file_main; + clib_file_t *uf; unix_cli_file_t *cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); unformat_input_t input; int vlib_parse_eval (u8 *); @@ -2157,7 +2160,7 @@ more: else { /* Line edit, echo, etc. */ - if (unix_cli_line_edit (cm, um, cf)) + if (unix_cli_line_edit (cm, um, fm, cf)) /* want more input */ return; } @@ -2196,7 +2199,7 @@ more: /* Re-fetch pointer since pool may have moved. */ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); - uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); done: /* reset vector; we'll re-use it later */ @@ -2240,12 +2243,13 @@ static void unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index) { unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; unix_cli_file_t *cf; - unix_file_t *uf; + clib_file_t *uf; int i; cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); - uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); /* Quit/EOF on stdin means quit program. */ if (uf->file_descriptor == UNIX_CLI_STDIN_FD) @@ -2259,7 +2263,7 @@ unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index) vec_free (cf->command_history); - unix_file_del (um, uf); + clib_file_del (fm, uf); unix_cli_file_free (cf); pool_put (cm->cli_file_pool, cf); @@ -2311,7 +2315,7 @@ done: /** Called when a CLI session file descriptor can be written to without * blocking. */ static clib_error_t * -unix_cli_write_ready (unix_file_t * uf) +unix_cli_write_ready (clib_file_t * uf) { unix_cli_main_t *cm = &unix_cli_main; unix_cli_file_t *cf; @@ -2334,7 +2338,7 @@ unix_cli_write_ready (unix_file_t * uf) /** Called when a CLI session file descriptor has data to be read. */ static clib_error_t * -unix_cli_read_ready (unix_file_t * uf) +unix_cli_read_ready (clib_file_t * uf) { unix_main_t *um = &unix_main; unix_cli_main_t *cm = &unix_cli_main; @@ -2380,8 +2384,9 @@ static u32 unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd) { unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; unix_cli_file_t *cf; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; vlib_main_t *vm = um->vlib_main; vlib_node_t *n; @@ -2424,7 +2429,7 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd) template.private_data = cf - cm->cli_file_pool; cf->process_node_index = n->index; - cf->unix_file_index = unix_file_add (um, &template); + cf->clib_file_index = clib_file_add (fm, &template); cf->output_vector = 0; cf->input_vector = 0; @@ -2439,9 +2444,10 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd) /** Telnet listening socket has a new connection. */ static clib_error_t * -unix_cli_listen_read_ready (unix_file_t * uf) +unix_cli_listen_read_ready (clib_file_t * uf) { unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; unix_cli_main_t *cm = &unix_cli_main; clib_socket_t *s = &um->cli_listen_socket; clib_socket_t client; @@ -2497,7 +2503,7 @@ unix_cli_listen_read_ready (unix_file_t * uf) /* Setup the pager */ cf->no_pager = um->cli_no_pager; - uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); /* Send the telnet options */ unix_vlib_cli_output_raw (cf, uf, charmode_option, @@ -2517,11 +2523,11 @@ unix_cli_listen_read_ready (unix_file_t * uf) static void unix_cli_resize_interrupt (int signum) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; unix_cli_main_t *cm = &unix_cli_main; unix_cli_file_t *cf = pool_elt_at_index (cm->cli_file_pool, cm->stdin_cli_file_index); - unix_file_t *uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); struct winsize ws; (void) signum; @@ -2548,6 +2554,7 @@ static clib_error_t * unix_cli_config (vlib_main_t * vm, unformat_input_t * input) { unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; unix_cli_main_t *cm = &unix_cli_main; int flags; clib_error_t *error = 0; @@ -2640,7 +2647,7 @@ unix_cli_config (vlib_main_t * vm, unformat_input_t * input) if (s->config && s->config[0] != 0) { /* CLI listen. */ - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; /* mkdir of file socketu, only under /run */ if (strncmp (s->config, "/run", 4) == 0) @@ -2667,7 +2674,7 @@ unix_cli_config (vlib_main_t * vm, unformat_input_t * input) template.read_function = unix_cli_listen_read_ready; template.file_descriptor = s->fd; - unix_file_add (um, &template); + clib_file_add (fm, &template); } /* Set CLI prompt. */ diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index 515dae94..ecd31791 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -62,9 +62,9 @@ typedef struct static linux_epoll_main_t linux_epoll_main; static void -linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) +linux_epoll_file_update (clib_file_t * f, unix_file_update_type_t update_type) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; linux_epoll_main_t *em = &linux_epoll_main; struct epoll_event e; int op; @@ -76,7 +76,7 @@ linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) e.events |= EPOLLOUT; if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED) e.events |= EPOLLET; - e.data.u32 = f - um->file_pool; + e.data.u32 = f - fm->file_pool; op = -1; @@ -108,6 +108,7 @@ linux_epoll_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; linux_epoll_main_t *em = &linux_epoll_main; struct epoll_event *e; int n_fds_ready; @@ -186,7 +187,7 @@ linux_epoll_input (vlib_main_t * vm, for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++) { u32 i = e->data.u32; - unix_file_t *f = pool_elt_at_index (um->file_pool, i); + clib_file_t *f = pool_elt_at_index (fm->file_pool, i); clib_error_t *errors[4]; int n_errors = 0; @@ -236,7 +237,7 @@ clib_error_t * linux_epoll_input_init (vlib_main_t * vm) { linux_epoll_main_t *em = &linux_epoll_main; - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; /* Allocate some events. */ vec_resize (em->epoll_events, VLIB_FRAME_SIZE); @@ -245,7 +246,7 @@ linux_epoll_input_init (vlib_main_t * vm) if (em->epoll_fd < 0) return clib_error_return_unix (0, "epoll_create"); - um->file_update = linux_epoll_file_update; + fm->file_update = linux_epoll_file_update; return 0; } diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c index 3a92b2e3..ed0631ec 100644 --- a/src/vlib/unix/main.c +++ b/src/vlib/unix/main.c @@ -60,6 +60,7 @@ char *vlib_default_runtime_dir __attribute__ ((weak)); char *vlib_default_runtime_dir = "vlib"; unix_main_t unix_main; +clib_file_main_t file_main; static clib_error_t * unix_main_init (vlib_main_t * vm) diff --git a/src/vlib/unix/mc_socket.c b/src/vlib/unix/mc_socket.c index 9c12ad3b..3f1cd99d 100644 --- a/src/vlib/unix/mc_socket.c +++ b/src/vlib/unix/mc_socket.c @@ -243,7 +243,7 @@ recvmsg_helper (mc_socket_main_t * msm, } static clib_error_t * -mastership_socket_read_ready (unix_file_t * uf) +mastership_socket_read_ready (clib_file_t * uf) { mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_main_t *mcm = &msm->mc_main; @@ -263,7 +263,7 @@ mastership_socket_read_ready (unix_file_t * uf) } static clib_error_t * -to_relay_socket_read_ready (unix_file_t * uf) +to_relay_socket_read_ready (clib_file_t * uf) { mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_main_t *mcm = &msm->mc_main; @@ -297,7 +297,7 @@ to_relay_socket_read_ready (unix_file_t * uf) } static clib_error_t * -from_relay_socket_read_ready (unix_file_t * uf) +from_relay_socket_read_ready (clib_file_t * uf) { mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_main_t *mcm = &msm->mc_main; @@ -317,7 +317,7 @@ from_relay_socket_read_ready (unix_file_t * uf) } static clib_error_t * -join_socket_read_ready (unix_file_t * uf) +join_socket_read_ready (clib_file_t * uf) { mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_main_t *mcm = &msm->mc_main; @@ -354,7 +354,7 @@ join_socket_read_ready (unix_file_t * uf) } static clib_error_t * -ack_socket_read_ready (unix_file_t * uf) +ack_socket_read_ready (clib_file_t * uf) { mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_main_t *mcm = &msm->mc_main; @@ -371,10 +371,11 @@ ack_socket_read_ready (unix_file_t * uf) static void catchup_cleanup (mc_socket_main_t * msm, - mc_socket_catchup_t * c, unix_main_t * um, unix_file_t * uf) + mc_socket_catchup_t * c, clib_file_main_t * um, + clib_file_t * uf) { hash_unset (msm->catchup_index_by_file_descriptor, uf->file_descriptor); - unix_file_del (um, uf); + clib_file_del (um, uf); vec_free (c->input_vector); vec_free (c->output_vector); pool_put (msm->catchups, c); @@ -390,9 +391,9 @@ find_catchup_from_file_descriptor (mc_socket_main_t * msm, } static clib_error_t * -catchup_socket_read_ready (unix_file_t * uf, int is_server) +catchup_socket_read_ready (clib_file_t * uf, int is_server) { - unix_main_t *um = &unix_main; + clib_file_main_t *um = &file_main; mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_main_t *mcm = &msm->mc_main; mc_socket_catchup_t *c = @@ -440,13 +441,13 @@ catchup_socket_read_ready (unix_file_t * uf, int is_server) } static clib_error_t * -catchup_server_read_ready (unix_file_t * uf) +catchup_server_read_ready (clib_file_t * uf) { return catchup_socket_read_ready (uf, /* is_server */ 1); } static clib_error_t * -catchup_client_read_ready (unix_file_t * uf) +catchup_client_read_ready (clib_file_t * uf) { if (MC_EVENT_LOGGING) { @@ -460,9 +461,9 @@ catchup_client_read_ready (unix_file_t * uf) } static clib_error_t * -catchup_socket_write_ready (unix_file_t * uf, int is_server) +catchup_socket_write_ready (clib_file_t * uf, int is_server) { - unix_main_t *um = &unix_main; + clib_file_main_t *um = &file_main; mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_socket_catchup_t *c = find_catchup_from_file_descriptor (msm, uf->file_descriptor); @@ -522,7 +523,7 @@ catchup_socket_write_ready (unix_file_t * uf, int is_server) if (!is_server) { uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE; - unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); + file_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); /* Send EOF to other side. */ shutdown (uf->file_descriptor, SHUT_WR); return error; @@ -537,21 +538,21 @@ catchup_socket_write_ready (unix_file_t * uf, int is_server) } static clib_error_t * -catchup_server_write_ready (unix_file_t * uf) +catchup_server_write_ready (clib_file_t * uf) { return catchup_socket_write_ready (uf, /* is_server */ 1); } static clib_error_t * -catchup_client_write_ready (unix_file_t * uf) +catchup_client_write_ready (clib_file_t * uf) { return catchup_socket_write_ready (uf, /* is_server */ 0); } static clib_error_t * -catchup_socket_error_ready (unix_file_t * uf) +catchup_socket_error_ready (clib_file_t * uf) { - unix_main_t *um = &unix_main; + clib_file_main_t *um = &file_main; mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_socket_catchup_t *c = find_catchup_from_file_descriptor (msm, uf->file_descriptor); @@ -560,13 +561,13 @@ catchup_socket_error_ready (unix_file_t * uf) } static clib_error_t * -catchup_listen_read_ready (unix_file_t * uf) +catchup_listen_read_ready (clib_file_t * uf) { mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; struct sockaddr_in client_addr; int client_len; mc_socket_catchup_t *c; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; pool_get (msm->catchups, c); memset (c, 0, sizeof (c[0])); @@ -616,7 +617,7 @@ catchup_listen_read_ready (unix_file_t * uf) template.error_function = catchup_socket_error_ready; template.file_descriptor = c->socket; template.private_data = pointer_to_uword (msm); - c->unix_file_index = unix_file_add (&unix_main, &template); + c->clib_file_index = clib_file_add (&file_main, &template); hash_set (msm->catchup_index_by_file_descriptor, c->socket, c - msm->catchups); @@ -772,45 +773,45 @@ socket_setup (mc_socket_main_t * msm) /* epoll setup for multicast mastership socket */ { - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; template.read_function = mastership_socket_read_ready; template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP].socket; template.private_data = (uword) msm; - unix_file_add (&unix_main, &template); + clib_file_add (&file_main, &template); /* epoll setup for multicast to_relay socket */ template.read_function = to_relay_socket_read_ready; template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY].socket; template.private_data = (uword) msm; - unix_file_add (&unix_main, &template); + clib_file_add (&file_main, &template); /* epoll setup for multicast from_relay socket */ template.read_function = from_relay_socket_read_ready; template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY].socket; template.private_data = (uword) msm; - unix_file_add (&unix_main, &template); + clib_file_add (&file_main, &template); template.read_function = join_socket_read_ready; template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_JOIN].socket; template.private_data = (uword) msm; - unix_file_add (&unix_main, &template); + clib_file_add (&file_main, &template); /* epoll setup for ack rx socket */ template.read_function = ack_socket_read_ready; template.file_descriptor = msm->ack_socket; template.private_data = (uword) msm; - unix_file_add (&unix_main, &template); + clib_file_add (&file_main, &template); /* epoll setup for TCP catchup server */ template.read_function = catchup_listen_read_ready; template.file_descriptor = msm->catchup_server_socket; template.private_data = (uword) msm; - unix_file_add (&unix_main, &template); + clib_file_add (&file_main, &template); } return 0; @@ -820,8 +821,8 @@ static void * catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes, u8 * set_output_vector) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, - c->unix_file_index); + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, + c->clib_file_index); u8 *result = 0; if (set_output_vector) @@ -833,7 +834,7 @@ catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes, int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; if (!skip_update) - unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); + file_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); } return result; } @@ -847,7 +848,7 @@ catchup_request_fun (void *transport_main, vlib_main_t *vm = mcm->vlib_main; mc_socket_catchup_t *c; struct sockaddr_in addr; - unix_main_t *um = &unix_main; + clib_file_main_t *um = &file_main; int one = 1; pool_get (msm->catchups, c); @@ -895,14 +896,14 @@ catchup_request_fun (void *transport_main, } { - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; template.read_function = catchup_client_read_ready; template.write_function = catchup_client_write_ready; template.error_function = catchup_socket_error_ready; template.file_descriptor = c->socket; template.private_data = (uword) msm; - c->unix_file_index = unix_file_add (um, &template); + c->clib_file_index = clib_file_add (um, &template); hash_set (msm->catchup_index_by_file_descriptor, c->socket, c - msm->catchups); diff --git a/src/vlib/unix/mc_socket.h b/src/vlib/unix/mc_socket.h index 273c9ad4..3686c824 100644 --- a/src/vlib/unix/mc_socket.h +++ b/src/vlib/unix/mc_socket.h @@ -31,7 +31,7 @@ typedef struct typedef struct { int socket; - u32 unix_file_index; + u32 clib_file_index; u8 *input_vector; u8 *output_vector; diff --git a/src/vlib/unix/unix.h b/src/vlib/unix/unix.h index 1b0d8b9d..4c8566b7 100644 --- a/src/vlib/unix/unix.h +++ b/src/vlib/unix/unix.h @@ -40,42 +40,16 @@ #ifndef included_unix_unix_h #define included_unix_unix_h +#include #include #include - -struct unix_file; -typedef clib_error_t *(unix_file_function_t) (struct unix_file * f); - -typedef struct unix_file -{ - /* Unix file descriptor from open/socket. */ - u32 file_descriptor; - - u32 flags; -#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE (1 << 0) -#define UNIX_FILE_EVENT_EDGE_TRIGGERED (1 << 1) - - /* Data available for function's use. */ - uword private_data; - - /* Functions to be called when read/write data becomes ready. */ - unix_file_function_t *read_function, *write_function, *error_function; -} unix_file_t; - typedef struct { f64 time; clib_error_t *error; } unix_error_history_t; -typedef enum -{ - UNIX_FILE_UPDATE_ADD, - UNIX_FILE_UPDATE_MODIFY, - UNIX_FILE_UPDATE_DELETE, -} unix_file_update_type_t; - typedef struct { /* Back pointer to main structure. */ @@ -86,15 +60,9 @@ typedef struct #define UNIX_FLAG_INTERACTIVE (1 << 0) #define UNIX_FLAG_NODAEMON (1 << 1) - /* Pool of files to poll for input/output. */ - unix_file_t *file_pool; - /* CLI listen socket. */ clib_socket_t cli_listen_socket; - void (*file_update) (unix_file_t * file, - unix_file_update_type_t update_type); - /* Circular buffer of last unix errors. */ unix_error_history_t error_history[128]; u32 error_history_index; @@ -138,47 +106,7 @@ typedef struct /* Global main structure. */ extern unix_main_t unix_main; - -always_inline uword -unix_file_add (unix_main_t * um, unix_file_t * template) -{ - unix_file_t *f; - pool_get (um->file_pool, f); - f[0] = template[0]; - um->file_update (f, UNIX_FILE_UPDATE_ADD); - return f - um->file_pool; -} - -always_inline void -unix_file_del (unix_main_t * um, unix_file_t * f) -{ - um->file_update (f, UNIX_FILE_UPDATE_DELETE); - close (f->file_descriptor); - f->file_descriptor = ~0; - pool_put (um->file_pool, f); -} - -always_inline void -unix_file_del_by_index (unix_main_t * um, uword index) -{ - unix_file_t *uf; - uf = pool_elt_at_index (um->file_pool, index); - unix_file_del (um, uf); -} - -always_inline uword -unix_file_set_data_available_to_write (u32 unix_file_index, - uword is_available) -{ - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, unix_file_index); - uword was_available = (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); - if ((was_available != 0) != (is_available != 0)) - { - uf->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; - unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); - } - return was_available != 0; -} +extern clib_file_main_t file_main; always_inline void unix_save_error (unix_main_t * um, clib_error_t * error) diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h index b84d269e..651566ae 100644 --- a/src/vlibapi/api_common.h +++ b/src/vlibapi/api_common.h @@ -51,7 +51,7 @@ typedef struct vl_api_registration_ unix_shared_memory_queue_t *vl_input_queue; /* socket server and client */ - u32 unix_file_index; + u32 clib_file_index; i8 *unprocessed_input; u32 unprocessed_msg_length; u8 *output_vector; diff --git a/src/vlibsocket/api.h b/src/vlibsocket/api.h index 79c0d08a..d7b7055d 100644 --- a/src/vlibsocket/api.h +++ b/src/vlibsocket/api.h @@ -41,27 +41,27 @@ typedef struct * or to a shared-memory connection. */ vl_api_registration_t *current_rp; - unix_file_t *current_uf; + clib_file_t *current_uf; /* One input buffer, shared across all sockets */ i8 *input_buffer; } socket_main_t; extern socket_main_t socket_main; -void socksvr_add_pending_output (struct unix_file *uf, +void socksvr_add_pending_output (clib_file_t * uf, struct vl_api_registration_ *cf, u8 * buffer, uword buffer_bytes); #define SOCKSVR_DEFAULT_PORT 32741 /* whatever */ void vl_free_socket_registration_index (u32 pool_index); -void vl_socket_process_msg (struct unix_file *uf, +void vl_socket_process_msg (clib_file_t * uf, struct vl_api_registration_ *rp, i8 * input_v); -clib_error_t *vl_socket_read_ready (struct unix_file *uf); -void vl_socket_add_pending_output (struct unix_file *uf, +clib_error_t *vl_socket_read_ready (clib_file_t * uf); +void vl_socket_add_pending_output (clib_file_t * uf, struct vl_api_registration_ *rp, u8 * buffer, uword buffer_bytes); -clib_error_t *vl_socket_write_ready (struct unix_file *uf); +clib_error_t *vl_socket_write_ready (clib_file_t * uf); void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem); void vl_socket_api_send_with_data (vl_api_registration_t * rp, u8 * elem, u8 * data_vector); diff --git a/src/vlibsocket/sockclnt_vlib.c b/src/vlibsocket/sockclnt_vlib.c index e16adfeb..760ad944 100644 --- a/src/vlibsocket/sockclnt_vlib.c +++ b/src/vlibsocket/sockclnt_vlib.c @@ -60,11 +60,11 @@ vl_api_sockclnt_create_reply_t_handler (vl_api_sockclnt_create_reply_t * mp) static void vl_api_sockclnt_delete_reply_t_handler (vl_api_sockclnt_delete_reply_t * mp) { - unix_main_t *um = &unix_main; - unix_file_t *uf = socket_main.current_uf; + clib_file_main_t *fm = &file_main; + clib_file_t *uf = socket_main.current_uf; vl_api_registration_t *rp = socket_main.current_rp; - unix_file_del (um, uf); + clib_file_del (fm, uf); vl_free_socket_registration_index (rp->vl_api_registration_pool_index); } @@ -72,8 +72,8 @@ u32 sockclnt_open_index (char *client_name, char *hostname, int port) { vl_api_registration_t *rp; - unix_main_t *um = &unix_main; - unix_file_t template = { 0 }; + clib_file_main_t *fm = &file_main; + clib_file_t template = { 0 }; int sockfd; int one = 1; int rv; @@ -129,7 +129,7 @@ sockclnt_open_index (char *client_name, char *hostname, int port) template.file_descriptor = sockfd; template.private_data = rp - socket_main.registration_pool; - rp->unix_file_index = unix_file_add (um, &template); + rp->clib_file_index = clib_file_add (fm, &template); rp->name = format (0, "%s:%d", hostname, port); mp = vl_msg_api_alloc (sizeof (*mp)); diff --git a/src/vlibsocket/socksvr_vlib.c b/src/vlibsocket/socksvr_vlib.c index dc8c63eb..31b33df5 100644 --- a/src/vlibsocket/socksvr_vlib.c +++ b/src/vlibsocket/socksvr_vlib.c @@ -53,8 +53,8 @@ dump_socket_clients (vlib_main_t * vm, api_main_t * am) { vl_api_registration_t *reg; socket_main_t *sm = &socket_main; - unix_main_t *um = &unix_main; - unix_file_t *f; + clib_file_main_t *fm = &file_main; + clib_file_t *f; /* * Must have at least one active client, not counting the @@ -69,7 +69,7 @@ dump_socket_clients (vlib_main_t * vm, api_main_t * am) pool_foreach (reg, sm->registration_pool, ({ if (reg->registration_type == REGISTRATION_TYPE_SOCKET_SERVER) { - f = pool_elt_at_index (um->file_pool, reg->unix_file_index); + f = pool_elt_at_index (fm->file_pool, reg->clib_file_index); vlib_cli_output (vm, "%16s %8d", reg->name, f->file_descriptor); } @@ -99,13 +99,13 @@ vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) nbytes += msg_length; tmp = clib_host_to_net_u32 (nbytes); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, (u8 *) & tmp, sizeof (tmp)); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, elem, msg_length); @@ -139,18 +139,18 @@ vl_socket_api_send_with_data (vl_api_registration_t * rp, /* Length in network byte order */ tmp = clib_host_to_net_u32 (nbytes); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, (u8 *) & tmp, sizeof (tmp)); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, elem, msg_length); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, data_vector, vec_len (data_vector)); @@ -181,13 +181,13 @@ vl_socket_api_send_with_length_internal (vl_api_registration_t * rp, /* Length in network byte order */ tmp = clib_host_to_net_u32 (nbytes); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, (u8 *) & tmp, sizeof (tmp)); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, elem, msg_length); @@ -231,7 +231,7 @@ vl_free_socket_registration_index (u32 pool_index) } static inline void -socket_process_msg (unix_file_t * uf, vl_api_registration_t * rp, +socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp, i8 * input_v) { u8 *the_msg = (u8 *) (input_v + sizeof (u32)); @@ -243,9 +243,9 @@ socket_process_msg (unix_file_t * uf, vl_api_registration_t * rp, } clib_error_t * -vl_socket_read_ready (unix_file_t * uf) +vl_socket_read_ready (clib_file_t * uf) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vl_api_registration_t *rp; int n; i8 *msg_buffer = 0; @@ -259,7 +259,7 @@ vl_socket_read_ready (unix_file_t * uf) if (n <= 0 && errno != EAGAIN) { - unix_file_del (um, uf); + clib_file_del (fm, uf); if (!pool_is_free (socket_main.registration_pool, rp)) { @@ -352,11 +352,11 @@ turf_it: } void -vl_socket_add_pending_output (unix_file_t * uf, +vl_socket_add_pending_output (clib_file_t * uf, vl_api_registration_t * rp, u8 * buffer, uword buffer_bytes) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vec_add (rp->output_vector, buffer, buffer_bytes); if (vec_len (rp->output_vector) > 0) @@ -364,15 +364,15 @@ vl_socket_add_pending_output (unix_file_t * uf, int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; if (!skip_update) - um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); } } static void -socket_del_pending_output (unix_file_t * uf, +socket_del_pending_output (clib_file_t * uf, vl_api_registration_t * rp, uword n_bytes) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vec_delete (rp->output_vector, n_bytes, 0); if (vec_len (rp->output_vector) <= 0) @@ -380,14 +380,14 @@ socket_del_pending_output (unix_file_t * uf, int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE; if (!skip_update) - um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); } } clib_error_t * -vl_socket_write_ready (unix_file_t * uf) +vl_socket_write_ready (clib_file_t * uf) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vl_api_registration_t *rp; int n; @@ -402,7 +402,7 @@ vl_socket_write_ready (unix_file_t * uf) #if DEBUG > 2 clib_warning ("write error, close the file...\n"); #endif - unix_file_del (um, uf); + clib_file_del (fm, uf); vl_free_socket_registration_index (rp - socket_main.registration_pool); return 0; @@ -415,23 +415,23 @@ vl_socket_write_ready (unix_file_t * uf) } clib_error_t * -vl_socket_error_ready (unix_file_t * uf) +vl_socket_error_ready (clib_file_t * uf) { vl_api_registration_t *rp; - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data); - unix_file_del (um, uf); + clib_file_del (fm, uf); vl_free_socket_registration_index (rp - socket_main.registration_pool); return 0; } void -socksvr_file_add (unix_main_t * um, int fd) +socksvr_file_add (clib_file_main_t * fm, int fd) { vl_api_registration_t *rp; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; pool_get (socket_main.registration_pool, rp); memset (rp, 0, sizeof (*rp)); @@ -444,13 +444,13 @@ socksvr_file_add (unix_main_t * um, int fd) rp->registration_type = REGISTRATION_TYPE_SOCKET_SERVER; rp->vl_api_registration_pool_index = rp - socket_main.registration_pool; - rp->unix_file_index = unix_file_add (um, &template); + rp->clib_file_index = clib_file_add (fm, &template); } static clib_error_t * -socksvr_accept_ready (unix_file_t * uf) +socksvr_accept_ready (clib_file_t * uf) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; struct sockaddr_in client_addr; int client_fd; int client_len; @@ -468,12 +468,12 @@ socksvr_accept_ready (unix_file_t * uf) if (client_fd < 0) return clib_error_return_unix (0, "socksvr_accept_ready: accept"); - socksvr_file_add (um, client_fd); + socksvr_file_add (fm, client_fd); return 0; } static clib_error_t * -socksvr_bogus_write (unix_file_t * uf) +socksvr_bogus_write (clib_file_t * uf) { clib_warning ("why am I here?"); return 0; @@ -525,7 +525,7 @@ vl_api_sockclnt_delete_t_handler (vl_api_sockclnt_delete_t * mp) vl_msg_api_send (regp, (u8 *) rp); - unix_file_del (&unix_main, unix_main.file_pool + regp->unix_file_index); + clib_file_del (&file_main, file_main.file_pool + regp->clib_file_index); vl_free_socket_registration_index (mp->index); } @@ -542,8 +542,8 @@ _(SOCKCLNT_DELETE, sockclnt_delete) static clib_error_t * socksvr_api_init (vlib_main_t * vm) { - unix_main_t *um = &unix_main; - unix_file_t template = { 0 }; + clib_file_main_t *fm = &file_main; + clib_file_t template = { 0 }; int sockfd; int one = 1; int rv; @@ -625,14 +625,14 @@ socksvr_api_init (vlib_main_t * vm) template.file_descriptor = sockfd; template.private_data = rp - socket_main.registration_pool; - rp->unix_file_index = unix_file_add (um, &template); + rp->clib_file_index = clib_file_add (fm, &template); return 0; } static clib_error_t * socket_exit (vlib_main_t * vm) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vl_api_registration_t *rp; /* Defensive driving in case something wipes out early */ @@ -641,7 +641,7 @@ socket_exit (vlib_main_t * vm) u32 index; /* *INDENT-OFF* */ pool_foreach (rp, socket_main.registration_pool, ({ - unix_file_del (um, um->file_pool + rp->unix_file_index); + clib_file_del (fm, fm->file_pool + rp->clib_file_index); index = rp->vl_api_registration_pool_index; vl_free_socket_registration_index (index); })); diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index e7e69214..62bb228f 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -89,7 +89,7 @@ af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, } static clib_error_t * -af_packet_fd_read_ready (unix_file_t * uf) +af_packet_fd_read_ready (clib_file_t * uf) { af_packet_main_t *apm = &af_packet_main; vnet_main_t *vnm = vnet_get_main (); @@ -281,12 +281,12 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, clib_spinlock_init (&apif->lockp); { - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; template.read_function = af_packet_fd_read_ready; template.file_descriptor = fd; template.private_data = if_index; template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED; - apif->unix_file_index = unix_file_add (&unix_main, &template); + apif->clib_file_index = clib_file_add (&file_main, &template); } /*use configured or generate random MAC address */ @@ -371,10 +371,10 @@ af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name) vnet_hw_interface_unassign_rx_thread (vnm, apif->hw_if_index, 0); /* clean up */ - if (apif->unix_file_index != ~0) + if (apif->clib_file_index != ~0) { - unix_file_del (&unix_main, unix_main.file_pool + apif->unix_file_index); - apif->unix_file_index = ~0; + clib_file_del (&file_main, file_main.file_pool + apif->clib_file_index); + apif->clib_file_index = ~0; } else close (apif->fd); diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h index 194977f0..95c7e7cf 100644 --- a/src/vnet/devices/af_packet/af_packet.h +++ b/src/vnet/devices/af_packet/af_packet.h @@ -32,7 +32,7 @@ typedef struct u8 *tx_ring; u32 hw_if_index; u32 sw_if_index; - u32 unix_file_index; + u32 clib_file_index; u32 next_rx_frame; u32 next_tx_frame; diff --git a/src/vnet/devices/netmap/netmap.c b/src/vnet/devices/netmap/netmap.c index 09afc764..fc49ed62 100644 --- a/src/vnet/devices/netmap/netmap.c +++ b/src/vnet/devices/netmap/netmap.c @@ -36,7 +36,7 @@ netmap_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, } static clib_error_t * -netmap_fd_read_ready (unix_file_t * uf) +netmap_fd_read_ready (clib_file_t * uf) { vlib_main_t *vm = vlib_get_main (); netmap_main_t *nm = &netmap_main; @@ -54,10 +54,10 @@ netmap_fd_read_ready (unix_file_t * uf) static void close_netmap_if (netmap_main_t * nm, netmap_if_t * nif) { - if (nif->unix_file_index != ~0) + if (nif->clib_file_index != ~0) { - unix_file_del (&unix_main, unix_main.file_pool + nif->unix_file_index); - nif->unix_file_index = ~0; + clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index); + nif->clib_file_index = ~0; } else if (nif->fd > -1) close (nif->fd); @@ -137,7 +137,7 @@ netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, pool_get (nm->interfaces, nif); nif->if_index = nif - nm->interfaces; nif->fd = fd; - nif->unix_file_index = ~0; + nif->clib_file_index = ~0; vec_validate (req, 0); nif->req = req; @@ -188,11 +188,11 @@ netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, clib_spinlock_init (&nif->lockp); { - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; template.read_function = netmap_fd_read_ready; template.file_descriptor = nif->fd; template.private_data = nif->if_index; - nif->unix_file_index = unix_file_add (&unix_main, &template); + nif->clib_file_index = clib_file_add (&file_main, &template); } /*use configured or generate random MAC address */ diff --git a/src/vnet/devices/netmap/netmap.h b/src/vnet/devices/netmap/netmap.h index e04f045d..04731890 100644 --- a/src/vnet/devices/netmap/netmap.h +++ b/src/vnet/devices/netmap/netmap.h @@ -50,7 +50,7 @@ typedef struct uword if_index; u32 hw_if_index; u32 sw_if_index; - u32 unix_file_index; + u32 clib_file_index; u32 per_interface_next_index; u8 is_admin_up; diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 5fe378cb..2af96ee7 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -89,7 +89,7 @@ #define UNIX_GET_FD(unixfd_idx) \ (unixfd_idx != ~0) ? \ - pool_elt_at_index (unix_main.file_pool, \ + pool_elt_at_index (file_main.file_pool, \ unixfd_idx)->file_descriptor : -1; #define foreach_virtio_trace_flags \ @@ -477,7 +477,7 @@ vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq) } static clib_error_t * -vhost_user_callfd_read_ready (unix_file_t * uf) +vhost_user_callfd_read_ready (clib_file_t * uf) { __attribute__ ((unused)) int n; u8 buff[8]; @@ -488,7 +488,7 @@ vhost_user_callfd_read_ready (unix_file_t * uf) } static clib_error_t * -vhost_user_kickfd_read_ready (unix_file_t * uf) +vhost_user_kickfd_read_ready (clib_file_t * uf) { __attribute__ ((unused)) int n; u8 buff[8]; @@ -569,16 +569,16 @@ vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid) vhost_user_vring_t *vring = &vui->vrings[qid]; if (vring->kickfd_idx != ~0) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vring->kickfd_idx); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vring->kickfd_idx = ~0; } if (vring->callfd_idx != ~0) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vring->callfd_idx); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vring->callfd_idx = ~0; } if (vring->errfd != -1) @@ -597,10 +597,10 @@ vhost_user_if_disconnect (vhost_user_intf_t * vui) vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); - if (vui->unix_file_index != ~0) + if (vui->clib_file_index != ~0) { - unix_file_del (&unix_main, unix_main.file_pool + vui->unix_file_index); - vui->unix_file_index = ~0; + clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index); + vui->clib_file_index = ~0; } vui->is_up = 0; @@ -654,7 +654,7 @@ vhost_user_log_dirty_pages (vhost_user_intf_t * vui, u64 addr, u64 len) } static clib_error_t * -vhost_user_socket_read (unix_file_t * uf) +vhost_user_socket_read (clib_file_t * uf) { int n, i; int fd, number_of_fds = 0; @@ -666,7 +666,7 @@ vhost_user_socket_read (unix_file_t * uf) vhost_user_intf_t *vui; struct cmsghdr *cmsg; u8 q; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; vnet_main_t *vnm = vnet_get_main (); vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data); @@ -927,9 +927,9 @@ vhost_user_socket_read (unix_file_t * uf) /* if there is old fd, delete and close it */ if (vui->vrings[q].callfd_idx != ~0) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vui->vrings[q].callfd_idx); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vui->vrings[q].callfd_idx = ~0; } @@ -945,7 +945,7 @@ vhost_user_socket_read (unix_file_t * uf) template.file_descriptor = fds[0]; template.private_data = ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q; - vui->vrings[q].callfd_idx = unix_file_add (&unix_main, &template); + vui->vrings[q].callfd_idx = clib_file_add (&file_main, &template); } else vui->vrings[q].callfd_idx = ~0; @@ -959,9 +959,9 @@ vhost_user_socket_read (unix_file_t * uf) if (vui->vrings[q].kickfd_idx != ~0) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vui->vrings[q].kickfd_idx); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vui->vrings[q].kickfd_idx = ~0; } @@ -978,7 +978,7 @@ vhost_user_socket_read (unix_file_t * uf) template.private_data = (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) + q; - vui->vrings[q].kickfd_idx = unix_file_add (&unix_main, &template); + vui->vrings[q].kickfd_idx = clib_file_add (&file_main, &template); } else { @@ -1168,7 +1168,7 @@ close_socket: } static clib_error_t * -vhost_user_socket_error (unix_file_t * uf) +vhost_user_socket_error (clib_file_t * uf) { vlib_main_t *vm = vlib_get_main (); vhost_user_main_t *vum = &vhost_user_main; @@ -1184,11 +1184,11 @@ vhost_user_socket_error (unix_file_t * uf) } static clib_error_t * -vhost_user_socksvr_accept_ready (unix_file_t * uf) +vhost_user_socksvr_accept_ready (clib_file_t * uf) { int client_fd, client_len; struct sockaddr_un client; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; vhost_user_main_t *vum = &vhost_user_main; vhost_user_intf_t *vui; @@ -1207,7 +1207,7 @@ vhost_user_socksvr_accept_ready (unix_file_t * uf) template.error_function = vhost_user_socket_error; template.file_descriptor = client_fd; template.private_data = vui - vhost_user_main.vhost_user_interfaces; - vui->unix_file_index = unix_file_add (&unix_main, &template); + vui->clib_file_index = clib_file_add (&file_main, &template); return 0; } @@ -2475,7 +2475,7 @@ vhost_user_process (vlib_main_t * vm, vhost_user_intf_t *vui; struct sockaddr_un sun; int sockfd; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; f64 timeout = 3153600000.0 /* 100 years */ ; uword *event_data = 0; @@ -2496,7 +2496,7 @@ vhost_user_process (vlib_main_t * vm, pool_foreach (vui, vum->vhost_user_interfaces, { if (vui->unix_server_index == ~0) { //Nothing to do for server sockets - if (vui->unix_file_index == ~0) + if (vui->clib_file_index == ~0) { if ((sockfd < 0) && ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)) @@ -2534,7 +2534,7 @@ vhost_user_process (vlib_main_t * vm, template.file_descriptor = sockfd; template.private_data = vui - vhost_user_main.vhost_user_interfaces; - vui->unix_file_index = unix_file_add (&unix_main, &template); + vui->clib_file_index = clib_file_add (&file_main, &template); /* This sockfd is considered consumed */ sockfd = -1; @@ -2549,7 +2549,7 @@ vhost_user_process (vlib_main_t * vm, /* check if socket is alive */ int error = 0; socklen_t len = sizeof (error); - int fd = UNIX_GET_FD(vui->unix_file_index); + int fd = UNIX_GET_FD(vui->clib_file_index); int retval = getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len); @@ -2596,9 +2596,9 @@ vhost_user_term_if (vhost_user_intf_t * vui) if (vui->unix_server_index != ~0) { //Close server socket - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vui->unix_server_index); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vui->unix_server_index = ~0; unlink (vui->sock_filename); } @@ -2780,11 +2780,11 @@ vhost_user_vui_init (vnet_main_t * vnm, sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); if (server_sock_fd != -1) { - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; template.read_function = vhost_user_socksvr_accept_ready; template.file_descriptor = server_sock_fd; template.private_data = vui - vum->vhost_user_interfaces; //hw index - vui->unix_server_index = unix_file_add (&unix_main, &template); + vui->unix_server_index = clib_file_add (&file_main, &template); } else { @@ -2797,7 +2797,7 @@ vhost_user_vui_init (vnet_main_t * vnm, vui->sock_errno = 0; vui->is_up = 0; vui->feature_mask = feature_mask; - vui->unix_file_index = ~0; + vui->clib_file_index = ~0; vui->log_base_addr = 0; vui->if_index = vui - vum->vhost_user_interfaces; mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename, diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index ae3b88e8..105b92b7 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -223,7 +223,7 @@ typedef struct u32 is_up; u32 admin_up; u32 unix_server_index; - u32 unix_file_index; + u32 clib_file_index; char sock_filename[256]; int sock_errno; uword if_index; diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c index 67c54c3c..1ea32fa0 100644 --- a/src/vnet/ip/punt.c +++ b/src/vnet/ip/punt.c @@ -550,7 +550,7 @@ VLIB_REGISTER_NODE (punt_socket_rx_node, static) = format_punt_trace,}; static clib_error_t * -punt_socket_read_ready (unix_file_t * uf) +punt_socket_read_ready (clib_file_t * uf) { vlib_main_t *vm = vlib_get_main (); punt_main_t *pm = &punt_main; @@ -790,11 +790,11 @@ punt_config (vlib_main_t * vm, unformat_input_t * input) } /* Register socket */ - unix_main_t *um = &unix_main; - unix_file_t template = { 0 }; + clib_file_main_t *fm = &file_main; + clib_file_t template = { 0 }; template.read_function = punt_socket_read_ready; template.file_descriptor = pm->socket_fd; - pm->unix_file_index = unix_file_add (um, &template); + pm->clib_file_index = clib_file_add (fm, &template); pm->is_configured = true; diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h index 0103249c..9defa881 100644 --- a/src/vnet/ip/punt.h +++ b/src/vnet/ip/punt.h @@ -72,7 +72,7 @@ typedef struct char sun_path[sizeof (struct sockaddr_un)]; punt_client_t *clients_by_dst_port4; punt_client_t *clients_by_dst_port6; - u32 unix_file_index; + u32 clib_file_index; bool is_configured; vlib_node_t *interface_output_node; u32 *ready_fds; diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index 0fc62f6c..13154b3b 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -56,7 +56,7 @@ static void tapcli_nopunt_frame (vlib_main_t * vm, */ typedef struct { u32 unix_fd; - u32 unix_file_index; + u32 clib_file_index; u32 provision_fd; /** For counters */ u32 sw_if_index; @@ -137,8 +137,6 @@ typedef struct { vlib_main_t * vlib_main; /** convenience - vnet_main_t */ vnet_main_t * vnet_main; - /** convenience - unix_main_t */ - unix_main_t * unix_main; } tapcli_main_t; static tapcli_main_t tapcli_main; @@ -453,12 +451,12 @@ VLIB_REGISTER_NODE (tapcli_rx_node, static) = { /** * @brief Gets called when file descriptor is ready from epoll. * - * @param *uf - unix_file_t + * @param *uf - clib_file_t * * @return error - clib_error_t * */ -static clib_error_t * tapcli_read_ready (unix_file_t * uf) +static clib_error_t * tapcli_read_ready (clib_file_t * uf) { vlib_main_t * vm = vlib_get_main(); tapcli_main_t * tm = &tapcli_main; @@ -999,10 +997,10 @@ int vnet_tap_connect (vlib_main_t * vm, vnet_tap_connect_args_t *ap) } { - unix_file_t template = {0}; + clib_file_t template = {0}; template.read_function = tapcli_read_ready; template.file_descriptor = dev_net_tun_fd; - ti->unix_file_index = unix_file_add (&unix_main, &template); + ti->clib_file_index = clib_file_add (&file_main, &template); ti->unix_fd = dev_net_tun_fd; ti->provision_fd = dev_tap_fd; clib_memcpy (&ti->ifr, &ifr, sizeof (ifr)); @@ -1079,9 +1077,9 @@ static int tapcli_tap_disconnect (tapcli_interface_t *ti) // bring interface down vnet_sw_interface_set_flags (vnm, sw_if_index, 0); - if (ti->unix_file_index != ~0) { - unix_file_del (&unix_main, unix_main.file_pool + ti->unix_file_index); - ti->unix_file_index = ~0; + if (ti->clib_file_index != ~0) { + clib_file_del (&file_main, file_main.file_pool + ti->clib_file_index); + ti->clib_file_index = ~0; } else close(ti->unix_fd); @@ -1455,7 +1453,6 @@ tapcli_init (vlib_main_t * vm) tm->vlib_main = vm; tm->vnet_main = vnet_get_main(); - tm->unix_main = &unix_main; tm->mtu_bytes = TAP_MTU_DEFAULT; tm->tapcli_interface_index_by_sw_if_index = hash_create (0, sizeof(uword)); tm->tapcli_interface_index_by_unix_fd = hash_create (0, sizeof (uword)); diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c index 2c403679..9616feb2 100644 --- a/src/vnet/unix/tuntap.c +++ b/src/vnet/unix/tuntap.c @@ -104,7 +104,7 @@ typedef struct { mhash_t subif_mhash; /** Unix file index */ - u32 unix_file_index; + u32 clib_file_index; /** For the "normal" interface, if configured */ u32 hw_if_index, sw_if_index; @@ -388,11 +388,11 @@ VLIB_REGISTER_NODE (tuntap_rx_node,static) = { /** * @brief Gets called when file descriptor is ready from epoll. * - * @param *uf - unix_file_t + * @param *uf - clib_file_t * * @return error - clib_error_t */ -static clib_error_t * tuntap_read_ready (unix_file_t * uf) +static clib_error_t * tuntap_read_ready (clib_file_t * uf) { vlib_main_t * vm = vlib_get_main(); vlib_node_set_interrupt_pending (vm, tuntap_rx_node.index); @@ -645,10 +645,10 @@ tuntap_config (vlib_main_t * vm, unformat_input_t * input) } { - unix_file_t template = {0}; + clib_file_t template = {0}; template.read_function = tuntap_read_ready; template.file_descriptor = tm->dev_net_tun_fd; - tm->unix_file_index = unix_file_add (&unix_main, &template); + tm->clib_file_index = clib_file_add (&file_main, &template); } done: diff --git a/src/vppinfra.am b/src/vppinfra.am index 8f01114c..a5769a0d 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -183,6 +183,7 @@ nobase_include_HEADERS = \ vppinfra/error.h \ vppinfra/error_bootstrap.h \ vppinfra/fifo.h \ + vppinfra/file.h \ vppinfra/format.h \ vppinfra/graph.h \ vppinfra/hash.h \ diff --git a/src/vppinfra/file.h b/src/vppinfra/file.h new file mode 100644 index 00000000..69facea9 --- /dev/null +++ b/src/vppinfra/file.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * file.h: unix file handling + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_clib_file_h +#define included_clib_file_h + +#include +#include + + +struct clib_file; +typedef clib_error_t *(clib_file_function_t) (struct clib_file * f); + +typedef struct clib_file +{ + /* Unix file descriptor from open/socket. */ + u32 file_descriptor; + + u32 flags; +#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE (1 << 0) +#define UNIX_FILE_EVENT_EDGE_TRIGGERED (1 << 1) + + /* Data available for function's use. */ + uword private_data; + + /* Functions to be called when read/write data becomes ready. */ + clib_file_function_t *read_function, *write_function, *error_function; +} clib_file_t; + +typedef enum +{ + UNIX_FILE_UPDATE_ADD, + UNIX_FILE_UPDATE_MODIFY, + UNIX_FILE_UPDATE_DELETE, +} unix_file_update_type_t; + +typedef struct +{ + /* Pool of files to poll for input/output. */ + clib_file_t *file_pool; + + void (*file_update) (clib_file_t * file, + unix_file_update_type_t update_type); + +} clib_file_main_t; + +always_inline uword +clib_file_add (clib_file_main_t * um, clib_file_t * template) +{ + clib_file_t *f; + pool_get (um->file_pool, f); + f[0] = template[0]; + um->file_update (f, UNIX_FILE_UPDATE_ADD); + return f - um->file_pool; +} + +always_inline void +clib_file_del (clib_file_main_t * um, clib_file_t * f) +{ + um->file_update (f, UNIX_FILE_UPDATE_DELETE); + close (f->file_descriptor); + f->file_descriptor = ~0; + pool_put (um->file_pool, f); +} + +always_inline void +clib_file_del_by_index (clib_file_main_t * um, uword index) +{ + clib_file_t *uf; + uf = pool_elt_at_index (um->file_pool, index); + clib_file_del (um, uf); +} + +always_inline uword +clib_file_set_data_available_to_write (clib_file_main_t * um, + u32 clib_file_index, + uword is_available) +{ + clib_file_t *uf = pool_elt_at_index (um->file_pool, clib_file_index); + uword was_available = (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); + if ((was_available != 0) != (is_available != 0)) + { + uf->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; + um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + } + return was_available != 0; +} + + +#endif /* included_clib_file_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 01914ce45729833cec88c65689de9a0336cd40cc Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 14 Sep 2017 19:04:50 +0200 Subject: vppinfra: add clib_mem_vm_ext_alloc function Change-Id: Iff33694fc42cc3bcc73cf1372339053a6365039c Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/init.c | 6 +- src/plugins/memif/memif.c | 21 ++- src/vlib.am | 5 +- src/vlib/linux/pci.c | 25 ++-- src/vlib/linux/physmem.c | 192 ++++-------------------- src/vlib/linux/syscall.h | 58 -------- src/vlib/linux/sysfs.c | 250 ------------------------------- src/vlib/linux/sysfs.h | 44 ------ src/vlib/threads.c | 6 +- src/vlib/threads_cli.c | 6 +- src/vnet/devices/af_packet/af_packet.c | 4 +- src/vppinfra.am | 5 +- src/vppinfra/linux/mem.c | 260 +++++++++++++++++++++++++++++++++ src/vppinfra/linux/syscall.h | 56 +++++++ src/vppinfra/linux/sysfs.c | 250 +++++++++++++++++++++++++++++++ src/vppinfra/linux/sysfs.h | 46 ++++++ src/vppinfra/mem.h | 94 ++++++++++-- src/vppinfra/vm_linux_kernel.h | 78 ---------- src/vppinfra/vm_standalone.h | 74 ---------- src/vppinfra/vm_unix.h | 106 -------------- 20 files changed, 761 insertions(+), 825 deletions(-) delete mode 100644 src/vlib/linux/syscall.h delete mode 100644 src/vlib/linux/sysfs.c delete mode 100644 src/vlib/linux/sysfs.h create mode 100644 src/vppinfra/linux/mem.c create mode 100644 src/vppinfra/linux/syscall.h create mode 100644 src/vppinfra/linux/sysfs.c create mode 100644 src/vppinfra/linux/sysfs.h delete mode 100644 src/vppinfra/vm_linux_kernel.h delete mode 100644 src/vppinfra/vm_standalone.h delete mode 100644 src/vppinfra/vm_unix.h (limited to 'src/vnet/devices') diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 95176fb8..ee61f94e 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -1040,7 +1040,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) mem = mem_by_socket[c]; page_size = 1024; - e = vlib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); + e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem) use_1g = 0; @@ -1049,7 +1049,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) clib_error_free (e); page_size = 2; - e = vlib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); + e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem) use_2m = 0; diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 8fec409a..6a609a57 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include #include @@ -267,6 +267,8 @@ memif_init_regions_and_queues (memif_if_t * mif) int i, j; u64 buffer_offset; memif_region_t *r; + clib_mem_vm_alloc_t alloc = { 0 }; + clib_error_t *err; vec_validate_aligned (mif->regions, 0, CLIB_CACHE_LINE_BYTES); r = vec_elt_at_index (mif->regions, 0); @@ -279,18 +281,15 @@ memif_init_regions_and_queues (memif_if_t * mif) mif->run.buffer_size * (1 << mif->run.log2_ring_size) * (mif->run.num_s2m_rings + mif->run.num_m2s_rings); - if ((r->fd = memfd_create ("memif region 0", MFD_ALLOW_SEALING)) == -1) - return clib_error_return_unix (0, "memfd_create"); - - if ((fcntl (r->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - return clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); + alloc.name = "memif region"; + alloc.size = r->region_size; + alloc.flags = CLIB_MEM_VM_F_SHARED; - if ((ftruncate (r->fd, r->region_size)) == -1) - return clib_error_return_unix (0, "ftruncate"); + err = clib_mem_vm_ext_alloc (&alloc); + if (err) + return err; - if ((r->shm = mmap (NULL, r->region_size, PROT_READ | PROT_WRITE, - MAP_SHARED, r->fd, 0)) == MAP_FAILED) - return clib_error_return_unix (0, "mmap"); + r->fd = alloc.fd; for (i = 0; i < mif->run.num_s2m_rings; i++) { diff --git a/src/vlib.am b/src/vlib.am index 41d68690..067e4afc 100644 --- a/src/vlib.am +++ b/src/vlib.am @@ -13,7 +13,7 @@ lib_LTLIBRARIES += libvlib.la -libvlib_la_LIBADD = libvppinfra.la -ldl -lpthread -lnuma +libvlib_la_LIBADD = libvppinfra.la -ldl -lpthread libvlib_la_DEPENDENCIES = libvppinfra.la BUILT_SOURCES += vlib/config.h @@ -34,7 +34,6 @@ libvlib_la_SOURCES = \ vlib/init.c \ vlib/linux/pci.c \ vlib/linux/physmem.c \ - vlib/linux/sysfs.c \ vlib/main.c \ vlib/mc.c \ vlib/node.c \ @@ -60,8 +59,6 @@ nobase_include_HEADERS += \ vlib/global_funcs.h \ vlib/i2c.h \ vlib/init.h \ - vlib/linux/sysfs.h \ - vlib/linux/syscall.h \ vlib/main.h \ vlib/mc.h \ vlib/node_funcs.h \ diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c index 4ce19190..790f168a 100644 --- a/src/vlib/linux/pci.c +++ b/src/vlib/linux/pci.c @@ -37,10 +37,11 @@ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include #include #include -#include #include #include @@ -104,7 +105,7 @@ vlib_pci_bind_to_uio (vlib_pci_device_t * d, char *uio_driver_name) format_vlib_pci_addr, &d->bus_address); s = format (s, "%v/driver%c", dev_dir_name, 0); - driver_name = vlib_sysfs_link_to_name ((char *) s); + driver_name = clib_sysfs_link_to_name ((char *) s); vec_reset_length (s); if (driver_name && @@ -183,32 +184,32 @@ vlib_pci_bind_to_uio (vlib_pci_device_t * d, char *uio_driver_name) vec_reset_length (s); s = format (s, "%v/driver/unbind%c", dev_dir_name, 0); - vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); + clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); vec_reset_length (s); s = format (s, "%v/driver_override%c", dev_dir_name, 0); if (access ((char *) s, F_OK) == 0) { - vlib_sysfs_write ((char *) s, "%s", uio_driver_name); + clib_sysfs_write ((char *) s, "%s", uio_driver_name); clear_driver_override = 1; } else { vec_reset_length (s); s = format (s, "/sys/bus/pci/drivers/%s/new_id%c", uio_driver_name, 0); - vlib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id, + clib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id, d->device_id); } vec_reset_length (s); s = format (s, "/sys/bus/pci/drivers/%s/bind%c", uio_driver_name, 0); - vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); + clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); vec_reset_length (s); if (clear_driver_override) { s = format (s, "%v/driver_override%c", dev_dir_name, 0); - vlib_sysfs_write ((char *) s, "%c", 0); + clib_sysfs_write ((char *) s, "%c", 0); vec_reset_length (s); } @@ -602,28 +603,28 @@ scan_device (void *arg, u8 * dev_dir_name, u8 * ignored) dev->numa_node = -1; vec_reset_length (f); f = format (f, "%v/numa_node%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "%u", &dev->numa_node); + clib_sysfs_read ((char *) f, "%u", &dev->numa_node); vec_reset_length (f); f = format (f, "%v/class%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); + clib_sysfs_read ((char *) f, "0x%x", &tmp); dev->device_class = tmp >> 8; vec_reset_length (f); f = format (f, "%v/vendor%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); + clib_sysfs_read ((char *) f, "0x%x", &tmp); dev->vendor_id = tmp; vec_reset_length (f); f = format (f, "%v/device%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); + clib_sysfs_read ((char *) f, "0x%x", &tmp); dev->device_id = tmp; error = init_device (vm, dev, &pdev); vec_reset_length (f); f = format (f, "%v/driver%c", dev_dir_name, 0); - dev->driver_name = vlib_sysfs_link_to_name ((char *) f); + dev->driver_name = clib_sysfs_link_to_name ((char *) f); done: vec_free (f); diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c index d8c5dc9b..3cc42a06 100644 --- a/src/vlib/linux/physmem.c +++ b/src/vlib/linux/physmem.c @@ -43,14 +43,12 @@ #include #include #include -#include -#include +#include +#include #include #include #include -#include -#include static void * unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx, @@ -111,31 +109,6 @@ unix_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx, void *x) mheap_put (pr->heap, x - pr->heap); } -static u64 -get_page_paddr (int fd, uword addr) -{ - int pagesize = sysconf (_SC_PAGESIZE); - u64 seek, pagemap = 0; - - seek = ((u64) addr / pagesize) * sizeof (u64); - if (lseek (fd, seek, SEEK_SET) != seek) - { - clib_unix_warning ("lseek to 0x%llx", seek); - return 0; - } - if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap))) - { - clib_unix_warning ("read ptbits"); - return 0; - } - if ((pagemap & (1ULL << 63)) == 0) - return 0; - - pagemap &= pow2_mask (55); - - return pagemap * pagesize; -} - static clib_error_t * unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, u8 numa_node, u32 flags, @@ -144,13 +117,8 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, vlib_physmem_main_t *vpm = &vm->physmem_main; vlib_physmem_region_t *pr; clib_error_t *error = 0; - int pagemap_fd = -1; - u8 *mount_dir = 0; - u8 *filename = 0; - struct stat st; - int old_mpol; - int mmap_flags; - struct bitmask *old_mask = numa_allocate_nodemask (); + clib_mem_vm_alloc_t alloc = { 0 }; + if (geteuid () != 0 && (flags & VLIB_PHYSMEM_F_FAKE) == 0) return clib_error_return (0, "not allowed"); @@ -163,113 +131,32 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, goto error; } - pr->index = pr - vpm->regions; - pr->fd = -1; - pr->flags = flags; - - if (get_mempolicy (&old_mpol, old_mask->maskp, old_mask->size + 1, NULL, 0) - == -1) - { - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - error = clib_error_return_unix (0, "get_mempolicy"); - goto error; - } - else - old_mpol = -1; - } + alloc.name = name; + alloc.size = size; + alloc.numa_node = numa_node; + alloc.flags = CLIB_MEM_VM_F_SHARED; if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) { - if ((pagemap_fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) - { - error = clib_error_return_unix (0, "open '/proc/self/pagemap'"); - goto error; - } - - mount_dir = format (0, "%s/physmem_region%d%c", - vlib_unix_get_runtime_dir (), pr->index, 0); - filename = format (0, "%s/mem%c", mount_dir, 0); - - unlink ((char *) mount_dir); - - error = vlib_unix_recursive_mkdir ((char *) mount_dir); - if (error) - goto error; - - if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) - { - error = clib_error_return_unix (0, "mount hugetlb directory '%s'", - mount_dir); - goto error; - } - - if ((pr->fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) - { - error = clib_error_return_unix (0, "open"); - goto error; - } - - mmap_flags = MAP_SHARED | MAP_HUGETLB | MAP_LOCKED; + alloc.flags |= CLIB_MEM_VM_F_HUGETLB; + alloc.flags |= CLIB_MEM_VM_F_HUGETLB_PREALLOC; + alloc.flags |= CLIB_MEM_VM_F_NUMA_FORCE; } else { - if ((pr->fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1) - return clib_error_return_unix (0, "memfd_create"); - - if ((fcntl (pr->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - { - error = - clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); - goto error; - } - mmap_flags = MAP_SHARED; - } - - if (fstat (pr->fd, &st)) - { - error = clib_error_return_unix (0, "fstat"); - goto error; - } - - pr->log2_page_size = min_log2 (st.st_blksize); - pr->n_pages = ((size - 1) >> pr->log2_page_size) + 1; - size = pr->n_pages * (1 << pr->log2_page_size); - - if ((ftruncate (pr->fd, size)) == -1) - { - error = clib_error_return_unix (0, "ftruncate length: %d", size); - goto error; - } - - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - error = vlib_sysfs_prealloc_hugepages (numa_node, - 1 << (pr->log2_page_size - 10), - pr->n_pages); - if (error) - goto error; - } - - if (old_mpol != -1) - numa_set_preferred (numa_node); - - pr->mem = mmap (0, size, (PROT_READ | PROT_WRITE), mmap_flags, pr->fd, 0); - - if (pr->mem == MAP_FAILED) - { - pr->mem = 0; - error = clib_error_return_unix (0, "mmap"); - goto error; + alloc.flags |= CLIB_MEM_VM_F_NUMA_PREFER; } - if (old_mpol != -1 && - set_mempolicy (old_mpol, old_mask->maskp, old_mask->size + 1) == -1) - { - error = clib_error_return_unix (0, "set_mempolicy"); - goto error; - } + error = clib_mem_vm_ext_alloc (&alloc); + if (error) + goto error; + pr->index = pr - vpm->regions; + pr->flags = flags; + pr->fd = alloc.fd; + pr->mem = alloc.addr; + pr->log2_page_size = alloc.log2_page_size; + pr->n_pages = alloc.n_pages; pr->size = pr->n_pages << pr->log2_page_size; pr->page_mask = (1 << pr->log2_page_size) - 1; pr->numa_node = numa_node; @@ -285,13 +172,14 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, move_pages (0, 1, &ptr, 0, &node, 0); if (numa_node != node) { - clib_warning - ("physmem page for region \'%s\' allocated on the wrong" - " numa node (requested %u actual %u)", pr->name, - pr->numa_node, node, i); + clib_warning ("physmem page for region \'%s\' allocated on the" + " wrong numa node (requested %u actual %u)", + pr->name, pr->numa_node, node, i); break; } } + pr->page_table = clib_mem_vm_get_paddr (pr->mem, pr->log2_page_size, + pr->n_pages); } if (flags & VLIB_PHYSMEM_F_INIT_MHEAP) @@ -309,41 +197,13 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, *idx = pr->index; - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - int i; - for (i = 0; i < pr->n_pages; i++) - { - uword vaddr = - pointer_to_uword (pr->mem) + (((u64) i) << pr->log2_page_size); - u64 page_paddr = get_page_paddr (pagemap_fd, vaddr); - vec_add1 (pr->page_table, page_paddr); - } - } - goto done; error: - if (pr->fd > -1) - close (pr->fd); - - if (pr->mem) - munmap (pr->mem, size); - memset (pr, 0, sizeof (*pr)); pool_put (vpm->regions, pr); done: - if (mount_dir) - { - umount2 ((char *) mount_dir, MNT_DETACH); - rmdir ((char *) mount_dir); - vec_free (mount_dir); - } - numa_free_cpumask (old_mask); - vec_free (filename); - if (pagemap_fd > -1) - close (pagemap_fd); return error; } diff --git a/src/vlib/linux/syscall.h b/src/vlib/linux/syscall.h deleted file mode 100644 index 9e37997e..00000000 --- a/src/vlib/linux/syscall.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_linux_syscall_h -#define included_linux_syscall_h - -#ifndef __NR_memfd_create -#if defined __x86_64__ -#define __NR_memfd_create 319 -#elif defined __arm__ -#define __NR_memfd_create 385 -#elif defined __aarch64__ -#define __NR_memfd_create 279 -#else -#error "__NR_memfd_create unknown for this architecture" -#endif -#endif - -static inline int -memfd_create (const char *name, unsigned int flags) -{ - return syscall (__NR_memfd_create, name, flags); -} - -#ifndef F_LINUX_SPECIFIC_BASE -#define F_LINUX_SPECIFIC_BASE 1024 -#endif -#define MFD_ALLOW_SEALING 0x0002U -#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) -#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) - -#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ -#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ -#define F_SEAL_GROW 0x0004 /* prevent file from growing */ -#define F_SEAL_WRITE 0x0008 /* prevent writes */ - - -#endif /* included_linux_syscall_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/linux/sysfs.c b/src/vlib/linux/sysfs.c deleted file mode 100644 index f92f9ef5..00000000 --- a/src/vlib/linux/sysfs.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include -#include -#include - -clib_error_t * -vlib_sysfs_write (char *file_name, char *fmt, ...) -{ - u8 *s; - int fd; - clib_error_t *error = 0; - - fd = open (file_name, O_WRONLY); - if (fd < 0) - return clib_error_return_unix (0, "open `%s'", file_name); - - va_list va; - va_start (va, fmt); - s = va_format (0, fmt, &va); - va_end (va); - - if (write (fd, s, vec_len (s)) < 0) - error = clib_error_return_unix (0, "write `%s'", file_name); - - vec_free (s); - close (fd); - return error; -} - -clib_error_t * -vlib_sysfs_read (char *file_name, char *fmt, ...) -{ - unformat_input_t input; - u8 *s = 0; - int fd; - ssize_t sz; - uword result; - - fd = open (file_name, O_RDONLY); - if (fd < 0) - return clib_error_return_unix (0, "open `%s'", file_name); - - vec_validate (s, 4095); - - sz = read (fd, s, vec_len (s)); - if (sz < 0) - { - close (fd); - vec_free (s); - return clib_error_return_unix (0, "read `%s'", file_name); - } - - _vec_len (s) = sz; - unformat_init_vector (&input, s); - - va_list va; - va_start (va, fmt); - result = va_unformat (&input, fmt, &va); - va_end (va); - - vec_free (s); - close (fd); - - if (result == 0) - return clib_error_return (0, "unformat error"); - - return 0; -} - -u8 * -vlib_sysfs_link_to_name (char *link) -{ - char *p, buffer[64]; - unformat_input_t in; - u8 *s = 0; - int r; - - r = readlink (link, buffer, sizeof (buffer) - 1); - - if (r < 0) - return 0; - - buffer[r] = 0; - p = strrchr (buffer, '/'); - - if (!p) - return 0; - - unformat_init_string (&in, p + 1, strlen (p + 1)); - if (unformat (&in, "%s", &s) != 1) - clib_unix_warning ("no string?"); - unformat_free (&in); - - return s; -} - -clib_error_t * -vlib_sysfs_set_nr_hugepages (unsigned int numa_node, int page_size, int nr) -{ - clib_error_t *error = 0; - struct stat sb; - u8 *p = 0; - - p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); - - if (stat ((char *) p, &sb) == 0) - { - if (S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' is not directory", p); - goto done; - } - } - else if (numa_node == 0) - { - vec_reset_length (p); - p = format (p, "/sys/kernel/mm%c", 0); - if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' does not exist or it is not " - "directory", p); - goto done; - } - } - else - { - error = clib_error_return (0, "'%s' does not exist", p); - goto done; - } - - _vec_len (p) -= 1; - p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0); - vlib_sysfs_write ((char *) p, "%d", nr); - -done: - vec_free (p); - return error; -} - - -static clib_error_t * -vlib_sysfs_get_xxx_hugepages (char *type, unsigned int numa_node, - int page_size, int *val) -{ - clib_error_t *error = 0; - struct stat sb; - u8 *p = 0; - - p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); - - if (stat ((char *) p, &sb) == 0) - { - if (S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' is not directory", p); - goto done; - } - } - else if (numa_node == 0) - { - vec_reset_length (p); - p = format (p, "/sys/kernel/mm%c", 0); - if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' does not exist or it is not " - "directory", p); - goto done; - } - } - else - { - error = clib_error_return (0, "'%s' does not exist", p); - goto done; - } - - _vec_len (p) -= 1; - p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size, - type, 0); - error = vlib_sysfs_read ((char *) p, "%d", val); - -done: - vec_free (p); - return error; -} - -clib_error_t * -vlib_sysfs_get_free_hugepages (unsigned int numa_node, int page_size, int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("free", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_get_nr_hugepages (unsigned int numa_node, int page_size, int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("nr", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, int page_size, - int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("surplus", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_prealloc_hugepages (unsigned int numa_node, int page_size, int nr) -{ - clib_error_t *error = 0; - int n, needed; - error = vlib_sysfs_get_free_hugepages (numa_node, page_size, &n); - if (error) - return error; - needed = nr - n; - if (needed <= 0) - return 0; - - error = vlib_sysfs_get_nr_hugepages (numa_node, page_size, &n); - if (error) - return error; - clib_warning ("pre-allocating %u additional %uK hugepages on numa node %u", - needed, page_size, numa_node); - return vlib_sysfs_set_nr_hugepages (numa_node, page_size, n + needed); -} - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/linux/sysfs.h b/src/vlib/linux/sysfs.h deleted file mode 100644 index 14b71317..00000000 --- a/src/vlib/linux/sysfs.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_linux_sysfs_h -#define included_linux_sysfs_h - -clib_error_t *vlib_sysfs_write (char *file_name, char *fmt, ...); - -clib_error_t *vlib_sysfs_read (char *file_name, char *fmt, ...); - -u8 *vlib_sysfs_link_to_name (char *link); - -clib_error_t *vlib_sysfs_set_nr_hugepages (unsigned int numa_node, - int page_size, int nr); -clib_error_t *vlib_sysfs_get_nr_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_get_free_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_prealloc_hugepages (unsigned int numa_node, - int page_size, int nr); - -#endif /* included_linux_sysfs_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 2d9ce84a..f9c7043c 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -289,7 +289,7 @@ sort_registrations_by_no_clone (void *a0, void *a1) } static uword * -vlib_sysfs_list_to_bitmap (char *filename) +clib_sysfs_list_to_bitmap (char *filename) { FILE *fp; uword *r = 0; @@ -331,9 +331,9 @@ vlib_thread_init (vlib_main_t * vm) /* get bitmaps of active cpu cores and sockets */ tm->cpu_core_bitmap = - vlib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online"); + clib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online"); tm->cpu_socket_bitmap = - vlib_sysfs_list_to_bitmap ("/sys/devices/system/node/online"); + clib_sysfs_list_to_bitmap ("/sys/devices/system/node/online"); avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap); diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c index f8d5d8f9..02bdea5c 100644 --- a/src/vlib/threads_cli.c +++ b/src/vlib/threads_cli.c @@ -15,10 +15,10 @@ #define _GNU_SOURCE #include +#include #include #include -#include #include static u8 * @@ -98,14 +98,14 @@ show_threads_fn (vlib_main_t * vm, u8 *p = 0; p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, lcore, 0); - vlib_sysfs_read ((char *) p, "%d", &core_id); + clib_sysfs_read ((char *) p, "%d", &core_id); vec_reset_length (p); p = format (p, "%s%u/topology/physical_package_id%c", sys_cpu_path, lcore, 0); - vlib_sysfs_read ((char *) p, "%d", &socket_id); + clib_sysfs_read ((char *) p, "%d", &socket_id); vec_free (p); line = format (line, "%-7u%-7u%-7u%", lcore, core_id, socket_id); diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index 62bb228f..32696014 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -24,9 +24,9 @@ #include #include +#include #include #include -#include #include #include @@ -75,7 +75,7 @@ af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, { s = format (0, "/sys/class/net/%s/mtu%c", apif->host_if_name, 0); - error = vlib_sysfs_write ((char *) s, "%d", hi->max_packet_bytes); + error = clib_sysfs_write ((char *) s, "%d", hi->max_packet_bytes); vec_free (s); if (error) diff --git a/src/vppinfra.am b/src/vppinfra.am index a5769a0d..daca9954 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -188,6 +188,8 @@ nobase_include_HEADERS = \ vppinfra/graph.h \ vppinfra/hash.h \ vppinfra/heap.h \ + vppinfra/linux/sysfs.h \ + vppinfra/linux/syscall.h \ vppinfra/lock.h \ vppinfra/longjmp.h \ vppinfra/macros.h \ @@ -233,7 +235,6 @@ nobase_include_HEADERS = \ vppinfra/vector_neon.h \ vppinfra/vector_sse2.h \ vppinfra/valgrind.h \ - vppinfra/vm_unix.h \ vppinfra/xxhash.h \ vppinfra/xy.h \ vppinfra/zvec.h @@ -291,6 +292,8 @@ CLIB_CORE = \ libvppinfra_la_SOURCES = \ $(CLIB_CORE) \ vppinfra/elf_clib.c \ + vppinfra/linux/mem.c \ + vppinfra/linux/sysfs.c \ vppinfra/socket.c \ vppinfra/timer.c \ vppinfra/unix-formats.c \ diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c new file mode 100644 index 00000000..665ddf61 --- /dev/null +++ b/src/vppinfra/linux/mem.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +int +clib_mem_vm_get_log2_page_size (int fd) +{ + struct stat st = { 0 }; + if (fstat (fd, &st)) + return 0; + return min_log2 (st.st_blksize); +} + +clib_error_t * +clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a) +{ + int fd = -1; + clib_error_t *err = 0; + void *addr = 0; + u8 *filename = 0; + int mmap_flags = MAP_SHARED; + int log2_page_size; + int n_pages; + int old_mpol = -1; + u64 old_mask[16] = { 0 }; + + /* save old numa mem policy if needed */ + if (a->flags & (CLIB_MEM_VM_F_NUMA_PREFER | CLIB_MEM_VM_F_NUMA_FORCE)) + { + int rv; + rv = + get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1, 0, 0); + + if (rv == -1) + { + if ((a->flags & CLIB_MEM_VM_F_NUMA_FORCE) != 0) + { + err = clib_error_return_unix (0, "get_mempolicy"); + goto error; + } + else + old_mpol = -1; + } + } + + /* if we are creating shared segment, we need file descriptor */ + if (a->flags & CLIB_MEM_VM_F_SHARED) + { + /* if hugepages are needed we need to create mount point */ + if (a->flags & CLIB_MEM_VM_F_HUGETLB) + { + char *mount_dir; + char template[] = "/tmp/hugepage_mount.XXXXXX"; + + mount_dir = mkdtemp (template); + if (mount_dir == 0) + return clib_error_return_unix (0, "mkdtemp \'%s\'", template); + + if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) + { + err = clib_error_return_unix (0, "mount hugetlb directory '%s'", + mount_dir); + goto error; + } + + filename = format (0, "%s/%s%c", mount_dir, a->name, 0); + + if ((fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) + { + err = clib_error_return_unix (0, "open"); + goto error; + } + umount2 ((char *) mount_dir, MNT_DETACH); + rmdir ((char *) mount_dir); + mmap_flags |= MAP_LOCKED; + } + else + { + if ((fd = memfd_create (a->name, MFD_ALLOW_SEALING)) == -1) + { + err = clib_error_return_unix (0, "memfd_create"); + goto error; + } + + if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) + { + err = clib_error_return_unix (0, "fcntl (F_ADD_SEALS)"); + goto error; + } + } + log2_page_size = clib_mem_vm_get_log2_page_size (fd); + } + else /* not CLIB_MEM_VM_F_SHARED */ + { + if (a->flags & CLIB_MEM_VM_F_HUGETLB) + { + mmap_flags |= MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS; + log2_page_size = 21; + } + else + { + mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS; + log2_page_size = min_log2 (sysconf (_SC_PAGESIZE)); + } + } + + n_pages = ((a->size - 1) >> log2_page_size) + 1; + + + if (a->flags & CLIB_MEM_VM_F_HUGETLB_PREALLOC) + { + err = clib_sysfs_prealloc_hugepages (a->numa_node, + 1 << (log2_page_size - 10), + n_pages); + if (err) + goto error; + + } + + if (fd != -1) + if ((ftruncate (fd, a->size)) == -1) + { + err = clib_error_return_unix (0, "ftruncate"); + goto error; + } + + if (old_mpol != -1) + { + int rv; + u64 mask[16] = { 0 }; + mask[0] = 1 << a->numa_node; + rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1); + if (rv) + { + err = clib_error_return_unix (0, "set_mempolicy"); + goto error; + } + } + + addr = mmap (0, a->size, (PROT_READ | PROT_WRITE), mmap_flags, fd, 0); + if (addr == MAP_FAILED) + { + err = clib_error_return_unix (0, "mmap"); + goto error; + } + + /* re-apply ole numa memory policy */ + if (old_mpol != -1 && + set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1) == -1) + { + err = clib_error_return_unix (0, "set_mempolicy"); + goto error; + } + + a->log2_page_size = log2_page_size; + a->n_pages = n_pages; + a->addr = addr; + a->fd = fd; + goto done; + +error: + if (fd != -1) + close (fd); + +done: + vec_free (filename); + return err; +} + +u64 * +clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages) +{ + int pagesize = sysconf (_SC_PAGESIZE); + int fd; + int i; + u64 *r = 0; + + if ((fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) + return 0; + + for (i = 0; i < n_pages; i++) + { + u64 seek, pagemap = 0; + uword vaddr = pointer_to_uword (mem) + (((u64) i) << log2_page_size); + seek = ((u64) vaddr / pagesize) * sizeof (u64); + if (lseek (fd, seek, SEEK_SET) != seek) + goto done; + + if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap))) + goto done; + + if ((pagemap & (1ULL << 63)) == 0) + goto done; + + pagemap &= pow2_mask (55); + vec_add1 (r, pagemap * pagesize); + } + +done: + close (fd); + if (vec_len (r) != n_pages) + { + vec_free (r); + return 0; + } + return r; +} + + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/linux/syscall.h b/src/vppinfra/linux/syscall.h new file mode 100644 index 00000000..f8ec5919 --- /dev/null +++ b/src/vppinfra/linux/syscall.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_linux_syscall_h +#define included_linux_syscall_h + +#include +#include + +static inline long +set_mempolicy (int mode, const unsigned long *nodemask, unsigned long maxnode) +{ + return syscall (__NR_set_mempolicy, mode, nodemask, maxnode); +} + +static inline int +get_mempolicy (int *mode, unsigned long *nodemask, unsigned long maxnode, + void *addr, unsigned long flags) +{ + return syscall (__NR_get_mempolicy, mode, nodemask, maxnode, addr, flags); +} + +static inline long +move_pages (int pid, unsigned long count, void **pages, const int *nodes, + int *status, int flags) +{ + return syscall (__NR_move_pages, pid, count, pages, nodes, status, flags); +} + +static inline int +memfd_create (const char *name, unsigned int flags) +{ + return syscall (__NR_memfd_create, name, flags); +} + +#endif /* included_linux_syscall_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/linux/sysfs.c b/src/vppinfra/linux/sysfs.c new file mode 100644 index 00000000..5f611e6a --- /dev/null +++ b/src/vppinfra/linux/sysfs.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +clib_error_t * +clib_sysfs_write (char *file_name, char *fmt, ...) +{ + u8 *s; + int fd; + clib_error_t *error = 0; + + fd = open (file_name, O_WRONLY); + if (fd < 0) + return clib_error_return_unix (0, "open `%s'", file_name); + + va_list va; + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + + if (write (fd, s, vec_len (s)) < 0) + error = clib_error_return_unix (0, "write `%s'", file_name); + + vec_free (s); + close (fd); + return error; +} + +clib_error_t * +clib_sysfs_read (char *file_name, char *fmt, ...) +{ + unformat_input_t input; + u8 *s = 0; + int fd; + ssize_t sz; + uword result; + + fd = open (file_name, O_RDONLY); + if (fd < 0) + return clib_error_return_unix (0, "open `%s'", file_name); + + vec_validate (s, 4095); + + sz = read (fd, s, vec_len (s)); + if (sz < 0) + { + close (fd); + vec_free (s); + return clib_error_return_unix (0, "read `%s'", file_name); + } + + _vec_len (s) = sz; + unformat_init_vector (&input, s); + + va_list va; + va_start (va, fmt); + result = va_unformat (&input, fmt, &va); + va_end (va); + + vec_free (s); + close (fd); + + if (result == 0) + return clib_error_return (0, "unformat error"); + + return 0; +} + +u8 * +clib_sysfs_link_to_name (char *link) +{ + char *p, buffer[64]; + unformat_input_t in; + u8 *s = 0; + int r; + + r = readlink (link, buffer, sizeof (buffer) - 1); + + if (r < 0) + return 0; + + buffer[r] = 0; + p = strrchr (buffer, '/'); + + if (!p) + return 0; + + unformat_init_string (&in, p + 1, strlen (p + 1)); + if (unformat (&in, "%s", &s) != 1) + clib_unix_warning ("no string?"); + unformat_free (&in); + + return s; +} + +clib_error_t * +clib_sysfs_set_nr_hugepages (int numa_node, int page_size, int nr) +{ + clib_error_t *error = 0; + struct stat sb; + u8 *p = 0; + + p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); + + if (stat ((char *) p, &sb) == 0) + { + if (S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' is not directory", p); + goto done; + } + } + else if (numa_node == 0) + { + vec_reset_length (p); + p = format (p, "/sys/kernel/mm%c", 0); + if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' does not exist or it is not " + "directory", p); + goto done; + } + } + else + { + error = clib_error_return (0, "'%s' does not exist", p); + goto done; + } + + _vec_len (p) -= 1; + p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0); + clib_sysfs_write ((char *) p, "%d", nr); + +done: + vec_free (p); + return error; +} + + +static clib_error_t * +clib_sysfs_get_xxx_hugepages (char *type, int numa_node, + int page_size, int *val) +{ + clib_error_t *error = 0; + struct stat sb; + u8 *p = 0; + + p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); + + if (stat ((char *) p, &sb) == 0) + { + if (S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' is not directory", p); + goto done; + } + } + else if (numa_node == 0) + { + vec_reset_length (p); + p = format (p, "/sys/kernel/mm%c", 0); + if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' does not exist or it is not " + "directory", p); + goto done; + } + } + else + { + error = clib_error_return (0, "'%s' does not exist", p); + goto done; + } + + _vec_len (p) -= 1; + p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size, + type, 0); + error = clib_sysfs_read ((char *) p, "%d", val); + +done: + vec_free (p); + return error; +} + +clib_error_t * +clib_sysfs_get_free_hugepages (int numa_node, int page_size, int *v) +{ + return clib_sysfs_get_xxx_hugepages ("free", numa_node, page_size, v); +} + +clib_error_t * +clib_sysfs_get_nr_hugepages (int numa_node, int page_size, int *v) +{ + return clib_sysfs_get_xxx_hugepages ("nr", numa_node, page_size, v); +} + +clib_error_t * +clib_sysfs_get_surplus_hugepages (int numa_node, int page_size, int *v) +{ + return clib_sysfs_get_xxx_hugepages ("surplus", numa_node, page_size, v); +} + +clib_error_t * +clib_sysfs_prealloc_hugepages (int numa_node, int page_size, int nr) +{ + clib_error_t *error = 0; + int n, needed; + error = clib_sysfs_get_free_hugepages (numa_node, page_size, &n); + if (error) + return error; + needed = nr - n; + if (needed <= 0) + return 0; + + error = clib_sysfs_get_nr_hugepages (numa_node, page_size, &n); + if (error) + return error; + clib_warning ("pre-allocating %u additional %uK hugepages on numa node %u", + needed, page_size, numa_node); + return clib_sysfs_set_nr_hugepages (numa_node, page_size, n + needed); +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/linux/sysfs.h b/src/vppinfra/linux/sysfs.h new file mode 100644 index 00000000..6c80cf95 --- /dev/null +++ b/src/vppinfra/linux/sysfs.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_linux_sysfs_h +#define included_linux_sysfs_h + +#include + +clib_error_t *clib_sysfs_write (char *file_name, char *fmt, ...); + +clib_error_t *clib_sysfs_read (char *file_name, char *fmt, ...); + +u8 *clib_sysfs_link_to_name (char *link); + +clib_error_t *clib_sysfs_set_nr_hugepages (int numa_node, + int page_size, int nr); +clib_error_t *clib_sysfs_get_nr_hugepages (int numa_node, + int page_size, int *v); +clib_error_t *clib_sysfs_get_free_hugepages (int numa_node, + int page_size, int *v); +clib_error_t *clib_sysfs_get_surplus_hugepages (int numa_node, + int page_size, int *v); +clib_error_t *clib_sysfs_prealloc_hugepages (int numa_node, + int page_size, int nr); + +#endif /* included_linux_sysfs_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h index 63c5ac16..69ab8803 100644 --- a/src/vppinfra/mem.h +++ b/src/vppinfra/mem.h @@ -39,8 +39,11 @@ #define _included_clib_mem_h #include +#include +#include #include /* uword, etc */ +#include #include #include #include /* memcpy, memset */ @@ -264,19 +267,90 @@ void clib_mem_usage (clib_mem_usage_t * usage); u8 *format_clib_mem_usage (u8 * s, va_list * args); -/* Include appropriate VM functions depending on whether - we are compiling for linux kernel, for Unix or standalone. */ -#ifdef CLIB_LINUX_KERNEL -#include -#endif +/* Allocate virtual address space. */ +always_inline void * +clib_mem_vm_alloc (uword size) +{ + void *mmap_addr; + uword flags = MAP_PRIVATE; -#ifdef CLIB_UNIX -#include +#ifdef MAP_ANONYMOUS + flags |= MAP_ANONYMOUS; #endif -#ifdef CLIB_STANDALONE -#include -#endif + mmap_addr = mmap (0, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (mmap_addr == (void *) -1) + mmap_addr = 0; + + return mmap_addr; +} + +always_inline void +clib_mem_vm_free (void *addr, uword size) +{ + munmap (addr, size); +} + +always_inline void * +clib_mem_vm_unmap (void *addr, uword size) +{ + void *mmap_addr; + uword flags = MAP_PRIVATE | MAP_FIXED; + + /* To unmap we "map" with no protection. If we actually called + munmap then other callers could steal the address space. By + changing to PROT_NONE the kernel can free up the pages which is + really what we want "unmap" to mean. */ + mmap_addr = mmap (addr, size, PROT_NONE, flags, -1, 0); + if (mmap_addr == (void *) -1) + mmap_addr = 0; + + return mmap_addr; +} + +always_inline void * +clib_mem_vm_map (void *addr, uword size) +{ + void *mmap_addr; + uword flags = MAP_PRIVATE | MAP_FIXED; + + mmap_addr = mmap (addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0); + if (mmap_addr == (void *) -1) + mmap_addr = 0; + + return mmap_addr; +} + +typedef struct +{ +#define CLIB_MEM_VM_F_SHARED (1 << 0) +#define CLIB_MEM_VM_F_HUGETLB (1 << 1) +#define CLIB_MEM_VM_F_NUMA_PREFER (1 << 2) +#define CLIB_MEM_VM_F_NUMA_FORCE (1 << 3) +#define CLIB_MEM_VM_F_HUGETLB_PREALLOC (1 << 4) + u32 flags; /**< vm allocation flags: +
CLIB_MEM_VM_F_SHARED: request shared memory, file + destiptor will be provided on successful allocation. +
CLIB_MEM_VM_F_HUGETLB: request hugepages. +
CLIB_MEM_VM_F_NUMA_PREFER: numa_node field contains valid + numa node preference. +
CLIB_MEM_VM_F_NUMA_FORCE: fail if setting numa policy fails. +
CLIB_MEM_VM_F_HUGETLB_PREALLOC: pre-allocate hugepages if + number of available pages is not sufficient. + */ + char *name; /**< Name for memory allocation, set by caller. */ + uword size; /**< Allocation size, set by caller. */ + int numa_node; /**< numa node preference. Valid if CLIB_MEM_VM_F_NUMA_PREFER set. */ + void *addr; /**< Pointer to allocated memory, set on successful allocation. */ + int fd; /**< File desriptor, set on successful allocation if CLIB_MEM_VM_F_SHARED is set. */ + int log2_page_size; /* Page size in log2 format, set on successful allocation. */ + int n_pages; /* Number of pages. */ +} clib_mem_vm_alloc_t; + +clib_error_t *clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a); +int clib_mem_vm_get_log2_page_size (int fd); +u64 *clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages); + #include /* clib_panic */ diff --git a/src/vppinfra/vm_linux_kernel.h b/src/vppinfra/vm_linux_kernel.h deleted file mode 100644 index fd9e6148..00000000 --- a/src/vppinfra/vm_linux_kernel.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef included_vm_linux_kernel_h -#define included_vm_linux_kernel_h - -#include -#include /* for GFP_* */ -#include /* for PAGE_KERNEL */ - -/* Allocate virtual address space. */ -always_inline void * -clib_mem_vm_alloc (uword size) -{ - return vmalloc (size); -} - -always_inline void -clib_mem_vm_free (void *addr, uword size) -{ - vfree (addr); -} - -always_inline void * -clib_mem_vm_unmap (void *addr, uword size) -{ - return 0; -} - -always_inline void * -clib_mem_vm_map (void *addr, uword size) -{ - return addr; -} - -#endif /* included_vm_linux_kernel_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/vm_standalone.h b/src/vppinfra/vm_standalone.h deleted file mode 100644 index 2cd431bc..00000000 --- a/src/vppinfra/vm_standalone.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef included_vm_standalone_h -#define included_vm_standalone_h - -/* Stubs for standalone "system" which has no VM support. */ - -always_inline void * -clib_mem_vm_alloc (uword size) -{ - return 0; -} - -always_inline void -clib_mem_vm_free (void *addr, uword size) -{ -} - -always_inline void * -clib_mem_vm_unmap (void *addr, uword size) -{ - return 0; -} - -always_inline void * -clib_mem_vm_map (void *addr, uword size) -{ - return addr; -} - -#endif /* included_vm_standalone_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/vm_unix.h b/src/vppinfra/vm_unix.h deleted file mode 100644 index 07e86516..00000000 --- a/src/vppinfra/vm_unix.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef included_vm_unix_h -#define included_vm_unix_h - -#include -#include - -/* Allocate virtual address space. */ -always_inline void * -clib_mem_vm_alloc (uword size) -{ - void *mmap_addr; - uword flags = MAP_PRIVATE; - -#ifdef MAP_ANONYMOUS - flags |= MAP_ANONYMOUS; -#endif - - mmap_addr = mmap (0, size, PROT_READ | PROT_WRITE, flags, -1, 0); - if (mmap_addr == (void *) -1) - mmap_addr = 0; - - return mmap_addr; -} - -always_inline void -clib_mem_vm_free (void *addr, uword size) -{ - munmap (addr, size); -} - -always_inline void * -clib_mem_vm_unmap (void *addr, uword size) -{ - void *mmap_addr; - uword flags = MAP_PRIVATE | MAP_FIXED; - - /* To unmap we "map" with no protection. If we actually called - munmap then other callers could steal the address space. By - changing to PROT_NONE the kernel can free up the pages which is - really what we want "unmap" to mean. */ - mmap_addr = mmap (addr, size, PROT_NONE, flags, -1, 0); - if (mmap_addr == (void *) -1) - mmap_addr = 0; - - return mmap_addr; -} - -always_inline void * -clib_mem_vm_map (void *addr, uword size) -{ - void *mmap_addr; - uword flags = MAP_PRIVATE | MAP_FIXED; - - mmap_addr = mmap (addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0); - if (mmap_addr == (void *) -1) - mmap_addr = 0; - - return mmap_addr; -} - -#endif /* included_vm_unix_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ -- cgit 1.2.3-korg From 35df2e1dc9dd639f3a28680689c1d5f3ec6d8dfc Mon Sep 17 00:00:00 2001 From: Yoann Desmouceaux Date: Wed, 20 Sep 2017 11:00:42 +0200 Subject: vhost-user: fix link-up status When changing the admin state of a vhost-user interface, do not put it in link-up mode if the interface is not actually ready. Change-Id: Idbc631a7126efa79d199909f9e7656d21bd412ca Signed-off-by: Yoann Desmouceaux --- src/vnet/devices/virtio/vhost-user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 2af96ee7..4200ed8d 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2443,7 +2443,7 @@ vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, vui->admin_up = is_up; - if (is_up) + if (is_up && vui->is_up) vnet_hw_interface_set_flags (vnm, vui->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); -- cgit 1.2.3-korg From 535f0bfe0274e86c5d2e00dfd66dd632c6ae20a9 Mon Sep 17 00:00:00 2001 From: Akshaya N Date: Fri, 15 Sep 2017 17:37:53 +0530 Subject: VLAN support on host(af-packet) interface. On host interface if a VLAN tagged packet is received, linux kernel removes the VLAN header from packet byte stream and adds metadata in tpacket2_hdr. This patch explicitely checks for the presense of VLAN metadata and adds it in VPP packet. Change-Id: I0ba35c1e98dbc008ce18d032f22f2717d610c1aa Signed-off-by: Akshaya N --- src/vnet/devices/af_packet/node.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index d3af41b5..99c91f38 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -173,12 +173,35 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, /* copy data */ u32 bytes_to_copy = data_len > n_buffer_bytes ? n_buffer_bytes : data_len; + u32 vlan_len = 0; + u32 bytes_copied = 0; b0->current_data = 0; - clib_memcpy (vlib_buffer_get_current (b0), - (u8 *) tph + tph->tp_mac + offset, bytes_to_copy); + /* Kernel removes VLAN headers, so reconstruct VLAN */ + if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID)) + { + if (PREDICT_TRUE (offset == 0)) + { + clib_memcpy (vlib_buffer_get_current (b0), + (u8 *) tph + tph->tp_mac, + sizeof (ethernet_header_t)); + ethernet_header_t *eth = vlib_buffer_get_current (b0); + ethernet_vlan_header_t *vlan = + (ethernet_vlan_header_t *) (eth + 1); + vlan->priority_cfi_and_id = + clib_host_to_net_u16 (tph->tp_vlan_tci); + vlan->type = eth->type; + eth->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + vlan_len = sizeof (ethernet_vlan_header_t); + bytes_copied = sizeof (ethernet_header_t); + } + } + clib_memcpy (((u8 *) vlib_buffer_get_current (b0)) + + bytes_copied + vlan_len, + (u8 *) tph + tph->tp_mac + offset + bytes_copied, + (bytes_to_copy - bytes_copied)); /* fill buffer header */ - b0->current_length = bytes_to_copy; + b0->current_length = bytes_to_copy + vlan_len; if (offset == 0) { -- cgit 1.2.3-korg From 67700d41169ac37d21c400949a316750eabad969 Mon Sep 17 00:00:00 2001 From: Pierre Pfister Date: Thu, 5 Oct 2017 14:24:05 +0200 Subject: fix buffer allocation for sparse jumbo frames in vhost A bug was reported where a jumbo packet would stay in vhost queue forever or until a large enough number of other packets arrived in the queue too. This is due to a bug in vhost input node buffer allocation. The fix is to make sure that vhost always allocates at least enough buffers for one single big packet. '40' is used to account for 65kB frames. Change-Id: I1d293028854165083e30cd798fab9d4140230b78 Signed-off-by: Pierre Pfister --- src/vnet/devices/virtio/vhost-user.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 4200ed8d..55faf4f5 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -1545,8 +1545,10 @@ vhost_user_if_input (vlib_main_t * vm, * per packet. In case packets are bigger, we will just yeld at some point * in the loop and come back later. This is not an issue as for big packet, * processing cost really comes from the memory copy. + * The assumption is that big packets will fit in 40 buffers. */ - if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1)) + if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1 || + vum->cpus[thread_index].rx_buffers_len < 40)) { u32 curr_len = vum->cpus[thread_index].rx_buffers_len; vum->cpus[thread_index].rx_buffers_len += -- cgit 1.2.3-korg From 4ca58265a00c106c8dc3e12229c3bd23876a7a58 Mon Sep 17 00:00:00 2001 From: Steven Date: Fri, 9 Jun 2017 18:49:17 -0700 Subject: vhost: crash under heavy traffic condition due to memory corruption (VPP-1016) With heavy traffic, tx code path may crash due to memory corruption Thread 5 "vpp_wk_2" received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fff3995c700 (LWP 2505)] 0x00007ffff73675e8 in vhost_user_if_input (vm=0x7fffb5f5bf9c, vum=0x7ffff7882a40 , vui=0x7fffb65570c4, qid=0, node=0x7fffb6577dac, mode=VNET_HW_INTERFACE_RX_MODE_POLLING) at /home/sluong/vpp-master/vpp/build-data/../src/vnet/devices/virtio/vhost-user.c:1610 1610 bi_current = (vum->cpus[thread_index].rx_buffers) [vum->cpus[thread_index].rx_buffers_len]; (gdb) p vum->cpus[thread_index].rx_buffers_len $2 = 793212607 (gdb) Apparently, some code accidentally wrote the bad value in rx_buffers_len. rx_buffers_len should never be greater than 1024 since that is how many buffers we request each time. After debugging many hours, I discovered that the memory corruption happens in the tx code path right here on line 2176. { vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len]; copy_len++; cpy->len = bytes_left; cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len; cpy->dst = buffer_map_addr; cpy->src = (uword) vlib_buffer_get_current (current_b0) + current_b0->current_length - bytes_left; (gdb) p cpy $3 = (vhost_copy_t *) 0x7fffb554077c (gdb) p copy_len $4 = 1025 (gdb) p &vum->cpus[3].rx_buffers_len $8 = (u32 *) 0x7fffb5540784 copy_len is picking up the index entry 1024 before it was incremented. copy array has only 1024 members (0 - 1023 are valid). The assignment here in cpy surely causes memory corruption. It is only discovered later when the memory location that it corrupted is used. The condition for the crash is to transmit jumbo frames under heavy volume. Since ring size is 1024, with one packet taking up one index for frame size (less 2048), it does not cause overflow. With jumbo frames, it requires multiple indices for one packet, it can cause the overflow under heavy traffic. The fix is to do copy out when we have 1000 entries in the array to avoid overflow. Change-Id: Iefbc739b8e80470f1cf13123113f8331ffcd0eb2 Signed-off-by: Steven (cherry picked from commit aa5df48cb233b377b5910694e2440a16e5973864) --- src/vnet/devices/virtio/vhost-user.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'src/vnet/devices') diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 55faf4f5..19ad9ab1 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -86,6 +86,16 @@ * The value 64 was obtained by testing (48 and 128 were not as good). */ #define VHOST_USER_RX_COPY_THRESHOLD 64 +/* + * On the transmit side, we keep processing the buffers from vlib in the while + * loop and prepare the copy order to be executed later. However, the static + * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N + * entries. In order to not corrupt memory, we have to do the copy when the + * static array reaches the copy threshold. We subtract 40 in case the code + * goes into the inner loop for a maximum of 64k frames which may require + * more array entries. + */ +#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 40) #define UNIX_GET_FD(unixfd_idx) \ (unixfd_idx != ~0) ? \ @@ -2000,7 +2010,7 @@ vhost_user_tx (vlib_main_t * vm, qid = VHOST_VRING_IDX_RX (*vec_elt_at_index - (vui->per_cpu_tx_qid, vlib_get_thread_index ())); + (vui->per_cpu_tx_qid, thread_index)); rxvq = &vui->vrings[qid]; if (PREDICT_FALSE (vui->use_tx_spinlock)) vhost_user_vring_lock (vui, qid); @@ -2212,6 +2222,27 @@ retry: } n_left--; //At the end for error counting when 'goto done' is invoked + + /* + * Do the copy periodically to prevent + * vum->cpus[thread_index].copy array overflow and corrupt memory + */ + if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD)) + { + if (PREDICT_FALSE + (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy, + copy_len, &map_hint))) + { + vlib_error_count (vm, node->node_index, + VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1); + } + copy_len = 0; + + /* give buffers back to driver */ + CLIB_MEMORY_BARRIER (); + rxvq->used->idx = rxvq->last_used_idx; + vhost_user_log_dirty_ring (vui, rxvq, idx); + } buffers++; } @@ -2266,7 +2297,7 @@ done3: vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + VNET_INTERFACE_COUNTER_DROP, - vlib_get_thread_index (), vui->sw_if_index, n_left); + thread_index, vui->sw_if_index, n_left); } vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); -- cgit 1.2.3-korg