diff options
Diffstat (limited to 'drivers/net/bonding/rte_eth_bond_pmd.c')
-rw-r--r-- | drivers/net/bonding/rte_eth_bond_pmd.c | 673 |
1 files changed, 413 insertions, 260 deletions
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c index fe232895..c34c3251 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -1,41 +1,12 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation */ #include <stdlib.h> #include <netinet/in.h> #include <rte_mbuf.h> #include <rte_malloc.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_ethdev_vdev.h> #include <rte_tcp.h> #include <rte_udp.h> @@ -309,87 +280,114 @@ bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, static uint16_t bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) + uint16_t nb_bufs) { - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; - uint16_t num_of_slaves; - uint16_t slaves[RTE_MAX_ETHPORTS]; - /* positions in slaves, not ID */ - uint8_t distributing_offsets[RTE_MAX_ETHPORTS]; - uint8_t distributing_count; + uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t slave_count; - uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0; - uint16_t i, op_slave_idx; + uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t dist_slave_count; - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; + /* 2-D array to sort mbufs for transmission on each slave into */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; + /* Number of mbufs for transmission on each slave */ + uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; + /* Mapping array generated by hash function to map mbufs to slaves */ + uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 }; - /* Total amount of packets in slave_bufs */ - uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; - /* Slow packets placed in each slave */ + uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t total_tx_count = 0, total_tx_fail_count = 0; - if (unlikely(nb_pkts == 0)) - return 0; + uint16_t i, j; - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; + if (unlikely(nb_bufs == 0)) + return 0; /* Copy slave list to protect against slave up/down changes during tx * bursting */ - num_of_slaves = internals->active_slave_count; - if (num_of_slaves < 1) - return num_tx_total; + slave_count = internals->active_slave_count; + if (unlikely(slave_count < 1)) + return 0; - memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * - num_of_slaves); + memcpy(slave_port_ids, internals->active_slaves, + sizeof(slave_port_ids[0]) * slave_count); + + + dist_slave_count = 0; + for (i = 0; i < slave_count; i++) { + struct port *port = &mode_8023ad_ports[slave_port_ids[i]]; - distributing_count = 0; - for (i = 0; i < num_of_slaves; i++) { - struct port *port = &mode_8023ad_ports[slaves[i]]; if (ACTOR_STATE(port, DISTRIBUTING)) - distributing_offsets[distributing_count++] = i; + dist_slave_port_ids[dist_slave_count++] = + slave_port_ids[i]; } - if (likely(distributing_count > 0)) { - /* Populate slaves mbuf with the packets which are to be sent */ - for (i = 0; i < nb_pkts; i++) { - /* Select output slave using hash based on xmit policy */ - op_slave_idx = internals->xmit_hash(bufs[i], - distributing_count); + if (unlikely(dist_slave_count < 1)) + return 0; - /* Populate slave mbuf arrays with mbufs for that slave. - * Use only slaves that are currently distributing. - */ - uint8_t slave_offset = - distributing_offsets[op_slave_idx]; - slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = - bufs[i]; - slave_nb_pkts[slave_offset]++; - } + /* + * Populate slaves mbuf with the packets which are to be sent on it + * selecting output slave using hash based on xmit policy + */ + internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count, + bufs_slave_port_idxs); + + for (i = 0; i < nb_bufs; i++) { + /* Populate slave mbuf arrays with mbufs for that slave. */ + uint8_t slave_idx = bufs_slave_port_idxs[i]; + + slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; } + /* Send packet burst on each slave device */ - for (i = 0; i < num_of_slaves; i++) { - if (slave_nb_pkts[i] == 0) + for (i = 0; i < dist_slave_count; i++) { + if (slave_nb_bufs[i] == 0) continue; - num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - slave_bufs[i], slave_nb_pkts[i]); + slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i], + bd_tx_q->queue_id, slave_bufs[i], + slave_nb_bufs[i]); - num_tx_total += num_tx_slave; - num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave; + total_tx_count += slave_tx_count; /* If tx burst fails move packets to end of bufs */ - if (unlikely(num_tx_slave < slave_nb_pkts[i])) { - uint16_t j = nb_pkts - num_tx_fail_total; - for ( ; num_tx_slave < slave_nb_pkts[i]; j++, - num_tx_slave++) - bufs[j] = slave_bufs[i][num_tx_slave]; + if (unlikely(slave_tx_count < slave_nb_bufs[i])) { + slave_tx_fail_count[i] = slave_nb_bufs[i] - + slave_tx_count; + total_tx_fail_count += slave_tx_fail_count[i]; + + /* + * Shift bufs to beginning of array to allow reordering + * later + */ + for (j = 0; j < slave_tx_fail_count[i]; j++) { + slave_bufs[i][j] = + slave_bufs[i][(slave_tx_count - 1) + j]; + } } } - return num_tx_total; + /* + * If there are tx burst failures we move packets to end of bufs to + * preserve expected PMD behaviour of all failed transmitted being + * at the end of the input mbuf array + */ + if (unlikely(total_tx_fail_count > 0)) { + int bufs_idx = nb_bufs - total_tx_fail_count - 1; + + for (i = 0; i < slave_count; i++) { + if (slave_tx_fail_count[i] > 0) { + for (j = 0; j < slave_tx_fail_count[i]; j++) + bufs[bufs_idx++] = slave_bufs[i][j]; + } + } + } + + return total_tx_count; } @@ -788,96 +786,129 @@ ipv6_hash(struct ipv6_hdr *ipv6_hdr) (word_src_addr[3] ^ word_dst_addr[3]); } -uint16_t -xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count) + +void +burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves) { - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); + struct ether_hdr *eth_hdr; + uint32_t hash; + int i; - uint32_t hash = ether_hash(eth_hdr); + for (i = 0; i < nb_pkts; i++) { + eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *); + + hash = ether_hash(eth_hdr); - return (hash ^= hash >> 8) % slave_count; + slaves[i] = (hash ^= hash >> 8) % slave_count; + } } -uint16_t -xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count) +void +burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves) { - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); - uint16_t proto = eth_hdr->ether_type; - size_t vlan_offset = get_vlan_offset(eth_hdr, &proto); - uint32_t hash, l3hash = 0; + uint16_t i; + struct ether_hdr *eth_hdr; + uint16_t proto; + size_t vlan_offset; + uint32_t hash, l3hash; - hash = ether_hash(eth_hdr); + for (i = 0; i < nb_pkts; i++) { + eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *); + l3hash = 0; - if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { - struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - l3hash = ipv4_hash(ipv4_hdr); + proto = eth_hdr->ether_type; + hash = ether_hash(eth_hdr); - } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { - struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - l3hash = ipv6_hash(ipv6_hdr); - } + vlan_offset = get_vlan_offset(eth_hdr, &proto); - hash = hash ^ l3hash; - hash ^= hash >> 16; - hash ^= hash >> 8; + if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { + struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv4_hash(ipv4_hdr); - return hash % slave_count; -} + } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { + struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv6_hash(ipv6_hdr); + } -uint16_t -xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count) -{ - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *); - uint16_t proto = eth_hdr->ether_type; - size_t vlan_offset = get_vlan_offset(eth_hdr, &proto); + hash = hash ^ l3hash; + hash ^= hash >> 16; + hash ^= hash >> 8; - struct udp_hdr *udp_hdr = NULL; - struct tcp_hdr *tcp_hdr = NULL; - uint32_t hash, l3hash = 0, l4hash = 0; + slaves[i] = hash % slave_count; + } +} - if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { - struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - size_t ip_hdr_offset; +void +burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint8_t slave_count, uint16_t *slaves) +{ + struct ether_hdr *eth_hdr; + uint16_t proto; + size_t vlan_offset; + int i; - l3hash = ipv4_hash(ipv4_hdr); + struct udp_hdr *udp_hdr; + struct tcp_hdr *tcp_hdr; + uint32_t hash, l3hash, l4hash; - /* there is no L4 header in fragmented packet */ - if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) { - ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) * + for (i = 0; i < nb_pkts; i++) { + eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *); + proto = eth_hdr->ether_type; + vlan_offset = get_vlan_offset(eth_hdr, &proto); + l3hash = 0; + l4hash = 0; + + if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) { + struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + size_t ip_hdr_offset; + + l3hash = ipv4_hash(ipv4_hdr); + + /* there is no L4 header in fragmented packet */ + if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) + == 0)) { + ip_hdr_offset = (ipv4_hdr->version_ihl + & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER; - if (ipv4_hdr->next_proto_id == IPPROTO_TCP) { - tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + - ip_hdr_offset); + if (ipv4_hdr->next_proto_id == IPPROTO_TCP) { + tcp_hdr = (struct tcp_hdr *) + ((char *)ipv4_hdr + + ip_hdr_offset); + l4hash = HASH_L4_PORTS(tcp_hdr); + } else if (ipv4_hdr->next_proto_id == + IPPROTO_UDP) { + udp_hdr = (struct udp_hdr *) + ((char *)ipv4_hdr + + ip_hdr_offset); + l4hash = HASH_L4_PORTS(udp_hdr); + } + } + } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { + struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv6_hash(ipv6_hdr); + + if (ipv6_hdr->proto == IPPROTO_TCP) { + tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1); l4hash = HASH_L4_PORTS(tcp_hdr); - } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) { - udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + - ip_hdr_offset); + } else if (ipv6_hdr->proto == IPPROTO_UDP) { + udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1); l4hash = HASH_L4_PORTS(udp_hdr); } } - } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) { - struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) - ((char *)(eth_hdr + 1) + vlan_offset); - l3hash = ipv6_hash(ipv6_hdr); - if (ipv6_hdr->proto == IPPROTO_TCP) { - tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1); - l4hash = HASH_L4_PORTS(tcp_hdr); - } else if (ipv6_hdr->proto == IPPROTO_UDP) { - udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1); - l4hash = HASH_L4_PORTS(udp_hdr); - } - } + hash = l3hash ^ l4hash; + hash ^= hash >> 16; + hash ^= hash >> 8; - hash = l3hash ^ l4hash; - hash ^= hash >> 16; - hash ^= hash >> 8; - - return hash % slave_count; + slaves[i] = hash % slave_count; + } } struct bwg_slave { @@ -1185,156 +1216,239 @@ bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) static uint16_t bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) + uint16_t nb_bufs) { - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; - uint16_t num_of_slaves; - uint16_t slaves[RTE_MAX_ETHPORTS]; + uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t slave_count; - uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0; + /* Array to sort mbufs for transmission on each slave into */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; + /* Number of mbufs for transmission on each slave */ + uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; + /* Mapping array generated by hash function to map mbufs to slaves */ + uint16_t bufs_slave_port_idxs[nb_bufs]; - int i, op_slave_id; + uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t total_tx_count = 0, total_tx_fail_count = 0; - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; - uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t i, j; - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; + if (unlikely(nb_bufs == 0)) + return 0; /* Copy slave list to protect against slave up/down changes during tx * bursting */ - num_of_slaves = internals->active_slave_count; - memcpy(slaves, internals->active_slaves, - sizeof(internals->active_slaves[0]) * num_of_slaves); + slave_count = internals->active_slave_count; + if (unlikely(slave_count < 1)) + return 0; - if (num_of_slaves < 1) - return num_tx_total; + memcpy(slave_port_ids, internals->active_slaves, + sizeof(slave_port_ids[0]) * slave_count); - /* Populate slaves mbuf with the packets which are to be sent on it */ - for (i = 0; i < nb_pkts; i++) { - /* Select output slave using hash based on xmit policy */ - op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves); + /* + * Populate slaves mbuf with the packets which are to be sent on it + * selecting output slave using hash based on xmit policy + */ + internals->burst_xmit_hash(bufs, nb_bufs, slave_count, + bufs_slave_port_idxs); + + for (i = 0; i < nb_bufs; i++) { + /* Populate slave mbuf arrays with mbufs for that slave. */ + uint8_t slave_idx = bufs_slave_port_idxs[i]; - /* Populate slave mbuf arrays with mbufs for that slave */ - slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i]; + slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; } /* Send packet burst on each slave device */ - for (i = 0; i < num_of_slaves; i++) { - if (slave_nb_pkts[i] > 0) { - num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - slave_bufs[i], slave_nb_pkts[i]); + for (i = 0; i < slave_count; i++) { + if (slave_nb_bufs[i] == 0) + continue; - /* if tx burst fails move packets to end of bufs */ - if (unlikely(num_tx_slave < slave_nb_pkts[i])) { - int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave; + slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], + bd_tx_q->queue_id, slave_bufs[i], + slave_nb_bufs[i]); - tx_fail_total += slave_tx_fail_count; - memcpy(&bufs[nb_pkts - tx_fail_total], - &slave_bufs[i][num_tx_slave], - slave_tx_fail_count * sizeof(bufs[0])); + total_tx_count += slave_tx_count; + + /* If tx burst fails move packets to end of bufs */ + if (unlikely(slave_tx_count < slave_nb_bufs[i])) { + slave_tx_fail_count[i] = slave_nb_bufs[i] - + slave_tx_count; + total_tx_fail_count += slave_tx_fail_count[i]; + + /* + * Shift bufs to beginning of array to allow reordering + * later + */ + for (j = 0; j < slave_tx_fail_count[i]; j++) { + slave_bufs[i][j] = + slave_bufs[i][(slave_tx_count - 1) + j]; } + } + } - num_tx_total += num_tx_slave; + /* + * If there are tx burst failures we move packets to end of bufs to + * preserve expected PMD behaviour of all failed transmitted being + * at the end of the input mbuf array + */ + if (unlikely(total_tx_fail_count > 0)) { + int bufs_idx = nb_bufs - total_tx_fail_count - 1; + + for (i = 0; i < slave_count; i++) { + if (slave_tx_fail_count[i] > 0) { + for (j = 0; j < slave_tx_fail_count[i]; j++) + bufs[bufs_idx++] = slave_bufs[i][j]; + } } } - return num_tx_total; + return total_tx_count; } static uint16_t bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, - uint16_t nb_pkts) + uint16_t nb_bufs) { - struct bond_dev_private *internals; - struct bond_tx_queue *bd_tx_q; + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; - uint16_t num_of_slaves; - uint16_t slaves[RTE_MAX_ETHPORTS]; - /* positions in slaves, not ID */ - uint8_t distributing_offsets[RTE_MAX_ETHPORTS]; - uint8_t distributing_count; + uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t slave_count; - uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0; - uint16_t i, j, op_slave_idx; - const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1; + uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t dist_slave_count; - /* Allocate additional packets in case 8023AD mode. */ - struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size]; - void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL }; + /* 2-D array to sort mbufs for transmission on each slave into */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; + /* Number of mbufs for transmission on each slave */ + uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; + /* Mapping array generated by hash function to map mbufs to slaves */ + uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 }; - /* Total amount of packets in slave_bufs */ - uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; - /* Slow packets placed in each slave */ - uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t total_tx_count = 0, total_tx_fail_count = 0; - bd_tx_q = (struct bond_tx_queue *)queue; - internals = bd_tx_q->dev_private; + uint16_t i, j; + + if (unlikely(nb_bufs == 0)) + return 0; /* Copy slave list to protect against slave up/down changes during tx * bursting */ - num_of_slaves = internals->active_slave_count; - if (num_of_slaves < 1) - return num_tx_total; - - memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves); - - distributing_count = 0; - for (i = 0; i < num_of_slaves; i++) { - struct port *port = &mode_8023ad_ports[slaves[i]]; + slave_count = internals->active_slave_count; + if (unlikely(slave_count < 1)) + return 0; - slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring, - slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS, - NULL); - slave_nb_pkts[i] = slave_slow_nb_pkts[i]; + memcpy(slave_port_ids, internals->active_slaves, + sizeof(slave_port_ids[0]) * slave_count); - for (j = 0; j < slave_slow_nb_pkts[i]; j++) - slave_bufs[i][j] = slow_pkts[j]; + dist_slave_count = 0; + for (i = 0; i < slave_count; i++) { + struct port *port = &mode_8023ad_ports[slave_port_ids[i]]; if (ACTOR_STATE(port, DISTRIBUTING)) - distributing_offsets[distributing_count++] = i; + dist_slave_port_ids[dist_slave_count++] = + slave_port_ids[i]; } - if (likely(distributing_count > 0)) { - /* Populate slaves mbuf with the packets which are to be sent on it */ - for (i = 0; i < nb_pkts; i++) { - /* Select output slave using hash based on xmit policy */ - op_slave_idx = internals->xmit_hash(bufs[i], distributing_count); + if (likely(dist_slave_count > 1)) { - /* Populate slave mbuf arrays with mbufs for that slave. Use only - * slaves that are currently distributing. */ - uint8_t slave_offset = distributing_offsets[op_slave_idx]; - slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i]; - slave_nb_pkts[slave_offset]++; + /* + * Populate slaves mbuf with the packets which are to be sent + * on it, selecting output slave using hash based on xmit policy + */ + internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count, + bufs_slave_port_idxs); + + for (i = 0; i < nb_bufs; i++) { + /* + * Populate slave mbuf arrays with mbufs for that + * slave + */ + uint8_t slave_idx = bufs_slave_port_idxs[i]; + + slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = + bufs[i]; } - } - /* Send packet burst on each slave device */ - for (i = 0; i < num_of_slaves; i++) { - if (slave_nb_pkts[i] == 0) - continue; - num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, - slave_bufs[i], slave_nb_pkts[i]); + /* Send packet burst on each slave device */ + for (i = 0; i < dist_slave_count; i++) { + if (slave_nb_bufs[i] == 0) + continue; - /* If tx burst fails drop slow packets */ - for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++) - rte_pktmbuf_free(slave_bufs[i][num_tx_slave]); + slave_tx_count = rte_eth_tx_burst( + dist_slave_port_ids[i], + bd_tx_q->queue_id, slave_bufs[i], + slave_nb_bufs[i]); + + total_tx_count += slave_tx_count; + + /* If tx burst fails move packets to end of bufs */ + if (unlikely(slave_tx_count < slave_nb_bufs[i])) { + slave_tx_fail_count[i] = slave_nb_bufs[i] - + slave_tx_count; + total_tx_fail_count += slave_tx_fail_count[i]; + + /* + * Shift bufs to beginning of array to allow + * reordering later + */ + for (j = 0; j < slave_tx_fail_count[i]; j++) + slave_bufs[i][j] = + slave_bufs[i] + [(slave_tx_count - 1) + + j]; + } + } - num_tx_total += num_tx_slave - slave_slow_nb_pkts[i]; - num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave; + /* + * If there are tx burst failures we move packets to end of + * bufs to preserve expected PMD behaviour of all failed + * transmitted being at the end of the input mbuf array + */ + if (unlikely(total_tx_fail_count > 0)) { + int bufs_idx = nb_bufs - total_tx_fail_count - 1; + + for (i = 0; i < slave_count; i++) { + if (slave_tx_fail_count[i] > 0) { + for (j = 0; + j < slave_tx_fail_count[i]; + j++) { + bufs[bufs_idx++] = + slave_bufs[i][j]; + } + } + } + } + } - /* If tx burst fails move packets to end of bufs */ - if (unlikely(num_tx_slave < slave_nb_pkts[i])) { - uint16_t j = nb_pkts - num_tx_fail_total; - for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++) - bufs[j] = slave_bufs[i][num_tx_slave]; + /* Check for LACP control packets and send if available */ + for (i = 0; i < slave_count; i++) { + struct port *port = &mode_8023ad_ports[slave_port_ids[i]]; + struct rte_mbuf *ctrl_pkt = NULL; + + if (likely(rte_ring_empty(port->tx_ring))) + continue; + + if (rte_ring_dequeue(port->tx_ring, + (void **)&ctrl_pkt) != -ENOENT) { + slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], + bd_tx_q->queue_id, &ctrl_pkt, 1); + /* + * re-enqueue LAG control plane packets to buffering + * ring if transmission fails so the packet isn't lost. + */ + if (slave_tx_count != 1) + rte_ring_enqueue(port->tx_ring, ctrl_pkt); } } - return num_tx_total; + return total_tx_count; } static uint16_t @@ -1500,7 +1614,8 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) case BONDING_MODE_BALANCE: case BONDING_MODE_BROADCAST: for (i = 0; i < internals->slave_count; i++) { - if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id], + if (rte_eth_dev_default_mac_addr_set( + internals->slaves[i].port_id, bonded_eth_dev->data->mac_addrs)) { RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", internals->slaves[i].port_id); @@ -1518,15 +1633,16 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) for (i = 0; i < internals->slave_count; i++) { if (internals->slaves[i].port_id == internals->current_primary_port) { - if (mac_address_set(&rte_eth_devices[internals->primary_port], + if (rte_eth_dev_default_mac_addr_set( + internals->primary_port, bonded_eth_dev->data->mac_addrs)) { RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", internals->current_primary_port); return -1; } } else { - if (mac_address_set( - &rte_eth_devices[internals->slaves[i].port_id], + if (rte_eth_dev_default_mac_addr_set( + internals->slaves[i].port_id, &internals->slaves[i].persisted_mac_addr)) { RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", internals->slaves[i].port_id); @@ -2476,7 +2592,7 @@ bond_ethdev_delayed_lsc_propagation(void *arg) return; _rte_eth_dev_callback_process((struct rte_eth_dev *)arg, - RTE_ETH_EVENT_INTR_LSC, NULL, NULL); + RTE_ETH_EVENT_INTR_LSC, NULL); } int @@ -2584,7 +2700,7 @@ bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type, else _rte_eth_dev_callback_process(bonded_eth_dev, RTE_ETH_EVENT_INTR_LSC, - NULL, NULL); + NULL); } else { if (internals->link_down_delay_ms > 0) @@ -2594,7 +2710,7 @@ bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type, else _rte_eth_dev_callback_process(bonded_eth_dev, RTE_ETH_EVENT_INTR_LSC, - NULL, NULL); + NULL); } } return 0; @@ -2707,6 +2823,41 @@ bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev, return 0; } +static int +bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct rte_eth_dev *slave_eth_dev; + struct bond_dev_private *internals = dev->data->dev_private; + int ret, i; + + rte_spinlock_lock(&internals->lock); + + for (i = 0; i < internals->slave_count; i++) { + slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id]; + if (*slave_eth_dev->dev_ops->mtu_set == NULL) { + rte_spinlock_unlock(&internals->lock); + return -ENOTSUP; + } + } + for (i = 0; i < internals->slave_count; i++) { + ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu); + if (ret < 0) { + rte_spinlock_unlock(&internals->lock); + return ret; + } + } + + rte_spinlock_unlock(&internals->lock); + return 0; +} + +static void +bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr) +{ + if (mac_address_set(dev, addr)) + RTE_BOND_LOG(ERR, "Failed to update MAC address"); +} + const struct eth_dev_ops default_dev_ops = { .dev_start = bond_ethdev_start, .dev_stop = bond_ethdev_stop, @@ -2726,7 +2877,9 @@ const struct eth_dev_ops default_dev_ops = { .reta_update = bond_ethdev_rss_reta_update, .reta_query = bond_ethdev_rss_reta_query, .rss_hash_update = bond_ethdev_rss_hash_update, - .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get + .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get, + .mtu_set = bond_ethdev_mtu_set, + .mac_addr_set = bond_ethdev_mac_address_set }; static int @@ -2769,7 +2922,7 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode) internals->mode = BONDING_MODE_INVALID; internals->current_primary_port = RTE_MAX_ETHPORTS + 1; internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; - internals->xmit_hash = xmit_l2_hash; + internals->burst_xmit_hash = burst_xmit_l2_hash; internals->user_defined_mac = 0; internals->link_status_polling_enabled = 0; |