diff options
Diffstat (limited to 'src')
98 files changed, 4785 insertions, 3240 deletions
diff --git a/src/cmake/platform/octeon9.cmake b/src/cmake/platform/octeon9.cmake new file mode 100644 index 00000000000..46ca7dfa64a --- /dev/null +++ b/src/cmake/platform/octeon9.cmake @@ -0,0 +1,4 @@ + +set(VPP_PLATFORM_CACHE_LINE_SIZE 128) +set(VPP_PLATFORM_MARCH_FLAGS -march=armv8.2-a+crc+crypto) +set(VPP_PLATFORM_BUFFER_ALIGN 128) diff --git a/src/plugins/builtinurl/builtinurl.api b/src/plugins/builtinurl/builtinurl.api index f292fd77a8e..80efa73c725 100644 --- a/src/plugins/builtinurl/builtinurl.api +++ b/src/plugins/builtinurl/builtinurl.api @@ -35,6 +35,7 @@ option version = "1.0.0"; */ autoreply define builtinurl_enable { + option deprecated="incorporated in http_static plugin"; /* Client identifier, set from api_main.my_client_index */ u32 client_index; diff --git a/src/plugins/dev_octeon/CMakeLists.txt b/src/plugins/dev_octeon/CMakeLists.txt index e8abf1a3389..c6271ecdfba 100644 --- a/src/plugins/dev_octeon/CMakeLists.txt +++ b/src/plugins/dev_octeon/CMakeLists.txt @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright(c) 2022 Cisco Systems, Inc. -if (NOT VPP_PLATFORM_NAME STREQUAL "octeon10") +if (NOT VPP_PLATFORM_NAME STREQUAL "octeon10" AND NOT VPP_PLATFORM_NAME STREQUAL "octeon9") return() endif() @@ -21,6 +21,10 @@ endif() include_directories (${OCTEON_ROC_DIR}/) +if (VPP_PLATFORM_NAME STREQUAL "octeon9") + add_compile_definitions(PLATFORM_OCTEON9) +endif() + add_vpp_plugin(dev_octeon SOURCES init.c @@ -31,6 +35,7 @@ add_vpp_plugin(dev_octeon rx_node.c tx_node.c flow.c + counter.c MULTIARCH_SOURCES rx_node.c diff --git a/src/plugins/dev_octeon/counter.c b/src/plugins/dev_octeon/counter.c new file mode 100644 index 00000000000..dd73684c386 --- /dev/null +++ b/src/plugins/dev_octeon/counter.c @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2024 Marvell. + * SPDX-License-Identifier: Apache-2.0 + * https://spdx.org/licenses/Apache-2.0.html + */ + +#include <vnet/vnet.h> +#include <vnet/dev/dev.h> +#include <vnet/dev/pci.h> +#include <vnet/dev/counters.h> +#include <dev_octeon/octeon.h> +#include <dev_octeon/common.h> + +VLIB_REGISTER_LOG_CLASS (oct_log, static) = { + .class_name = "oct", + .subclass_name = "counters", +}; + +typedef enum +{ + OCT_PORT_CTR_RX_BYTES, + OCT_PORT_CTR_TX_BYTES, + OCT_PORT_CTR_RX_PACKETS, + OCT_PORT_CTR_TX_PACKETS, + OCT_PORT_CTR_RX_DROPS, + OCT_PORT_CTR_TX_DROPS, + OCT_PORT_CTR_RX_DROP_BYTES, + OCT_PORT_CTR_RX_UCAST, + OCT_PORT_CTR_TX_UCAST, + OCT_PORT_CTR_RX_MCAST, + OCT_PORT_CTR_TX_MCAST, + OCT_PORT_CTR_RX_BCAST, + OCT_PORT_CTR_TX_BCAST, + OCT_PORT_CTR_RX_FCS, + OCT_PORT_CTR_RX_ERR, + OCT_PORT_CTR_RX_DROP_MCAST, + OCT_PORT_CTR_RX_DROP_BCAST, + OCT_PORT_CTR_RX_DROP_L3_MCAST, + OCT_PORT_CTR_RX_DROP_L3_BCAST, +} oct_port_counter_id_t; + +vnet_dev_counter_t oct_port_counters[] = { + VNET_DEV_CTR_RX_BYTES (OCT_PORT_CTR_RX_BYTES), + VNET_DEV_CTR_RX_PACKETS (OCT_PORT_CTR_RX_PACKETS), + VNET_DEV_CTR_RX_DROPS (OCT_PORT_CTR_RX_DROPS), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_BYTES, RX, BYTES, "drop bytes"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_UCAST, RX, PACKETS, "unicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_MCAST, RX, PACKETS, "multicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_BCAST, RX, PACKETS, "broadcast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_FCS, RX, PACKETS, "fcs"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_ERR, RX, PACKETS, "error"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_MCAST, RX, PACKETS, + "drop multicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_BCAST, RX, PACKETS, + "drop broadcast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_L3_MCAST, RX, PACKETS, + "drop L3 multicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_L3_BCAST, RX, PACKETS, + "drop L3 broadcast"), + + VNET_DEV_CTR_TX_BYTES (OCT_PORT_CTR_TX_BYTES), + VNET_DEV_CTR_TX_PACKETS (OCT_PORT_CTR_TX_PACKETS), + VNET_DEV_CTR_TX_DROPS (OCT_PORT_CTR_TX_DROPS), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_TX_UCAST, TX, PACKETS, "unicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_TX_MCAST, TX, PACKETS, "multicast"), + VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_TX_BCAST, TX, PACKETS, "broadcast"), +}; + +typedef enum +{ + OCT_RXQ_CTR_BYTES, + OCT_RXQ_CTR_PKTS, + OCT_RXQ_CTR_DROPS, + OCT_RXQ_CTR_DROP_BYTES, + OCT_RXQ_CTR_ERR, +} oct_rxq_counter_id_t; + +vnet_dev_counter_t oct_rxq_counters[] = { + VNET_DEV_CTR_RX_BYTES (OCT_RXQ_CTR_BYTES), + VNET_DEV_CTR_RX_PACKETS (OCT_RXQ_CTR_PKTS), + VNET_DEV_CTR_RX_DROPS (OCT_RXQ_CTR_DROPS), + VNET_DEV_CTR_VENDOR (OCT_RXQ_CTR_DROP_BYTES, RX, BYTES, "drop bytes"), + VNET_DEV_CTR_VENDOR (OCT_RXQ_CTR_ERR, RX, PACKETS, "error"), +}; + +typedef enum +{ + OCT_TXQ_CTR_BYTES, + OCT_TXQ_CTR_PKTS, + OCT_TXQ_CTR_DROPS, + OCT_TXQ_CTR_DROP_BYTES, +} oct_txq_counter_id_t; + +vnet_dev_counter_t oct_txq_counters[] = { + VNET_DEV_CTR_TX_BYTES (OCT_TXQ_CTR_BYTES), + VNET_DEV_CTR_TX_PACKETS (OCT_TXQ_CTR_PKTS), + VNET_DEV_CTR_TX_DROPS (OCT_TXQ_CTR_DROPS), + VNET_DEV_CTR_VENDOR (OCT_TXQ_CTR_DROP_BYTES, TX, BYTES, "drop bytes"), +}; + +static vnet_dev_rv_t +oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...) +{ + u8 *s = 0; + va_list va; + + va_start (va, fmt); + s = va_format (s, fmt, &va); + va_end (va); + + log_err (dev, "%v - ROC error %s (%d)", s, roc_error_msg_get (rv), rv); + + vec_free (s); + return VNET_DEV_ERR_INTERNAL; +} + +void +oct_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_port_add_counters (vm, port, oct_port_counters, + ARRAY_LEN (oct_port_counters)); + + foreach_vnet_dev_port_rx_queue (rxq, port) + { + vnet_dev_rx_queue_add_counters (vm, rxq, oct_rxq_counters, + ARRAY_LEN (oct_rxq_counters)); + } + + foreach_vnet_dev_port_tx_queue (txq, port) + { + vnet_dev_tx_queue_add_counters (vm, txq, oct_txq_counters, + ARRAY_LEN (oct_txq_counters)); + } +} + +vnet_dev_rv_t +oct_port_get_stats (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + struct roc_nix_stats stats; + + if ((rrv = roc_nix_stats_get (nix, &stats))) + return oct_roc_err (dev, rrv, "roc_nix_stats_get() failed"); + + foreach_vnet_dev_counter (c, port->counter_main) + { + switch (c->user_data) + { + case OCT_PORT_CTR_RX_BYTES: + vnet_dev_counter_value_update (vm, c, stats.rx_octs); + break; + case OCT_PORT_CTR_TX_BYTES: + vnet_dev_counter_value_update (vm, c, stats.tx_octs); + break; + case OCT_PORT_CTR_RX_PACKETS: + vnet_dev_counter_value_update ( + vm, c, stats.rx_ucast + stats.rx_bcast + stats.rx_mcast); + break; + case OCT_PORT_CTR_TX_PACKETS: + vnet_dev_counter_value_update ( + vm, c, stats.tx_ucast + stats.tx_bcast + stats.tx_mcast); + break; + case OCT_PORT_CTR_RX_DROPS: + vnet_dev_counter_value_update (vm, c, stats.rx_drop); + break; + case OCT_PORT_CTR_TX_DROPS: + vnet_dev_counter_value_update (vm, c, stats.tx_drop); + break; + case OCT_PORT_CTR_RX_DROP_BYTES: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_octs); + break; + case OCT_PORT_CTR_RX_UCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_ucast); + break; + case OCT_PORT_CTR_TX_UCAST: + vnet_dev_counter_value_update (vm, c, stats.tx_ucast); + break; + case OCT_PORT_CTR_RX_MCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_mcast); + break; + case OCT_PORT_CTR_TX_MCAST: + vnet_dev_counter_value_update (vm, c, stats.tx_mcast); + break; + case OCT_PORT_CTR_RX_BCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_bcast); + break; + case OCT_PORT_CTR_TX_BCAST: + vnet_dev_counter_value_update (vm, c, stats.tx_bcast); + break; + case OCT_PORT_CTR_RX_FCS: + vnet_dev_counter_value_update (vm, c, stats.rx_fcs); + break; + case OCT_PORT_CTR_RX_ERR: + vnet_dev_counter_value_update (vm, c, stats.rx_err); + break; + case OCT_PORT_CTR_RX_DROP_MCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_mcast); + break; + case OCT_PORT_CTR_RX_DROP_BCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_bcast); + break; + case OCT_PORT_CTR_RX_DROP_L3_MCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_l3_mcast); + break; + case OCT_PORT_CTR_RX_DROP_L3_BCAST: + vnet_dev_counter_value_update (vm, c, stats.rx_drop_l3_bcast); + break; + default: + ASSERT (0); + } + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +oct_rxq_get_stats (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_rx_queue_t *rxq) +{ + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + struct roc_nix_stats_queue qstats; + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_queue_get (nix, crq->rq.qid, 1, &qstats))) + return oct_roc_err (dev, rrv, "roc_nix_stats_queue_get() failed"); + + foreach_vnet_dev_counter (c, rxq->counter_main) + { + switch (c->user_data) + { + case OCT_RXQ_CTR_BYTES: + vnet_dev_counter_value_update (vm, c, qstats.rx_octs); + break; + case OCT_RXQ_CTR_PKTS: + vnet_dev_counter_value_update (vm, c, qstats.rx_pkts); + break; + case OCT_RXQ_CTR_DROPS: + vnet_dev_counter_value_update (vm, c, qstats.rx_drop_pkts); + break; + case OCT_RXQ_CTR_DROP_BYTES: + vnet_dev_counter_value_update (vm, c, qstats.rx_drop_octs); + break; + case OCT_RXQ_CTR_ERR: + vnet_dev_counter_value_update (vm, c, qstats.rx_error_pkts); + break; + default: + ASSERT (0); + } + } + + return VNET_DEV_OK; +} + +vnet_dev_rv_t +oct_txq_get_stats (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_tx_queue_t *txq) +{ + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + struct roc_nix_stats_queue qstats; + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_queue_get (nix, ctq->sq.qid, 0, &qstats))) + return oct_roc_err (dev, rrv, "roc_nix_stats_queue_get() failed"); + + foreach_vnet_dev_counter (c, txq->counter_main) + { + switch (c->user_data) + { + case OCT_TXQ_CTR_BYTES: + vnet_dev_counter_value_update (vm, c, qstats.tx_octs); + break; + case OCT_TXQ_CTR_PKTS: + vnet_dev_counter_value_update (vm, c, qstats.tx_pkts); + break; + case OCT_TXQ_CTR_DROPS: + vnet_dev_counter_value_update (vm, c, qstats.tx_drop_pkts); + break; + case OCT_TXQ_CTR_DROP_BYTES: + vnet_dev_counter_value_update (vm, c, qstats.tx_drop_octs); + break; + default: + ASSERT (0); + } + } + + return VNET_DEV_OK; +} + +void +oct_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_reset (nix))) + oct_roc_err (dev, rrv, "roc_nix_stats_reset() failed"); +} + +void +oct_rxq_clear_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + vnet_dev_t *dev = rxq->port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_queue_reset (nix, crq->rq.qid, 1))) + oct_roc_err (dev, rrv, + "roc_nix_stats_queue_reset() failed for rx queue %u", + rxq->queue_id); +} + +void +oct_txq_clear_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + vnet_dev_t *dev = txq->port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + int rrv; + + if ((rrv = roc_nix_stats_queue_reset (nix, ctq->sq.qid, 0))) + oct_roc_err (dev, rrv, + "roc_nix_stats_queue_reset() failed for tx queue %u", + txq->queue_id); +} diff --git a/src/plugins/dev_octeon/flow.c b/src/plugins/dev_octeon/flow.c index 1c367a036ab..35aabde76a7 100644 --- a/src/plugins/dev_octeon/flow.c +++ b/src/plugins/dev_octeon/flow.c @@ -46,6 +46,8 @@ VLIB_REGISTER_LOG_CLASS (oct_log, static) = { (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \ (f->type == VNET_FLOW_TYPE_IP4_GTPU)) +#define FLOW_IS_GENERIC_TYPE(f) (f->type == VNET_FLOW_TYPE_GENERIC) + #define OCT_FLOW_UNSUPPORTED_ACTIONS(f) \ ((f->actions == VNET_FLOW_ACTION_BUFFER_ADVANCE) || \ (f->actions == VNET_FLOW_ACTION_REDIRECT_TO_NODE)) @@ -71,6 +73,9 @@ VLIB_REGISTER_LOG_CLASS (oct_log, static) = { _ (62, FLOW_KEY_TYPE_L3_DST, "l3-dst-only") \ _ (63, FLOW_KEY_TYPE_L3_SRC, "l3-src-only") +#define GTPU_PORT 2152 +#define VXLAN_PORT 4789 + typedef struct { u16 src_port; @@ -87,6 +92,27 @@ typedef struct u32 teid; } gtpu_header_t; +typedef struct +{ + u8 layer; + u16 nxt_proto; + vnet_dev_port_t *port; + struct roc_npc_item_info *items; + struct + { + u8 *spec; + u8 *mask; + u16 off; + } oct_drv; + struct + { + u8 *spec; + u8 *mask; + u16 off; + u16 len; + } generic; +} oct_flow_parse_state; + static void oct_flow_convert_rss_types (u64 *key, u64 rss_types) { @@ -183,6 +209,320 @@ oct_flow_rule_create (vnet_dev_port_t *port, struct roc_npc_action *actions, return VNET_DEV_OK; } +static int +oct_parse_l2 (oct_flow_parse_state *pst) +{ + struct roc_npc_flow_item_eth *eth_spec = + (struct roc_npc_flow_item_eth *) &pst->oct_drv.spec[pst->oct_drv.off]; + struct roc_npc_flow_item_eth *eth_mask = + (struct roc_npc_flow_item_eth *) &pst->oct_drv.mask[pst->oct_drv.off]; + ethernet_header_t *eth_hdr_mask = + (ethernet_header_t *) &pst->generic.mask[pst->generic.off]; + ethernet_header_t *eth_hdr = + (ethernet_header_t *) &pst->generic.spec[pst->generic.off]; + u16 tpid, etype; + + tpid = etype = clib_net_to_host_u16 (eth_hdr->type); + clib_memcpy_fast (eth_spec, eth_hdr, sizeof (ethernet_header_t)); + clib_memcpy_fast (eth_mask, eth_hdr_mask, sizeof (ethernet_header_t)); + eth_spec->has_vlan = 0; + + pst->items[pst->layer].spec = (void *) eth_spec; + pst->items[pst->layer].mask = (void *) eth_mask; + pst->items[pst->layer].size = sizeof (ethernet_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_ETH; + pst->generic.off += sizeof (ethernet_header_t); + pst->oct_drv.off += sizeof (struct roc_npc_flow_item_eth); + pst->layer++; + + /* Parse VLAN Tags if any */ + struct roc_npc_flow_item_vlan *vlan_spec = + (struct roc_npc_flow_item_vlan *) &pst->oct_drv.spec[pst->oct_drv.off]; + struct roc_npc_flow_item_vlan *vlan_mask = + (struct roc_npc_flow_item_vlan *) &pst->oct_drv.mask[pst->oct_drv.off]; + ethernet_vlan_header_t *vlan_hdr, *vlan_hdr_mask; + u8 vlan_cnt = 0; + + while (tpid == ETHERNET_TYPE_DOT1AD || tpid == ETHERNET_TYPE_VLAN) + { + if (pst->generic.off >= pst->generic.len) + break; + + vlan_hdr = + (ethernet_vlan_header_t *) &pst->generic.spec[pst->generic.off]; + vlan_hdr_mask = + (ethernet_vlan_header_t *) &pst->generic.mask[pst->generic.off]; + tpid = etype = clib_net_to_host_u16 (vlan_hdr->type); + clib_memcpy (&vlan_spec[vlan_cnt], vlan_hdr, + sizeof (ethernet_vlan_header_t)); + clib_memcpy (&vlan_mask[vlan_cnt], vlan_hdr_mask, + sizeof (ethernet_vlan_header_t)); + pst->items[pst->layer].spec = (void *) &vlan_spec[vlan_cnt]; + pst->items[pst->layer].mask = (void *) &vlan_mask[vlan_cnt]; + pst->items[pst->layer].size = sizeof (ethernet_vlan_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_VLAN; + pst->generic.off += sizeof (ethernet_vlan_header_t); + pst->oct_drv.off += sizeof (struct roc_npc_flow_item_vlan); + pst->layer++; + vlan_cnt++; + } + + /* Inner most vlan tag */ + if (vlan_cnt) + vlan_spec[vlan_cnt - 1].has_more_vlan = 0; + + pst->nxt_proto = etype; + return 0; +} + +static int +oct_parse_l3 (oct_flow_parse_state *pst) +{ + + if (pst->generic.off >= pst->generic.len || pst->nxt_proto == 0) + return 0; + + if (pst->nxt_proto == ETHERNET_TYPE_MPLS) + { + int label_stack_bottom = 0; + do + { + + u8 *mpls_spec = &pst->generic.spec[pst->generic.off]; + u8 *mpls_mask = &pst->generic.mask[pst->generic.off]; + + label_stack_bottom = mpls_spec[2] & 1; + pst->items[pst->layer].spec = (void *) mpls_spec; + pst->items[pst->layer].mask = (void *) mpls_mask; + pst->items[pst->layer].size = sizeof (u32); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_MPLS; + pst->generic.off += sizeof (u32); + pst->layer++; + } + while (label_stack_bottom); + + pst->nxt_proto = 0; + return 0; + } + else if (pst->nxt_proto == ETHERNET_TYPE_IP4) + { + ip4_header_t *ip4_spec = + (ip4_header_t *) &pst->generic.spec[pst->generic.off]; + ip4_header_t *ip4_mask = + (ip4_header_t *) &pst->generic.mask[pst->generic.off]; + pst->items[pst->layer].spec = (void *) ip4_spec; + pst->items[pst->layer].mask = (void *) ip4_mask; + pst->items[pst->layer].size = sizeof (ip4_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV4; + pst->generic.off += sizeof (ip4_header_t); + pst->layer++; + pst->nxt_proto = ip4_spec->protocol; + } + else if (pst->nxt_proto == ETHERNET_TYPE_IP6) + { + struct roc_npc_flow_item_ipv6 *ip6_spec = + (struct roc_npc_flow_item_ipv6 *) &pst->oct_drv.spec[pst->oct_drv.off]; + struct roc_npc_flow_item_ipv6 *ip6_mask = + (struct roc_npc_flow_item_ipv6 *) &pst->oct_drv.mask[pst->oct_drv.off]; + ip6_header_t *ip6_hdr_mask = + (ip6_header_t *) &pst->generic.mask[pst->generic.off]; + ip6_header_t *ip6_hdr = + (ip6_header_t *) &pst->generic.spec[pst->generic.off]; + u8 nxt_hdr = ip6_hdr->protocol; + + clib_memcpy (ip6_spec, ip6_hdr, sizeof (ip6_header_t)); + clib_memcpy (ip6_mask, ip6_hdr_mask, sizeof (ip6_header_t)); + pst->items[pst->layer].spec = (void *) ip6_spec; + pst->items[pst->layer].mask = (void *) ip6_mask; + pst->items[pst->layer].size = sizeof (ip6_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV6; + pst->generic.off += sizeof (ip6_header_t); + pst->oct_drv.off += sizeof (struct roc_npc_flow_item_ipv6); + pst->layer++; + + while (nxt_hdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS || + nxt_hdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS || + nxt_hdr == IP_PROTOCOL_IPV6_ROUTE) + { + if (pst->generic.off >= pst->generic.len) + return 0; + + ip6_ext_header_t *ip6_ext_spec = + (ip6_ext_header_t *) &pst->generic.spec[pst->generic.off]; + ip6_ext_header_t *ip6_ext_mask = + (ip6_ext_header_t *) &pst->generic.mask[pst->generic.off]; + nxt_hdr = ip6_ext_spec->next_hdr; + + pst->items[pst->layer].spec = (void *) ip6_ext_spec; + pst->items[pst->layer].mask = (void *) ip6_ext_mask; + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV6_EXT; + pst->generic.off += ip6_ext_header_len (ip6_ext_spec); + pst->layer++; + } + + if (pst->generic.off >= pst->generic.len) + return 0; + + if (nxt_hdr == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + ip6_frag_hdr_t *ip6_ext_frag_spec = + (ip6_frag_hdr_t *) &pst->generic.spec[pst->generic.off]; + ip6_frag_hdr_t *ip6_ext_frag_mask = + (ip6_frag_hdr_t *) &pst->generic.mask[pst->generic.off]; + + pst->items[pst->layer].spec = (void *) ip6_ext_frag_spec; + pst->items[pst->layer].mask = (void *) ip6_ext_frag_mask; + pst->items[pst->layer].size = sizeof (ip6_frag_hdr_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV6_FRAG_EXT; + pst->generic.off += sizeof (ip6_frag_hdr_t); + pst->layer++; + } + + pst->nxt_proto = nxt_hdr; + } + /* Unsupported L3. */ + else + return -1; + + return 0; +} + +static int +oct_parse_l4 (oct_flow_parse_state *pst) +{ + + if (pst->generic.off >= pst->generic.len || pst->nxt_proto == 0) + return 0; + +#define _(protocol_t, protocol_value, ltype) \ + if (pst->nxt_proto == protocol_value) \ + \ + { \ + \ + protocol_t *spec = (protocol_t *) &pst->generic.spec[pst->generic.off]; \ + protocol_t *mask = (protocol_t *) &pst->generic.mask[pst->generic.off]; \ + pst->items[pst->layer].spec = spec; \ + pst->items[pst->layer].mask = mask; \ + \ + pst->items[pst->layer].size = sizeof (protocol_t); \ + \ + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_##ltype; \ + pst->generic.off += sizeof (protocol_t); \ + pst->layer++; \ + return 0; \ + } + + _ (esp_header_t, IP_PROTOCOL_IPSEC_ESP, ESP) + _ (udp_header_t, IP_PROTOCOL_UDP, UDP) + _ (tcp_header_t, IP_PROTOCOL_TCP, TCP) + _ (sctp_header_t, IP_PROTOCOL_SCTP, SCTP) + _ (icmp46_header_t, IP_PROTOCOL_ICMP, ICMP) + _ (icmp46_header_t, IP_PROTOCOL_ICMP6, ICMP) + _ (igmp_header_t, IP_PROTOCOL_IGMP, IGMP) + _ (gre_header_t, IP_PROTOCOL_GRE, GRE) + + /* Unsupported L4. */ + return -1; +} + +static int +oct_parse_tunnel (oct_flow_parse_state *pst) +{ + if (pst->generic.off >= pst->generic.len) + return 0; + + if (pst->items[pst->layer - 1].type == ROC_NPC_ITEM_TYPE_GRE) + { + gre_header_t *gre_hdr = (gre_header_t *) pst->items[pst->layer - 1].spec; + pst->nxt_proto = clib_net_to_host_u16 (gre_hdr->protocol); + goto parse_l3; + } + + else if (pst->items[pst->layer - 1].type == ROC_NPC_ITEM_TYPE_UDP) + { + udp_header_t *udp_h = (udp_header_t *) pst->items[pst->layer - 1].spec; + u16 dport = clib_net_to_host_u16 (udp_h->dst_port); + + if (dport == GTPU_PORT) + { + gtpu_header_t *gtpu_spec = + (gtpu_header_t *) &pst->generic.spec[pst->generic.off]; + gtpu_header_t *gtpu_mask = + (gtpu_header_t *) &pst->generic.mask[pst->generic.off]; + pst->items[pst->layer].spec = (void *) gtpu_spec; + pst->items[pst->layer].mask = (void *) gtpu_mask; + pst->items[pst->layer].size = sizeof (gtpu_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_GTPU; + pst->generic.off += sizeof (gtpu_header_t); + pst->layer++; + pst->nxt_proto = 0; + return 0; + } + else if (dport == VXLAN_PORT) + { + vxlan_header_t *vxlan_spec = + (vxlan_header_t *) &pst->generic.spec[pst->generic.off]; + vxlan_header_t *vxlan_mask = + (vxlan_header_t *) &pst->generic.spec[pst->generic.off]; + pst->items[pst->layer].spec = (void *) vxlan_spec; + pst->items[pst->layer].mask = (void *) vxlan_mask; + pst->items[pst->layer].size = sizeof (vxlan_header_t); + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_VXLAN; + pst->generic.off += sizeof (vxlan_header_t); + pst->layer++; + pst->nxt_proto = 0; + goto parse_l2; + } + } + /* No supported Tunnel detected. */ + else + { + log_err (pst->port->dev, + "Partially parsed till offset %u, not able to parse further", + pst->generic.off); + return 0; + } +parse_l2: + if (oct_parse_l2 (pst)) + return -1; +parse_l3: + if (oct_parse_l3 (pst)) + return -1; + + return oct_parse_l4 (pst); +} + +static vnet_dev_rv_t +oct_flow_generic_pattern_parse (oct_flow_parse_state *pst) +{ + + if (oct_parse_l2 (pst)) + goto err; + + if (oct_parse_l3 (pst)) + goto err; + + if (oct_parse_l4 (pst)) + goto err; + + if (oct_parse_tunnel (pst)) + goto err; + + if (pst->generic.off < pst->generic.len) + { + log_err (pst->port->dev, + "Partially parsed till offset %u, not able to parse further", + pst->generic.off); + goto err; + } + + pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_END; + return VNET_DEV_OK; + +err: + return VNET_DEV_ERR_NOT_SUPPORTED; +} + static vnet_dev_rv_t oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, uword *private_data) @@ -196,6 +536,7 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, struct roc_npc_action_queue conf = {}; struct roc_npc_action_mark mark = {}; struct roc_npc *npc = &oct_port->npc; + u8 *flow_spec = 0, *flow_mask = 0; vnet_dev_rv_t rv = VNET_DEV_OK; int layer = 0, index = 0; u16 *queues = NULL; @@ -203,6 +544,45 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, u8 proto = 0; u16 action = 0; + if (FLOW_IS_GENERIC_TYPE (flow)) + { + u8 drv_item_spec[1024] = { 0 }, drv_item_mask[1024] = { 0 }; + unformat_input_t input; + int rc; + + unformat_init_string ( + &input, (const char *) flow->generic.pattern.spec, + strlen ((const char *) flow->generic.pattern.spec)); + unformat_user (&input, unformat_hex_string, &flow_spec); + unformat_free (&input); + + unformat_init_string ( + &input, (const char *) flow->generic.pattern.mask, + strlen ((const char *) flow->generic.pattern.mask)); + unformat_user (&input, unformat_hex_string, &flow_mask); + unformat_free (&input); + + oct_flow_parse_state pst = { + .nxt_proto = 0, + .port = port, + .items = item_info, + .oct_drv = { .spec = drv_item_spec, .mask = drv_item_mask }, + .generic = { .spec = flow_spec, + .mask = flow_mask, + .len = vec_len (flow_spec) }, + }; + + rc = oct_flow_generic_pattern_parse (&pst); + if (rc) + { + vec_free (flow_spec); + vec_free (flow_mask); + return VNET_DEV_ERR_NOT_SUPPORTED; + } + + goto parse_flow_actions; + } + if (FLOW_IS_ETHERNET_CLASS (flow)) { ethernet_header_t eth_spec = { .type = clib_host_to_net_u16 ( @@ -357,6 +737,7 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow, end_item_info: item_info[layer].type = ROC_NPC_ITEM_TYPE_END; +parse_flow_actions: if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE) { conf.index = flow->redirect_queue; @@ -422,6 +803,11 @@ end_item_info: if (queues) clib_mem_free (queues); + if (flow_spec) + vec_free (flow_spec); + if (flow_mask) + vec_free (flow_mask); + return rv; } diff --git a/src/plugins/dev_octeon/format.c b/src/plugins/dev_octeon/format.c index e624b84f54e..d0f53013d99 100644 --- a/src/plugins/dev_octeon/format.c +++ b/src/plugins/dev_octeon/format.c @@ -25,7 +25,7 @@ format_oct_nix_rx_cqe_desc (u8 *s, va_list *args) typeof (d->sg0) *sg0 = &d->sg0; typeof (d->sg0) *sg1 = &d->sg1; - s = format (s, "hdr: cqe_type %u nude %u q %u tag 0x%x", h->cqe_type, + s = format (s, "hdr: cqe_type %u nude %u qid %u tag 0x%x", h->cqe_type, h->node, h->q, h->tag); s = format (s, "\n%Uparse:", format_white_space, indent); #define _(n, f) s = format (s, " " #n " " f, p->n) diff --git a/src/plugins/dev_octeon/init.c b/src/plugins/dev_octeon/init.c index 97a11e0d0d7..87ed8317277 100644 --- a/src/plugins/dev_octeon/init.c +++ b/src/plugins/dev_octeon/init.c @@ -141,6 +141,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) .config_change_validate = oct_port_cfg_change_validate, .format_status = format_oct_port_status, .format_flow = format_oct_port_flow, + .clear_counters = oct_port_clear_counters, }, .data_size = sizeof (oct_port_t), .initial_data = &oct_port, @@ -159,6 +160,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) .alloc = oct_rx_queue_alloc, .free = oct_rx_queue_free, .format_info = format_oct_rxq_info, + .clear_counters = oct_rxq_clear_counters, }, }, .tx_queue = { @@ -173,6 +175,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) .alloc = oct_tx_queue_alloc, .free = oct_tx_queue_free, .format_info = format_oct_txq_info, + .clear_counters = oct_txq_clear_counters, }, }, }; @@ -245,6 +248,7 @@ oct_init (vlib_main_t *vm, vnet_dev_t *dev) { case OCT_DEVICE_TYPE_RVU_PF: case OCT_DEVICE_TYPE_RVU_VF: + case OCT_DEVICE_TYPE_LBK_VF: case OCT_DEVICE_TYPE_SDP_VF: return oct_init_nix (vm, dev); diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h index e43cde0a35f..a87a5e3e1ed 100644 --- a/src/plugins/dev_octeon/octeon.h +++ b/src/plugins/dev_octeon/octeon.h @@ -12,6 +12,12 @@ #include <vnet/flow/flow.h> #include <vnet/udp/udp.h> #include <vnet/ipsec/esp.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/icmp46_packet.h> +#include <vnet/ip/igmp_packet.h> +#include <vnet/gre/packet.h> +#include <vxlan/vxlan.h> #include <base/roc_api.h> #include <dev_octeon/hw_defs.h> @@ -141,6 +147,17 @@ vnet_dev_rv_t oct_flow_validate_params (vlib_main_t *, vnet_dev_port_t *, vnet_dev_rv_t oct_flow_query (vlib_main_t *, vnet_dev_port_t *, u32, uword, u64 *); +/* counter.c */ +void oct_port_add_counters (vlib_main_t *, vnet_dev_port_t *); +void oct_port_clear_counters (vlib_main_t *, vnet_dev_port_t *); +void oct_rxq_clear_counters (vlib_main_t *, vnet_dev_rx_queue_t *); +void oct_txq_clear_counters (vlib_main_t *, vnet_dev_tx_queue_t *); +vnet_dev_rv_t oct_port_get_stats (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_rv_t oct_rxq_get_stats (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_rx_queue_t *); +vnet_dev_rv_t oct_txq_get_stats (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_tx_queue_t *); + #define log_debug(dev, f, ...) \ vlib_log (VLIB_LOG_LEVEL_DEBUG, oct_log.class, "%U: " f, \ format_vnet_dev_addr, (dev), ##__VA_ARGS__) diff --git a/src/plugins/dev_octeon/port.c b/src/plugins/dev_octeon/port.c index d5f78301adf..a53fa256884 100644 --- a/src/plugins/dev_octeon/port.c +++ b/src/plugins/dev_octeon/port.c @@ -124,6 +124,8 @@ oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port) return rv; } + oct_port_add_counters (vm, port); + return VNET_DEV_OK; } @@ -172,6 +174,21 @@ oct_port_poll (vlib_main_t *vm, vnet_dev_port_t *port) vnet_dev_port_state_changes_t changes = {}; int rrv; + if (oct_port_get_stats (vm, port)) + return; + + foreach_vnet_dev_port_rx_queue (q, port) + { + if (oct_rxq_get_stats (vm, port, q)) + return; + } + + foreach_vnet_dev_port_tx_queue (q, port) + { + if (oct_txq_get_stats (vm, port, q)) + return; + } + if (roc_nix_is_lbk (nix)) { link_info.status = 1; @@ -203,7 +220,8 @@ oct_port_poll (vlib_main_t *vm, vnet_dev_port_t *port) if (cd->speed != link_info.speed) { changes.change.link_speed = 1; - changes.link_speed = link_info.speed; + /* Convert to Kbps */ + changes.link_speed = link_info.speed * 1000; cd->speed = link_info.speed; } @@ -385,7 +403,7 @@ oct_validate_config_promisc_mode (vnet_dev_port_t *port, int enable) oct_device_t *cd = vnet_dev_get_data (dev); struct roc_nix *nix = cd->nix; - if (roc_nix_is_vf_or_sdp (nix)) + if (roc_nix_is_sdp (nix) || roc_nix_is_lbk (nix)) return VNET_DEV_ERR_UNSUPPORTED_DEVICE; return VNET_DEV_OK; @@ -405,6 +423,9 @@ oct_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enable) return oct_roc_err (dev, rv, "roc_nix_npc_promisc_ena_dis failed"); } + if (!roc_nix_is_pf (nix)) + return VNET_DEV_OK; + rv = roc_nix_mac_promisc_mode_enable (nix, enable); if (rv) { @@ -416,6 +437,44 @@ oct_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enable) return VNET_DEV_OK; } +static vnet_dev_rv_t +oct_port_add_del_eth_addr (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_hw_addr_t *addr, int is_add, + int is_primary) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + vnet_dev_rv_t rv = VNET_DEV_OK; + + i32 rrv; + + if (is_primary) + { + if (is_add) + { + /* Update mac address at NPC */ + rrv = roc_nix_npc_mac_addr_set (nix, (u8 *) addr); + if (rrv) + rv = oct_roc_err (dev, rrv, "roc_nix_npc_mac_addr_set() failed"); + + /* Update mac address at CGX for PFs only */ + if (!roc_nix_is_vf_or_sdp (nix)) + { + rrv = roc_nix_mac_addr_set (nix, (u8 *) addr); + if (rrv) + { + /* Rollback to previous mac address */ + roc_nix_npc_mac_addr_set (nix, + (u8 *) &port->primary_hw_addr); + rv = oct_roc_err (dev, rrv, "roc_nix_mac_addr_set() failed"); + } + } + } + } + return rv; +} + vnet_dev_rv_t oct_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_port_cfg_change_req_t *req) @@ -465,6 +524,9 @@ oct_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, break; case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: + rv = oct_port_add_del_eth_addr (vm, port, &req->addr, + /* is_add */ 1, + /* is_primary */ 1); break; case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: diff --git a/src/plugins/dev_octeon/roc_helper.c b/src/plugins/dev_octeon/roc_helper.c index f10c2cb578b..16e0a871a9d 100644 --- a/src/plugins/dev_octeon/roc_helper.c +++ b/src/plugins/dev_octeon/roc_helper.c @@ -49,6 +49,12 @@ oct_plt_get_thread_index (void) return __os_thread_index; } +static u64 +oct_plt_get_cache_line_size (void) +{ + return CLIB_CACHE_LINE_BYTES; +} + static void oct_drv_physmem_free (vlib_main_t *vm, void *mem) { @@ -178,4 +184,5 @@ oct_plt_init_param_t oct_plt_init_param = { .oct_plt_spinlock_unlock = oct_plt_spinlock_unlock, .oct_plt_spinlock_trylock = oct_plt_spinlock_trylock, .oct_plt_get_thread_index = oct_plt_get_thread_index, + .oct_plt_get_cache_line_size = oct_plt_get_cache_line_size, }; diff --git a/src/plugins/dev_octeon/rx_node.c b/src/plugins/dev_octeon/rx_node.c index 997f1356199..1f8d5d93fa3 100644 --- a/src/plugins/dev_octeon/rx_node.c +++ b/src/plugins/dev_octeon/rx_node.c @@ -165,6 +165,38 @@ oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, return n; } +#ifdef PLATFORM_OCTEON9 +static_always_inline u32 +oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill) +{ + u32 n_alloc, n_free; + u32 buffer_indices[n_refill]; + vlib_buffer_t *buffers[n_refill]; + u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq); + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + u64 aura = roc_npa_aura_handle_to_aura (crq->aura_handle); + const uint64_t addr = + roc_npa_aura_handle_to_base (crq->aura_handle) + NPA_LF_AURA_OP_FREE0; + + if (n_refill < 256) + return 0; + + n_alloc = vlib_buffer_alloc (vm, buffer_indices, n_refill); + if (PREDICT_FALSE (n_alloc < n_refill)) + goto alloc_fail; + + vlib_get_buffers (vm, buffer_indices, (vlib_buffer_t **) buffers, n_alloc); + + for (n_free = 0; n_free < n_alloc; n_free++) + roc_store_pair ((u64) buffers[n_free], aura, addr); + + return n_alloc; + +alloc_fail: + vlib_buffer_unalloc_to_pool (vm, buffer_indices, n_alloc, bpi); + return 0; +} +#else static_always_inline void oct_rxq_refill_batch (vlib_main_t *vm, u64 lmt_id, u64 addr, oct_npa_lf_aura_batch_free_line_t *lines, u32 *bi, @@ -260,6 +292,7 @@ oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill) return n_enq; } +#endif static_always_inline void oct_rx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, diff --git a/src/plugins/dev_octeon/tx_node.c b/src/plugins/dev_octeon/tx_node.c index a2e4b07de8a..0907493814d 100644 --- a/src/plugins/dev_octeon/tx_node.c +++ b/src/plugins/dev_octeon/tx_node.c @@ -32,6 +32,44 @@ typedef struct lmt_line_t *lmt_lines; } oct_tx_ctx_t; +#ifdef PLATFORM_OCTEON9 +static_always_inline u32 +oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) +{ + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + u16 off = ctq->hdr_off; + u64 ah = ctq->aura_handle; + u32 n_freed = 0, n; + + ah = ctq->aura_handle; + + if ((n = roc_npa_aura_op_available (ah)) >= 32) + { + u64 buffers[n]; + u32 bi[n]; + + n_freed = roc_npa_aura_op_bulk_alloc (ah, buffers, n, 0, 1); + vlib_get_buffer_indices_with_offset (vm, (void **) &buffers, bi, n_freed, + off); + vlib_buffer_free_no_next (vm, bi, n_freed); + } + + return n_freed; +} + +static_always_inline void +oct_lmt_copy (void *lmt_addr, u64 io_addr, void *desc, u64 dwords) +{ + u64 lmt_status; + + do + { + roc_lmt_mov_seg (lmt_addr, desc, dwords); + lmt_status = roc_lmt_submit_ldeor (io_addr); + } + while (lmt_status == 0); +} +#else static_always_inline u32 oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) { @@ -133,6 +171,7 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) return n_freed; } +#endif static_always_inline u8 oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, @@ -158,6 +197,11 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, return 0; } +#ifdef PLATFORM_OCTEON9 + /* Override line for Octeon9 */ + line = ctx->lmt_lines; +#endif + if (!simple && flags & VLIB_BUFFER_NEXT_PRESENT) { u8 n_tail_segs = 0; @@ -238,8 +282,12 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, t->sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX]; } +#ifdef PLATFORM_OCTEON9 + oct_lmt_copy (line, ctx->lmt_ioaddr, &d, n_dwords); +#else for (u32 i = 0; i < n_dwords; i++) line->dwords[i] = d.as_u128[i]; +#endif *dpl = n_dwords; *n = *n + 1; @@ -252,7 +300,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, vlib_buffer_t **b, u32 n_pkts, int trace) { u8 dwords_per_line[16], *dpl = dwords_per_line; - u64 lmt_arg, ioaddr, n_lines; + u64 __attribute__ ((unused)) lmt_arg, ioaddr, n_lines; u32 n_left, or_flags_16 = 0, n = 0; const u32 not_simple_flags = VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD; @@ -331,6 +379,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, if (PREDICT_FALSE (!n_lines)) return n_pkts; +#ifndef PLATFORM_OCTEON9 if (PREDICT_FALSE (or_flags_16 & VLIB_BUFFER_NEXT_PRESENT)) { dpl = dwords_per_line; @@ -359,6 +408,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, } roc_lmt_submit_steorl (lmt_arg, ioaddr); +#endif return n_pkts; } @@ -375,7 +425,11 @@ VNET_DEV_NODE_FN (oct_tx_node) u32 *from = vlib_frame_vector_args (frame); u32 n, n_enq, n_left, n_pkts = frame->n_vectors; vlib_buffer_t *buffers[VLIB_FRAME_SIZE + 8], **b = buffers; +#ifdef PLATFORM_OCTEON9 + u64 lmt_id = 0; +#else u64 lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2; +#endif oct_tx_ctx_t ctx = { .node = node, diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 421f662efa2..e416efe2e4d 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -1045,12 +1045,14 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) dpdk_main_t *dm = &dpdk_main; clib_error_t *error = 0; dpdk_config_main_t *conf = &dpdk_config_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); dpdk_device_config_t *devconf; vlib_pci_addr_t pci_addr = { 0 }; vlib_vmbus_addr_t vmbus_addr = { 0 }; unformat_input_t sub_input; +#ifdef __linux + vlib_thread_main_t *tm = vlib_get_thread_main (); uword default_hugepage_sz, x; +#endif /* __linux__ */ u8 *s, *tmp = 0; int ret, i; int num_whitelisted = 0; @@ -1258,6 +1260,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) { vec_add1 (conf->eal_init_args, (u8 *) "--in-memory"); +#ifdef __linux__ + /* + * FreeBSD performs huge page prealloc through a dedicated kernel mode + * this process is only required on Linux. + */ default_hugepage_sz = clib_mem_get_default_hugepage_size (); clib_bitmap_foreach (x, tm->cpu_socket_bitmap) @@ -1272,6 +1279,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) if ((e = clib_sysfs_prealloc_hugepages(x, 0, n_pages))) clib_error_report (e); } +#endif /* __linux__ */ } /* on/off dpdk's telemetry thread */ diff --git a/src/plugins/hs_apps/http_cli.c b/src/plugins/hs_apps/http_cli.c index 5d4d49c0fba..f42f65342c3 100644 --- a/src/plugins/hs_apps/http_cli.c +++ b/src/plugins/hs_apps/http_cli.c @@ -323,6 +323,13 @@ hcs_ts_rx_callback (session_t *ts) return 0; } + if (msg.data.len == 0) + { + hs->tx_buf = 0; + start_send_data (hs, HTTP_STATUS_BAD_REQUEST); + return 0; + } + /* send the command to a new/recycled vlib process */ vec_validate (args.buf, msg.data.len - 1); rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, args.buf); diff --git a/src/plugins/hs_apps/http_client_cli.c b/src/plugins/hs_apps/http_client_cli.c index 1a321bf44a8..a99169bafea 100644 --- a/src/plugins/hs_apps/http_client_cli.c +++ b/src/plugins/hs_apps/http_client_cli.c @@ -67,6 +67,7 @@ typedef enum { HCC_REPLY_RECEIVED = 100, HCC_TRANSPORT_CLOSED, + HCC_CONNECT_FAILED, } hcc_cli_signal_t; static hcc_main_t hcc_main; @@ -135,6 +136,8 @@ hcc_ts_connected_callback (u32 app_index, u32 hc_index, session_t *as, { clib_warning ("connected error: hc_index(%d): %U", hc_index, format_session_error, err); + vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index, + HCC_CONNECT_FAILED, 0); return -1; } @@ -425,6 +428,9 @@ hcc_run (vlib_main_t *vm, int print_output) case HCC_TRANSPORT_CLOSED: err = clib_error_return (0, "error, transport closed"); break; + case HCC_CONNECT_FAILED: + err = clib_error_return (0, "failed to connect"); + break; default: err = clib_error_return (0, "unexpected event %d", event_type); break; diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c index 855ab8deb3b..893dd877c29 100644 --- a/src/plugins/http/http.c +++ b/src/plugins/http/http.c @@ -267,17 +267,21 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts, app_worker_t *app_wrk; int rv; + ho_hc = http_conn_get_w_thread (ho_hc_index, 0); + ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING); + if (err) { - clib_warning ("ERROR: %d", err); + clib_warning ("half-open hc index %d, error: %U", ho_hc_index, + format_session_error, err); + app_wrk = app_worker_get_if_valid (ho_hc->h_pa_wrk_index); + if (app_wrk) + app_worker_connect_notify (app_wrk, 0, err, ho_hc->h_pa_app_api_ctx); return 0; } new_hc_index = http_conn_alloc_w_thread (ts->thread_index); hc = http_conn_get_w_thread (new_hc_index, ts->thread_index); - ho_hc = http_conn_get_w_thread (ho_hc_index, 0); - - ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING); clib_memcpy_fast (hc, ho_hc, sizeof (*hc)); @@ -378,7 +382,7 @@ static const char *http_response_template = "HTTP/1.1 %s\r\n" "Content-Length: %lu\r\n\r\n"; static const char *http_request_template = "GET %s HTTP/1.1\r\n" - "User-Agent: VPP HTTP client\r\n" + "User-Agent: %s\r\n" "Accept: */*\r\n"; static u32 @@ -521,17 +525,19 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp) http_msg_t msg = {}; app_worker_t *app_wrk; session_t *as; - http_status_code_t ec; rv = http_read_message (hc); /* Nothing yet, wait for data or timer expire */ if (rv) - return HTTP_SM_STOP; + { + HTTP_DBG (1, "no data to deq"); + return HTTP_SM_STOP; + } if (vec_len (hc->rx_buf) < 8) { - ec = HTTP_STATUS_BAD_REQUEST; + clib_warning ("response buffer too short"); goto error; } @@ -547,9 +553,7 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp) if (rv) { clib_warning ("failed to parse http reply"); - session_transport_closing_notify (&hc->connection); - http_disconnect_transport (hc); - return -1; + goto error; } msg.data.len = content_length; u32 dlen = vec_len (hc->rx_buf) - hc->rx_buf_offset; @@ -592,16 +596,14 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp) } else { - HTTP_DBG (0, "Unknown http method %v", hc->rx_buf); - ec = HTTP_STATUS_METHOD_NOT_ALLOWED; + clib_warning ("Unknown http method %v", hc->rx_buf); goto error; } error: - http_send_error (hc, ec); session_transport_closing_notify (&hc->connection); + session_transport_closed_notify (&hc->connection); http_disconnect_transport (hc); - return HTTP_SM_ERROR; } @@ -742,6 +744,10 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) switch (msg.code) { + case HTTP_STATUS_NOT_FOUND: + case HTTP_STATUS_METHOD_NOT_ALLOWED: + case HTTP_STATUS_BAD_REQUEST: + case HTTP_STATUS_INTERNAL_ERROR: case HTTP_STATUS_OK: header = format (0, http_response_template, http_status_code_str[msg.code], @@ -762,6 +768,7 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) /* Location: http(s)://new-place already queued up as data */ break; default: + clib_warning ("unsupported status code: %d", msg.code); return HTTP_SM_ERROR; } @@ -817,11 +824,18 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) goto error; } + /* currently we support only GET method */ + if (msg.method_type != HTTP_REQ_GET) + { + clib_warning ("unsupported method %d", msg.method_type); + goto error; + } + vec_validate (buf, msg.data.len - 1); rv = svm_fifo_dequeue (as->tx_fifo, msg.data.len, buf); ASSERT (rv == msg.data.len); - request = format (0, http_request_template, buf); + request = format (0, http_request_template, buf, hc->app_name); offset = http_send_data (hc, request, vec_len (request), 0); if (offset != vec_len (request)) { @@ -1159,6 +1173,11 @@ http_transport_connect (transport_endpoint_cfg_t *tep) hc->state = HTTP_CONN_STATE_CONNECTING; cargs->api_context = hc_index; + if (vec_len (app->name)) + hc->app_name = vec_dup (app->name); + else + hc->app_name = format (0, "VPP HTTP client"); + HTTP_DBG (1, "hc ho_index %x", hc_index); if ((error = vnet_connect (cargs))) diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h index c9912dd6db8..7fbefd667f4 100644 --- a/src/plugins/http/http.h +++ b/src/plugins/http/http.h @@ -277,6 +277,74 @@ http_state_is_tx_valid (http_conn_t *hc) state == HTTP_STATE_WAIT_APP_METHOD); } +/** + * Remove dot segments from path (RFC3986 section 5.2.4) + * + * @param path Path to sanitize. + * + * @return New vector with sanitized path. + * + * The caller is always responsible to free the returned vector. + */ +always_inline u8 * +http_path_remove_dot_segments (u8 *path) +{ + u32 *segments = 0, *segments_len = 0, segment_len; + u8 *new_path = 0; + int i, ii; + + if (!path) + return vec_new (u8, 0); + + segments = vec_new (u32, 1); + /* first segment */ + segments[0] = 0; + /* find all segments */ + for (i = 1; i < (vec_len (path) - 1); i++) + { + if (path[i] == '/') + vec_add1 (segments, i + 1); + } + /* dummy tail */ + vec_add1 (segments, vec_len (path)); + + /* scan all segments for "." and ".." */ + segments_len = vec_new (u32, vec_len (segments) - 1); + for (i = 0; i < vec_len (segments_len); i++) + { + segment_len = segments[i + 1] - segments[i]; + if (segment_len == 2 && path[segments[i]] == '.') + segment_len = 0; + else if (segment_len == 3 && path[segments[i]] == '.' && + path[segments[i] + 1] == '.') + { + segment_len = 0; + /* remove parent (if any) */ + for (ii = i - 1; ii >= 0; ii--) + { + if (segments_len[ii]) + { + segments_len[ii] = 0; + break; + } + } + } + segments_len[i] = segment_len; + } + + /* we might end with empty path, so return at least empty vector */ + new_path = vec_new (u8, 0); + /* append all valid segments */ + for (i = 0; i < vec_len (segments_len); i++) + { + if (segments_len[i]) + vec_add (new_path, path + segments[i], segments_len[i]); + } + vec_free (segments); + vec_free (segments_len); + return new_path; +} + #endif /* SRC_PLUGINS_HTTP_HTTP_H_ */ /* diff --git a/src/plugins/http_static/static_server.c b/src/plugins/http_static/static_server.c index 040cdca9d7a..f433238dcb1 100644 --- a/src/plugins/http_static/static_server.c +++ b/src/plugins/http_static/static_server.c @@ -357,7 +357,7 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, u8 *request) { http_status_code_t sc = HTTP_STATUS_OK; - u8 *path; + u8 *path, *sanitized_path; u32 ce_index; http_content_type_t type; @@ -367,6 +367,9 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, type = content_type_from_request (request); + /* Remove dot segments to prevent path traversal */ + sanitized_path = http_path_remove_dot_segments (request); + /* * Construct the file to open * Browsers are capable of sporadically including a leading '/' @@ -374,9 +377,9 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, if (!request) path = format (0, "%s%c", hsm->www_root, 0); else if (request[0] == '/') - path = format (0, "%s%s%c", hsm->www_root, request, 0); + path = format (0, "%s%s%c", hsm->www_root, sanitized_path, 0); else - path = format (0, "%s/%s%c", hsm->www_root, request, 0); + path = format (0, "%s/%s%c", hsm->www_root, sanitized_path, 0); if (hsm->debug_level > 0) clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", path); @@ -419,7 +422,7 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, hs->cache_pool_index = ce_index; done: - + vec_free (sanitized_path); hs->content_type = type; start_send_data (hs, sc); if (!hs->data) diff --git a/src/plugins/marvell/pp2/cli.c b/src/plugins/marvell/pp2/cli.c index f4ecb1873c9..5072a3c035b 100644 --- a/src/plugins/marvell/pp2/cli.c +++ b/src/plugins/marvell/pp2/cli.c @@ -31,7 +31,7 @@ mrvl_pp2_create_command_fn (vlib_main_t * vm, unformat_input_t * input, { unformat_input_t _line_input, *line_input = &_line_input; mrvl_pp2_create_if_args_t args = { 0 }; - uint val; + unsigned int val; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c index 01b333a5234..3b981d69986 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c +++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c @@ -859,7 +859,7 @@ nat44_ei_icmp_in2out (vlib_buffer_t *b0, ip4_header_t *ip0, nat44_ei_main_t *nm = &nat44_ei_main; vlib_main_t *vm = vlib_get_main (); ip4_address_t addr; - u16 port; + u16 port = 0; u32 fib_index; nat_protocol_t proto; icmp_echo_header_t *echo0, *inner_echo0 = 0; diff --git a/src/plugins/netmap/CMakeLists.txt b/src/plugins/netmap/CMakeLists.txt new file mode 100644 index 00000000000..d53a9e0911a --- /dev/null +++ b/src/plugins/netmap/CMakeLists.txt @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2024 Tom Jones <thj@freebsd.org> +# +# This software was developed by Tom Jones <thj@freebsd.org> under sponsorship +# from the FreeBSD Foundation. +# + +if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") + message(WARNING "Netmap is only currently support on FreeBSD - netmap plugin disabled") + return() +endif() + +add_vpp_plugin(netmap + SOURCES + plugin.c + netmap.c + node.c + device.c + cli.c + netmap_api.c + + MULTIARCH_SOURCES + node.c + device.c + + INSTALL_HEADERS + netmap.h + net_netmap.h + + API_FILES + netmap.api +) diff --git a/src/plugins/netmap/FEATURE.yaml b/src/plugins/netmap/FEATURE.yaml new file mode 100644 index 00000000000..a9dfb2163e4 --- /dev/null +++ b/src/plugins/netmap/FEATURE.yaml @@ -0,0 +1,12 @@ +--- +name: Netmap Device +maintainer: Tom Jones <thj@freebsd.org> +features: + - L4 checksum offload +description: "Create a netmap interface, which is a high speed user-space + interface that allows VPP to patch to a physical or virtual NIC + without the use of DPDK" +missing: + - API dump +state: production +properties: [API, CLI, STATS, MULTITHREAD] diff --git a/src/plugins/netmap/cli.c b/src/plugins/netmap/cli.c new file mode 100644 index 00000000000..b54d397ecbe --- /dev/null +++ b/src/plugins/netmap/cli.c @@ -0,0 +1,236 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> + +#include <netmap/net_netmap.h> +#include <netmap/netmap.h> + +static clib_error_t * +netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *host_if_name = NULL; + u8 hwaddr[6]; + u8 *hw_addr_ptr = 0; + int r; + u8 is_pipe = 0; + u8 is_master = 0; + u32 sw_if_index = ~0; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &host_if_name)) + ; + else + if (unformat + (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr)) + hw_addr_ptr = hwaddr; + else if (unformat (line_input, "pipe")) + is_pipe = 1; + else if (unformat (line_input, "master")) + is_master = 1; + else if (unformat (line_input, "slave")) + is_master = 0; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (host_if_name == NULL) + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } + + r = + netmap_create_if (vm, host_if_name, hw_addr_ptr, is_pipe, is_master, + &sw_if_index); + + if (r == VNET_API_ERROR_SYSCALL_ERROR_1) + { + error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + goto done; + } + + if (r == VNET_API_ERROR_INVALID_INTERFACE) + { + error = clib_error_return (0, "Invalid interface name"); + goto done; + } + + if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) + { + error = clib_error_return (0, "Interface already exists"); + goto done; + } + + vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * '<em>netmap</em>' is a framework for very fast packet I/O from userspace. + * '<em>VALE</em>' is an equally fast in-kernel software switch using the + * netmap API. '<em>netmap</em>' includes '<em>netmap pipes</em>', a shared + * memory packet transport channel. Together, they provide a high speed + * user-space interface that allows VPP to patch into a linux namespace, a + * linux container, or a physical NIC without the use of DPDK. Netmap/VALE + * generates the '<em>netmap.ko</em>' kernel module that needs to be loaded + * before netmap interfaces can be created. + * - https://github.com/luigirizzo/netmap - Netmap/VALE repo. + * - https://github.com/vpp-dev/netmap - VPP development package for Netmap/VALE, + * which is a snapshot of the Netmap/VALE repo with minor changes to work + * with containers and modified kernel drivers to work with NICs. + * + * Create a netmap interface that will attach to a linux interface. + * The interface must already exist. Once created, a new netmap interface + * will exist in VPP with the name '<em>netmap-<ifname></em>', where + * '<em><ifname></em>' takes one of two forms: + * - <b>ifname</b> - Linux interface to bind too. + * - <b>valeXXX:YYY</b> - + * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE + * interface that must start with '<em>vale</em>' and is less + * than 16 characters. + * - Where '<em>YYY</em>' is an existing linux namespace. + * + * This command has the following optional parameters: + * + * - <b>hw-addr <mac-addr></b> - Optional ethernet address, can be in either + * X:X:X:X:X:X unix or X.X.X cisco format. + * + * - <b>pipe</b> - Optional flag to indicate that a '<em>netmap pipe</em>' + * instance should be created. + * + * - <b>master | slave</b> - Optional flag to indicate whether VPP should + * be the master or slave of the '<em>netmap pipe</em>'. Only considered + * if '<em>pipe</em>' is entered. Defaults to '<em>slave</em>' if not entered. + * + * @cliexpar + * Example of how to create a netmap interface tied to the linux + * namespace '<em>vpp1</em>': + * @cliexstart{create netmap name vale00:vpp1 hw-addr 02:FE:3F:34:15:9B pipe master} + * netmap-vale00:vpp1 + * @cliexend + * Once the netmap interface is created, enable the interface using: + * @cliexcmd{set interface state netmap-vale00:vpp1 up} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (netmap_create_command, static) = { + .path = "create netmap", + .short_help = "create netmap name <ifname>|valeXXX:YYY " + "[hw-addr <mac-addr>] [pipe] [master|slave]", + .function = netmap_create_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *host_if_name = NULL; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &host_if_name)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (host_if_name == NULL) + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } + + netmap_delete_if (vm, host_if_name); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * Delete a netmap interface. Use the '<em><ifname></em>' to identify + * the netmap interface to be deleted. In VPP, netmap interfaces are + * named as '<em>netmap-<ifname></em>', where '<em><ifname></em>' + * takes one of two forms: + * - <b>ifname</b> - Linux interface to bind too. + * - <b>valeXXX:YYY</b> - + * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE + * interface that must start with '<em>vale</em>' and is less + * than 16 characters. + * - Where '<em>YYY</em>' is an existing linux namespace. + * + * @cliexpar + * Example of how to delete a netmap interface named '<em>netmap-vale00:vpp1</em>': + * @cliexcmd{delete netmap name vale00:vpp1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (netmap_delete_command, static) = { + .path = "delete netmap", + .short_help = "delete netmap name <ifname>|valeXXX:YYY", + .function = netmap_delete_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +netmap_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (netmap_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/device.c b/src/plugins/netmap/device.c new file mode 100644 index 00000000000..505deb988c4 --- /dev/null +++ b/src/plugins/netmap/device.c @@ -0,0 +1,252 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> + +#include <netmap/net_netmap.h> +#include <netmap/netmap.h> + +#define foreach_netmap_tx_func_error \ +_(NO_FREE_SLOTS, "no free tx slots") \ +_(PENDING_MSGS, "pending msgs in tx ring") + +typedef enum +{ +#define _(f,s) NETMAP_TX_ERROR_##f, + foreach_netmap_tx_func_error +#undef _ + NETMAP_TX_N_ERROR, +} netmap_tx_func_error_t; + +static char *netmap_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_netmap_tx_func_error +#undef _ +}; + + +static u8 * +format_netmap_device_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + netmap_main_t *apm = &netmap_main; + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, i); + + s = format (s, "netmap-%s", nif->host_if_name); + return s; +} + +static u8 * +format_netmap_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + netmap_main_t *nm = &netmap_main; + netmap_if_t *nif = vec_elt_at_index (nm->interfaces, dev_instance); + u32 indent = format_get_indent (s); + + s = format (s, "NETMAP interface"); + if (verbose) + { + s = format (s, "\n%U version %d flags 0x%x" + "\n%U region %u memsize 0x%x offset 0x%x" + "\n%U tx_slots %u rx_slots %u tx_rings %u rx_rings %u", + format_white_space, indent + 2, + nif->req->nr_version, + nif->req->nr_flags, + format_white_space, indent + 2, + nif->mem_region, + nif->req->nr_memsize, + nif->req->nr_offset, + format_white_space, indent + 2, + nif->req->nr_tx_slots, + nif->req->nr_rx_slots, + nif->req->nr_tx_rings, nif->req->nr_rx_rings); + } + return s; +} + +static u8 * +format_netmap_tx_trace (u8 * s, va_list * args) +{ + s = format (s, "Unimplemented..."); + return s; +} + +VNET_DEVICE_CLASS_TX_FN (netmap_device_class) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + netmap_main_t *nm = &netmap_main; + u32 *buffers = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + f64 const time_constant = 1e3; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + netmap_if_t *nif = pool_elt_at_index (nm->interfaces, rd->dev_instance); + int cur_ring; + + clib_spinlock_lock_if_init (&nif->lockp); + + cur_ring = nif->first_tx_ring; + + while (n_left && cur_ring <= nif->last_tx_ring) + { + struct netmap_ring *ring = NETMAP_TXRING (nif->nifp, cur_ring); + int n_free_slots = nm_ring_space (ring); + uint cur = ring->cur; + + if (nm_tx_pending (ring)) + { + if (ioctl (nif->fd, NIOCTXSYNC, NULL) < 0) + clib_unix_warning ("NIOCTXSYNC"); + clib_cpu_time_wait (time_constant); + + if (nm_tx_pending (ring) && !n_free_slots) + { + cur_ring++; + continue; + } + } + + while (n_left && n_free_slots) + { + vlib_buffer_t *b0 = 0; + u32 bi = buffers[0]; + u32 len; + u32 offset = 0; + buffers++; + + struct netmap_slot *slot = &ring->slot[cur]; + + do + { + b0 = vlib_get_buffer (vm, bi); + len = b0->current_length; + /* memcpy */ + clib_memcpy_fast ((u8 *) NETMAP_BUF (ring, slot->buf_idx) + + offset, vlib_buffer_get_current (b0), len); + offset += len; + } + while ((bi = b0->next_buffer)); + + slot->len = offset; + cur = (cur + 1) % ring->num_slots; + n_free_slots--; + n_left--; + } + CLIB_MEMORY_BARRIER (); + ring->head = ring->cur = cur; + } + + if (n_left < frame->n_vectors) + ioctl (nif->fd, NIOCTXSYNC, NULL); + + clib_spinlock_unlock_if_init (&nif->lockp); + + if (n_left) + vlib_error_count (vm, node->node_index, + (n_left == + frame->n_vectors ? NETMAP_TX_ERROR_PENDING_MSGS : + NETMAP_TX_ERROR_NO_FREE_SLOTS), n_left); + + vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors); + return frame->n_vectors; +} + +static void +netmap_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + netmap_main_t *apm = &netmap_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + nif->per_interface_next_index = node_index; + return; + } + + nif->per_interface_next_index = + vlib_node_add_next (vlib_get_main (), netmap_input_node.index, + node_index); +} + +static void +netmap_clear_hw_interface_counters (u32 instance) +{ + /* Nothing for now */ +} + +static clib_error_t * +netmap_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + netmap_main_t *apm = &netmap_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + u32 hw_flags; + + nif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + + if (nif->is_admin_up) + hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP; + else + hw_flags = 0; + + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + + return 0; +} + +static clib_error_t * +netmap_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + /* Nothing for now */ + return 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (netmap_device_class) = { + .name = "netmap", + .format_device_name = format_netmap_device_name, + .format_device = format_netmap_device, + .format_tx_trace = format_netmap_tx_trace, + .tx_function_n_errors = NETMAP_TX_N_ERROR, + .tx_function_error_strings = netmap_tx_func_error_strings, + .rx_redirect_to_node = netmap_set_interface_next_node, + .clear_counters = netmap_clear_hw_interface_counters, + .admin_up_down_function = netmap_interface_admin_up_down, + .subif_add_del_function = netmap_subif_add_del_function, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/net_netmap.h b/src/plugins/netmap/net_netmap.h new file mode 100644 index 00000000000..ecccedd4484 --- /dev/null +++ b/src/plugins/netmap/net_netmap.h @@ -0,0 +1,650 @@ +/* + * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $ + * + * Definitions of constants and the structures used by the netmap + * framework, for the part visible to both kernel and userspace. + * Detailed info on netmap is available with "man netmap" or at + * + * http://info.iet.unipi.it/~luigi/netmap/ + * + * This API is also used to communicate with the VALE software switch + */ + +#ifndef _NET_NETMAP_H_ +#define _NET_NETMAP_H_ + +#define NETMAP_API 14 /* current API version */ + +#define NETMAP_MIN_API 14 /* min and max versions accepted */ +#define NETMAP_MAX_API 15 +/* + * Some fields should be cache-aligned to reduce contention. + * The alignment is architecture and OS dependent, but rather than + * digging into OS headers to find the exact value we use an estimate + * that should cover most architectures. + */ +#define NM_CACHE_ALIGN 128 + +/* + * --- Netmap data structures --- + * + * The userspace data structures used by netmap are shown below. + * They are allocated by the kernel and mmap()ed by userspace threads. + * Pointers are implemented as memory offsets or indexes, + * so that they can be easily dereferenced in kernel and userspace. + + KERNEL (opaque, obviously) + + ==================================================================== + | + USERSPACE | struct netmap_ring + +---->+---------------+ + / | head,cur,tail | + struct netmap_if (nifp, 1 per fd) / | buf_ofs | + +---------------+ / | other fields | + | ni_tx_rings | / +===============+ + | ni_rx_rings | / | buf_idx, len | slot[0] + | | / | flags, ptr | + | | / +---------------+ + +===============+ / | buf_idx, len | slot[1] + | txring_ofs[0] | (rel.to nifp)--' | flags, ptr | + | txring_ofs[1] | +---------------+ + (tx+1 entries) (num_slots entries) + | txring_ofs[t] | | buf_idx, len | slot[n-1] + +---------------+ | flags, ptr | + | rxring_ofs[0] | +---------------+ + | rxring_ofs[1] | + (rx+1 entries) + | rxring_ofs[r] | + +---------------+ + + * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to + * a file descriptor, the mmap()ed region contains a (logically readonly) + * struct netmap_if pointing to struct netmap_ring's. + * + * There is one netmap_ring per physical NIC ring, plus one tx/rx ring + * pair attached to the host stack (this pair is unused for non-NIC ports). + * + * All physical/host stack ports share the same memory region, + * so that zero-copy can be implemented between them. + * VALE switch ports instead have separate memory regions. + * + * The netmap_ring is the userspace-visible replica of the NIC ring. + * Each slot has the index of a buffer (MTU-sized and residing in the + * mmapped region), its length and some flags. An extra 64-bit pointer + * is provided for user-supplied buffers in the tx path. + * + * In user space, the buffer address is computed as + * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE + * + * Added in NETMAP_API 11: + * + * + NIOCREGIF can request the allocation of extra spare buffers from + * the same memory pool. The desired number of buffers must be in + * nr_arg3. The ioctl may return fewer buffers, depending on memory + * availability. nr_arg3 will return the actual value, and, once + * mapped, nifp->ni_bufs_head will be the index of the first buffer. + * + * The buffers are linked to each other using the first uint32_t + * as the index. On close, ni_bufs_head must point to the list of + * buffers to be released. + * + * + NIOCREGIF can request space for extra rings (and buffers) + * allocated in the same memory space. The number of extra rings + * is in nr_arg1, and is advisory. This is a no-op on NICs where + * the size of the memory space is fixed. + * + * + NIOCREGIF can attach to PIPE rings sharing the same memory + * space with a parent device. The ifname indicates the parent device, + * which must already exist. Flags in nr_flags indicate if we want to + * bind the master or slave side, the index (from nr_ringid) + * is just a cookie and does not need to be sequential. + * + * + NIOCREGIF can also attach to 'monitor' rings that replicate + * the content of specific rings, also from the same memory space. + * + * Extra flags in nr_flags support the above functions. + * Application libraries may use the following naming scheme: + * netmap:foo all NIC ring pairs + * netmap:foo^ only host ring pair + * netmap:foo+ all NIC ring + host ring pairs + * netmap:foo-k the k-th NIC ring pair + * netmap:foo{k PIPE ring pair k, master side + * netmap:foo}k PIPE ring pair k, slave side + */ + +/* + * struct netmap_slot is a buffer descriptor + */ +struct netmap_slot { + uint32_t buf_idx; /* buffer index */ + uint16_t len; /* length for this slot */ + uint16_t flags; /* buf changed, etc. */ + uint64_t ptr; /* pointer for indirect buffers */ +}; + +/* + * The following flags control how the slot is used + */ + +#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */ + /* + * must be set whenever buf_idx is changed (as it might be + * necessary to recompute the physical address and mapping) + * + * It is also set by the kernel whenever the buf_idx is + * changed internally (e.g., by pipes). Applications may + * use this information to know when they can reuse the + * contents of previously prepared buffers. + */ + +#define NS_REPORT 0x0002 /* ask the hardware to report results */ + /* + * Request notification when slot is used by the hardware. + * Normally transmit completions are handled lazily and + * may be unreported. This flag lets us know when a slot + * has been sent (e.g. to terminate the sender). + */ + +#define NS_FORWARD 0x0004 /* pass packet 'forward' */ + /* + * (Only for physical ports, rx rings with NR_FORWARD set). + * Slot released to the kernel (i.e. before ring->head) with + * this flag set are passed to the peer ring (host/NIC), + * thus restoring the host-NIC connection for these slots. + * This supports efficient traffic monitoring or firewalling. + */ + +#define NS_NO_LEARN 0x0008 /* disable bridge learning */ + /* + * On a VALE switch, do not 'learn' the source port for + * this buffer. + */ + +#define NS_INDIRECT 0x0010 /* userspace buffer */ + /* + * (VALE tx rings only) data is in a userspace buffer, + * whose address is in the 'ptr' field in the slot. + */ + +#define NS_MOREFRAG 0x0020 /* packet has more fragments */ + /* + * (VALE ports only) + * Set on all but the last slot of a multi-segment packet. + * The 'len' field refers to the individual fragment. + */ + +#define NS_PORT_SHIFT 8 +#define NS_PORT_MASK (0xff << NS_PORT_SHIFT) + /* + * The high 8 bits of the flag, if not zero, indicate the + * destination port for the VALE switch, overriding + * the lookup table. + */ + +#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff) + /* + * (VALE rx rings only) the high 8 bits + * are the number of fragments. + */ + + +/* + * struct netmap_ring + * + * Netmap representation of a TX or RX ring (also known as "queue"). + * This is a queue implemented as a fixed-size circular array. + * At the software level the important fields are: head, cur, tail. + * + * In TX rings: + * + * head first slot available for transmission. + * cur wakeup point. select() and poll() will unblock + * when 'tail' moves past 'cur' + * tail (readonly) first slot reserved to the kernel + * + * [head .. tail-1] can be used for new packets to send; + * 'head' and 'cur' must be incremented as slots are filled + * with new packets to be sent; + * 'cur' can be moved further ahead if we need more space + * for new transmissions. XXX todo (2014-03-12) + * + * In RX rings: + * + * head first valid received packet + * cur wakeup point. select() and poll() will unblock + * when 'tail' moves past 'cur' + * tail (readonly) first slot reserved to the kernel + * + * [head .. tail-1] contain received packets; + * 'head' and 'cur' must be incremented as slots are consumed + * and can be returned to the kernel; + * 'cur' can be moved further ahead if we want to wait for + * new packets without returning the previous ones. + * + * DATA OWNERSHIP/LOCKING: + * The netmap_ring, and all slots and buffers in the range + * [head .. tail-1] are owned by the user program; + * the kernel only accesses them during a netmap system call + * and in the user thread context. + * + * Other slots and buffers are reserved for use by the kernel + */ +struct netmap_ring { + /* + * buf_ofs is meant to be used through macros. + * It contains the offset of the buffer region from this + * descriptor. + */ + const int64_t buf_ofs; + const uint32_t num_slots; /* number of slots in the ring. */ + const uint32_t nr_buf_size; + const uint16_t ringid; + const uint16_t dir; /* 0: tx, 1: rx */ + + uint32_t head; /* (u) first user slot */ + uint32_t cur; /* (u) wakeup point */ + uint32_t tail; /* (k) first kernel slot */ + + uint32_t flags; + + struct timeval ts; /* (k) time of last *sync() */ + + /* opaque room for a mutex or similar object */ +#if !defined(_WIN32) || defined(__CYGWIN__) + uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128]; +#else + uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128]; +#endif + + /* the slots follow. This struct has variable size */ + struct netmap_slot slot[0]; /* array of slots. */ +}; + + +/* + * RING FLAGS + */ +#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ + /* + * updates the 'ts' field on each netmap syscall. This saves + * saves a separate gettimeofday(), and is not much worse than + * software timestamps generated in the interrupt handler. + */ + +#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */ + /* + * Enables the NS_FORWARD slot flag for the ring. + */ + + +/* + * Netmap representation of an interface and its queue(s). + * This is initialized by the kernel when binding a file + * descriptor to a port, and should be considered as readonly + * by user programs. The kernel never uses it. + * + * There is one netmap_if for each file descriptor on which we want + * to select/poll. + * select/poll operates on one or all pairs depending on the value of + * nmr_queueid passed on the ioctl. + */ +struct netmap_if { + char ni_name[IFNAMSIZ]; /* name of the interface. */ + const uint32_t ni_version; /* API version, currently unused */ + const uint32_t ni_flags; /* properties */ +#define NI_PRIV_MEM 0x1 /* private memory region */ + + /* + * The number of packet rings available in netmap mode. + * Physical NICs can have different numbers of tx and rx rings. + * Physical NICs also have a 'host' ring pair. + * Additionally, clients can request additional ring pairs to + * be used for internal communication. + */ + const uint32_t ni_tx_rings; /* number of HW tx rings */ + const uint32_t ni_rx_rings; /* number of HW rx rings */ + + uint32_t ni_bufs_head; /* head index for extra bufs */ + uint32_t ni_spare1[5]; + /* + * The following array contains the offset of each netmap ring + * from this structure, in the following order: + * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings; + * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings. + * + * The area is filled up by the kernel on NIOCREGIF, + * and then only read by userspace code. + */ + const ssize_t ring_ofs[0]; +}; + + +#ifndef NIOCREGIF +/* + * ioctl names and related fields + * + * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, + * whose identity is set in NIOCREGIF through nr_ringid. + * These are non blocking and take no argument. + * + * NIOCGINFO takes a struct ifreq, the interface name is the input, + * the outputs are number of queues and number of descriptor + * for each queue (useful to set number of threads etc.). + * The info returned is only advisory and may change before + * the interface is bound to a file descriptor. + * + * NIOCREGIF takes an interface name within a struct nmre, + * and activates netmap mode on the interface (if possible). + * + * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we + * can pass it down to other NIC-related ioctls. + * + * The actual argument (struct nmreq) has a number of options to request + * different functions. + * The following are used in NIOCREGIF when nr_cmd == 0: + * + * nr_name (in) + * The name of the port (em0, valeXXX:YYY, etc.) + * limited to IFNAMSIZ for backward compatibility. + * + * nr_version (in/out) + * Must match NETMAP_API as used in the kernel, error otherwise. + * Always returns the desired value on output. + * + * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out) + * On input, non-zero values may be used to reconfigure the port + * according to the requested values, but this is not guaranteed. + * On output the actual values in use are reported. + * + * nr_ringid (in) + * Indicates how rings should be bound to the file descriptors. + * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK) + * are used to indicate the ring number, and nr_flags specifies + * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected. + * + * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED: + * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control + * the binding as follows: + * 0 (default) binds all physical rings + * NETMAP_HW_RING | ring number binds a single ring pair + * NETMAP_SW_RING binds only the host tx/rx rings + * + * NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push + * packets on tx rings only if POLLOUT is set. + * The default is to push any pending packet. + * + * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release + * packets on rx rings also when POLLIN is NOT set. + * The default is to touch the rx ring only with POLLIN. + * Note that this is the opposite of TX because it + * reflects the common usage. + * + * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead. + * NETMAP_PRIV_MEM is set on return for ports that do not use + * the global memory allocator. + * This information is not significant and applications + * should look at the region id in nr_arg2 + * + * nr_flags is the recommended mode to indicate which rings should + * be bound to a file descriptor. Values are NR_REG_* + * + * nr_arg1 (in) The number of extra rings to be reserved. + * Especially when allocating a VALE port the system only + * allocates the amount of memory needed for the port. + * If more shared memory rings are desired (e.g. for pipes), + * the first invocation for the same basename/allocator + * should specify a suitable number. Memory cannot be + * extended after the first allocation without closing + * all ports on the same region. + * + * nr_arg2 (in/out) The identity of the memory region used. + * On input, 0 means the system decides autonomously, + * other values may try to select a specific region. + * On return the actual value is reported. + * Region '1' is the global allocator, normally shared + * by all interfaces. Other values are private regions. + * If two ports the same region zero-copy is possible. + * + * nr_arg3 (in/out) number of extra buffers to be allocated. + * + * + * + * nr_cmd (in) if non-zero indicates a special command: + * NETMAP_BDG_ATTACH and nr_name = vale*:ifname + * attaches the NIC to the switch; nr_ringid specifies + * which rings to use. Used by vale-ctl -a ... + * nr_arg1 = NETMAP_BDG_HOST also attaches the host port + * as in vale-ctl -h ... + * + * NETMAP_BDG_DETACH and nr_name = vale*:ifname + * disconnects a previously attached NIC. + * Used by vale-ctl -d ... + * + * NETMAP_BDG_LIST + * list the configuration of VALE switches. + * + * NETMAP_BDG_VNET_HDR + * Set the virtio-net header length used by the client + * of a VALE switch port. + * + * NETMAP_BDG_NEWIF + * create a persistent VALE port with name nr_name. + * Used by vale-ctl -n ... + * + * NETMAP_BDG_DELIF + * delete a persistent VALE port. Used by vale-ctl -d ... + * + * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific + * + * + * + */ + + +/* + * struct nmreq overlays a struct ifreq (just the name) + */ +struct nmreq { + char nr_name[IFNAMSIZ]; + uint32_t nr_version; /* API version */ + uint32_t nr_offset; /* nifp offset in the shared region */ + uint32_t nr_memsize; /* size of the shared region */ + uint32_t nr_tx_slots; /* slots in tx rings */ + uint32_t nr_rx_slots; /* slots in rx rings */ + uint16_t nr_tx_rings; /* number of tx rings */ + uint16_t nr_rx_rings; /* number of rx rings */ + + uint16_t nr_ringid; /* ring(s) we care about */ +#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */ +#define NETMAP_SW_RING 0x2000 /* only host ring pair */ + +#define NETMAP_RING_MASK 0x0fff /* the ring number */ + +#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ + +#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */ + + uint16_t nr_cmd; +#define NETMAP_BDG_ATTACH 1 /* attach the NIC */ +#define NETMAP_BDG_DETACH 2 /* detach the NIC */ +#define NETMAP_BDG_REGOPS 3 /* register bridge callbacks */ +#define NETMAP_BDG_LIST 4 /* get bridge's info */ +#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */ +#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */ +#define NETMAP_BDG_NEWIF 6 /* create a virtual port */ +#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */ +#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */ +#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */ +#define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */ +#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */ +#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */ + uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */ +#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */ + + uint16_t nr_arg2; + uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */ + uint32_t nr_flags; + /* various modes, extends nr_ringid */ + uint32_t spare2[1]; +}; + +#define NR_REG_MASK 0xf /* values for nr_flags */ +enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */ + NR_REG_ALL_NIC = 1, + NR_REG_SW = 2, + NR_REG_NIC_SW = 3, + NR_REG_ONE_NIC = 4, + NR_REG_PIPE_MASTER = 5, + NR_REG_PIPE_SLAVE = 6, +}; +/* monitor uses the NR_REG to select the rings to monitor */ +#define NR_MONITOR_TX 0x100 +#define NR_MONITOR_RX 0x200 +#define NR_ZCOPY_MON 0x400 +/* request exclusive access to the selected rings */ +#define NR_EXCLUSIVE 0x800 +/* request ptnetmap host support */ +#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */ +#define NR_PTNETMAP_HOST 0x1000 +#define NR_RX_RINGS_ONLY 0x2000 +#define NR_TX_RINGS_ONLY 0x4000 +/* Applications set this flag if they are able to deal with virtio-net headers, + * that is send/receive frames that start with a virtio-net header. + * If not set, NIOCREGIF will fail with netmap ports that require applications + * to use those headers. If the flag is set, the application can use the + * NETMAP_VNET_HDR_GET command to figure out the header length. */ +#define NR_ACCEPT_VNET_HDR 0x8000 + + +/* + * Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined + * in ws2def.h but not sure if they are in the form we need. + * XXX so we redefine them + * in a convenient way to use for DeviceIoControl signatures + */ +#ifdef _WIN32 +#undef _IO // ws2def.h +#define _WIN_NM_IOCTL_TYPE 40000 +#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ + METHOD_BUFFERED, FILE_ANY_ACCESS ) +#define _IO_direct(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ + METHOD_OUT_DIRECT, FILE_ANY_ACCESS ) + +#define _IOWR(_c, _n, _s) _IO(_c, _n) + +/* We havesome internal sysctl in addition to the externally visible ones */ +#define NETMAP_MMAP _IO_direct('i', 160) // note METHOD_OUT_DIRECT +#define NETMAP_POLL _IO('i', 162) + +/* and also two setsockopt for sysctl emulation */ +#define NETMAP_SETSOCKOPT _IO('i', 140) +#define NETMAP_GETSOCKOPT _IO('i', 141) + + +//These linknames are for the Netmap Core Driver +#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP" +#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap" + +//Definition of a structure used to pass a virtual address within an IOCTL +typedef struct _MEMORY_ENTRY { + PVOID pUsermodeVirtualAddress; +} MEMORY_ENTRY, *PMEMORY_ENTRY; + +typedef struct _POLL_REQUEST_DATA { + int events; + int timeout; + int revents; +} POLL_REQUEST_DATA; + +#endif /* _WIN32 */ + +/* + * FreeBSD uses the size value embedded in the _IOWR to determine + * how much to copy in/out. So we need it to match the actual + * data structure we pass. We put some spares in the structure + * to ease compatibility with other versions + */ +#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ +#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ +#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ +#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ +#define NIOCCONFIG _IOWR('i',150, struct nm_ifreq) /* for ext. modules */ +#endif /* !NIOCREGIF */ + + +/* + * Helper functions for kernel and userspace + */ + +/* + * check if space is available in the ring. + */ +static inline int +nm_ring_empty(struct netmap_ring *ring) +{ + return (ring->cur == ring->tail); +} + +/* + * Opaque structure that is passed to an external kernel + * module via ioctl(fd, NIOCCONFIG, req) for a user-owned + * bridge port (at this point ephemeral VALE interface). + */ +#define NM_IFRDATA_LEN 256 +struct nm_ifreq { + char nifr_name[IFNAMSIZ]; + char data[NM_IFRDATA_LEN]; +}; + +/* + * netmap kernel thread configuration + */ +/* bhyve/vmm.ko MSIX parameters for IOCTL */ +struct ptn_vmm_ioctl_msix { + uint64_t msg; + uint64_t addr; +}; + +/* IOCTL parameters */ +struct nm_kth_ioctl { + u_long com; + /* TODO: use union */ + union { + struct ptn_vmm_ioctl_msix msix; + } data; +}; + +/* Configuration of a ptnetmap ring */ +struct ptnet_ring_cfg { + uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */ + uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */ + struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */ +}; +#endif /* _NET_NETMAP_H_ */ diff --git a/src/plugins/netmap/netmap.api b/src/plugins/netmap/netmap.api new file mode 100644 index 00000000000..a14753cad9c --- /dev/null +++ b/src/plugins/netmap/netmap.api @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "1.0.0"; + +/** \brief Create netmap + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param netmap_if_name - interface name + @param hw_addr - interface MAC + @param use_random_hw_addr - use random generated MAC + @param is_pipe - is pipe + @param is_master - 0=slave, 1=master +*/ +autoreply define netmap_create +{ + u32 client_index; + u32 context; + + u8 netmap_if_name[64]; + u8 hw_addr[6]; + u8 use_random_hw_addr; + u8 is_pipe; + u8 is_master; +}; + +/** \brief Delete netmap + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param netmap_if_name - interface name +*/ +autoreply define netmap_delete +{ + u32 client_index; + u32 context; + + u8 netmap_if_name[64]; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/netmap.c b/src/plugins/netmap/netmap.c new file mode 100644 index 00000000000..ebef215eb3b --- /dev/null +++ b/src/plugins/netmap/netmap.c @@ -0,0 +1,334 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <fcntl.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> + +#include <netmap/net_netmap.h> +#include <netmap/netmap.h> +#include <netmap/netmap.api_enum.h> +#include <netmap/netmap.api_types.h> + +netmap_main_t netmap_main; + +static clib_error_t * +netmap_fd_read_ready (clib_file_t * uf) +{ + vlib_main_t *vm = vlib_get_main (); + netmap_main_t *nm = &netmap_main; + u32 idx = uf->private_data; + + nm->pending_input_bitmap = + clib_bitmap_set (nm->pending_input_bitmap, idx, 1); + + /* Schedule the rx node */ + vlib_node_set_interrupt_pending (vm, netmap_input_node.index); + + return 0; +} + +static void +close_netmap_if (netmap_main_t * nm, netmap_if_t * nif) +{ + if (nif->clib_file_index != ~0) + { + clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index); + nif->clib_file_index = ~0; + } + else if (nif->fd > -1) + close (nif->fd); + + if (nif->mem_region) + { + netmap_mem_region_t *reg = &nm->mem_regions[nif->mem_region]; + if (--reg->refcnt == 0) + { + munmap (reg->mem, reg->region_size); + reg->region_size = 0; + } + } + + + mhash_unset (&nm->if_index_by_host_if_name, nif->host_if_name, + &nif->if_index); + vec_free (nif->host_if_name); + vec_free (nif->req); + + clib_memset (nif, 0, sizeof (*nif)); + pool_put (nm->interfaces, nif); +} + +int +netmap_worker_thread_enable () +{ + /* if worker threads are enabled, switch to polling mode */ + foreach_vlib_main () + { + vlib_node_set_state (this_vlib_main, netmap_input_node.index, + VLIB_NODE_STATE_POLLING); + } + + return 0; +} + +int +netmap_worker_thread_disable () +{ + foreach_vlib_main () + { + vlib_node_set_state (this_vlib_main, netmap_input_node.index, + VLIB_NODE_STATE_INTERRUPT); + } + + return 0; +} + +int +netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, + u8 is_pipe, u8 is_master, u32 * sw_if_index) +{ + netmap_main_t *nm = &netmap_main; + int ret = 0; + uint32_t nr_reg; + netmap_if_t *nif = 0; + u8 hw_addr[6]; + vnet_sw_interface_t *sw; + vnet_main_t *vnm = vnet_get_main (); + uword *p; + struct nmreq *req = 0; + netmap_mem_region_t *reg; + vlib_thread_main_t *tm = vlib_get_thread_main (); + int fd; + + p = mhash_get (&nm->if_index_by_host_if_name, if_name); + if (p) + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + + fd = open ("/dev/netmap", O_RDWR); + if (fd < 0) + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + + pool_get (nm->interfaces, nif); + nif->if_index = nif - nm->interfaces; + nif->fd = fd; + nif->clib_file_index = ~0; + + vec_validate (req, 0); + nif->req = req; + req->nr_version = NETMAP_API; + req->nr_flags = NR_REG_ALL_NIC; + + if (is_pipe) + req->nr_flags = is_master ? NR_REG_PIPE_MASTER : NR_REG_PIPE_SLAVE; + else + req->nr_flags = NR_REG_ALL_NIC; + + req->nr_flags |= NR_ACCEPT_VNET_HDR; + snprintf (req->nr_name, IFNAMSIZ, "%s", if_name); + req->nr_name[IFNAMSIZ - 1] = 0; + + if (ioctl (nif->fd, NIOCREGIF, req)) + { + ret = VNET_API_ERROR_NOT_CONNECTED; + goto error; + } + + nif->mem_region = req->nr_arg2; + vec_validate (nm->mem_regions, nif->mem_region); + reg = &nm->mem_regions[nif->mem_region]; + if (reg->region_size == 0) + { + reg->mem = mmap (NULL, req->nr_memsize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + clib_warning ("mem %p", reg->mem); + if (reg->mem == MAP_FAILED) + { + ret = VNET_API_ERROR_NOT_CONNECTED; + goto error; + } + reg->region_size = req->nr_memsize; + } + reg->refcnt++; + + nif->nifp = NETMAP_IF (reg->mem, req->nr_offset); + nr_reg = nif->req->nr_flags & NR_REG_MASK; + + if (nr_reg == NR_REG_SW) + { /* host stack */ + nif->first_tx_ring = nif->last_tx_ring = nif->req->nr_tx_rings; + nif->first_rx_ring = nif->last_rx_ring = nif->req->nr_rx_rings; + } + else if (nr_reg == NR_REG_ALL_NIC) + { /* only nic */ + nif->first_tx_ring = 0; + nif->first_rx_ring = 0; + nif->last_tx_ring = nif->req->nr_tx_rings - 1; + nif->last_rx_ring = nif->req->nr_rx_rings - 1; + } + else if (nr_reg == NR_REG_NIC_SW) + { + nif->first_tx_ring = 0; + nif->first_rx_ring = 0; + nif->last_tx_ring = nif->req->nr_tx_rings; + nif->last_rx_ring = nif->req->nr_rx_rings; + } + else if (nr_reg == NR_REG_ONE_NIC) + { + /* XXX check validity */ + nif->first_tx_ring = nif->last_tx_ring = nif->first_rx_ring = + nif->last_rx_ring = nif->req->nr_ringid & NETMAP_RING_MASK; + } + else + { /* pipes */ + nif->first_tx_ring = nif->last_tx_ring = 0; + nif->first_rx_ring = nif->last_rx_ring = 0; + } + + nif->host_if_name = if_name; + nif->per_interface_next_index = ~0; + + if (tm->n_vlib_mains > 1) + clib_spinlock_init (&nif->lockp); + + { + clib_file_t template = { 0 }; + template.read_function = netmap_fd_read_ready; + template.file_descriptor = nif->fd; + template.private_data = nif->if_index; + template.description = format (0, "netmap socket"); + nif->clib_file_index = clib_file_add (&file_main, &template); + } + + /*use configured or generate random MAC address */ + if (hw_addr_set) + memcpy (hw_addr, hw_addr_set, 6); + else + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (hw_addr + 2, &rnd, sizeof (rnd)); + hw_addr[0] = 2; + hw_addr[1] = 0xfe; + } + + vnet_eth_interface_registration_t eir = {}; + + eir.dev_class_index = netmap_device_class.index; + eir.dev_instance = nif->if_index; + eir.address = hw_addr; + eir.cb.set_max_frame_size = NULL; + + nif->hw_if_index = vnet_eth_register_interface (vnm, &eir); + + sw = vnet_get_hw_sw_interface (vnm, nif->hw_if_index); + nif->sw_if_index = sw->sw_if_index; + + mhash_set_mem (&nm->if_index_by_host_if_name, if_name, &nif->if_index, 0); + + if (sw_if_index) + *sw_if_index = nif->sw_if_index; + + if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 1) + netmap_worker_thread_enable (); + + return 0; + +error: + close_netmap_if (nm, nif); + return ret; +} + +int +netmap_delete_if (vlib_main_t * vm, u8 * host_if_name) +{ + vnet_main_t *vnm = vnet_get_main (); + netmap_main_t *nm = &netmap_main; + netmap_if_t *nif; + uword *p; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + p = mhash_get (&nm->if_index_by_host_if_name, host_if_name); + if (p == NULL) + { + clib_warning ("Host interface %s does not exist", host_if_name); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + nif = pool_elt_at_index (nm->interfaces, p[0]); + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, nif->hw_if_index, 0); + + ethernet_delete_interface (vnm, nif->hw_if_index); + + close_netmap_if (nm, nif); + + if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 0) + netmap_worker_thread_disable (); + + return 0; +} + +static clib_error_t * +netmap_init (vlib_main_t * vm) +{ + netmap_main_t *nm = &netmap_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_thread_registration_t *tr; + uword *p; + + clib_memset (nm, 0, sizeof (netmap_main_t)); + + nm->input_cpu_first_index = 0; + nm->input_cpu_count = 1; + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + nm->input_cpu_first_index = tr->first_index; + nm->input_cpu_count = tr->count; + } + + mhash_init_vec_string (&nm->if_index_by_host_if_name, sizeof (uword)); + + vec_validate_aligned (nm->rx_buffers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + return 0; +} + +VLIB_INIT_FUNCTION (netmap_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/netmap.h b/src/plugins/netmap/netmap.h new file mode 100644 index 00000000000..29f855fda8e --- /dev/null +++ b/src/plugins/netmap/netmap.h @@ -0,0 +1,166 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +/* + * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <vppinfra/lock.h> + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + clib_spinlock_t lockp; + u8 *host_if_name; + uword if_index; + u32 hw_if_index; + u32 sw_if_index; + u32 clib_file_index; + + u32 per_interface_next_index; + u8 is_admin_up; + + /* netmap */ + struct nmreq *req; + u16 mem_region; + int fd; + struct netmap_if *nifp; + u16 first_tx_ring; + u16 last_tx_ring; + u16 first_rx_ring; + u16 last_rx_ring; + +} netmap_if_t; + +typedef struct +{ + char *mem; + u32 region_size; + int refcnt; +} netmap_mem_region_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + netmap_if_t *interfaces; + + /* bitmap of pending rx interfaces */ + uword *pending_input_bitmap; + + /* rx buffer cache */ + u32 **rx_buffers; + + /* hash of host interface names */ + mhash_t if_index_by_host_if_name; + + /* vector of memory regions */ + netmap_mem_region_t *mem_regions; + + /* first cpu index */ + u32 input_cpu_first_index; + + /* total cpu count */ + u32 input_cpu_count; +} netmap_main_t; + +extern netmap_main_t netmap_main; +extern vnet_device_class_t netmap_device_class; +extern vlib_node_registration_t netmap_input_node; + +int netmap_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, + u8 is_pipe, u8 is_master, u32 * sw_if_index); +int netmap_delete_if (vlib_main_t * vm, u8 * host_if_name); + + +/* Macros and helper functions from sys/net/netmap_user.h */ + +#ifdef _NET_NETMAP_H_ + +#define _NETMAP_OFFSET(type, ptr, offset) \ + ((type)(void *)((char *)(ptr) + (offset))) + +#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) + +#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ + nifp, (nifp)->ring_ofs[index] ) + +#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ + nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) + +#define NETMAP_BUF(ring, index) \ + ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) + +#define NETMAP_BUF_IDX(ring, buf) \ + ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ + (ring)->nr_buf_size ) + +static inline uint32_t +nm_ring_next (struct netmap_ring *ring, uint32_t i) +{ + return (PREDICT_FALSE (i + 1 == ring->num_slots) ? 0 : i + 1); +} + + +/* + * Return 1 if we have pending transmissions in the tx ring. + * When everything is complete ring->head = ring->tail + 1 (modulo ring size) + */ +static inline int +nm_tx_pending (struct netmap_ring *ring) +{ + return nm_ring_next (ring, ring->tail) != ring->head; +} + +static inline uint32_t +nm_ring_space (struct netmap_ring *ring) +{ + int ret = ring->tail - ring->cur; + if (ret < 0) + ret += ring->num_slots; + return ret; +} +#endif + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/netmap_api.c b/src/plugins/netmap/netmap_api.c new file mode 100644 index 00000000000..51f572a23e6 --- /dev/null +++ b/src/plugins/netmap/netmap_api.c @@ -0,0 +1,95 @@ +/* + *------------------------------------------------------------------ + * netmap_api.c - netmap api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> +#include <netmap/netmap.h> + +#include <vnet/format_fns.h> +#include <netmap/netmap.api_enum.h> +#include <netmap/netmap.api_types.h> + +#include <vlibapi/api_helper_macros.h> + +#define foreach_vpe_api_msg \ +_(NETMAP_CREATE, netmap_create) \ +_(NETMAP_DELETE, netmap_delete) \ + +static void +vl_api_netmap_create_t_handler (vl_api_netmap_create_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_netmap_create_reply_t *rmp; + int rv = 0; + u8 *if_name = NULL; + + if_name = format (0, "%s", mp->netmap_if_name); + vec_add1 (if_name, 0); + + rv = + netmap_create_if (vm, if_name, mp->use_random_hw_addr ? 0 : mp->hw_addr, + mp->is_pipe, mp->is_master, 0); + + vec_free (if_name); + + REPLY_MACRO (VL_API_NETMAP_CREATE_REPLY); +} + +static void +vl_api_netmap_delete_t_handler (vl_api_netmap_delete_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_netmap_delete_reply_t *rmp; + int rv = 0; + u8 *if_name = NULL; + + if_name = format (0, "%s", mp->netmap_if_name); + vec_add1 (if_name, 0); + + rv = netmap_delete_if (vm, if_name); + + vec_free (if_name); + + REPLY_MACRO (VL_API_NETMAP_DELETE_REPLY); +} + +#include <netmap/netmap.api.c> +static clib_error_t * +netmap_api_hookup (vlib_main_t * vm) +{ + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (); + + return 0; +} + +VLIB_API_INIT_FUNCTION (netmap_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/node.c b/src/plugins/netmap/node.c new file mode 100644 index 00000000000..6169847fa79 --- /dev/null +++ b/src/plugins/netmap/node.c @@ -0,0 +1,295 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/devices.h> +#include <vnet/feature/feature.h> + +#include <netmap/net_netmap.h> +#include <netmap/netmap.h> + +#define foreach_netmap_input_error + +typedef enum +{ +#define _(f,s) NETMAP_INPUT_ERROR_##f, + foreach_netmap_input_error +#undef _ + NETMAP_INPUT_N_ERROR, +} netmap_input_error_t; + +static char *netmap_input_error_strings[] = { +#define _(n,s) s, + foreach_netmap_input_error +#undef _ +}; + +typedef struct +{ + u32 next_index; + u32 hw_if_index; + struct netmap_slot slot; +} netmap_input_trace_t; + +static u8 * +format_netmap_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + netmap_input_trace_t *t = va_arg (*args, netmap_input_trace_t *); + u32 indent = format_get_indent (s); + + s = format (s, "netmap: hw_if_index %d next-index %d", + t->hw_if_index, t->next_index); + s = format (s, "\n%Uslot: flags 0x%x len %u buf_idx %u", + format_white_space, indent + 2, + t->slot.flags, t->slot.len, t->slot.buf_idx); + return s; +} + +always_inline void +buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi); + vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi); + + /* update first buffer */ + first_b->total_length_not_including_first_buffer += b->current_length; + + /* update previous buffer */ + prev_b->next_buffer = bi; + prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT; + + /* update current buffer */ + b->next_buffer = 0; +} + +always_inline uword +netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, netmap_if_t * nif) +{ + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + uword n_trace = vlib_get_trace_count (vm, node); + netmap_main_t *nm = &netmap_main; + u32 n_rx_packets = 0; + u32 n_rx_bytes = 0; + u32 *to_next = 0; + u32 n_free_bufs; + struct netmap_ring *ring; + int cur_ring; + u32 thread_index = vm->thread_index; + u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); + + if (nif->per_interface_next_index != ~0) + next_index = nif->per_interface_next_index; + + n_free_bufs = vec_len (nm->rx_buffers[thread_index]); + if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE)) + { + vec_validate (nm->rx_buffers[thread_index], + VLIB_FRAME_SIZE + n_free_bufs - 1); + n_free_bufs += + vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs], + VLIB_FRAME_SIZE); + vec_set_len (nm->rx_buffers[thread_index], n_free_bufs); + } + + cur_ring = nif->first_rx_ring; + while (cur_ring <= nif->last_rx_ring && n_free_bufs) + { + int r = 0; + u32 cur_slot_index; + ring = NETMAP_RXRING (nif->nifp, cur_ring); + r = nm_ring_space (ring); + + if (!r) + { + cur_ring++; + continue; + } + + if (r > n_free_bufs) + r = n_free_bufs; + + cur_slot_index = ring->cur; + while (r) + { + u32 n_left_to_next; + u32 next0 = next_index; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (r && n_left_to_next) + { + vlib_buffer_t *first_b0 = 0; + u32 offset = 0; + u32 bi0 = 0, first_bi0 = 0, prev_bi0; + u32 next_slot_index = (cur_slot_index + 1) % ring->num_slots; + u32 next2_slot_index = (cur_slot_index + 2) % ring->num_slots; + struct netmap_slot *slot = &ring->slot[cur_slot_index]; + u32 data_len = slot->len; + + /* prefetch 2 slots in advance */ + CLIB_PREFETCH (&ring->slot[next2_slot_index], + CLIB_CACHE_LINE_BYTES, LOAD); + /* prefetch start of next packet */ + CLIB_PREFETCH (NETMAP_BUF + (ring, ring->slot[next_slot_index].buf_idx), + CLIB_CACHE_LINE_BYTES, LOAD); + + while (data_len && n_free_bufs) + { + vlib_buffer_t *b0; + /* grab free buffer */ + u32 last_empty_buffer = + vec_len (nm->rx_buffers[thread_index]) - 1; + prev_bi0 = bi0; + bi0 = nm->rx_buffers[thread_index][last_empty_buffer]; + b0 = vlib_get_buffer (vm, bi0); + vec_set_len (nm->rx_buffers[thread_index], + last_empty_buffer); + n_free_bufs--; + + /* copy data */ + u32 bytes_to_copy = + data_len > n_buffer_bytes ? n_buffer_bytes : data_len; + b0->current_data = 0; + clib_memcpy_fast (vlib_buffer_get_current (b0), + (u8 *) NETMAP_BUF (ring, slot->buf_idx) + + offset, bytes_to_copy); + + /* fill buffer header */ + b0->current_length = bytes_to_copy; + + if (offset == 0) + { + b0->total_length_not_including_first_buffer = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + nif->sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + first_bi0 = bi0; + first_b0 = vlib_get_buffer (vm, first_bi0); + } + else + buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0); + + offset += bytes_to_copy; + data_len -= bytes_to_copy; + } + + /* trace */ + if (PREDICT_FALSE (n_trace > 0)) + { + if (PREDICT_TRUE (first_b0 != 0) && + vlib_trace_buffer (vm, node, next0, first_b0, + /* follow_chain */ 0)) + { + netmap_input_trace_t *tr; + + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = nif->hw_if_index; + memcpy (&tr->slot, slot, sizeof (struct netmap_slot)); + } + } + + /* enque and take next packet */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, first_bi0, + next0); + + /* next packet */ + n_rx_packets++; + n_rx_bytes += slot->len; + to_next[0] = first_bi0; + to_next += 1; + n_left_to_next--; + cur_slot_index = next_slot_index; + + r--; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + ring->head = ring->cur = cur_slot_index; + cur_ring++; + } + + if (n_rx_packets) + ioctl (nif->fd, NIOCRXSYNC, NULL); + + vlib_increment_combined_counter + (vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes); + + vnet_device_increment_rx_packets (thread_index, n_rx_packets); + + return n_rx_packets; +} + +VLIB_NODE_FN (netmap_input_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + int i; + u32 n_rx_packets = 0; + u32 thread_index = vm->thread_index; + netmap_main_t *nm = &netmap_main; + netmap_if_t *nmi; + + for (i = 0; i < vec_len (nm->interfaces); i++) + { + nmi = vec_elt_at_index (nm->interfaces, i); + if (nmi->is_admin_up && + (i % nm->input_cpu_count) == + (thread_index - nm->input_cpu_first_index)) + n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi); + } + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (netmap_input_node) = { + .name = "netmap-input", + .sibling_of = "device-input", + .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, + .format_trace = format_netmap_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + /* default state is INTERRUPT mode, switch to POLLING if worker threads are enabled */ + .state = VLIB_NODE_STATE_INTERRUPT, + .n_errors = NETMAP_INPUT_N_ERROR, + .error_strings = netmap_input_error_strings, +}; +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/plugin.c b/src/plugins/netmap/plugin.c new file mode 100644 index 00000000000..1673225b683 --- /dev/null +++ b/src/plugins/netmap/plugin.c @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Tom Jones <thj@freebsd.org> + * + * This software was developed by Tom Jones <thj@freebsd.org> under sponsorship + * from the FreeBSD Foundation. + * + */ + +#include <vlib/vlib.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "netmap", +}; diff --git a/src/plugins/quic/quic.c b/src/plugins/quic/quic.c index 60d4ac21c19..3f7a3426069 100644 --- a/src/plugins/quic/quic.c +++ b/src/plugins/quic/quic.c @@ -1058,6 +1058,8 @@ quic_on_stream_open (quicly_stream_open_t * self, quicly_stream_t * stream) svm_fifo_add_want_deq_ntf (stream_session->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL | SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY); + svm_fifo_init_ooo_lookup (stream_session->rx_fifo, 0 /* ooo enq */); + svm_fifo_init_ooo_lookup (stream_session->tx_fifo, 1 /* ooo deq */); stream_session->session_state = SESSION_STATE_ACCEPTING; if ((rv = app_worker_accept_notify (app_wrk, stream_session))) @@ -1302,6 +1304,8 @@ quic_connect_stream (session_t * quic_session, session_endpoint_cfg_t * sep) return app_worker_connect_notify (app_wrk, NULL, rv, sep->opaque); } + svm_fifo_init_ooo_lookup (stream_session->rx_fifo, 0 /* ooo enq */); + svm_fifo_init_ooo_lookup (stream_session->tx_fifo, 1 /* ooo deq */); svm_fifo_add_want_deq_ntf (stream_session->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL | SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY); @@ -1679,6 +1683,9 @@ quic_on_quic_session_connected (quic_ctx_t * ctx) return; } + svm_fifo_init_ooo_lookup (quic_session->rx_fifo, 0 /* ooo enq */); + svm_fifo_init_ooo_lookup (quic_session->tx_fifo, 1 /* ooo deq */); + quic_session->session_state = SESSION_STATE_CONNECTING; if ((rv = app_worker_connect_notify (app_wrk, quic_session, SESSION_E_NONE, ctx->client_opaque))) @@ -2137,6 +2144,9 @@ quic_accept_connection (quic_rx_packet_ctx_t * pctx) return; } + svm_fifo_init_ooo_lookup (quic_session->rx_fifo, 0 /* ooo enq */); + svm_fifo_init_ooo_lookup (quic_session->tx_fifo, 1 /* ooo deq */); + app_wrk = app_worker_get (quic_session->app_wrk_index); quic_session->session_state = SESSION_STATE_ACCEPTING; if ((rv = app_worker_accept_notify (app_wrk, quic_session))) diff --git a/src/plugins/srmpls/CMakeLists.txt b/src/plugins/srmpls/CMakeLists.txt new file mode 100644 index 00000000000..25905d31e1b --- /dev/null +++ b/src/plugins/srmpls/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (c) 2024 Cisco and/or its affiliates +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_vpp_plugin(srmpls + SOURCES + sr_mpls_policy.c + sr_mpls_steering.c + sr_mpls_api.c + plugin.c + + INSTALL_HEADERS + sr_mpls.h + + API_FILES + sr_mpls.api + + # This might need to be VAT_AUTO_TEST? Not documented + API_TEST_SOURCES + sr_mpls_test.c +) diff --git a/src/vnet/srmpls/FEATURE.yaml b/src/plugins/srmpls/FEATURE.yaml index c5b958224c7..c5b958224c7 100644 --- a/src/vnet/srmpls/FEATURE.yaml +++ b/src/plugins/srmpls/FEATURE.yaml diff --git a/src/vnet/srmpls/dir.dox b/src/plugins/srmpls/dir.dox index 76ec1d6a41b..76ec1d6a41b 100644 --- a/src/vnet/srmpls/dir.dox +++ b/src/plugins/srmpls/dir.dox diff --git a/src/plugins/srmpls/plugin.c b/src/plugins/srmpls/plugin.c new file mode 100644 index 00000000000..af87607764f --- /dev/null +++ b/src/plugins/srmpls/plugin.c @@ -0,0 +1,26 @@ +/* + * plugin.c: srmpls + * + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> + +// register a plugin +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Segment Routing for MPLS plugin", +}; diff --git a/src/vnet/srmpls/sr_doc.rst b/src/plugins/srmpls/sr_doc.rst index ed847fa0d42..ed847fa0d42 100644 --- a/src/vnet/srmpls/sr_doc.rst +++ b/src/plugins/srmpls/sr_doc.rst diff --git a/src/vnet/srmpls/sr_mpls.api b/src/plugins/srmpls/sr_mpls.api index 742f135d493..742f135d493 100644 --- a/src/vnet/srmpls/sr_mpls.api +++ b/src/plugins/srmpls/sr_mpls.api diff --git a/src/vnet/srmpls/sr_mpls.h b/src/plugins/srmpls/sr_mpls.h index a8f9494428f..a8f9494428f 100644 --- a/src/vnet/srmpls/sr_mpls.h +++ b/src/plugins/srmpls/sr_mpls.h diff --git a/src/vnet/srmpls/sr_mpls_api.c b/src/plugins/srmpls/sr_mpls_api.c index 920856acff6..3e89017dbc1 100644 --- a/src/vnet/srmpls/sr_mpls_api.c +++ b/src/plugins/srmpls/sr_mpls_api.c @@ -17,7 +17,7 @@ */ #include <vnet/vnet.h> -#include <vnet/srmpls/sr_mpls.h> +#include "sr_mpls.h" #include <vlibmemory/api.h> #include <vnet/interface.h> @@ -26,28 +26,27 @@ #include <vnet/ip/ip_types_api.h> #include <vnet/format_fns.h> -#include <vnet/srmpls/sr_mpls.api_enum.h> -#include <vnet/srmpls/sr_mpls.api_types.h> - +#include <plugins/srmpls/sr_mpls.api_enum.h> +#include <plugins/srmpls/sr_mpls.api_types.h> #define vl_api_version(n, v) static u32 api_version = v; -#include <vnet/srmpls/sr_mpls.api.h> +#include <plugins/srmpls/sr_mpls.api.h> #undef vl_api_version #define vl_endianfun -#include <vnet/srmpls/sr_mpls.api.h> +#include <plugins/srmpls/sr_mpls.api.h> #undef vl_endianfun #define vl_calcsizefun -#include <vnet/srmpls/sr_mpls.api.h> +#include <plugins/srmpls/sr_mpls.api.h> #undef vl_calcsizefun #define vl_printfun -#include <vnet/srmpls/sr_mpls.api.h> +#include <plugins/srmpls/sr_mpls.api.h> #undef vl_printfun #define vl_msg_name_crc_list -#include <vnet/srmpls/sr_mpls.api.h> +#include <plugins/srmpls/sr_mpls.api.h> #undef vl_msg_name_crc_list #define REPLY_MSG_ID_BASE msg_id_base diff --git a/src/vnet/srmpls/sr_mpls_policy.c b/src/plugins/srmpls/sr_mpls_policy.c index 41cb71601e9..af24acd8cf6 100644 --- a/src/vnet/srmpls/sr_mpls_policy.c +++ b/src/plugins/srmpls/sr_mpls_policy.c @@ -31,7 +31,7 @@ #include <vlib/vlib.h> #include <vnet/vnet.h> -#include <vnet/srmpls/sr_mpls.h> +#include "sr_mpls.h" #include <vnet/fib/mpls_fib.h> #include <vnet/dpo/dpo.h> #include <vnet/ip/ip.h> diff --git a/src/vnet/srmpls/sr_mpls_steering.c b/src/plugins/srmpls/sr_mpls_steering.c index e8920df542b..24c8b0e2d9f 100644 --- a/src/vnet/srmpls/sr_mpls_steering.c +++ b/src/plugins/srmpls/sr_mpls_steering.c @@ -31,7 +31,7 @@ #include <vlib/vlib.h> #include <vnet/vnet.h> -#include <vnet/srmpls/sr_mpls.h> +#include "sr_mpls.h" #include <vnet/ip/ip4_packet.h> #include <vnet/ip/ip6_packet.h> #include <vnet/fib/mpls_fib.h> diff --git a/src/vnet/srmpls/sr_mpls_test.c b/src/plugins/srmpls/sr_mpls_test.c index e5d68462443..7aff4c32b06 100644 --- a/src/vnet/srmpls/sr_mpls_test.c +++ b/src/plugins/srmpls/sr_mpls_test.c @@ -25,11 +25,11 @@ /* Declare message IDs */ #include <vnet/format_fns.h> -#include <vnet/srmpls/sr_mpls.api_enum.h> -#include <vnet/srmpls/sr_mpls.api_types.h> +#include <plugins/srmpls/sr_mpls.api_enum.h> +#include <plugins/srmpls/sr_mpls.api_types.h> #define vl_endianfun /* define message structures */ -#include <vnet/srmpls/sr_mpls.api.h> +#include <plugins/srmpls/sr_mpls.api.h> #undef vl_endianfun typedef struct @@ -163,7 +163,7 @@ api_sr_mpls_policy_del (vat_main_t *vam) return ret; } -#include <vnet/srmpls/sr_mpls.api_test.c> +#include <plugins/srmpls/sr_mpls.api_test.c> /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/urpf/CMakeLists.txt b/src/plugins/urpf/CMakeLists.txt index 2f44e3b2344..f665d30b0bb 100644 --- a/src/plugins/urpf/CMakeLists.txt +++ b/src/plugins/urpf/CMakeLists.txt @@ -22,6 +22,10 @@ add_vpp_plugin(urpf ip4_urpf.c ip6_urpf.c + INSTALL_HEADERS + urpf_dp.h + urpf.h + API_FILES urpf.api ) diff --git a/src/plugins/urpf/urpf.c b/src/plugins/urpf/urpf.c index e5209caafb4..1e7d6c0fb91 100644 --- a/src/plugins/urpf/urpf.c +++ b/src/plugins/urpf/urpf.c @@ -60,7 +60,17 @@ static const char *urpf_feats[N_AF][VLIB_N_DIR][URPF_N_MODES] = urpf_data_t *urpf_cfgs[N_AF][VLIB_N_DIR]; u8 * -format_urpf_mode (u8 * s, va_list * a) +format_urpf_trace (u8 *s, va_list *va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + urpf_trace_t *t = va_arg (*va, urpf_trace_t *); + + return format (s, "uRPF:%d fib:%d", t->urpf, t->fib_index); +} + +__clib_export u8 * +format_urpf_mode (u8 *s, va_list *a) { urpf_mode_t mode = va_arg (*a, int); @@ -76,8 +86,8 @@ format_urpf_mode (u8 * s, va_list * a) return (format (s, "unknown")); } -static uword -unformat_urpf_mode (unformat_input_t * input, va_list * args) +__clib_export uword +unformat_urpf_mode (unformat_input_t *input, va_list *args) { urpf_mode_t *mode = va_arg (*args, urpf_mode_t *); @@ -94,7 +104,16 @@ unformat_urpf_mode (unformat_input_t * input, va_list * args) return 0; } -int +__clib_export int +urpf_feature_enable_disable (ip_address_family_t af, vlib_dir_t dir, + urpf_mode_t mode, u32 sw_if_index, int enable) +{ + return vnet_feature_enable_disable (urpf_feat_arcs[af][dir], + urpf_feats[af][dir][mode], sw_if_index, + enable, 0, 0); +} + +__clib_export int urpf_update (urpf_mode_t mode, u32 sw_if_index, ip_address_family_t af, vlib_dir_t dir, u32 table_id) { diff --git a/src/plugins/urpf/urpf.h b/src/plugins/urpf/urpf.h index 6983a2b440c..a40a25df16b 100644 --- a/src/plugins/urpf/urpf.h +++ b/src/plugins/urpf/urpf.h @@ -32,7 +32,15 @@ typedef enum urpf_mode_t_ #define URPF_N_MODES (URPF_MODE_STRICT+1) -extern u8 *format_urpf_mode (u8 * s, va_list * a); +typedef struct +{ + index_t urpf; + u32 fib_index; +} urpf_trace_t; + +u8 *format_urpf_trace (u8 *s, va_list *va); +u8 *format_urpf_mode (u8 *s, va_list *a); +uword unformat_urpf_mode (unformat_input_t *input, va_list *args); typedef struct { @@ -43,8 +51,8 @@ typedef struct extern urpf_data_t *urpf_cfgs[N_AF][VLIB_N_DIR]; -extern int urpf_update (urpf_mode_t mode, u32 sw_if_index, - ip_address_family_t af, vlib_dir_t dir, u32 table_id); +int urpf_update (urpf_mode_t mode, u32 sw_if_index, ip_address_family_t af, + vlib_dir_t dir, u32 table_id); #endif diff --git a/src/plugins/urpf/urpf_dp.h b/src/plugins/urpf/urpf_dp.h index 816d8b70b90..b17fed7e04b 100644 --- a/src/plugins/urpf/urpf_dp.h +++ b/src/plugins/urpf/urpf_dp.h @@ -53,22 +53,6 @@ * * This file contains the interface unicast source check. */ -typedef struct -{ - index_t urpf; -} urpf_trace_t; - -static u8 * -format_urpf_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - urpf_trace_t *t = va_arg (*va, urpf_trace_t *); - - s = format (s, "uRPF:%d", t->urpf); - - return s; -} #define foreach_urpf_error \ _(DROP, "uRPF Drop") \ @@ -87,10 +71,157 @@ typedef enum URPF_N_NEXT, } urpf_next_t; +static_always_inline u32 +urpf_get_fib_index (vlib_buffer_t *b, ip_address_family_t af, vlib_dir_t dir) +{ + u32 sw_if_index = vnet_buffer (b)->sw_if_index[dir]; + return vec_elt (urpf_cfgs[af][dir], sw_if_index).fib_index; +} + +static_always_inline void +urpf_perform_check_x1 (ip_address_family_t af, vlib_dir_t dir, + urpf_mode_t mode, vlib_buffer_t *b, const u8 *h, + u32 fib_index, load_balance_t **lb, u32 *pass) +{ + load_balance_t *llb; + u32 lpass; + u32 lb_index; + + ASSERT (fib_index != ~0); + + if (AF_IP4 == af) + { + const ip4_header_t *ip; + + ip = (ip4_header_t *) h; + + lb_index = ip4_fib_forwarding_lookup (fib_index, &ip->src_address); + + /* Pass multicast. */ + lpass = (ip4_address_is_multicast (&ip->src_address) || + ip4_address_is_global_broadcast (&ip->src_address)); + } + else + { + const ip6_header_t *ip; + + ip = (ip6_header_t *) h; + + lb_index = ip6_fib_table_fwding_lookup (fib_index, &ip->src_address); + lpass = ip6_address_is_multicast (&ip->src_address); + } + + llb = load_balance_get (lb_index); + + if (URPF_MODE_STRICT == mode) + { + int res; + + res = fib_urpf_check (llb->lb_urpf, vnet_buffer (b)->sw_if_index[dir]); + if (VLIB_RX == dir) + lpass |= res; + else + { + lpass |= !res && fib_urpf_check_size (llb->lb_urpf); + lpass |= b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + } + else + lpass |= fib_urpf_check_size (llb->lb_urpf); + + *lb = llb; + *pass = lpass; +} + +static_always_inline void +urpf_perform_check_x2 (ip_address_family_t af, vlib_dir_t dir, + urpf_mode_t mode, vlib_buffer_t *b0, vlib_buffer_t *b1, + const u8 *h0, const u8 *h1, u32 fib_index0, + u32 fib_index1, load_balance_t **lb0, + load_balance_t **lb1, u32 *pass0, u32 *pass1) +{ + load_balance_t *llb0, *llb1; + u32 lpass0, lpass1; + u32 lb_index0, lb_index1; + + ASSERT (fib_index0 != ~0); + ASSERT (fib_index1 != ~0); + + if (AF_IP4 == af) + { + const ip4_header_t *ip0, *ip1; + + ip0 = (ip4_header_t *) h0; + ip1 = (ip4_header_t *) h1; + + ip4_fib_forwarding_lookup_x2 (fib_index0, fib_index1, &ip0->src_address, + &ip1->src_address, &lb_index0, &lb_index1); + /* Pass multicast. */ + lpass0 = (ip4_address_is_multicast (&ip0->src_address) || + ip4_address_is_global_broadcast (&ip0->src_address)); + lpass1 = (ip4_address_is_multicast (&ip1->src_address) || + ip4_address_is_global_broadcast (&ip1->src_address)); + } + else + { + const ip6_header_t *ip0, *ip1; + + ip0 = (ip6_header_t *) h0; + ip1 = (ip6_header_t *) h1; + + lb_index0 = ip6_fib_table_fwding_lookup (fib_index0, &ip0->src_address); + lb_index1 = ip6_fib_table_fwding_lookup (fib_index1, &ip1->src_address); + lpass0 = ip6_address_is_multicast (&ip0->src_address); + lpass1 = ip6_address_is_multicast (&ip1->src_address); + } + + llb0 = load_balance_get (lb_index0); + llb1 = load_balance_get (lb_index1); + + if (URPF_MODE_STRICT == mode) + { + /* for RX the check is: would this source adddress be + * forwarded out of the interface on which it was recieved, + * if yes allow. For TX it's; would this source address be + * forwarded out of the interface through which it is being + * sent, if yes drop. + */ + int res0, res1; + + res0 = + fib_urpf_check (llb0->lb_urpf, vnet_buffer (b0)->sw_if_index[dir]); + res1 = + fib_urpf_check (llb1->lb_urpf, vnet_buffer (b1)->sw_if_index[dir]); + + if (VLIB_RX == dir) + { + lpass0 |= res0; + lpass1 |= res1; + } + else + { + lpass0 |= !res0 && fib_urpf_check_size (llb0->lb_urpf); + lpass1 |= !res1 && fib_urpf_check_size (llb1->lb_urpf); + + /* allow locally generated */ + lpass0 |= b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; + lpass1 |= b1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + } + else + { + lpass0 |= fib_urpf_check_size (llb0->lb_urpf); + lpass1 |= fib_urpf_check_size (llb1->lb_urpf); + } + + *lb0 = llb0; + *lb1 = llb1; + *pass0 = lpass0; + *pass1 = lpass1; +} + static_always_inline uword -urpf_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, +urpf_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, ip_address_family_t af, vlib_dir_t dir, urpf_mode_t mode) { vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; @@ -106,8 +237,8 @@ urpf_inline (vlib_main_t * vm, while (n_left >= 4) { - u32 pass0, lb_index0, pass1, lb_index1; - const load_balance_t *lb0, *lb1; + u32 pass0, pass1; + load_balance_t *lb0 = 0, *lb1 = 0; u32 fib_index0, fib_index1; const u8 *h0, *h1; @@ -121,87 +252,32 @@ urpf_inline (vlib_main_t * vm, h0 = (u8 *) vlib_buffer_get_current (b[0]); h1 = (u8 *) vlib_buffer_get_current (b[1]); - if (VLIB_TX == dir) { h0 += vnet_buffer (b[0])->ip.save_rewrite_length; h1 += vnet_buffer (b[1])->ip.save_rewrite_length; } - fib_index0 = - urpf_cfgs[af][dir][vnet_buffer (b[0])->sw_if_index[dir]].fib_index; - fib_index1 = - urpf_cfgs[af][dir][vnet_buffer (b[1])->sw_if_index[dir]].fib_index; + fib_index0 = urpf_get_fib_index (b[0], af, dir); + fib_index1 = urpf_get_fib_index (b[1], af, dir); + urpf_perform_check_x2 (af, dir, mode, b[0], b[1], h0, h1, fib_index0, + fib_index1, &lb0, &lb1, &pass0, &pass1); - if (AF_IP4 == af) - { - const ip4_header_t *ip0, *ip1; - - ip0 = (ip4_header_t *) h0; - ip1 = (ip4_header_t *) h1; - - ip4_fib_forwarding_lookup_x2 (fib_index0, - fib_index1, - &ip0->src_address, - &ip1->src_address, - &lb_index0, &lb_index1); - /* Pass multicast. */ - pass0 = (ip4_address_is_multicast (&ip0->src_address) || - ip4_address_is_global_broadcast (&ip0->src_address)); - pass1 = (ip4_address_is_multicast (&ip1->src_address) || - ip4_address_is_global_broadcast (&ip1->src_address)); - } - else + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) { - const ip6_header_t *ip0, *ip1; - - ip0 = (ip6_header_t *) h0; - ip1 = (ip6_header_t *) h1; - - lb_index0 = ip6_fib_table_fwding_lookup (fib_index0, - &ip0->src_address); - lb_index1 = ip6_fib_table_fwding_lookup (fib_index1, - &ip1->src_address); - pass0 = ip6_address_is_multicast (&ip0->src_address); - pass1 = ip6_address_is_multicast (&ip1->src_address); - } - - lb0 = load_balance_get (lb_index0); - lb1 = load_balance_get (lb_index1); + urpf_trace_t *t; - if (URPF_MODE_STRICT == mode) - { - /* for RX the check is: would this source adddress be forwarded - * out of the interface on which it was recieved, if yes allow. - * For TX it's; would this source address be forwarded out of the - * interface through which it is being sent, if yes drop. - */ - int res0, res1; - - res0 = fib_urpf_check (lb0->lb_urpf, - vnet_buffer (b[0])->sw_if_index[dir]); - res1 = fib_urpf_check (lb1->lb_urpf, - vnet_buffer (b[1])->sw_if_index[dir]); - - if (VLIB_RX == dir) - { - pass0 |= res0; - pass1 |= res1; - } - else - { - pass0 |= !res0 && fib_urpf_check_size (lb0->lb_urpf); - pass1 |= !res1 && fib_urpf_check_size (lb1->lb_urpf); - - /* allow locally generated */ - pass0 |= b[0]->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; - pass1 |= b[1]->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; - } + t = vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->urpf = lb0 ? lb0->lb_urpf : ~0; + t->fib_index = fib_index0; } - else + if (b[1]->flags & VLIB_BUFFER_IS_TRACED) { - pass0 |= fib_urpf_check_size (lb0->lb_urpf); - pass1 |= fib_urpf_check_size (lb1->lb_urpf); + urpf_trace_t *t; + + t = vlib_add_trace (vm, node, b[1], sizeof (*t)); + t->urpf = lb1 ? lb1->lb_urpf : ~0; + t->fib_index = fib_index1; } if (PREDICT_TRUE (pass0)) @@ -218,22 +294,6 @@ urpf_inline (vlib_main_t * vm, next[1] = URPF_NEXT_DROP; b[1]->error = node->errors[URPF_ERROR_DROP]; } - - if (b[0]->flags & VLIB_BUFFER_IS_TRACED) - { - urpf_trace_t *t; - - t = vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->urpf = lb0->lb_urpf; - } - if (b[1]->flags & VLIB_BUFFER_IS_TRACED) - { - urpf_trace_t *t; - - t = vlib_add_trace (vm, node, b[1], sizeof (*t)); - t->urpf = lb1->lb_urpf; - } - b += 2; next += 2; n_left -= 2; @@ -241,8 +301,8 @@ urpf_inline (vlib_main_t * vm, while (n_left) { - u32 pass0, lb_index0, fib_index0; - const load_balance_t *lb0; + u32 pass0, fib_index0; + load_balance_t *lb0 = 0; const u8 *h0; h0 = (u8 *) vlib_buffer_get_current (b[0]); @@ -250,51 +310,18 @@ urpf_inline (vlib_main_t * vm, if (VLIB_TX == dir) h0 += vnet_buffer (b[0])->ip.save_rewrite_length; - fib_index0 = - urpf_cfgs[af][dir][vnet_buffer (b[0])->sw_if_index[dir]].fib_index; - - if (AF_IP4 == af) - { - const ip4_header_t *ip0; - - ip0 = (ip4_header_t *) h0; - - lb_index0 = ip4_fib_forwarding_lookup (fib_index0, - &ip0->src_address); + fib_index0 = urpf_get_fib_index (b[0], af, dir); + urpf_perform_check_x1 (af, dir, mode, b[0], h0, fib_index0, &lb0, + &pass0); - /* Pass multicast. */ - pass0 = (ip4_address_is_multicast (&ip0->src_address) || - ip4_address_is_global_broadcast (&ip0->src_address)); - } - else + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) { - const ip6_header_t *ip0; - - ip0 = (ip6_header_t *) h0; - - lb_index0 = ip6_fib_table_fwding_lookup (fib_index0, - &ip0->src_address); - pass0 = ip6_address_is_multicast (&ip0->src_address); - } - - lb0 = load_balance_get (lb_index0); + urpf_trace_t *t; - if (URPF_MODE_STRICT == mode) - { - int res0; - - res0 = fib_urpf_check (lb0->lb_urpf, - vnet_buffer (b[0])->sw_if_index[dir]); - if (VLIB_RX == dir) - pass0 |= res0; - else - { - pass0 |= !res0 && fib_urpf_check_size (lb0->lb_urpf); - pass0 |= b[0]->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; - } + t = vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->urpf = lb0 ? lb0->lb_urpf : ~0; + t->fib_index = fib_index0; } - else - pass0 |= fib_urpf_check_size (lb0->lb_urpf); if (PREDICT_TRUE (pass0)) vnet_feature_next_u16 (&next[0], b[0]); @@ -303,14 +330,6 @@ urpf_inline (vlib_main_t * vm, next[0] = URPF_NEXT_DROP; b[0]->error = node->errors[URPF_ERROR_DROP]; } - - if (b[0]->flags & VLIB_BUFFER_IS_TRACED) - { - urpf_trace_t *t; - - t = vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->urpf = lb0->lb_urpf; - } b++; next++; n_left--; diff --git a/src/plugins/wireguard/wireguard_chachapoly.c b/src/plugins/wireguard/wireguard_chachapoly.c index 0dd7908d2e2..ad644ff6cb8 100644 --- a/src/plugins/wireguard/wireguard_chachapoly.c +++ b/src/plugins/wireguard/wireguard_chachapoly.c @@ -72,11 +72,11 @@ wg_xchacha20poly1305_encrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst, u64 h_nonce; clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce)); - h_nonce = le64toh (h_nonce); + h_nonce = clib_little_to_host_u64 (h_nonce); hchacha20 (derived_key, nonce, key); for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++) - (derived_key[i]) = htole32 ((derived_key[i])); + (derived_key[i]) = clib_host_to_little_u32 ((derived_key[i])); uint32_t key_idx; @@ -102,11 +102,11 @@ wg_xchacha20poly1305_decrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst, u64 h_nonce; clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce)); - h_nonce = le64toh (h_nonce); + h_nonce = clib_little_to_host_u64 (h_nonce); hchacha20 (derived_key, nonce, key); for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++) - (derived_key[i]) = htole32 ((derived_key[i])); + (derived_key[i]) = clib_host_to_little_u32 ((derived_key[i])); uint32_t key_idx; diff --git a/src/plugins/wireguard/wireguard_noise.c b/src/plugins/wireguard/wireguard_noise.c index 5fe2e44b03b..c3f28f442f5 100644 --- a/src/plugins/wireguard/wireguard_noise.c +++ b/src/plugins/wireguard/wireguard_noise.c @@ -751,8 +751,8 @@ noise_tai64n_now (uint8_t output[NOISE_TIMESTAMP_LEN]) unix_nanosec &= REJECT_INTERVAL_MASK; /* https://cr.yp.to/libtai/tai64.html */ - sec = htobe64 (0x400000000000000aULL + unix_sec); - nsec = htobe32 (unix_nanosec); + sec = clib_host_to_big_u64 (0x400000000000000aULL + unix_sec); + nsec = clib_host_to_big_u32 (unix_nanosec); /* memcpy to output buffer, assuming output could be unaligned. */ clib_memcpy (output, &sec, sizeof (sec)); diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 674f15d5dc6..5ba42270b1a 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -663,7 +663,7 @@ vlib_buffer_main_init_numa_alloc (struct vlib_main_t *vm, u32 numa_node, u8 unpriv) { vlib_buffer_main_t *bm = vm->buffer_main; - u32 buffers_per_numa = bm->buffers_per_numa; + u32 buffers_per_numa = bm->buffers_per_numa[numa_node]; clib_error_t *error; u32 buffer_size; uword n_pages, pagesize; @@ -680,6 +680,9 @@ vlib_buffer_main_init_numa_alloc (struct vlib_main_t *vm, u32 numa_node, "size (%llu)", buffer_size, pagesize); if (buffers_per_numa == 0) + buffers_per_numa = bm->default_buffers_per_numa; + + if (buffers_per_numa == 0) buffers_per_numa = unpriv ? VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA_UNPRIV : VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA; @@ -906,18 +909,48 @@ done: } static clib_error_t * +vlib_buffers_numa_configure (vlib_buffer_main_t *bm, u32 numa_node, + unformat_input_t *input) +{ + u32 buffers = 0; + + if (numa_node >= VLIB_BUFFER_MAX_NUMA_NODES) + return clib_error_return (0, "invalid numa node"); + + if (!input) + return 0; + + unformat_skip_white_space (input); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "buffers %u", &buffers)) + ; + else + return unformat_parse_error (input); + } + + bm->buffers_per_numa[numa_node] = buffers; + return 0; +} + +static clib_error_t * vlib_buffers_configure (vlib_main_t * vm, unformat_input_t * input) { vlib_buffer_main_t *bm; + u32 numa_node; + unformat_input_t sub_input; + clib_error_t *error = 0; vlib_buffer_main_alloc (vm); bm = vm->buffer_main; bm->log2_page_size = CLIB_MEM_PAGE_SZ_UNKNOWN; + memset (bm->buffers_per_numa, 0, sizeof (bm->buffers_per_numa)); while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "buffers-per-numa %u", &bm->buffers_per_numa)) + if (unformat (input, "buffers-per-numa %u", + &bm->default_buffers_per_numa)) ; else if (unformat (input, "page-size %U", unformat_log2_page_size, &bm->log2_page_size)) @@ -925,6 +958,15 @@ vlib_buffers_configure (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "default data-size %u", &bm->default_data_size)) ; + else if (unformat (input, "numa %u %U", &numa_node, + unformat_vlib_cli_sub_input, &sub_input)) + { + error = vlib_buffers_numa_configure (bm, numa_node, &sub_input); + unformat_free (&sub_input); + + if (error) + return error; + } else return unformat_parse_error (input); } diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index 7d45689ed19..aad9701080e 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -508,7 +508,8 @@ typedef struct u8 default_buffer_pool_index_for_numa[VLIB_BUFFER_MAX_NUMA_NODES]; /* config */ - u32 buffers_per_numa; + u32 default_buffers_per_numa; + u32 buffers_per_numa[VLIB_BUFFER_MAX_NUMA_NODES]; u16 ext_hdr_size; u32 default_data_size; clib_mem_page_sz_t log2_page_size; diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c index f7c63bc3607..29ca3d97523 100644 --- a/src/vlib/linux/pci.c +++ b/src/vlib/linux/pci.c @@ -1561,14 +1561,17 @@ linux_pci_init (vlib_main_t * vm) ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32)); - addrs = vlib_pci_get_all_dev_addrs (); - vec_foreach (addr, addrs) + if (pm->pci_device_registrations) { - vlib_pci_device_info_t *d; - if ((d = vlib_pci_get_device_info (vm, addr, 0))) + addrs = vlib_pci_get_all_dev_addrs (); + vec_foreach (addr, addrs) { - init_device_from_registered (vm, d); - vlib_pci_free_device_info (d); + vlib_pci_device_info_t *d; + if ((d = vlib_pci_get_device_info (vm, addr, 0))) + { + init_device_from_registered (vm, d); + vlib_pci_free_device_info (d); + } } } diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 87b71adc2bc..ef2c5616f21 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -205,6 +205,10 @@ vlib_thread_init (vlib_main_t * vm) avail_cpu = clib_bitmap_set (avail_cpu, c, 0); } + /* if main thread affinity is unspecified, set to current running cpu */ + if (tm->main_lcore == ~0) + tm->main_lcore = sched_getcpu (); + /* grab cpu for main thread */ if (tm->main_lcore != ~0) { @@ -370,6 +374,8 @@ void vlib_worker_thread_init (vlib_worker_thread_t * w) { vlib_thread_main_t *tm = vlib_get_thread_main (); + sigset_t signals; + int rv; /* * Note: disabling signals in worker threads as follows @@ -379,7 +385,17 @@ vlib_worker_thread_init (vlib_worker_thread_t * w) * sigfillset (&s); * pthread_sigmask (SIG_SETMASK, &s, 0); * } + * We can still disable signals for SIGINT,SIGHUP and SIGTERM as they don't + * trigger post-dump handlers anyway. */ + sigemptyset (&signals); + sigaddset (&signals, SIGINT); + sigaddset (&signals, SIGHUP); + sigaddset (&signals, SIGTERM); + rv = pthread_sigmask (SIG_BLOCK, &signals, NULL); + + if (rv) + clib_warning ("Failed to set the worker signal mask"); clib_mem_set_heap (w->thread_mheap); @@ -1122,6 +1138,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) u8 *name; uword *bitmap; u32 count; + int use_corelist = 0; tm->thread_registrations_by_name = hash_create_string (0, sizeof (uword)); @@ -1173,6 +1190,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) tr->coremask = bitmap; tr->count = clib_bitmap_count_set_bits (tr->coremask); + use_corelist = 1; } else if (unformat @@ -1202,6 +1220,9 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) break; } + if (use_corelist && tm->main_lcore == ~0) + return clib_error_return (0, "main-core must be specified when using " + "corelist-* or coremask-* attribute"); if (tm->sched_priority != ~0) { if (tm->sched_policy == SCHED_FIFO || tm->sched_policy == SCHED_RR) diff --git a/src/vlib/threads.h b/src/vlib/threads.h index ac0c1d5d266..c671aa78c39 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -200,7 +200,7 @@ vlib_smp_unsafe_warning (void) } always_inline int -__foreach_vlib_main_helper (vlib_main_t *ii, vlib_main_t **p) +__foreach_vlib_main_helper (vlib_main_t *ii, vlib_main_t **p, int checks) { vlib_main_t *vm; u32 index = ii - (vlib_main_t *) 0; @@ -209,15 +209,17 @@ __foreach_vlib_main_helper (vlib_main_t *ii, vlib_main_t **p) return 0; *p = vm = vlib_global_main.vlib_mains[index]; - ASSERT (index == 0 || vm->parked_at_barrier == 1); + ASSERT (!checks || index == 0 || vm->parked_at_barrier == 1); return 1; } -#define foreach_vlib_main() \ +#define foreach_vlib_main__(checks) \ for (vlib_main_t *ii = 0, *this_vlib_main; \ - __foreach_vlib_main_helper (ii, &this_vlib_main); ii++) \ + __foreach_vlib_main_helper (ii, &this_vlib_main, checks); ii++) \ if (this_vlib_main) +#define foreach_vlib_main() foreach_vlib_main__ (1) + #define foreach_sched_policy_posix \ _ (SCHED_OTHER, OTHER, "other") \ _ (SCHED_FIFO, FIFO, "fifo") \ diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c index ee28ca8f1aa..11d0cb1160c 100644 --- a/src/vlib/unix/main.c +++ b/src/vlib/unix/main.c @@ -40,6 +40,8 @@ #include <vlib/unix/unix.h> #include <vlib/unix/plugin.h> #include <vppinfra/unix.h> +#include <vppinfra/stack.h> +#include <vppinfra/format_ansi.h> #include <limits.h> #include <signal.h> @@ -97,20 +99,42 @@ int vlib_last_signum = 0; uword vlib_last_faulting_address = 0; static void +log_one_line () +{ + vec_terminate_c_string (syslog_msg); + if (unix_main.flags & (UNIX_FLAG_INTERACTIVE | UNIX_FLAG_NOSYSLOG)) + fprintf (stderr, "%s\n", syslog_msg); + else + syslog (LOG_ERR | LOG_DAEMON, "%s", syslog_msg); + vec_reset_length (syslog_msg); +} + +static void unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc) { uword fatal = 0; + int color = + (unix_main.flags & (UNIX_FLAG_INTERACTIVE | UNIX_FLAG_NOSYSLOG)) && + (unix_main.flags & UNIX_FLAG_NOCOLOR) == 0; /* These come in handy when looking at core files from optimized images */ vlib_last_signum = signum; vlib_last_faulting_address = (uword) si->si_addr; + if (color) + syslog_msg = format (syslog_msg, ANSI_FG_BR_RED); + syslog_msg = format (syslog_msg, "received signal %U, PC %U", format_signal, signum, format_ucontext_pc, uc); - if (signum == SIGSEGV) + if (signum == SIGSEGV || signum == SIGBUS) syslog_msg = format (syslog_msg, ", faulting address %p", si->si_addr); + if (color) + syslog_msg = format (syslog_msg, ANSI_FG_DEFAULT); + + log_one_line (); + switch (signum) { /* these (caught) signals cause the application to exit */ @@ -120,11 +144,17 @@ unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc) */ if (unix_main.vlib_main && unix_main.vlib_main->main_loop_exit_set) { - syslog (LOG_ERR | LOG_DAEMON, "received SIGTERM, exiting..."); + syslog_msg = format ( + syslog_msg, "received SIGTERM from PID %d UID %d, exiting...", + si->si_pid, si->si_uid); + log_one_line (); unix_main.vlib_main->main_loop_exit_now = 1; } else - syslog (LOG_ERR | LOG_DAEMON, "IGNORE early SIGTERM..."); + { + syslog_msg = format (syslog_msg, "IGNORE early SIGTERM..."); + log_one_line (); + } break; /* fall through */ case SIGQUIT: @@ -144,26 +174,75 @@ unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc) break; } - /* Null terminate. */ - vec_add1 (syslog_msg, 0); if (fatal) { - syslog (LOG_ERR | LOG_DAEMON, "%s", syslog_msg); + int skip = 1, index = 0; - /* Address of callers: outer first, inner last. */ - uword callers[15]; - uword n_callers = clib_backtrace (callers, ARRAY_LEN (callers), 0); - int i; - for (i = 0; i < n_callers; i++) + foreach_clib_stack_frame (sf) { - vec_reset_length (syslog_msg); + if (sf->is_signal_frame) + { + int pipefd[2]; + const int n_bytes = 20; + u8 *ip = (void *) sf->ip; + + if (pipe (pipefd) == 0) + { + /* check PC points to valid memory */ + if (write (pipefd[1], ip, n_bytes) == n_bytes) + { + syslog_msg = format (syslog_msg, "Code: "); + if (color) + syslog_msg = format (syslog_msg, ANSI_FG_CYAN); + for (int i = 0; i < n_bytes; i++) + syslog_msg = format (syslog_msg, " %02x", ip[i]); + if (color) + syslog_msg = format (syslog_msg, ANSI_FG_DEFAULT); + } + else + { + syslog_msg = format ( + syslog_msg, "PC contains invalid memory address"); + } + log_one_line (); + foreach_int (i, 0, 1) + close (pipefd[i]); + } + skip = 0; + } + + if (skip) + continue; + + syslog_msg = format (syslog_msg, "#%-2d ", index++); + if (color) + syslog_msg = format (syslog_msg, ANSI_FG_BLUE); + syslog_msg = format (syslog_msg, "0x%016lx", sf->ip); + if (color) + syslog_msg = format (syslog_msg, ANSI_FG_DEFAULT); + + if (sf->name[0]) + { + if (color) + syslog_msg = format (syslog_msg, ANSI_FG_YELLOW); + syslog_msg = + format (syslog_msg, " %s + 0x%x", sf->name, sf->offset); + if (color) + syslog_msg = format (syslog_msg, ANSI_FG_DEFAULT); + } - syslog_msg = - format (syslog_msg, "#%-2d 0x%016lx %U%c", i, callers[i], - format_clib_elf_symbol_with_address, callers[i], 0); + log_one_line (); - syslog (LOG_ERR | LOG_DAEMON, "%s", syslog_msg); + if (sf->file_name) + { + if (color) + syslog_msg = format (syslog_msg, ANSI_FG_GREEN); + syslog_msg = format (syslog_msg, " from %s", sf->file_name); + if (color) + syslog_msg = format (syslog_msg, ANSI_FG_DEFAULT); + log_one_line (); + } } /* have to remove SIGABRT to avoid recursive - os_exit calling abort() */ @@ -175,9 +254,6 @@ unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc) else os_exit (1); } - else - clib_warning ("%s", syslog_msg); - } static clib_error_t * diff --git a/src/vlib/unix/plugin.c b/src/vlib/unix/plugin.c index 5cac9abc8fe..77e4633e14a 100644 --- a/src/vlib/unix/plugin.c +++ b/src/vlib/unix/plugin.c @@ -88,19 +88,14 @@ extract (u8 * sp, u8 * ep) */ static clib_error_t * -r2_to_reg (elf_main_t * em, vlib_plugin_r2_t * r2, - vlib_plugin_registration_t * reg) +r2_to_reg (elf_main_t *em, vlib_plugin_r2_t *r2, + vlib_plugin_registration_t *reg, elf_section_t *data_section) { - clib_error_t *error; - elf_section_t *section; uword data_segment_offset; u8 *data; /* It turns out that the strings land in the ".data" section */ - error = elf_get_section_by_name (em, ".data", §ion); - if (error) - return error; - data = elf_get_section_contents (em, section->index, 1); + data = elf_get_section_contents (em, data_section->index, 1); /* * Offsets in the ".vlib_plugin_r2" section @@ -177,13 +172,52 @@ load_one_plugin (plugin_main_t * pm, plugin_info_t * pi, int from_early_init) error = elf_get_section_by_name (&em, ".vlib_plugin_r2", §ion); if (error == 0) { + elf_section_t *data_section; + elf_relocation_table_t *rt; + elf_relocation_with_addend_t *r; + elf_symbol_table_t *st; + elf64_symbol_t *sym, *symok = 0; + data = elf_get_section_contents (&em, section->index, 1); r2 = (vlib_plugin_r2_t *) data; + + elf_get_section_by_name (&em, ".data", &data_section); + + // Find first symbol in .vlib_plugin_r2 section. + vec_foreach (st, em.symbol_tables) + { + vec_foreach (sym, st->symbols) + { + if (sym->section_index == section->index) + { + symok = sym; + break; + } + } + } + + // Relocate section data as per relocation tables. + if (symok != 0) + { + vec_foreach (rt, em.relocation_tables) + { + vec_foreach (r, rt->relocations) + { + if (r->address >= symok->value && + r->address < symok->value + symok->size) + { + *(uword *) ((void *) data + r->address - symok->value) += + r->addend - data_section->header.exec_address; + } + } + } + } + reg = clib_mem_alloc (sizeof (*reg)); memset (reg, 0, sizeof (*reg)); reg->default_disabled = r2->default_disabled != 0; - error = r2_to_reg (&em, r2, reg); + error = r2_to_reg (&em, r2, reg, data_section); if (error) { PLUGIN_LOG_ERR ("Bad r2 registration: %s\n", (char *) pi->name); diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index fb8d294009d..eb74f5de84c 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -813,24 +813,6 @@ list(APPEND VNET_API_FILES ) ############################################################################## -# mpls segment routing -############################################################################## - -list(APPEND VNET_SOURCES - srmpls/sr_mpls_policy.c - srmpls/sr_mpls_steering.c - srmpls/sr_mpls_api.c -) - -list(APPEND VNET_HEADERS - srmpls/sr_mpls.h -) - -list(APPEND VNET_API_FILES - srmpls/sr_mpls.api -) - -############################################################################## # IPFIX / netflow v10 ############################################################################## list(APPEND VNET_SOURCES @@ -1162,6 +1144,7 @@ list(APPEND VNET_HEADERS fib/fib_path_list.h fib/fib_sas.h fib/fib_source.h + fib/fib_urpf_list.h ) list(APPEND VNET_API_FILES @@ -1460,7 +1443,6 @@ add_vat_test_library(vnet ip/ip_test.c arp/arp_test.c ip6-nd/ip6_nd_test.c - srmpls/sr_mpls_test.c session/session_test.c l2/l2_test.c ipsec/ipsec_test.c diff --git a/src/vnet/dev/counters.c b/src/vnet/dev/counters.c index 0a1e0a7419d..d02839d664f 100644 --- a/src/vnet/dev/counters.c +++ b/src/vnet/dev/counters.c @@ -54,7 +54,7 @@ vnet_dev_counters_clear (vlib_main_t *vm, vnet_dev_counter_main_t *cm) { for (int i = 0; i < cm->n_counters; i++) { - cm->counter_start[i] = cm->counter_data[i]; + cm->counter_start[i] += cm->counter_data[i]; cm->counter_data[i] = 0; } } diff --git a/src/vnet/dev/dev.h b/src/vnet/dev/dev.h index bbf2f9dff21..eb06eeba34e 100644 --- a/src/vnet/dev/dev.h +++ b/src/vnet/dev/dev.h @@ -115,6 +115,7 @@ typedef struct vnet_dev_rx_queue_op_t *start; vnet_dev_rx_queue_op_no_rv_t *stop; vnet_dev_rx_queue_op_no_rv_t *free; + vnet_dev_rx_queue_op_no_rv_t *clear_counters; format_function_t *format_info; } vnet_dev_rx_queue_ops_t; @@ -124,6 +125,7 @@ typedef struct vnet_dev_tx_queue_op_t *start; vnet_dev_tx_queue_op_no_rv_t *stop; vnet_dev_tx_queue_op_no_rv_t *free; + vnet_dev_tx_queue_op_no_rv_t *clear_counters; format_function_t *format_info; } vnet_dev_tx_queue_ops_t; @@ -245,6 +247,7 @@ typedef struct vnet_dev_port_op_no_rv_t *stop; vnet_dev_port_op_no_rv_t *deinit; vnet_dev_port_op_no_rv_t *free; + vnet_dev_port_op_no_rv_t *clear_counters; format_function_t *format_status; format_function_t *format_flow; } vnet_dev_port_ops_t; diff --git a/src/vnet/dev/format.c b/src/vnet/dev/format.c index ed83a0eba95..f599c0f8b85 100644 --- a/src/vnet/dev/format.c +++ b/src/vnet/dev/format.c @@ -101,7 +101,7 @@ format_vnet_dev_port_info (u8 *s, va_list *args) u32 indent = format_get_indent (s); s = format (s, "Hardware Address is %U", format_vnet_dev_hw_addr, - &port->attr.hw_addr); + &port->primary_hw_addr); s = format (s, ", %u RX queues (max %u), %u TX queues (max %u)", pool_elts (port->rx_queues), port->attr.max_rx_queues, pool_elts (port->tx_queues), port->attr.max_tx_queues); diff --git a/src/vnet/dev/port.c b/src/vnet/dev/port.c index 8a6df54cbc8..5b4b8cdc7b8 100644 --- a/src/vnet/dev/port.c +++ b/src/vnet/dev/port.c @@ -733,16 +733,26 @@ vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port) void vnet_dev_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port) { - if (port->counter_main) + if (port->port_ops.clear_counters) + port->port_ops.clear_counters (vm, port); + else if (port->counter_main) vnet_dev_counters_clear (vm, port->counter_main); foreach_vnet_dev_port_rx_queue (q, port) - if (q->counter_main) - vnet_dev_counters_clear (vm, q->counter_main); + { + if (port->rx_queue_ops.clear_counters) + port->rx_queue_ops.clear_counters (vm, q); + else if (q->counter_main) + vnet_dev_counters_clear (vm, q->counter_main); + } foreach_vnet_dev_port_tx_queue (q, port) - if (q->counter_main) - vnet_dev_counters_clear (vm, q->counter_main); + { + if (port->tx_queue_ops.clear_counters) + port->tx_queue_ops.clear_counters (vm, q); + else if (q->counter_main) + vnet_dev_counters_clear (vm, q->counter_main); + } log_notice (port->dev, "counters cleared on port %u", port->port_id); } diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c index 8c837575cf8..027e1ed4e74 100644 --- a/src/vnet/devices/virtio/node.c +++ b/src/vnet/devices/virtio/node.c @@ -282,6 +282,16 @@ virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (n_left == 0) return 0; + if (PREDICT_FALSE (n_left == vring->queue_size)) + { + /* + * Informational error logging when VPP is not pulling packets fast + * enough. + */ + vlib_error_count (vm, node->node_index, VIRTIO_INPUT_ERROR_FULL_RX_QUEUE, + 1); + } + if (type == VIRTIO_IF_TYPE_TUN) { next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT; diff --git a/src/vnet/devices/virtio/virtio_inline.h b/src/vnet/devices/virtio/virtio_inline.h index 179f319aa4c..41bba755934 100644 --- a/src/vnet/devices/virtio/virtio_inline.h +++ b/src/vnet/devices/virtio/virtio_inline.h @@ -17,6 +17,7 @@ #define foreach_virtio_input_error \ _ (BUFFER_ALLOC, "buffer alloc error") \ + _ (FULL_RX_QUEUE, "full rx queue (driver tx drop)") \ _ (UNKNOWN, "unknown") typedef enum diff --git a/src/vnet/fib/fib_api.c b/src/vnet/fib/fib_api.c index 07d6699d87a..1b1c0d113c0 100644 --- a/src/vnet/fib/fib_api.c +++ b/src/vnet/fib/fib_api.c @@ -190,6 +190,7 @@ fib_api_path_decode (vl_api_fib_path_t *in, break; case FIB_API_PATH_TYPE_DROP: out->frp_flags |= FIB_ROUTE_PATH_DROP; + out->frp_sw_if_index = ntohl(in->sw_if_index); break; case FIB_API_PATH_TYPE_LOCAL: out->frp_flags |= FIB_ROUTE_PATH_LOCAL; diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c index c4472c7122d..9abb89bc6a0 100644 --- a/src/vnet/fib/fib_types.c +++ b/src/vnet/fib/fib_types.c @@ -715,6 +715,10 @@ unformat_fib_route_path (unformat_input_t * input, va_list * args) rpath->frp_proto = DPO_PROTO_IP6; rpath->frp_flags = FIB_ROUTE_PATH_INTF_RX; } + else if (unformat (input, "drop")) + { + rpath->frp_flags = FIB_ROUTE_PATH_DROP; + } else if (unformat (input, "local")) { clib_memset (&rpath->frp_addr, 0, sizeof (rpath->frp_addr)); diff --git a/src/vnet/ip-neighbor/ip_neighbor.c b/src/vnet/ip-neighbor/ip_neighbor.c index d340037a15d..614b78489cd 100644 --- a/src/vnet/ip-neighbor/ip_neighbor.c +++ b/src/vnet/ip-neighbor/ip_neighbor.c @@ -460,6 +460,7 @@ ip_neighbor_destroy (ip_neighbor_t * ipn) af = ip_neighbor_get_af (ipn); IP_NEIGHBOR_DBG ("free: %U", format_ip_neighbor, + vlib_time_now (vlib_get_main ()), ip_neighbor_get_index (ipn)); ip_neighbor_publish (ip_neighbor_get_index (ipn), @@ -944,20 +945,20 @@ ip_neighbor_show_sorted_i (vlib_main_t * vm, vlib_cli_command_t * cmd, ip_address_family_t af) { ip_neighbor_elt_t *elt, *head; + f64 now; head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]); + now = vlib_time_now (vm); - - vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP", - "Flags", "Ethernet", "Interface"); + vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Age", "IP", "Flags", + "Ethernet", "Interface"); /* the list is time sorted, newest first, so start from the back * and work forwards. Stop when we get to one that is alive */ - clib_llist_foreach_reverse(ip_neighbor_elt_pool, - ipne_anchor, head, elt, - ({ - vlib_cli_output (vm, "%U", format_ip_neighbor, elt->ipne_index); - })); + clib_llist_foreach_reverse (ip_neighbor_elt_pool, ipne_anchor, head, elt, ({ + vlib_cli_output (vm, "%U", format_ip_neighbor, + now, elt->ipne_index); + })); return (NULL); } @@ -969,6 +970,7 @@ ip_neighbor_show_i (vlib_main_t * vm, { index_t *ipni, *ipnis = NULL; u32 sw_if_index; + f64 now; /* Filter entries by interface if given. */ sw_if_index = ~0; @@ -976,14 +978,15 @@ ip_neighbor_show_i (vlib_main_t * vm, &sw_if_index); ipnis = ip_neighbor_entries (sw_if_index, af); + now = vlib_time_now (vm); if (ipnis) - vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP", - "Flags", "Ethernet", "Interface"); + vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Age", "IP", "Flags", + "Ethernet", "Interface"); vec_foreach (ipni, ipnis) { - vlib_cli_output (vm, "%U", format_ip_neighbor, *ipni); + vlib_cli_output (vm, "%U", format_ip_neighbor, now, *ipni); } vec_free (ipnis); @@ -1573,13 +1576,12 @@ ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait) if (ttl > ipndb_age) { - IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d", - format_ip_neighbor, ipni, now, - ipn->ipn_time_last_updated, ipndb_age); + IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d", format_ip_neighbor, now, ipni, + now, ipn->ipn_time_last_updated, ipndb_age); if (ipn->ipn_n_probes > 2) { /* 3 strikes and yea-re out */ - IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, ipni); + IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, now, ipni); *wait = 1; return (IP_NEIGHBOR_AGE_DEAD); } diff --git a/src/vnet/ip-neighbor/ip_neighbor_types.c b/src/vnet/ip-neighbor/ip_neighbor_types.c index 39039a48249..a6f3c26d42f 100644 --- a/src/vnet/ip-neighbor/ip_neighbor_types.c +++ b/src/vnet/ip-neighbor/ip_neighbor_types.c @@ -68,19 +68,18 @@ format_ip_neighbor_watcher (u8 * s, va_list * va) u8 * format_ip_neighbor (u8 * s, va_list * va) { + f64 now = va_arg (*va, f64); index_t ipni = va_arg (*va, index_t); ip_neighbor_t *ipn; ipn = ip_neighbor_get (ipni); - return (format (s, "%=12U%=40U%=6U%=20U%U", - format_vlib_time, vlib_get_main (), - ipn->ipn_time_last_updated, - format_ip_address, &ipn->ipn_key->ipnk_ip, - format_ip_neighbor_flags, ipn->ipn_flags, - format_mac_address_t, &ipn->ipn_mac, - format_vnet_sw_if_index_name, vnet_get_main (), - ipn->ipn_key->ipnk_sw_if_index)); + return ( + format (s, "%=12U%=40U%=6U%=20U%U", format_vlib_time, vlib_get_main (), + now - ipn->ipn_time_last_updated, format_ip_address, + &ipn->ipn_key->ipnk_ip, format_ip_neighbor_flags, ipn->ipn_flags, + format_mac_address_t, &ipn->ipn_mac, format_vnet_sw_if_index_name, + vnet_get_main (), ipn->ipn_key->ipnk_sw_if_index)); } static void diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c index b095f679cc8..f93ebce4bf1 100644 --- a/src/vnet/ip/icmp6.c +++ b/src/vnet/ip/icmp6.c @@ -338,7 +338,7 @@ ip6_icmp_error (vlib_main_t * vm, if (throttle_check (&icmp_throttle, thread_index, r0, seed)) { - vlib_error_count (vm, node->node_index, ICMP4_ERROR_DROP, 1); + vlib_error_count (vm, node->node_index, ICMP6_ERROR_DROP, 1); from += 1; n_left_from -= 1; continue; diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index c225c222a38..c0fa430e0aa 100644 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -603,6 +603,8 @@ VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = { * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.2 GigabitEthernet2/0/0 weight 3} * To add a route to a particular FIB table (VRF), use: * @cliexcmd{ip route add 172.16.24.0/24 table 7 via GigabitEthernet2/0/0} + * To add a route to drop the traffic: + * @cliexcmd{ip route add 172.16.24.0/24 table 100 via 127.0.0.1 drop} ?*/ VLIB_CLI_COMMAND (ip_route_command, static) = { .path = "ip route", @@ -612,7 +614,7 @@ VLIB_CLI_COMMAND (ip_route_command, static) = { "<value>] [udp-encap <value>] [ip4-lookup-in-table <value>] " "[ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] " "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 " - "<interface>] [out-labels <value value value>]", + "<interface>] [out-labels <value value value>] [drop]", .function = vnet_ip_route_cmd, .is_mp_safe = 1, }; diff --git a/src/vnet/ipsec/ah_decrypt.c b/src/vnet/ipsec/ah_decrypt.c index 918ebf03f67..ec4db0fed57 100644 --- a/src/vnet/ipsec/ah_decrypt.c +++ b/src/vnet/ipsec/ah_decrypt.c @@ -500,10 +500,10 @@ ah_decrypt_init (vlib_main_t *vm) { ipsec_main_t *im = &ipsec_main; - im->ah4_dec_fq_index = - vlib_frame_queue_main_init (ah4_decrypt_node.index, 0); - im->ah6_dec_fq_index = - vlib_frame_queue_main_init (ah6_decrypt_node.index, 0); + im->ah4_dec_fq_index = vlib_frame_queue_main_init (ah4_decrypt_node.index, + im->handoff_queue_size); + im->ah6_dec_fq_index = vlib_frame_queue_main_init (ah6_decrypt_node.index, + im->handoff_queue_size); return 0; } diff --git a/src/vnet/ipsec/ah_encrypt.c b/src/vnet/ipsec/ah_encrypt.c index 960327f071d..86694660878 100644 --- a/src/vnet/ipsec/ah_encrypt.c +++ b/src/vnet/ipsec/ah_encrypt.c @@ -490,10 +490,10 @@ ah_encrypt_init (vlib_main_t *vm) { ipsec_main_t *im = &ipsec_main; - im->ah4_enc_fq_index = - vlib_frame_queue_main_init (ah4_encrypt_node.index, 0); - im->ah6_enc_fq_index = - vlib_frame_queue_main_init (ah6_encrypt_node.index, 0); + im->ah4_enc_fq_index = vlib_frame_queue_main_init (ah4_encrypt_node.index, + im->handoff_queue_size); + im->ah6_enc_fq_index = vlib_frame_queue_main_init (ah6_encrypt_node.index, + im->handoff_queue_size); return 0; } diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c index 26d8ca1deee..01b2d2971b0 100644 --- a/src/vnet/ipsec/esp_decrypt.c +++ b/src/vnet/ipsec/esp_decrypt.c @@ -1675,14 +1675,14 @@ esp_decrypt_init (vlib_main_t *vm) { ipsec_main_t *im = &ipsec_main; - im->esp4_dec_fq_index = - vlib_frame_queue_main_init (esp4_decrypt_node.index, 0); - im->esp6_dec_fq_index = - vlib_frame_queue_main_init (esp6_decrypt_node.index, 0); - im->esp4_dec_tun_fq_index = - vlib_frame_queue_main_init (esp4_decrypt_tun_node.index, 0); - im->esp6_dec_tun_fq_index = - vlib_frame_queue_main_init (esp6_decrypt_tun_node.index, 0); + im->esp4_dec_fq_index = vlib_frame_queue_main_init (esp4_decrypt_node.index, + im->handoff_queue_size); + im->esp6_dec_fq_index = vlib_frame_queue_main_init (esp6_decrypt_node.index, + im->handoff_queue_size); + im->esp4_dec_tun_fq_index = vlib_frame_queue_main_init ( + esp4_decrypt_tun_node.index, im->handoff_queue_size); + im->esp6_dec_tun_fq_index = vlib_frame_queue_main_init ( + esp6_decrypt_tun_node.index, im->handoff_queue_size); return 0; } diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c index dd47053874c..544ff7b6aa8 100644 --- a/src/vnet/ipsec/esp_encrypt.c +++ b/src/vnet/ipsec/esp_encrypt.c @@ -1445,16 +1445,16 @@ esp_encrypt_init (vlib_main_t *vm) { ipsec_main_t *im = &ipsec_main; - im->esp4_enc_fq_index = - vlib_frame_queue_main_init (esp4_encrypt_node.index, 0); - im->esp6_enc_fq_index = - vlib_frame_queue_main_init (esp6_encrypt_node.index, 0); - im->esp4_enc_tun_fq_index = - vlib_frame_queue_main_init (esp4_encrypt_tun_node.index, 0); - im->esp6_enc_tun_fq_index = - vlib_frame_queue_main_init (esp6_encrypt_tun_node.index, 0); - im->esp_mpls_enc_tun_fq_index = - vlib_frame_queue_main_init (esp_mpls_encrypt_tun_node.index, 0); + im->esp4_enc_fq_index = vlib_frame_queue_main_init (esp4_encrypt_node.index, + im->handoff_queue_size); + im->esp6_enc_fq_index = vlib_frame_queue_main_init (esp6_encrypt_node.index, + im->handoff_queue_size); + im->esp4_enc_tun_fq_index = vlib_frame_queue_main_init ( + esp4_encrypt_tun_node.index, im->handoff_queue_size); + im->esp6_enc_tun_fq_index = vlib_frame_queue_main_init ( + esp6_encrypt_tun_node.index, im->handoff_queue_size); + im->esp_mpls_enc_tun_fq_index = vlib_frame_queue_main_init ( + esp_mpls_encrypt_tun_node.index, im->handoff_queue_size); return 0; } diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c index f8c39c327ed..8b43dd23cc8 100644 --- a/src/vnet/ipsec/ipsec.c +++ b/src/vnet/ipsec/ipsec.c @@ -663,6 +663,7 @@ ipsec_config (vlib_main_t *vm, unformat_input_t *input) u32 ipsec_spd_fp_num_buckets; bool fp_spd_ip4_enabled = false; bool fp_spd_ip6_enabled = false; + u32 handoff_queue_size; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -758,6 +759,11 @@ ipsec_config (vlib_main_t *vm, unformat_input_t *input) ipsec_tun_table_init (AF_IP6, table_size, n_buckets); } + else if (unformat (input, "async-handoff-queue-size %d", + &handoff_queue_size)) + { + im->handoff_queue_size = handoff_queue_size; + } else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h index 4aa09d7560e..9ab054cf2a9 100644 --- a/src/vnet/ipsec/ipsec.h +++ b/src/vnet/ipsec/ipsec.h @@ -248,6 +248,8 @@ typedef struct u32 esp4_dec_tun_fq_index; u32 esp6_dec_tun_fq_index; + u32 handoff_queue_size; + /* Number of buckets for flow cache */ u32 ipsec4_out_spd_hash_num_buckets; u32 ipsec4_out_spd_flow_cache_entries; diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c index 6ccc0be2622..48f7deadda3 100644 --- a/src/vnet/ipsec/ipsec_input.c +++ b/src/vnet/ipsec/ipsec_input.c @@ -274,6 +274,159 @@ ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la, } always_inline void +ipsec_collect_ah_trace (vlib_buffer_t **b, vlib_node_runtime_t *node, + vlib_main_t *vm, ip4_header_t *ip0, ah_header_t *ah0, + u8 has_space0, ipsec_spd_t *spd0, ipsec_policy_t *p0, + u32 pi0) +{ + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) + { + ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr)); + + tr->proto = ip0->protocol; + tr->sa_id = p0 ? p0->sa_id : ~0; + tr->spi = has_space0 ? clib_net_to_host_u32 (ah0->spi) : ~0; + tr->seq = has_space0 ? clib_net_to_host_u32 (ah0->seq_no) : ~0; + tr->spd = spd0->id; + tr->policy_index = pi0; + } +} + +always_inline void +ipsec_ah_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, + ah_header_t *ah0, u32 thread_index, ipsec_spd_t *spd0, + vlib_buffer_t **b, vlib_node_runtime_t *node, + u64 *ipsec_bypassed, u64 *ipsec_dropped, + u64 *ipsec_matched, u64 *ipsec_unprocessed, u16 *next) + +{ + ipsec_policy_t *p0 = NULL; + u32 pi0 = ~0; + u8 has_space0; + /* if flow cache is enabled, first search through flow cache for a + * policy match and revert back to linear search on failure */ + bool search_flow_cache = im->input_flow_cache_flag; + + while (1) + { + if (search_flow_cache) + { + p0 = ipsec4_input_spd_find_flow_cache_entry ( + im, ip0->src_address.as_u32, ip0->dst_address.as_u32, + IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT); + } + else + { + p0 = ipsec_input_protect_policy_match ( + spd0, clib_net_to_host_u32 (ip0->src_address.as_u32), + clib_net_to_host_u32 (ip0->dst_address.as_u32), + clib_net_to_host_u32 (ah0->spi)); + } + + has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (ah0 + 1) - + (clib_address_t) ip0); + + if (PREDICT_TRUE ((p0 != NULL) & (has_space0))) + { + *ipsec_matched += 1; + pi0 = p0 - im->policies; + vlib_increment_combined_counter (&ipsec_spd_policy_counters, + thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->length)); + + vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index; + next[0] = im->ah4_decrypt_next_index; + ipsec_collect_ah_trace (b, node, vm, ip0, ah0, has_space0, spd0, p0, + pi0); + return; + } + else + { + p0 = 0; + pi0 = ~0; + } + if (search_flow_cache) + { + p0 = ipsec4_input_spd_find_flow_cache_entry ( + im, ip0->src_address.as_u32, ip0->dst_address.as_u32, + IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS); + } + + else + { + p0 = ipsec_input_policy_match ( + spd0, clib_net_to_host_u32 (ip0->src_address.as_u32), + clib_net_to_host_u32 (ip0->dst_address.as_u32), + IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS); + } + + if (PREDICT_TRUE ((p0 != NULL))) + { + *ipsec_bypassed += 1; + pi0 = p0 - im->policies; + vlib_increment_combined_counter (&ipsec_spd_policy_counters, + thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->length)); + ipsec_collect_ah_trace (b, node, vm, ip0, ah0, has_space0, spd0, p0, + pi0); + return; + } + else + { + p0 = 0; + pi0 = ~0; + }; + + if (search_flow_cache) + { + p0 = ipsec4_input_spd_find_flow_cache_entry ( + im, ip0->src_address.as_u32, ip0->dst_address.as_u32, + IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD); + } + + else + { + p0 = ipsec_input_policy_match ( + spd0, clib_net_to_host_u32 (ip0->src_address.as_u32), + clib_net_to_host_u32 (ip0->dst_address.as_u32), + IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD); + } + + if (PREDICT_TRUE ((p0 != NULL))) + { + *ipsec_dropped += 1; + pi0 = p0 - im->policies; + vlib_increment_combined_counter (&ipsec_spd_policy_counters, + thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->length)); + + next[0] = IPSEC_INPUT_NEXT_DROP; + ipsec_collect_ah_trace (b, node, vm, ip0, ah0, has_space0, spd0, p0, + pi0); + return; + } + else + { + p0 = 0; + pi0 = ~0; + }; + /* flow cache search failed, retry with linear search */ + if (search_flow_cache && p0 == NULL) + { + search_flow_cache = false; + } + else if (search_flow_cache == false && p0 == NULL) + { + /* Drop by default if no match on PROTECT, BYPASS or DISCARD */ + *ipsec_unprocessed += 1; + next[0] = IPSEC_INPUT_NEXT_DROP; + return; + } + } +} + +always_inline void ipsec_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, esp_header_t *esp0, u32 thread_index, ipsec_spd_t *spd0, vlib_buffer_t **b, @@ -299,10 +452,11 @@ ipsec_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0, search_flow_cache = im->input_flow_cache_flag; udp_or_esp: - /* SPI ID field in the ESP header MUST NOT be a zero value */ if (esp0->spi == 0) { - /* Drop the packet if SPI ID is zero */ + /* RFC 4303, section 2.1: The SPI value of zero (0 is reserved for + * local, implementation-specific use and MUST NOT be sent on the wire. + */ *ipsec_unprocessed += 1; next[0] = IPSEC_INPUT_NEXT_DROP; return; @@ -523,15 +677,12 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, while (n_left_from > 0) { - u32 next32, pi0; + u32 next32; ip4_header_t *ip0; esp_header_t *esp0 = NULL; ah_header_t *ah0; ip4_ipsec_config_t *c0; ipsec_spd_t *spd0; - ipsec_policy_t *p0 = NULL; - u8 has_space0; - bool search_flow_cache = false; if (n_left_from > 2) { @@ -552,12 +703,10 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, udp_header_t *udp0 = NULL; udp0 = (udp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); - /* As per rfc3948 in UDP Encapsulated Header, UDP checksum must be - * Zero, and receivers must not depen upon UPD checksum. - * inside ESP header , SPI ID value MUST NOT be a zero value - * */ - - if (udp0->checksum == 0) + /* RFC5996 Section 2.23 "Port 4500 is reserved for + * UDP-encapsulated ESP and IKE." + */ + if (clib_host_to_net_u16 (4500) == udp0->dst_port) { esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t)); @@ -582,140 +731,9 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, { ah0 = (ah_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); - // if flow cache is enabled, first search through flow cache for a - // policy match and revert back to linear search on failure - search_flow_cache = im->input_flow_cache_flag; - - ah: - if (search_flow_cache) - { - p0 = ipsec4_input_spd_find_flow_cache_entry ( - im, ip0->src_address.as_u32, ip0->dst_address.as_u32, - IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT); - } - - else - { - p0 = ipsec_input_protect_policy_match ( - spd0, clib_net_to_host_u32 (ip0->src_address.as_u32), - clib_net_to_host_u32 (ip0->dst_address.as_u32), - clib_net_to_host_u32 (ah0->spi)); - } - - has_space0 = - vlib_buffer_has_space (b[0], - (clib_address_t) (ah0 + 1) - - (clib_address_t) ip0); - - if (PREDICT_TRUE ((p0 != NULL) & (has_space0))) - { - ipsec_matched += 1; - - pi0 = p0 - im->policies; - vlib_increment_combined_counter - (&ipsec_spd_policy_counters, - thread_index, pi0, 1, clib_net_to_host_u16 (ip0->length)); - - vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index; - next[0] = im->ah4_decrypt_next_index; - goto trace1; - } - else - { - p0 = 0; - pi0 = ~0; - } - - if (search_flow_cache) - { - p0 = ipsec4_input_spd_find_flow_cache_entry ( - im, ip0->src_address.as_u32, ip0->dst_address.as_u32, - IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS); - } - - else - { - p0 = ipsec_input_policy_match ( - spd0, clib_net_to_host_u32 (ip0->src_address.as_u32), - clib_net_to_host_u32 (ip0->dst_address.as_u32), - IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS); - } - - if (PREDICT_TRUE ((p0 != NULL))) - { - ipsec_bypassed += 1; - - pi0 = p0 - im->policies; - vlib_increment_combined_counter ( - &ipsec_spd_policy_counters, thread_index, pi0, 1, - clib_net_to_host_u16 (ip0->length)); - - goto trace1; - } - else - { - p0 = 0; - pi0 = ~0; - }; - - if (search_flow_cache) - { - p0 = ipsec4_input_spd_find_flow_cache_entry ( - im, ip0->src_address.as_u32, ip0->dst_address.as_u32, - IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD); - } - - else - { - p0 = ipsec_input_policy_match ( - spd0, clib_net_to_host_u32 (ip0->src_address.as_u32), - clib_net_to_host_u32 (ip0->dst_address.as_u32), - IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD); - } - - if (PREDICT_TRUE ((p0 != NULL))) - { - ipsec_dropped += 1; - - pi0 = p0 - im->policies; - vlib_increment_combined_counter ( - &ipsec_spd_policy_counters, thread_index, pi0, 1, - clib_net_to_host_u16 (ip0->length)); - - next[0] = IPSEC_INPUT_NEXT_DROP; - goto trace1; - } - else - { - p0 = 0; - pi0 = ~0; - }; - - // flow cache search failed, retry with linear search - if (search_flow_cache && p0 == NULL) - { - search_flow_cache = false; - goto ah; - } - - /* Drop by default if no match on PROTECT, BYPASS or DISCARD */ - ipsec_unprocessed += 1; - next[0] = IPSEC_INPUT_NEXT_DROP; - - trace1: - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && - PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) - { - ipsec_input_trace_t *tr = - vlib_add_trace (vm, node, b[0], sizeof (*tr)); - - tr->proto = ip0->protocol; - tr->sa_id = p0 ? p0->sa_id : ~0; - tr->spi = has_space0 ? clib_net_to_host_u32 (ah0->spi) : ~0; - tr->seq = has_space0 ? clib_net_to_host_u32 (ah0->seq_no) : ~0; - tr->spd = spd0->id; - tr->policy_index = pi0; - } + ipsec_ah_packet_process (vm, im, ip0, ah0, thread_index, spd0, b, + node, &ipsec_bypassed, &ipsec_dropped, + &ipsec_matched, &ipsec_unprocessed, next); } else { diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 67e7ee39001..2ca867c166f 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -1659,8 +1659,10 @@ session_transport_close (session_t * s) { if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED) session_set_state (s, SESSION_STATE_CLOSED); - /* If transport is already deleted, just free the session */ - else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED) + /* If transport is already deleted, just free the session. Half-opens + * expected to be already cleaning up at this point */ + else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED && + !(s->flags & SESSION_F_HALF_OPEN)) session_program_cleanup (s); return; } @@ -1687,7 +1689,8 @@ session_transport_reset (session_t * s) { if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED) session_set_state (s, SESSION_STATE_CLOSED); - else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED) + else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED && + !(s->flags & SESSION_F_HALF_OPEN)) session_program_cleanup (s); return; } diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c index 9d028dbb28c..ff20bc2d835 100644 --- a/src/vnet/session/session_lookup.c +++ b/src/vnet/session/session_lookup.c @@ -1184,7 +1184,6 @@ session_lookup_connection_wt6 (u32 fib_index, ip6_address_t * lcl, rv = clib_bihash_search_inline_48_8 (&st->v6_session_hash, &kv6); if (rv == 0) { - ASSERT ((u32) (kv6.value >> 32) == thread_index); if (PREDICT_FALSE ((u32) (kv6.value >> 32) != thread_index)) { *result = SESSION_LOOKUP_RESULT_WRONG_THREAD; diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c index b3c02510232..9c1121f7cfb 100644 --- a/src/vnet/udp/udp.c +++ b/src/vnet/udp/udp.c @@ -232,18 +232,43 @@ udp_session_get_listener (u32 listener_index) return &us->connection; } +always_inline u16 +udp_compute_checksum (vlib_main_t *vm, vlib_buffer_t *b, u8 csum_offload, + u8 is_ip4) +{ + u16 csum = 0; + + if (csum_offload) + vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM); + else + { + if (is_ip4) + csum = + ip4_tcp_udp_compute_checksum (vm, b, vlib_buffer_get_current (b)); + else + { + int bogus = 0; + csum = ip6_tcp_udp_icmp_compute_checksum ( + vm, b, vlib_buffer_get_current (b), &bogus); + } + } + + return csum; +} + always_inline u32 udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b, u8 is_cless) { + udp_header_t *uh; + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; /* reuse tcp medatada for now */ vnet_buffer (b)->tcp.connection_index = uc->c_c_index; if (!is_cless) { - vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port, - udp_csum_offload (uc)); + uh = vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port); if (uc->c_is_ip4) vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4, @@ -263,8 +288,7 @@ udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b, hdr = *(session_dgram_hdr_t *) (data - sizeof (hdr)); /* Local port assumed to be bound, not overwriting it */ - vlib_buffer_push_udp (b, uc->c_lcl_port, hdr.rmt_port, - udp_csum_offload (uc)); + uh = vlib_buffer_push_udp (b, uc->c_lcl_port, hdr.rmt_port); if (uc->c_is_ip4) vlib_buffer_push_ip4_custom (vm, b, &hdr.lcl_ip.ip4, &hdr.rmt_ip.ip4, @@ -279,6 +303,9 @@ udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b, vnet_buffer (b)->tcp.flags |= UDP_CONN_F_LISTEN; } + uh->checksum = + udp_compute_checksum (vm, b, udp_csum_offload (uc), uc->c_is_ip4); + return 0; } diff --git a/src/vnet/udp/udp_inlines.h b/src/vnet/udp/udp_inlines.h index f0dd44f48b5..ceec0b191b1 100644 --- a/src/vnet/udp/udp_inlines.h +++ b/src/vnet/udp/udp_inlines.h @@ -26,7 +26,7 @@ #include <vnet/udp/udp_encap.h> always_inline void * -vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum) +vlib_buffer_push_udp (vlib_buffer_t *b, u16 sp, u16 dp) { udp_header_t *uh; u16 udp_len = sizeof (udp_header_t) + b->current_length; @@ -38,8 +38,6 @@ vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum) uh->dst_port = dp; uh->checksum = 0; uh->length = clib_host_to_net_u16 (udp_len); - if (offload_csum) - vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM); vnet_buffer (b)->l4_hdr_offset = (u8 *) uh - b->data; b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID; return uh; diff --git a/src/vnet/unix/gdb_funcs.c b/src/vnet/unix/gdb_funcs.c index d6fdc985bd9..a89b7202400 100644 --- a/src/vnet/unix/gdb_funcs.c +++ b/src/vnet/unix/gdb_funcs.c @@ -238,44 +238,44 @@ gdb_show_traces () /* Get active traces from pool. */ - foreach_vlib_main () - { - fmt = "------------------- Start of thread %d %s -------------------\n"; - s = format (s, fmt, index, vlib_worker_threads[index].name); + foreach_vlib_main__ (0 /* no checks */) + { + fmt = "------------------- Start of thread %d %s -------------------\n"; + s = format (s, fmt, index, vlib_worker_threads[index].name); - tm = &this_vlib_main->trace_main; + tm = &this_vlib_main->trace_main; - trace_apply_filter (this_vlib_main); + trace_apply_filter (this_vlib_main); - traces = 0; - pool_foreach (h, tm->trace_buffer_pool) - { - vec_add1 (traces, h[0]); - } + traces = 0; + pool_foreach (h, tm->trace_buffer_pool) + { + vec_add1 (traces, h[0]); + } - if (vec_len (traces) == 0) - { - s = format (s, "No packets in trace buffer\n"); - goto done; - } + if (vec_len (traces) == 0) + { + s = format (s, "No packets in trace buffer\n"); + goto done; + } - /* Sort them by increasing time. */ - vec_sort_with_function (traces, trace_cmp); + /* Sort them by increasing time. */ + vec_sort_with_function (traces, trace_cmp); - for (i = 0; i < vec_len (traces); i++) - { - if (i == max) - { - fformat (stderr, - "Limiting display to %d packets." - " To display more specify max.", - max); - goto done; - } - - s = format (s, "Packet %d\n%U\n\n", i + 1, format_vlib_trace, - vlib_get_first_main (), traces[i]); - } + for (i = 0; i < vec_len (traces); i++) + { + if (i == max) + { + fformat (stderr, + "Limiting display to %d packets." + " To display more specify max.", + max); + goto done; + } + + s = format (s, "Packet %d\n%U\n\n", i + 1, format_vlib_trace, + vlib_get_first_main (), traces[i]); + } done: vec_free (traces); diff --git a/src/vpp-api/python/vpp_papi/vpp_serializer.py b/src/vpp-api/python/vpp_papi/vpp_serializer.py index d724cb33ce9..707bb03b790 100644 --- a/src/vpp-api/python/vpp_papi/vpp_serializer.py +++ b/src/vpp-api/python/vpp_papi/vpp_serializer.py @@ -644,10 +644,15 @@ class VPPType(Packer): else: arg = data[a] kwarg = kwargs[a] if a in kwargs else None - if isinstance(self.packers[i], VPPType): - b += self.packers[i].pack(arg, kwarg) - else: - b += self.packers[i].pack(arg, kwargs) + try: + if isinstance(self.packers[i], VPPType): + b += self.packers[i].pack(arg, kwarg) + else: + b += self.packers[i].pack(arg, kwargs) + except Exception as e: + raise VPPSerializerValueError( + f"Exception while packing {data} for {self.name}.{a}." + ) from e return bytes(b) diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c index c57efd59a62..dd4f4cc3353 100644 --- a/src/vpp/vnet/main.c +++ b/src/vpp/vnet/main.c @@ -329,6 +329,10 @@ defaulted: unformat_free (&input); + /* if main thread affinity is unspecified, set to current running cpu */ + if (main_core == ~0) + main_core = sched_getcpu (); + /* set process affinity for main thread */ if (main_core != ~0) { diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt index 5878f0612f0..233e75d6e2a 100644 --- a/src/vppinfra/CMakeLists.txt +++ b/src/vppinfra/CMakeLists.txt @@ -14,6 +14,34 @@ enable_language(ASM) ############################################################################## +# find libdl +############################################################################## +vpp_find_path(LIBDL_INCLUDE_DIR dlfcn.h) +vpp_find_library(LIBDL_LIB NAMES dl) + +if (LIBDL_INCLUDE_DIR AND LIBDL_LIB) + message(STATUS "libdl found at ${LIBDL_LIB}") + list(APPEND VPPINFRA_LIBS ${LIBDL_LIB}) +else() + message(FATAL_ERROR "libdl not found") +endif() + +############################################################################## +# find libunwind +############################################################################## +vpp_find_path(LIBUNWIND_INCLUDE_DIR unwind.h) +vpp_find_library(LIBUNWIND_LIB NAMES unwind libunwind) + +if (LIBUNWIND_INCLUDE_DIR AND LIBUNWIND_LIB) + message(STATUS "libunwind found at ${LIBUNWIND_LIB}") + list(APPEND VPPINFRA_LIBS ${LIBUNWIND_LIB}) + add_definitions(-DHAVE_LIBUNWIND=1) +else() + message(WARNING "libunwind not found - stack traces disabled") + add_definitions(-DHAVE_LIBUNWIND=0) +endif() + +############################################################################## # Generate vppinfra/config.h ############################################################################## set(LOG2_CACHE_LINE_BYTES ${VPP_LOG2_CACHE_LINE_SIZE}) @@ -42,12 +70,10 @@ add_definitions(-fvisibility=hidden) set_source_files_properties( cJSON.c jsonformat.c PROPERTIES COMPILE_DEFINITIONS " CJSON_API_VISIBILITY " ) - ############################################################################## # vppinfra sources ############################################################################## set(VPPINFRA_SRCS - backtrace.c bitmap.c bihash_all_vector.c cpu.c @@ -80,6 +106,7 @@ set(VPPINFRA_SRCS rbtree.c serialize.c socket.c + stack.c std-formats.c string.c time.c @@ -142,6 +169,7 @@ set(VPPINFRA_HEADERS fifo.h file.h format.h + format_ansi.h format_table.h hash.h heap.h @@ -175,6 +203,7 @@ set(VPPINFRA_HEADERS smp.h socket.h sparse_vec.h + stack.h string.h time.h time_range.h @@ -229,18 +258,9 @@ elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") ) endif() -if("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") - option(VPP_USE_EXTERNAL_LIBEXECINFO "Use external libexecinfo (useful for non-glibc targets)." ON) -else() - option(VPP_USE_EXTERNAL_LIBEXECINFO "Use external libexecinfo (useful for non-glibc targets)." OFF) -endif() - -if(VPP_USE_EXTERNAL_LIBEXECINFO) - set(EXECINFO_LIB execinfo) -endif() add_vpp_library(vppinfra SOURCES ${VPPINFRA_SRCS} - LINK_LIBRARIES m ${EXECINFO_LIB} + LINK_LIBRARIES m ${VPPINFRA_LIBS} INSTALL_HEADERS ${VPPINFRA_HEADERS} COMPONENT libvppinfra LTO @@ -265,6 +285,7 @@ if(VPP_BUILD_VPPINFRA_TESTS) longjmp macros maplog + mhash pmalloc pool_alloc pool_iterate diff --git a/src/vppinfra/asm_mips.h b/src/vppinfra/asm_mips.h deleted file mode 100644 index 7c9e69586f4..00000000000 --- a/src/vppinfra/asm_mips.h +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - Copyright (c) 2004 Eliot Dresselhaus - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef included_asm_mips_h -#define included_asm_mips_h - -/* Encoding of MIPS instructions. */ -/* Encoding of opcode field (op). */ -#define mips_foreach_opcode \ - _(SPECIAL) _(REGIMM) _(j) _(jal) _(beq) _(bne) _(blez) _(bgtz) \ - _(addi) _(addiu) _(slti) _(sltiu) _(andi) _(ori) _(xori) _(lui) \ - _(COP0) _(COP1) _(COP2) _(COP1X) _(beql) _(bnel) _(blezl) _(bgtzl) \ - _(daddi) _(daddiu) _(ldl) _(ldr) _(SPECIAL2) _(jalx) _(MDMX) _(O37) \ - _(lb) _(lh) _(lwl) _(lw) _(lbu) _(lhu) _(lwr) _(lwu) \ - _(sb) _(sh) _(swl) _(sw) _(sdl) _(sdr) _(swr) _(cache) \ - _(ll) _(lwc1) _(lwc2) _(pref) _(lld) _(ldc1) _(ldc2) _(ld) \ - _(sc) _(swc1) _(swc2) _(o73) _(scd) _(sdc1) _(sdc2) _(sd) - -/* Encoding of funct field. */ -#define mips_foreach_special_funct \ - _(sll) _(MOVCI) _(srl) _(sra) _(sllv) _(o05) _(srlv) _(srav) \ - _(jr) _(jalr) _(movz) _(movn) _(syscall) _(break) _(o16) _(sync) \ - _(mfhi) _(mthi) _(mflo) _(mtlo) _(dsllv) _(o25) _(dsrlv) _(dsrav) \ - _(mult) _(multu) _(div) _(divu) _(dmult) _(dmultu) _(ddiv) _(ddivu) \ - _(add) _(addu) _(sub) _(subu) _(and) _(or) _(xor) _(nor) \ - _(o50) _(o51) _(slt) _(sltu) _(dadd) _(daddu) _(dsub) _(dsubu) \ - _(tge) _(tgeu) _(tlt) _(tltu) _(teq) _(o65) _(tne) _(o67) \ - _(dsll) _(o71) _(dsrl) _(dsra) _(dsll32) _(o75) _(dsrl32) _(dsra32) - -/* SPECIAL2 encoding of funct field. */ -#define mips_foreach_special2_funct \ - _(madd) _(maddu) _(mul) _(o03) _(msub) _(msubu) _(o06) _(o07) \ - _(o10) _(o11) _(o12) _(o13) _(o14) _(o15) _(o16) _(o17) \ - _(o20) _(o21) _(o22) _(o23) _(o24) _(o25) _(o26) _(o27) \ - _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37) \ - _(clz) _(clo) _(o42) _(o43) _(dclz) _(dclo) _(o46) _(o47) \ - _(o50) _(o51) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \ - _(o60) _(o61) _(o62) _(o63) _(o64) _(o65) _(o66) _(o67) \ - _(o70) _(o71) _(o72) _(o73) _(o74) _(o75) _(o76) _(sdbbp) - -/* REGIMM encoding of rt field. */ -#define mips_foreach_regimm_rt \ - _(bltz) _(bgez) _(bltzl) _(bgezl) _(o04) _(o05) _(o06) _(o07) \ - _(tgei) _(tgeiu) _(tltiu) _(teqi) _(o14) _(tnei) _(o16) _(o17) \ - _(bltzal) _(bgezal) _(bltzall) _(bgezall) _(o24) _(o25) _(o26) _(o27) \ - _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37) - -/* COP0 encoding of rs field. */ -#define mips_foreach_cop0_rs \ - _(mfc0) _(dmfc0) _(o02) _(o03) _(mtc0) _(dmtc0) _(o06) _(o07) \ - _(o10) _(o11) _(o12) _(o13) _(o14) _(o15) _(o16) _(o17) \ - _(C0) _(o21) _(o22) _(o23) _(o24) _(o25) _(o26) _(o27) \ - _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37) - -/* COP0 encoding of funct when rs == RS_CO */ -#define mips_foreach_cop0_funct \ - _(o00) _(tlbr) _(tlbwi) _(o03) _(o04) _(o05) _(tlbwr) _(o07) \ - _(tlbp) _(o11) _(o12) _(o13) _(o14) _(o15) _(o16) _(o17) \ - _(o20) _(o21) _(o22) _(o23) _(o24) _(o25) _(o26) _(o27) \ - _(eret) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(deret) \ - _(wait) _(o41) _(o42) _(o43) _(o44) _(o45) _(o46) _(o47) \ - _(o50) _(o51) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \ - _(o60) _(o61) _(o62) _(o63) _(o64) _(o65) _(o66) _(o67) \ - _(o70) _(o71) _(o72) _(o73) _(o74) _(o75) _(o76) _(o77) - -/* COP1 encoding of rs field. */ -#define mips_foreach_cop1_rs \ - _(mfc1) _(dmfc1) _(cfc1) _(o03) _(mtc1) _(dmtc1) _(ctc1) _(o07) \ - _(BC1) _(o11) _(o12) _(o13) _(o14) _(o15) _(o16) _(o17) \ - _(S) _(D) _(o22) _(o23) _(W) _(L) _(o26) _(o27) \ - _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37) - -/* COP1 encoding of funct for S and D */ -#define mips_foreach_cop1_funct \ - _(add) _(sub) _(mul) _(div) _(sqrt) _(abs) _(mov) _(neg) \ - _(roundl) _(truncl) _(ceill) _(floorl) _(roundw) _(truncw) _(ceilw) _(floorw) \ - _(o20) _(MOVCF) _(movz) _(movn) _(o24) _(recip) _(rsqrt) _(o27) \ - _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37) \ - _(cvts) _(cvtd) _(o42) _(o43) _(cvtw) _(cvtl) _(o46) _(o47) \ - _(o50) _(o51) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \ - _(cf) _(cun) _(ceq) _(cueq) _(colt) _(cult) _(cole) _(cule) \ - _(csf) _(cngle) _(cseq) _(cngl) _(clt) _(cnge) _(cle) _(cngt) - -/* COP1X encoding of funct */ -#define mips_foreach_cop1x_funct \ - _(lwxc1) _(ldxc1) _(o02) _(o03) _(o04) _(luxc1) _(o06) _(o07) \ - _(swxc1) _(sdxc1) _(o12) _(o13) _(o14) _(suxc1) _(o16) _(prefx) \ - _(o20) _(o21) _(o22) _(o23) _(o24) _(o25) _(o26) _(o27) \ - _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37) \ - _(madds) _(maddd) _(o42) _(o43) _(o44) _(o45) _(o46) _(o47) \ - _(msubs) _(msubd) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \ - _(nmadds) _(nmaddd) _(o62) _(o63) _(o64) _(o65) _(o66) _(o67) \ - _(nmsubs) _(nmsubd) _(o72) _(o73) _(o74) _(o75) _(o76) _(o77) - -#define mips_foreach_mdmx_funct \ - _(msgn) _(ceq) _(pickf) _(pickt) _(clt) _(cle) _(min) _(max) \ - _(o10) _(o11) _(sub) _(add) _(and) _(xor) _(or) _(nor) \ - _(sll) _(o21) _(srl) _(sra) _(o24) _(o25) _(o26) _(o27) \ - _(alniob) _(alnvob) _(alniqh) _(alnvqh) _(o34) _(o35) _(o36) _(shfl) \ - _(rzu) _(rnau) _(rneu) _(o43) _(rzs) _(rnas) _(rnes) _(o47) \ - _(o50) _(o51) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \ - _(mul) _(o61) _(muls) _(mula) _(o64) _(o65) _(suba) _(adda) \ - _(o70) _(o71) _(o72) _(o73) _(o74) _(o75) _(wac) _(rac) - -#define _(f) MIPS_OPCODE_##f, -typedef enum -{ - mips_foreach_opcode -} mips_insn_opcode_t; -#undef _ - -#define _(f) MIPS_SPECIAL_FUNCT_##f, -typedef enum -{ - mips_foreach_special_funct -} mips_insn_special_funct_t; -#undef _ - -#define _(f) MIPS_SPECIAL2_FUNCT_##f, -typedef enum -{ - mips_foreach_special2_funct -} mips_insn_special2_funct_t; -#undef _ - -#define _(f) MIPS_REGIMM_RT_##f, -typedef enum -{ - mips_foreach_regimm_rt -} mips_insn_regimm_rt_t; -#undef _ - -#define _(f) MIPS_COP0_RS_##f, -typedef enum -{ - mips_foreach_cop0_rs -} mips_insn_cop0_rs_t; -#undef _ - -#define _(f) MIPS_COP0_FUNCT_##f, -typedef enum -{ - mips_foreach_cop0_funct -} mips_insn_cop0_funct_t; -#undef _ - -#define _(f) MIPS_COP1_RS_##f, -typedef enum -{ - mips_foreach_cop1_rs -} mips_insn_cop1_rs_t; -#undef _ - -#define _(f) MIPS_COP1_FUNCT_##f, -typedef enum -{ - mips_foreach_cop1_funct -} mips_insn_cop1_funct_t; -#undef _ - -#define _(f) MIPS_COP1X_FUNCT_##f, -typedef enum -{ - mips_foreach_cop1x_funct -} mips_insn_cop1x_funct_t; -#undef _ - -#define _(f) MIPS_MDMX_FUNCT_##f, -typedef enum -{ - mips_foreach_mdmx_funct -} mips_insn_mdmx_funct_t; -#undef _ - -always_inline mips_insn_opcode_t -mips_insn_get_op (u32 insn) -{ - return (insn >> 26) & 0x3f; -} - -always_inline u32 -mips_insn_get_rs (u32 insn) -{ - return (insn >> 21) & 0x1f; -} - -always_inline u32 -mips_insn_get_rt (u32 insn) -{ - return (insn >> 16) & 0x1f; -} - -always_inline u32 -mips_insn_get_rd (u32 insn) -{ - return (insn >> 11) & 0x1f; -} - -always_inline u32 -mips_insn_get_sa (u32 insn) -{ - return (insn >> 6) & 0x1f; -} - -always_inline u32 -mips_insn_get_funct (u32 insn) -{ - return (insn >> 0) & 0x3f; -} - -always_inline i32 -mips_insn_get_immediate (u32 insn) -{ - return (((i32) insn) << 16) >> 16; -} - -always_inline u32 -mips_insn_encode_i_type (int op, int rs, int rt, int immediate) -{ - u32 insn; - insn = immediate; - insn |= rt << 16; - insn |= rs << 21; - insn |= op << 26; - - ASSERT (mips_insn_get_immediate (insn) == immediate); - ASSERT (mips_insn_get_rt (insn) == rt); - ASSERT (mips_insn_get_rs (insn) == rt); - ASSERT (mips_insn_get_op (insn) == op); - - return insn; -} - -always_inline u32 -mips_insn_encode_j_type (int op, u32 addr) -{ - u32 insn; - - insn = (addr & ((1 << 28) - 1)) / 4; - insn |= op << 26; - - return insn; -} - -always_inline u32 -mips_insn_encode_r_type (int op, int rs, int rt, int rd, int sa, int funct) -{ - u32 insn; - insn = funct; - insn |= sa << 6; - insn |= rd << 11; - insn |= rt << 16; - insn |= rs << 21; - insn |= op << 26; - - ASSERT (mips_insn_get_funct (insn) == funct); - ASSERT (mips_insn_get_sa (insn) == sa); - ASSERT (mips_insn_get_rd (insn) == rd); - ASSERT (mips_insn_get_rt (insn) == rt); - ASSERT (mips_insn_get_rs (insn) == rt); - ASSERT (mips_insn_get_op (insn) == op); - - return insn; -} - -#define mips_insn_r(op,funct,rd,rs,rt,sa) \ - mips_insn_encode_r_type (MIPS_OPCODE_##op, \ - (rs), (rt), (rd), (sa), \ - MIPS_##op##_FUNCT_##funct) - -#define mips_insn_i(op,rs,rt,imm) \ - mips_insn_encode_i_type (MIPS_OPCODE_##op, (rs), (rt), (imm)) - -#define mips_insn_j(op,target) \ - mips_insn_encode_i_type (MIPS_OPCODE_##op, (rs), (rt), (imm)) - -/* Generate unsigned load instructions of data of various sizes. */ -always_inline u32 -mips_insn_load (u32 rd, i32 offset, u32 base, u32 log2_bytes) -{ - int op; - - ASSERT (log2_bytes < 4); - switch (log2_bytes) - { - case 0: - op = MIPS_OPCODE_lbu; - break; - case 1: - op = MIPS_OPCODE_lhu; - break; - case 2: - op = MIPS_OPCODE_lwu; - break; - case 3: - op = MIPS_OPCODE_ld; - break; - } - - return mips_insn_encode_i_type (op, base, rd, offset); -} - -typedef enum -{ - MIPS_REG_SP = 29, - MIPS_REG_RA = 31, -} mips_reg_t; - -#endif /* included_asm_mips_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/asm_x86.c b/src/vppinfra/asm_x86.c deleted file mode 100644 index e6e00ce5543..00000000000 --- a/src/vppinfra/asm_x86.c +++ /dev/null @@ -1,1947 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* FIXME - opcode name remove to save table space; enum - x87 - 3dnow - cbw naming -*/ - -#include <vppinfra/error.h> -#include <vppinfra/byte_order.h> -#include <vppinfra/asm_x86.h> - -#define foreach_x86_gp_register \ - _ (AX) _ (CX) _ (DX) _ (BX) \ - _ (SP) _ (BP) _ (SI) _ (DI) - -typedef enum { -#define _(r) X86_INSN_GP_REG_##r, - foreach_x86_gp_register -#undef _ -} x86_insn_gp_register_t; - -typedef union { - struct { - u8 rm : 3; - u8 reg : 3; - u8 mode : 2; - }; - u8 byte; -} x86_insn_modrm_byte_t; - -typedef union { - struct { - u8 base : 3; - u8 index : 3; - u8 log2_scale : 2; - }; - u8 byte; -} x86_insn_sib_byte_t; - -always_inline uword -x86_insn_has_modrm_byte (x86_insn_t * insn) -{ - int i; - for (i = 0; i < ARRAY_LEN (insn->operands); i++) - switch (insn->operands[i].code) - { - case 'G': case 'E': case 'M': case 'R': - return 1; - } - return 0; -} - -always_inline uword -x86_insn_immediate_type (x86_insn_t * insn) -{ - int i; - for (i = 0; i < ARRAY_LEN (insn->operands); i++) - switch (insn->operands[i].code) - { - case 'J': - case 'I': - case 'O': - return insn->operands[i].type; - } - return 0; -} - -/* Opcode extension in modrm byte reg field. */ -#define foreach_x86_insn_modrm_reg_group \ - _ (1) _ (1a) _ (2) _ (3) _ (4) _ (5) _ (6) _ (7) \ - _ (8) _ (9) _ (10) _ (11) _ (12) _ (13) _ (14) \ - _ (15) _ (16) _ (p) - -#define foreach_x86_insn_sse_group \ - _ (10) _ (28) _ (50) _ (58) _ (60) _ (68) _ (70) _ (78) \ - _ (c0) _ (d0) _ (d8) _ (e0) _ (e8) _ (f0) _ (f8) - -enum { -#define _(x) X86_INSN_MODRM_REG_GROUP_##x, - foreach_x86_insn_modrm_reg_group -#undef _ -#define _(x) X86_INSN_SSE_GROUP_##x, - foreach_x86_insn_sse_group -#undef _ -}; - -enum { -#define _(x) \ - X86_INSN_FLAG_MODRM_REG_GROUP_##x \ - = X86_INSN_FLAG_SET_MODRM_REG_GROUP (1 + X86_INSN_MODRM_REG_GROUP_##x), - foreach_x86_insn_modrm_reg_group -#undef _ - -#define _(x) \ - X86_INSN_FLAG_SSE_GROUP_##x \ - = X86_INSN_FLAG_SET_SSE_GROUP (1 + X86_INSN_SSE_GROUP_##x), - foreach_x86_insn_sse_group -#undef _ -}; - -#define foreach_x86_gp_reg \ - _ (AX) _ (CX) _ (DX) _ (BX) \ - _ (SP) _ (BP) _ (SI) _ (DI) - -#define foreach_x86_condition \ - _ (o) _ (no) _ (b) _ (nb) \ - _ (z) _ (nz) _ (be) _ (nbe) \ - _ (s) _ (ns) _ (p) _ (np) \ - _ (l) _ (nl) _ (le) _ (nle) - -#define _3f(x,f,o0,o1,o2) \ -{ \ - .name = #x, \ - .flags = (f), \ - .operands[0] = { .data = #o0 }, \ - .operands[1] = { .data = #o1 }, \ - .operands[2] = { .data = #o2 }, \ -} - -#define _2f(x,f,o0,o1) _3f(x,f,o0,o1,__) -#define _1f(x,f,o0) _2f(x,f,o0,__) -#define _0f(x,f) _1f(x,f,__) - -#define _3(x,o0,o1,o2) _3f(x,0,o0,o1,o2) -#define _2(x,o0,o1) _2f(x,0,o0,o1) -#define _1(x,o0) _1f(x,0,o0) -#define _0(x) _0f(x,0) - -static x86_insn_t x86_insns_one_byte[256] = { - -#define _(x) \ - _2 (x, Eb, Gb), \ - _2 (x, Ev, Gv), \ - _2 (x, Gb, Eb), \ - _2 (x, Gv, Ev), \ - _2 (x, AL, Ib), \ - _2 (x, AX, Iz) - - /* 0x00 */ - _ (add), - _0 (push_es), - _0 (pop_es), - _ (or), - _0 (push_cs), - _0 (escape_two_byte), - - /* 0x10 */ - _ (adc), - _0 (push_ss), - _0 (pop_ss), - _ (sbb), - _0 (push_ds), - _0 (pop_ds), - - /* 0x20 */ - _ (and), - _0 (segment_es), - _0 (daa), - _ (sub), - _0 (segment_cs), - _0 (das), - - /* 0x30 */ - _ (xor), - _0 (segment_ss), - _0 (aaa), - _ (cmp), - _0 (segment_ds), - _0 (aas), - -#undef _ - - /* 0x40 */ -#define _(r) _1 (inc, r), - foreach_x86_gp_reg -#undef _ -#define _(r) _1 (dec, r), - foreach_x86_gp_reg -#undef _ - - /* 0x50 */ -#define _(r) _1f (push, X86_INSN_FLAG_DEFAULT_64_BIT, r), - foreach_x86_gp_reg -#undef _ -#define _(r) _1f (pop, X86_INSN_FLAG_DEFAULT_64_BIT, r), - foreach_x86_gp_reg -#undef _ - - /* 0x60 */ - _0 (pusha), - _0 (popa), - _2 (bound, Gv, Ma), - _2 (movsxd, Gv, Ed), - _0 (segment_fs), - _0 (segment_gs), - _0 (operand_type), - _0 (address_size), - _1f (push, X86_INSN_FLAG_DEFAULT_64_BIT, Iz), - _3 (imul, Gv, Ev, Iz), - _1f (push, X86_INSN_FLAG_DEFAULT_64_BIT, Ib), - _3 (imul, Gv, Ev, Ib), - _1 (insb, DX), - _1 (insw, DX), - _1 (outsb, DX), - _1 (outsw, DX), - - /* 0x70 */ -#define _(x) _1 (j##x, Jb), - foreach_x86_condition -#undef _ - - /* 0x80 */ - _2f (modrm_group_1, X86_INSN_FLAG_MODRM_REG_GROUP_1, Eb, Ib), - _2f (modrm_group_1, X86_INSN_FLAG_MODRM_REG_GROUP_1, Ev, Iz), - _2f (modrm_group_1, X86_INSN_FLAG_MODRM_REG_GROUP_1, Eb, Ib), - _2f (modrm_group_1, X86_INSN_FLAG_MODRM_REG_GROUP_1, Ev, Ib), - _2 (test, Eb, Gb), - _2 (test, Ev, Gv), - _2 (xchg, Eb, Gb), - _2 (xchg, Ev, Gv), - _2 (mov, Eb, Gb), - _2 (mov, Ev, Gv), - _2 (mov, Gb, Eb), - _2 (mov, Gv, Ev), - _2 (mov, Ev, Sw), - _2 (lea, Gv, Ev), - _2 (mov, Sw, Ew), - _1f (modrm_group_1a, X86_INSN_FLAG_MODRM_REG_GROUP_1a, Ev), - - /* 0x90 */ - _0 (nop), - _1 (xchg, CX), - _1 (xchg, DX), - _1 (xchg, BX), - _1 (xchg, SP), - _1 (xchg, BP), - _1 (xchg, SI), - _1 (xchg, DI), - _0 (cbw), - _0 (cwd), - _1 (call, Ap), - _0 (wait), - _0 (pushf), - _0 (popf), - _0 (sahf), - _0 (lahf), - - /* 0xa0 */ - _2 (mov, AL, Ob), - _2 (mov, AX, Ov), - _2 (mov, Ob, AL), - _2 (mov, Ov, AX), - _0 (movsb), - _0 (movsw), - _0 (cmpsb), - _0 (cmpsw), - _2 (test, AL, Ib), - _2 (test, AX, Iz), - _1 (stosb, AL), - _1 (stosw, AX), - _1 (lodsb, AL), - _1 (lodsw, AX), - _1 (scasb, AL), - _1 (scasw, AX), - - /* 0xb0 */ - _2 (mov, AL, Ib), - _2 (mov, CL, Ib), - _2 (mov, DL, Ib), - _2 (mov, BL, Ib), - _2 (mov, AH, Ib), - _2 (mov, CH, Ib), - _2 (mov, DH, Ib), - _2 (mov, BH, Ib), -#define _(r) _2 (mov, r, Iv), - foreach_x86_gp_reg -#undef _ - - /* 0xc0 */ - _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Eb, Ib), - _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Ev, Ib), - _1 (ret, Iw), - _0 (ret), - _2 (les, Gz, Mp), - _2 (lds, Gz, Mp), - _2f (modrm_group_11, X86_INSN_FLAG_MODRM_REG_GROUP_11, Eb, Ib), - _2f (modrm_group_11, X86_INSN_FLAG_MODRM_REG_GROUP_11, Ev, Iz), - _2 (enter, Iw, Ib), - _0 (leave), - _1 (ret, Iw), - _0 (ret), - _0 (int3), - _1 (int, Ib), - _0 (into), - _0 (iret), - - /* 0xd0 */ - _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Eb, 1b), - _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Ev, 1b), - _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Eb, CL), - _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Ev, CL), - _0 (aam), - _0 (aad), - _0 (salc), - _0 (xlat), - /* FIXME x87 */ - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - - /* 0xe0 */ - _1 (loopnz, Jb), - _1 (loopz, Jb), - _1 (loop, Jb), - _1 (jcxz, Jb), - _2 (in, AL, Ib), - _2 (in, AX, Ib), - _2 (out, Ib, AL), - _2 (out, Ib, AX), - _1f (call, X86_INSN_FLAG_DEFAULT_64_BIT, Jz), - _1f ( jmp, X86_INSN_FLAG_DEFAULT_64_BIT, Jz), - _1 (jmp, Ap), - _1 (jmp, Jb), - _2 (in, AL, DX), - _2 (in, AX, DX), - _2 (out, DX, AL), - _2 (out, DX, AX), - - /* 0xf0 */ - _0 (lock), - _0 (int1), - _0 (repne), - _0 (rep), - _0 (hlt), - _0 (cmc), - _0f (modrm_group_3, X86_INSN_FLAG_MODRM_REG_GROUP_3), - _0f (modrm_group_3, X86_INSN_FLAG_MODRM_REG_GROUP_3), - _0 (clc), - _0 (stc), - _0 (cli), - _0 (sti), - _0 (cld), - _0 (std), - _1f (modrm_group_4, X86_INSN_FLAG_MODRM_REG_GROUP_4, Eb), - _0f (modrm_group_5, X86_INSN_FLAG_MODRM_REG_GROUP_5), -}; - -static x86_insn_t x86_insns_two_byte[256] = { - /* 0x00 */ - _0f (modrm_group_6, X86_INSN_FLAG_MODRM_REG_GROUP_6), - _0f (modrm_group_7, X86_INSN_FLAG_MODRM_REG_GROUP_7), - _2 (lar, Gv, Ew), - _2 (lsl, Gv, Ew), - _0 (bad), - _0 (syscall), - _0 (clts), - _0 (sysret), - _0 (invd), - _0 (wbinvd), - _0 (bad), - _0 (ud2), - _0 (bad), - _0f (modrm_group_p, X86_INSN_FLAG_MODRM_REG_GROUP_p), - _0 (femms), - _0 (escape_3dnow), - - /* 0x10 */ - _2f (movups, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex), - _2f (movups, X86_INSN_FLAG_SSE_GROUP_10, Ex, Gx), - _2f (movlps, X86_INSN_FLAG_SSE_GROUP_10, Ex, Gx), - _2f (movlps, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex), - _2f (unpcklps, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex), - _2f (unpckhps, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex), - _2f (movhps, X86_INSN_FLAG_SSE_GROUP_10, Ex, Gx), - _2f (movhps, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex), - _0f (modrm_group_16, X86_INSN_FLAG_MODRM_REG_GROUP_16), - _0 (nop), - _0 (nop), - _0 (nop), - _0 (nop), - _0 (nop), - _0 (nop), - _0 (nop), - - /* 0x20 */ - _2 (mov, Rv, Cv), - _2 (mov, Rv, Dv), - _2 (mov, Cv, Rv), - _2 (mov, Dv, Rv), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _2f (movaps, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex), - _2f (movaps, X86_INSN_FLAG_SSE_GROUP_28, Ex, Gx), - _2f (cvtpi2ps, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex), - _2f (movntps, X86_INSN_FLAG_SSE_GROUP_28, Mx, Gx), - _2f (cvttps2pi, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex), - _2f (cvtps2pi, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex), - _2f (ucomiss, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex), - _2f (comiss, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex), - - /* 0x30 */ - _0 (wrmsr), - _0 (rdtsc), - _0 (rdmsr), - _0 (rdpmc), - _0 (sysenter), - _0 (sysexit), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - - /* 0x40 */ -#define _(x) _2 (cmov##x, Gv, Ev), - foreach_x86_condition -#undef _ - - /* 0x50 */ - _2f (movmskps, X86_INSN_FLAG_SSE_GROUP_50, Gd, Rx), - _2f (sqrtps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex), - _2f (rsqrtps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex), - _2f (rcpps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex), - _2f (andps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex), - _2f (andnps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex), - _2f (orps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex), - _2f (xorps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex), - _2f (addps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex), - _2f (mulps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex), - _2f (cvtps2pd, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex), - _2f (cvtdq2ps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex), - _2f (subps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex), - _2f (minps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex), - _2f (divps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex), - _2f (maxps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex), - - /* 0x60 */ - _2f (punpcklbw, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em), - _2f (punpcklwd, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em), - _2f (punpckldq, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em), - _2f (packsswb, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em), - _2f (pcmpgtb, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em), - _2f (pcmpgtw, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em), - _2f (pcmpgtd, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em), - _2f (packuswb, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em), - _2f (punpckhbw, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em), - _2f (punpckhwd, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em), - _2f (punpckhdq, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em), - _2f (packssdw, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em), - _0f (bad, X86_INSN_FLAG_SSE_GROUP_68), - _0f (bad, X86_INSN_FLAG_SSE_GROUP_68), - _2f (movd, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em), - _2f (movq, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em), - - /* 0x70 */ - _3f (pshufw, X86_INSN_FLAG_SSE_GROUP_70, Gm, Em, Ib), - _0f (modrm_group_12, X86_INSN_FLAG_MODRM_REG_GROUP_12), - _0f (modrm_group_13, X86_INSN_FLAG_MODRM_REG_GROUP_13), - _0f (modrm_group_14, X86_INSN_FLAG_MODRM_REG_GROUP_14), - _2f (pcmpeqb, X86_INSN_FLAG_SSE_GROUP_70, Gm, Em), - _2f (pcmpeqw, X86_INSN_FLAG_SSE_GROUP_70, Gm, Em), - _2f (pcmpeqd, X86_INSN_FLAG_SSE_GROUP_70, Gm, Em), - _0f (emms, X86_INSN_FLAG_SSE_GROUP_70), - _0f (bad, X86_INSN_FLAG_SSE_GROUP_78), - _0f (bad, X86_INSN_FLAG_SSE_GROUP_78), - _0f (bad, X86_INSN_FLAG_SSE_GROUP_78), - _0f (bad, X86_INSN_FLAG_SSE_GROUP_78), - _0f (bad, X86_INSN_FLAG_SSE_GROUP_78), - _0f (bad, X86_INSN_FLAG_SSE_GROUP_78), - _2f (movd, X86_INSN_FLAG_SSE_GROUP_78, Em, Gm), - _2f (movq, X86_INSN_FLAG_SSE_GROUP_78, Em, Gm), - - /* 0x80 */ -#define _(x) _1 (jmp##x, Jz), - foreach_x86_condition -#undef _ - - /* 0x90 */ -#define _(x) _1 (set##x, Eb), - foreach_x86_condition -#undef _ - - /* 0xa0 */ - _0 (push_fs), - _0 (pop_fs), - _0 (cpuid), - _2 (bt, Ev, Gv), - _3 (shld, Ev, Gv, Ib), - _3 (shld, Ev, Gv, CL), - _0 (bad), - _0 (bad), - _0 (push_gs), - _0 (pop_gs), - _0 (rsm), - _2 (bts, Ev, Gv), - _3 (shrd, Ev, Gv, Ib), - _3 (shrd, Ev, Gv, CL), - _0f (modrm_group_15, X86_INSN_FLAG_MODRM_REG_GROUP_15), - _2 (imul, Gv, Ev), - - /* 0xb0 */ - _2 (cmpxchg, Eb, Gb), - _2 (cmpxchg, Ev, Gv), - _2 (lss, Gz, Mp), - _2 (btr, Ev, Gv), - _2 (lfs, Gz, Mp), - _2 (lgs, Gz, Mp), - _2 (movzbl, Gv, Eb), - _2 (movzwl, Gv, Ew), - _0 (bad), - _0f (modrm_group_10, X86_INSN_FLAG_MODRM_REG_GROUP_10), - _2f (modrm_group_8, X86_INSN_FLAG_MODRM_REG_GROUP_8, Ev, Ib), - _2 (btc, Ev, Gv), - _2 (bsf, Gv, Ev), - _2 (bsr, Gv, Ev), - _2 (movsx, Gv, Eb), - _2 (movsx, Gv, Ew), - - /* 0xc0 */ - _2 (xadd, Eb, Gb), - _2 (xadd, Ev, Gv), - _3f (cmpps, X86_INSN_FLAG_SSE_GROUP_c0, Gx, Ex, Ib), - _2 (movnti, Mv, Gv), - _3f (pinsrw, X86_INSN_FLAG_SSE_GROUP_c0, Gm, Ew, Ib), - _3f (pextrw, X86_INSN_FLAG_SSE_GROUP_c0, Gd, Rm, Ib), - _3f (shufps, X86_INSN_FLAG_SSE_GROUP_c0, Gx, Ex, Ib), - _1f (modrm_group_9, X86_INSN_FLAG_MODRM_REG_GROUP_9, Mx), -#define _(r) _1 (bswap, r), - foreach_x86_gp_reg -#undef _ - - /* 0xd0 */ - _0f (bad, X86_INSN_FLAG_SSE_GROUP_d0), - _2f (psrlw, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em), - _2f (psrld, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em), - _2f (psrlq, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em), - _2f (paddq, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em), - _2f (pmullw, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em), - _0f (bad, X86_INSN_FLAG_SSE_GROUP_d0), - _2f (pmovmskb, X86_INSN_FLAG_SSE_GROUP_d0, Gd, Rm), - _2f (psubusb, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em), - _2f (psubusw, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em), - _2f (pminub, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em), - _2f (pand, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em), - _2f (paddusb, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em), - _2f (paddusw, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em), - _2f (pmaxub, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em), - _2f (pandn, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em), - - /* 0xe0 */ - _2f (pavgb, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em), - _2f (psraw, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em), - _2f (psrad, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em), - _2f (pavgw, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em), - _2f (pmulhuw, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em), - _2f (pmulhw, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em), - _2f (bad, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em), - _2f (movntq, X86_INSN_FLAG_SSE_GROUP_e0, Mm, Gm), - _2f (psubsb, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em), - _2f (psubsw, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em), - _2f (pminsw, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em), - _2f (por, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em), - _2f (paddsb, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em), - _2f (paddsw, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em), - _2f (pmaxsw, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em), - _2f (pxor, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em), - - /* 0xf0 */ - _0f (bad, X86_INSN_FLAG_SSE_GROUP_f0), - _2f (psllw, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em), - _2f (pslld, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em), - _2f (psllq, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em), - _2f (pmuludq, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em), - _2f (pmaddwd, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em), - _2f (psadbw, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em), - _2f (maskmovq, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em), - _2f (psubb, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em), - _2f (psubw, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em), - _2f (psubd, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em), - _2f (psubq, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em), - _2f (paddb, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em), - _2f (paddw, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em), - _2f (paddd, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em), - _0f (bad, X86_INSN_FLAG_SSE_GROUP_f8), -}; - -typedef struct { - x86_insn_t insns[8]; -} x86_insn_group8_t; - -/* Escape groups are indexed by modrm reg field. */ -static x86_insn_group8_t x86_insn_modrm_reg_groups[] = { - [X86_INSN_MODRM_REG_GROUP_1].insns = { - _0 (add), _0 ( or), _0 (adc), _0 (sbb), - _0 (and), _0 (sub), _0 (xor), _0 (cmp), - }, - - [X86_INSN_MODRM_REG_GROUP_1a].insns = { - _0f (pop, X86_INSN_FLAG_DEFAULT_64_BIT), - _0 (bad), _0 (bad), _0 (bad), - _0 (bad), _0 (bad), _0 (bad), _0 (bad), - }, - - [X86_INSN_MODRM_REG_GROUP_2].insns = { - _0 (rol), _0 (ror), _0 (rcl), _0 (rcr), - _0 (shl), _0 (shr), _0 (sal), _0 (sar), - }, - - [X86_INSN_MODRM_REG_GROUP_3].insns = { - _0 (test), _0 (test), _0 (not), _0 (neg), - _0 (mul), _0 (imul), _0 (div), _0 (idiv), - }, - - [X86_INSN_MODRM_REG_GROUP_4].insns = { - _0 (inc), _0 (dec), _0 (bad), _0 (bad), - _0 (bad), _0 (bad), _0 (bad), _0 (bad), - }, - - [X86_INSN_MODRM_REG_GROUP_5].insns = { - _1 (inc, Ev), - _1 (dec, Ev), - _1f (call, X86_INSN_FLAG_DEFAULT_64_BIT, Ev), - _1 (call, Mp), - _1f (jmp, X86_INSN_FLAG_DEFAULT_64_BIT, Ev), - _1 (jmp, Mp), - _1f (push, X86_INSN_FLAG_DEFAULT_64_BIT, Ev), - _0 (bad), - }, - - [X86_INSN_MODRM_REG_GROUP_6].insns = { - _1 (sldt, Ev), - _1 (str, Ev), - _1 (lldt, Ev), - _1 (ltr, Ev), - _1 (verr, Ev), - _1 (verw, Ev), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_MODRM_REG_GROUP_7].insns = { - _1 (sgdt, Mv), - _1 (sidt, Mv), - _1 (lgdt, Mv), - _1 (lidt, Mv), - _1 (smsw, Ev), - _0 (bad), - _1 (lmsw, Ew), - _1 (invlpg, Mv), - }, - - [X86_INSN_MODRM_REG_GROUP_8].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _2 (bt, Ev, Ib), - _2 (bts, Ev, Ib), - _2 (btr, Ev, Ib), - _2 (btc, Ev, Ib), - }, - - [X86_INSN_MODRM_REG_GROUP_9].insns = { - _0 (bad), - _1 (cmpxchg, Mx), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_MODRM_REG_GROUP_10].insns = { - _0 (bad), _0 (bad), _0 (bad), _0 (bad), - _0 (bad), _0 (bad), _0 (bad), _0 (bad), - }, - - [X86_INSN_MODRM_REG_GROUP_11].insns = { - _0 (mov), _0 (bad), _0 (bad), _0 (bad), - _0 (bad), _0 (bad), _0 (bad), _0 (bad), - }, - - [X86_INSN_MODRM_REG_GROUP_12].insns = { - _0 (bad), - _0 (bad), - _2 (psrlw, Rm, Ib), - _0 (bad), - _2 (psraw, Rm, Ib), - _0 (bad), - _2 (psllw, Rm, Ib), - _0 (bad), - }, - - [X86_INSN_MODRM_REG_GROUP_13].insns = { - _0 (bad), - _0 (bad), - _2 (psrld, Rm, Ib), - _0 (bad), - _2 (psrad, Rm, Ib), - _0 (bad), - _2 (pslld, Rm, Ib), - _0 (bad), - }, - - [X86_INSN_MODRM_REG_GROUP_14].insns = { - _0 (bad), - _0 (bad), - _2 (psrlq, Rm, Ib), - _0f (bad, 0), - _0 (bad), - _0 (bad), - _2 (psllq, Rm, Ib), - _0f (bad, 0), - }, - - [X86_INSN_MODRM_REG_GROUP_15].insns = { - _1 (fxsave, Mv), - _1 (fxrstor, Mv), - _1 (ldmxcsr, Mv), - _1 (stmxcsr, Mv), - _0 (bad), - _1 (lfence, Mv), - _1 (mfence, Mv), - _1 (sfence, Mv), - }, - - [X86_INSN_MODRM_REG_GROUP_16].insns = { - _1 (prefetch_nta, Mv), - _1 (prefetch_t0, Mv), - _1 (prefetch_t1, Mv), - _1 (prefetch_t2, Mv), - _1 (prefetch_nop, Mv), - _1 (prefetch_nop, Mv), - _1 (prefetch_nop, Mv), - _1 (prefetch_nop, Mv), - }, - - [X86_INSN_MODRM_REG_GROUP_p].insns = { - _1 (prefetch_exclusive, Mv), - _1 (prefetch_modified, Mv), - _1 (prefetch_nop, Mv), - _1 (prefetch_modified, Mv), - _1 (prefetch_nop, Mv), - _1 (prefetch_nop, Mv), - _1 (prefetch_nop, Mv), - _1 (prefetch_nop, Mv), - }, -}; - -static x86_insn_group8_t x86_insn_sse_groups_repz[] = { - [X86_INSN_SSE_GROUP_10].insns = { - _2 (movss, Gx, Ex), - _2 (movss, Ex, Gx), - _2 (movsldup, Gx, Ex), - _0 (bad), - _0 (bad), - _0 (bad), - _2 (movshdup, Gx, Ex), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_28].insns = { - _0 (bad), - _0 (bad), - _2 (cvtsi2ss, Gx, Ev), - _0 (bad), - _2 (cvttss2si, Gv, Ex), - _2 (cvtss2si, Gv, Ex), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_50].insns = { - _0 (bad), - _2 (sqrtss, Gx, Ex), - _2 (rsqrtps, Gx, Ex), - _2 (rcpss, Gx, Ex), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_58].insns = { - _2 (addss, Gx, Ex), - _2 (mulss, Gx, Ex), - _2 (cvtss2sd, Gx, Ex), - _2 (cvttps2dq, Gx, Ex), - _2 (subss, Gx, Ex), - _2 (minss, Gx, Ex), - _2 (divss, Gx, Ex), - _2 (maxss, Gx, Ex), - }, - - [X86_INSN_SSE_GROUP_60].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_68].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _2 (movdqu, Gx, Ex), - }, - - [X86_INSN_SSE_GROUP_70].insns = { - _3 (pshufhw, Gx, Ex, Ib), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_78].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _2 (movq, Gx, Ex), - _2 (movdqu, Ex, Gx), - }, - - [X86_INSN_SSE_GROUP_c0].insns = { - _0 (bad), - _0 (bad), - _3 (cmpss, Gx, Ex, Ib), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_d0].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _2 (movq2dq, Gx, Em), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_d8].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_e0].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _2 (cvtdq2pd, Gx, Ex), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_e8].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_f0].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_f8].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, -}; - -static x86_insn_group8_t x86_insn_sse_groups_operand_size[] = { - [X86_INSN_SSE_GROUP_10].insns = { - _2 (movupd, Gx, Ex), - _2 (movupd, Ex, Gx), - _2 (movlpd, Gx, Ex), - _2 (movlpd, Ex, Gx), - _2 (unpcklpd, Gx, Ex), - _2 (unpckhpd, Gx, Ex), - _2 (movhpd, Gx, Mx), - _2 (movhpd, Mx, Gx), - }, - - [X86_INSN_SSE_GROUP_28].insns = { - _2 (movapd, Gx, Ex), - _2 (movapd, Ex, Gx), - _2 (cvtpi2pd, Gx, Ex), - _2 (movntpd, Mx, Gx), - _2 (cvttpd2pi, Gx, Mx), - _2 (cvtpd2pi, Gx, Mx), - _2 (ucomisd, Gx, Ex), - _2 (comisd, Gx, Ex), - }, - - [X86_INSN_SSE_GROUP_50].insns = { - _2 (movmskpd, Gd, Rx), - _2 (sqrtpd, Gx, Ex), - _0 (bad), - _0 (bad), - _2 (andpd, Gx, Ex), - _2 (andnpd, Gx, Ex), - _2 (orpd, Gx, Ex), - _2 (xorpd, Gx, Ex), - }, - - [X86_INSN_SSE_GROUP_58].insns = { - _2 (addpd, Gx, Ex), - _2 (mulpd, Gx, Ex), - _2 (cvtpd2ps, Gx, Ex), - _2 (cvtps2dq, Gx, Ex), - _2 (subpd, Gx, Ex), - _2 (minpd, Gx, Ex), - _2 (divpd, Gx, Ex), - _2 (maxpd, Gx, Ex), - }, - - [X86_INSN_SSE_GROUP_60].insns = { - _2 (punpcklbw, Gx, Ex), - _2 (punpcklwd, Gx, Ex), - _2 (punpckldq, Gx, Ex), - _2 (packsswb, Gx, Ex), - _2 (pcmpgtb, Gx, Ex), - _2 (pcmpgtw, Gx, Ex), - _2 (pcmpgtd, Gx, Ex), - _2 (packuswb, Gx, Ex), - }, - - [X86_INSN_SSE_GROUP_68].insns = { - _2 (punpckhbw, Gx, Ex), - _2 (punpckhwd, Gx, Ex), - _2 (punpckhdq, Gx, Ex), - _2 (packssdw, Gx, Ex), - _2 (punpcklqdq, Gx, Ex), - _2 (punpckhqdq, Gx, Ex), - _2 (movd, Gx, Ev), - _2 (movdqa, Gx, Ex), - }, - - [X86_INSN_SSE_GROUP_70].insns = { - _3 (pshufd, Gx, Ex, Ib), - _0f (modrm_group_12, X86_INSN_FLAG_MODRM_REG_GROUP_12), - _0f (modrm_group_13, X86_INSN_FLAG_MODRM_REG_GROUP_13), - _0f (modrm_group_14, X86_INSN_FLAG_MODRM_REG_GROUP_14), - _2 (pcmpeqb, Gx, Ex), - _2 (pcmpeqw, Gx, Ex), - _2 (pcmpeqd, Gx, Ex), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_78].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _2 (haddpd, Gx, Ex), - _2 (hsubpd, Gx, Ex), - _2 (movd, Ev, Gx), - _2 (movdqa, Ex, Gx), - }, - - [X86_INSN_SSE_GROUP_c0].insns = { - _0 (bad), - _0 (bad), - _3 (cmppd, Gx, Ex, Ib), - _0 (bad), - _3 (pinsrw, Gx, Ew, Ib), - _3 (pextrw, Gd, Gx, Ib), - _3 (shufpd, Gx, Ex, Ib), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_d0].insns = { - _2 (addsubpd, Gx, Ex), - _2 (psrlw, Gx, Ex), - _2 (psrld, Gx, Ex), - _2 (psrlq, Gx, Ex), - _2 (paddq, Gx, Ex), - _2 (pmullw, Gx, Ex), - _2 (movq, Ex, Gx), - _2 (pmovmskb, Gd, Rx), - }, - - [X86_INSN_SSE_GROUP_d8].insns = { - _2 (psubusb, Gx, Ex), - _2 (psubusw, Gx, Ex), - _2 (pminub, Gx, Ex), - _2 (pand, Gx, Ex), - _2 (paddusb, Gx, Ex), - _2 (paddusw, Gx, Ex), - _2 (pmaxub, Gx, Ex), - _2 (pandn, Gx, Ex), - }, - - [X86_INSN_SSE_GROUP_e0].insns = { - _2 (pavgb, Gx, Ex), - _2 (psraw, Gx, Ex), - _2 (psrad, Gx, Ex), - _2 (pavgw, Gx, Ex), - _2 (pmulhuw, Gx, Ex), - _2 (pmulhw, Gx, Ex), - _2 (cvttpd2dq, Gx, Ex), - _2 (movntdq, Mx, Gx), - }, - - [X86_INSN_SSE_GROUP_e8].insns = { - _2 (psubsb, Gx, Ex), - _2 (psubsw, Gx, Ex), - _2 (pminsw, Gx, Ex), - _2 (por, Gx, Ex), - _2 (paddsb, Gx, Ex), - _2 (paddsw, Gx, Ex), - _2 (pmaxsw, Gx, Ex), - _2 (pxor, Gx, Ex), - }, - - [X86_INSN_SSE_GROUP_f0].insns = { - _0 (bad), - _2 (psllw, Gx, Ex), - _2 (pslld, Gx, Ex), - _2 (psllq, Gx, Ex), - _2 (pmuludq, Gx, Ex), - _2 (pmaddwd, Gx, Ex), - _2 (psadbw, Gx, Ex), - _2 (maskmovdqu, Gx, Ex), - }, - - [X86_INSN_SSE_GROUP_f8].insns = { - _2 (psubb, Gx, Ex), - _2 (psubw, Gx, Ex), - _2 (psubd, Gx, Ex), - _2 (psubq, Gx, Ex), - _2 (paddb, Gx, Ex), - _2 (paddw, Gx, Ex), - _2 (paddd, Gx, Ex), - _0 (bad), - }, -}; - -static x86_insn_group8_t x86_insn_sse_groups_repnz[] = { - [X86_INSN_SSE_GROUP_10].insns = { - _2 (movsd, Gx, Ex), - _2 (movsd, Ex, Gx), - _2 (movddup, Gx, Ex), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_28].insns = { - _0 (bad), - _0 (bad), - _2 (cvtsi2sd, Gx, Ev), - _0 (bad), - _2 (cvttsd2si, Gv, Ex), - _2 (cvtsd2si, Gv, Ex), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_50].insns = { - _0 (bad), - _2 (sqrtsd, Gx, Ex), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_58].insns = { - _2 (addsd, Gx, Ex), - _2 (mulsd, Gx, Ex), - _2 (cvtsd2ss, Gx, Ex), - _0 (bad), - _2 (subsd, Gx, Ex), - _2 (minsd, Gx, Ex), - _2 (divsd, Gx, Ex), - _2 (maxsd, Gx, Ex), - }, - - [X86_INSN_SSE_GROUP_60].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_68].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_70].insns = { - _3 (pshuflw, Gx, Ex, Ib), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_78].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _2 (haddps, Gx, Ex), - _2 (hsubps, Gx, Ex), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_c0].insns = { - _0 (bad), - _0 (bad), - _3 (cmpsd, Gx, Ex, Ib), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_d0].insns = { - _2 (addsubps, Gx, Ex), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _2 (movdq2q, Gm, Ex), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_d8].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_e0].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _2 (cvtpd2dq, Gx, Ex), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_e8].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_f0].insns = { - _2 (lddqu, Gx, Mx), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, - - [X86_INSN_SSE_GROUP_f8].insns = { - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - _0 (bad), - }, -}; - -#undef _ - -/* Parses memory displacements and immediates. */ -static u8 * x86_insn_parse_number (u32 log2_n_bytes, - u8 * code, u8 * code_end, - i64 * result) -{ - i64 x = 0; - - if (code + (1 << log2_n_bytes) > code_end) - return 0; - - switch (log2_n_bytes) - { - case 3: - x = clib_little_to_host_unaligned_mem_u64 ((u64 *) code); - break; - - case 2: - x = (i32) clib_little_to_host_unaligned_mem_u32 ((u32 *) code); - break; - - case 1: - x = (i16) clib_little_to_host_unaligned_mem_u16 ((u16 *) code); - break; - - case 0: - x = (i8) code[0]; - break; - - default: - ASSERT (0); - } - - *result = x; - return code + (1 << log2_n_bytes); -} - -static u32 -x86_insn_log2_immediate_bytes (x86_insn_parse_t * p, x86_insn_t * insn) -{ - u32 i = ~0; - switch (x86_insn_immediate_type (insn)) - { - case 'b': i = 0; break; - case 'w': i = 1; break; - case 'd': i = 2; break; - case 'q': i = 3; break; - - case 'z': - i = p->log2_effective_operand_bytes; - if (i > 2) i = 2; - break; - - case 'v': - i = p->log2_effective_operand_bytes; - break; - - default: - i = ~0; - break; - } - - return i; -} - -static u8 * -x86_insn_parse_modrm_byte (x86_insn_parse_t * x, - x86_insn_modrm_byte_t modrm, - u32 parse_flags, - u8 * code, - u8 * code_end) -{ - u8 effective_address_bits; - - if (parse_flags & X86_INSN_PARSE_64_BIT) - effective_address_bits = (x->flags & X86_INSN_ADDRESS_SIZE) ? 32 : 64; - else if (parse_flags & X86_INSN_PARSE_32_BIT) - effective_address_bits = (x->flags & X86_INSN_ADDRESS_SIZE) ? 16 : 32; - else - effective_address_bits = (x->flags & X86_INSN_ADDRESS_SIZE) ? 32 : 16; - - x->log2_effective_address_bytes = 1; - x->log2_effective_address_bytes += effective_address_bits > 16; - x->log2_effective_address_bytes += effective_address_bits > 32; - - x->regs[0] |= modrm.reg; - if (modrm.mode == 3) - x->regs[1] |= modrm.rm; - else - { - u32 log2_disp_bytes = ~0; - - x->flags |= X86_INSN_IS_ADDRESS; - - if (effective_address_bits != 16) - { - u8 has_sib_byte = 0; - - switch (modrm.mode) - { - case 0: - /* When base is bp displacement is present for mode 0. */ - if (modrm.rm == X86_INSN_GP_REG_BP) - { - log2_disp_bytes = x->log2_effective_address_bytes; - break; - } - else if (modrm.rm == X86_INSN_GP_REG_SP - && effective_address_bits != 16) - { - has_sib_byte = 1; - break; - } - /* fall through */ - case 1: - case 2: - x->regs[1] |= modrm.rm; - x->flags |= X86_INSN_HAS_BASE; - if (modrm.mode != 0) - { - log2_disp_bytes = (modrm.mode == 1 - ? 0 - : x->log2_effective_address_bytes); - if (log2_disp_bytes > 2) - log2_disp_bytes = 2; - } - break; - } - - if (has_sib_byte) - { - x86_insn_sib_byte_t sib; - - if (code >= code_end) - return 0; - sib.byte = *code++; - - x->log2_index_scale = 1 << sib.log2_scale; - x->regs[1] |= sib.base; - x->flags |= X86_INSN_HAS_BASE; - - if (sib.index != X86_INSN_GP_REG_SP) - { - x->regs[2] |= sib.index; - x->flags |= X86_INSN_HAS_INDEX; - } - } - } - else - { - /* effective_address_bits == 16 */ - switch (modrm.mode) - { - case 0: - if (modrm.rm == 6) - { - /* [disp16] */ - log2_disp_bytes = 1; - break; - } - /* fall through */ - case 1: - case 2: - switch (modrm.rm) - { - case 0: /* [bx + si/di] */ - case 1: - x->regs[1] = X86_INSN_GP_REG_BX; - x->regs[2] = X86_INSN_GP_REG_SI + (modrm.rm & 1); - x->flags |= X86_INSN_HAS_BASE | X86_INSN_HAS_INDEX; - break; - - case 2: /* [bp + si/di] */ - case 3: - x->regs[1] = X86_INSN_GP_REG_BP; - x->regs[2] = X86_INSN_GP_REG_SI + (modrm.rm & 1); - x->flags |= X86_INSN_HAS_BASE | X86_INSN_HAS_INDEX; - break; - - case 4: /* [si/di] */ - case 5: - x->regs[1] = X86_INSN_GP_REG_SI + (modrm.rm & 1); - x->flags |= X86_INSN_HAS_BASE; - break; - - case 6: /* [bp + disp] */ - x->regs[1] = X86_INSN_GP_REG_BP; - x->flags |= X86_INSN_HAS_BASE; - break; - - case 7: /* [bx + disp] */ - x->regs[1] = X86_INSN_GP_REG_BX; - x->flags |= X86_INSN_HAS_BASE; - break; - } - - if (modrm.mode != 0) - log2_disp_bytes = modrm.mode == 1 ? 0 : 1; - break; - } - } - - if (log2_disp_bytes != ~0) - { - i64 disp; - code = x86_insn_parse_number (log2_disp_bytes, code, code_end, - &disp); - if (code) - x->displacement = disp; - } - } - - return code; -} - -u8 * x86_insn_parse (x86_insn_parse_t * p, u8 * code_start) -{ - u8 i, * code, * code_end; - x86_insn_t * insn, * group_insn; - u8 default_operand_bits, effective_operand_bits; - u32 opcode, parse_flags; - - /* Preserve global parse flags. */ - parse_flags = p->flags & (X86_INSN_PARSE_32_BIT | X86_INSN_PARSE_64_BIT); - clib_memset (p, 0, sizeof (p[0])); - p->flags = parse_flags; - - /* 64 implies 32 bit parsing. */ - if (parse_flags & X86_INSN_PARSE_64_BIT) - parse_flags |= X86_INSN_PARSE_32_BIT; - - /* Instruction must be <= 15 bytes. */ - code = code_start; - code_end = code + 15; - - /* Parse legacy prefixes. */ - while (1) - { - if (code >= code_end) - goto insn_too_long; - i = code[0]; - code++; - switch (i) - { - default: goto prefix_done; - - /* Set flags based on prefix. */ -#define _(x,o) case o: p->flags |= X86_INSN_##x; break; - foreach_x86_legacy_prefix; -#undef _ - } - } - prefix_done: - - /* REX prefix. */ - if ((parse_flags & X86_INSN_PARSE_64_BIT) && i >= 0x40 && i <= 0x4f) - { - p->regs[0] |= ((i & (1 << 2)) != 0) << 3; /* r bit */ - p->regs[1] |= ((i & (1 << 0)) != 0) << 3; /* b bit */ - p->regs[2] |= ((i & (1 << 1)) != 0) << 3; /* x bit */ - p->flags |= ((i & (1 << 3)) /* w bit */ - ? X86_INSN_OPERAND_SIZE_64 : 0); - if (code >= code_end) - goto insn_too_long; - i = *code++; - } - - opcode = i; - if (opcode == 0x0f) - { - /* two byte opcode. */; - if (code >= code_end) - goto insn_too_long; - i = *code++; - opcode = (opcode << 8) | i; - insn = x86_insns_two_byte + i; - } - else - { - static x86_insn_t arpl = { - .name = "arpl", - .operands[0].data = "Ew", - .operands[1].data = "Gw", - }; - - if (PREDICT_FALSE (i == 0x63 - && ! (parse_flags & X86_INSN_PARSE_64_BIT))) - insn = &arpl; - else - insn = x86_insns_one_byte + i; - } - - if ((i = X86_INSN_FLAG_GET_SSE_GROUP (insn->flags)) != 0) - { - x86_insn_group8_t * g8; - - if (p->flags & X86_INSN_OPERAND_SIZE) - g8 = x86_insn_sse_groups_operand_size; - else if (p->flags & X86_INSN_REPZ) - g8 = x86_insn_sse_groups_repz; - else if (p->flags & X86_INSN_REPNZ) - g8 = x86_insn_sse_groups_repnz; - else - g8 = 0; - - /* insn flags have 1 + group so != 0 test above can work. */ - ASSERT ((i - 1) < ARRAY_LEN (x86_insn_sse_groups_operand_size)); - if (g8) - insn = g8[i - 1].insns + (opcode & 7); - } - - /* Parse modrm and displacement if present. */ - if (x86_insn_has_modrm_byte (insn)) - { - x86_insn_modrm_byte_t modrm; - - if (code >= code_end) - goto insn_too_long; - modrm.byte = *code++; - - /* Handle special 0x0f01 and 0x0fae encodings. */ - if (PREDICT_FALSE (modrm.mode == 3 - && (opcode == 0x0f01 - || opcode == 0x0fae))) - { - static x86_insn_t x86_insns_0f01_special[] = { - _0 (swapgs), _0 (rdtscp), _0 (bad), _0 (bad), - _0 (bad), _0 (bad), _0 (bad), _0 (bad), - }; - static x86_insn_t x86_insns_0fae_special[] = { - _0 (vmrun), _0 (vmmcall), _0 (vmload), _0 (vmsave), - _0 (stgi), _0 (clgi), _0 (skinit), _0 (invlpga), - }; - - if (opcode == 0x0f01) - insn = x86_insns_0f01_special; - else - insn = x86_insns_0fae_special; - insn += modrm.rm; - opcode = (opcode << 8) | modrm.byte; - } - else - { - code = x86_insn_parse_modrm_byte (p, modrm, parse_flags, - code, code_end); - if (! code) - goto insn_too_long; - } - } - - group_insn = 0; - if ((i = X86_INSN_FLAG_GET_MODRM_REG_GROUP (insn->flags)) != 0) - { - u32 g = i - 1; - ASSERT (g < ARRAY_LEN (x86_insn_modrm_reg_groups)); - group_insn = x86_insn_modrm_reg_groups[g].insns + (p->regs[0] & 7); - } - - p->insn = insn[0]; - if (group_insn) - { - u32 k; - p->insn.name = group_insn->name; - p->insn.flags |= group_insn->flags; - for (k = 0; k < ARRAY_LEN (group_insn->operands); k++) - if (x86_insn_operand_is_valid (group_insn, k)) - p->insn.operands[k] = group_insn->operands[k]; - } - - default_operand_bits - = ((((parse_flags & X86_INSN_PARSE_32_BIT) != 0) - ^ ((p->flags & X86_INSN_OPERAND_SIZE) != 0)) - ? BITS (u32) : BITS (u16)); - - if ((parse_flags & X86_INSN_PARSE_64_BIT) - && (p->insn.flags & X86_INSN_FLAG_DEFAULT_64_BIT)) - default_operand_bits = BITS (u64); - - effective_operand_bits = default_operand_bits; - if (p->flags & X86_INSN_OPERAND_SIZE_64) - effective_operand_bits = BITS (u64); - - p->log2_effective_operand_bytes = 1; - p->log2_effective_operand_bytes += effective_operand_bits > 16; - p->log2_effective_operand_bytes += effective_operand_bits > 32; - - /* Parse immediate if present. */ - { - u32 l = x86_insn_log2_immediate_bytes (p, insn); - if (l <= 3) - { - code = x86_insn_parse_number (l, code, code_end, &p->immediate); - if (! code) - goto insn_too_long; - } - } - - return code; - - insn_too_long: - return 0; -} - -static u8 * format_x86_gp_reg_operand (u8 * s, va_list * va) -{ - u32 r = va_arg (*va, u32); - u32 log2_n_bytes = va_arg (*va, u32); - - const char names8[8] = "acdbsbsd"; - const char names16[8] = "xxxxppii"; - - ASSERT (r < 16); - - /* Add % register prefix. */ - vec_add1 (s, '%'); - - switch (log2_n_bytes) - { - case 0: - { - - if (r < 8) - s = format (s, "%c%c", names8[r & 3], (r >> 2) ? 'l' : 'h'); - else - s = format (s, "r%db", r); - } - break; - - case 2: - case 3: - s = format (s, "%c", log2_n_bytes == 2 ? 'e' : 'r'); - /* fall through */ - case 1: - if (r < 8) - s = format (s, "%c%c", names8[r], names16[r]); - else - { - s = format (s, "%d", r); - if (log2_n_bytes != 3) - s = format (s, "%c", log2_n_bytes == 1 ? 'w' : 'd'); - } - break; - - default: - ASSERT (0); - } - - return s; -} - -static u8 * format_x86_reg_operand (u8 * s, va_list * va) -{ - u32 reg = va_arg (*va, u32); - u32 log2_n_bytes = va_arg (*va, u32); - u32 type = va_arg (*va, u32); - - switch (type) - { - default: - ASSERT (0); - break; - - case 'x': - ASSERT (reg < 16); - return format (s, "%%xmm%d", reg); - - case 'm': - ASSERT (reg < 8); - return format (s, "%%mm%d", reg); - - /* Explicit byte/word/double-word/quad-word */ - case 'b': log2_n_bytes = 0; break; - case 'w': log2_n_bytes = 1; break; - case 'd': log2_n_bytes = 2; break; - case 'q': log2_n_bytes = 3; break; - - /* Use effective operand size. */ - case 'v': break; - - /* word or double-word depending on effective operand size. */ - case 'z': - log2_n_bytes = clib_min (log2_n_bytes, 2); - break; - } - - s = format (s, "%U", format_x86_gp_reg_operand, reg, log2_n_bytes); - return s; -} - -static u8 * format_x86_mem_operand (u8 * s, va_list * va) -{ - x86_insn_parse_t * p = va_arg (*va, x86_insn_parse_t *); - - if (p->displacement != 0) - s = format (s, "0x%x", p->displacement); - - if (p->flags & X86_INSN_HAS_BASE) - { - s = format (s, "(%U", - format_x86_gp_reg_operand, p->regs[1], - p->log2_effective_address_bytes); - if (p->flags & X86_INSN_HAS_INDEX) - { - s = format (s, ",%U", - format_x86_gp_reg_operand, p->regs[2], - p->log2_effective_address_bytes); - if (p->log2_index_scale != 0) - s = format (s, ",%d", 1 << p->log2_index_scale); - } - s = format (s, ")"); - } - - /* [RIP+disp] PC relative addressing in 64 bit mode. */ - else if (p->flags & X86_INSN_PARSE_64_BIT) - s = format (s, "(%%rip)"); - - return s; -} - -static u8 * format_x86_insn_operand (u8 * s, va_list * va) -{ - x86_insn_parse_t * p = va_arg (*va, x86_insn_parse_t *); - x86_insn_t * insn = &p->insn; - u32 o = va_arg (*va, u32); - u8 c, t; - - ASSERT (o < ARRAY_LEN (insn->operands)); - c = insn->operands[o].code; - t = insn->operands[o].type; - - /* Register encoded in instruction. */ - if (c < 8) - return format (s, "%U", - format_x86_gp_reg_operand, c, - p->log2_effective_operand_bytes); - - switch (c) - { - /* Memory or reg field from modrm byte. */ - case 'M': - ASSERT (p->flags & X86_INSN_IS_ADDRESS); - /* FALLTHROUGH */ - case 'E': - if (p->flags & X86_INSN_IS_ADDRESS) - s = format (s, "%U", format_x86_mem_operand, p); - else - s = format (s, "%U", - format_x86_reg_operand, p->regs[1], - p->log2_effective_operand_bytes, t); - break; - - /* reg field from modrm byte. */ - case 'R': - case 'G': - s = format (s, "%U", - format_x86_reg_operand, p->regs[0], - p->log2_effective_operand_bytes, t); - break; - - case 'I': - { - u32 l = x86_insn_log2_immediate_bytes (p, insn); - i64 mask = pow2_mask (8ULL << l); - s = format (s, "$0x%Lx", p->immediate & mask); - } - break; - - case 'J': - if (p->immediate < 0) - s = format (s, "- 0x%Lx", -p->immediate); - else - s = format (s, "+ 0x%Lx", p->immediate); - break; - - case 'O': - s = format (s, "0x%Lx", p->immediate); - break; - - case 'A': - /* AX/AL */ - s = format (s, "%U", - format_x86_gp_reg_operand, X86_INSN_GP_REG_AX, - t == 'L' ? 0 : p->log2_effective_operand_bytes); - break; - - case 'B': - /* BX/BL/BP */ - s = format (s, "%U", - format_x86_gp_reg_operand, - t == 'P' ? X86_INSN_GP_REG_BP : X86_INSN_GP_REG_BX, - t == 'L' ? 0 : p->log2_effective_operand_bytes); - break; - - case 'C': - /* CX/CL */ - s = format (s, "%U", - format_x86_gp_reg_operand, X86_INSN_GP_REG_CX, - t == 'L' ? 0 : p->log2_effective_operand_bytes); - break; - - case 'D': - /* DX/DL/DI */ - s = format (s, "%U", - format_x86_gp_reg_operand, - t == 'I' ? X86_INSN_GP_REG_DI : X86_INSN_GP_REG_DX, - t == 'L' ? 0 : p->log2_effective_operand_bytes); - break; - - case 'S': - /* SI/SP */ - s = format (s, "%U", - format_x86_gp_reg_operand, - t == 'I' ? X86_INSN_GP_REG_SI : X86_INSN_GP_REG_SP, - p->log2_effective_operand_bytes); - break; - - case '1': - s = format (s, "1"); - break; - - default: - ASSERT (0); - } - - return s; -} - -u8 * format_x86_insn_parse (u8 * s, va_list * va) -{ - x86_insn_parse_t * p = va_arg (*va, x86_insn_parse_t *); - x86_insn_t * insn = &p->insn; - u32 o, i, is_src_dst; - - s = format (s, "%s", insn->name); - - if (! x86_insn_operand_is_valid (insn, 0)) - goto done; - - is_src_dst = x86_insn_operand_is_valid (insn, 1); - - /* If instruction has immediate add suffix to opcode to - indicate operand size. */ - if (is_src_dst) - { - u32 b; - - b = x86_insn_log2_immediate_bytes (p, insn); - if (b < p->log2_effective_operand_bytes - && (p->flags & X86_INSN_IS_ADDRESS)) - s = format (s, "%c", "bwlq"[b]); - } - - for (i = 0; i < ARRAY_LEN (insn->operands); i++) - { - o = is_src_dst + i; - if (! x86_insn_operand_is_valid (insn, o)) - break; - s = format (s, "%s%U", - i == 0 ? " " : ", ", - format_x86_insn_operand, p, o); - } - - if (is_src_dst) - s = format (s, ", %U", - format_x86_insn_operand, p, 0); - - done: - return s; -} diff --git a/src/vppinfra/asm_x86.h b/src/vppinfra/asm_x86.h deleted file mode 100644 index dacef61755c..00000000000 --- a/src/vppinfra/asm_x86.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef included_asm_x86_h -#define included_asm_x86_h - -#include <vppinfra/format.h> - -typedef union -{ - struct - { - u8 code; - u8 type; - }; - u8 data[2]; -} x86_insn_operand_t; - -typedef struct -{ - /* Instruction name. */ - char *name; - - /* X86 instructions may have up to 3 operands. */ - x86_insn_operand_t operands[3]; - - u16 flags; -#define X86_INSN_FLAG_DEFAULT_64_BIT (1 << 0) -#define X86_INSN_FLAG_SET_SSE_GROUP(n) ((n) << 5) -#define X86_INSN_FLAG_GET_SSE_GROUP(f) (((f) >> 5) & 0x1f) -#define X86_INSN_FLAG_SET_MODRM_REG_GROUP(n) (((n) & 0x3f) << 10) -#define X86_INSN_FLAG_GET_MODRM_REG_GROUP(f) (((f) >> 10) & 0x3f) -} x86_insn_t; - -always_inline uword -x86_insn_operand_is_valid (x86_insn_t * i, uword o) -{ - ASSERT (o < ARRAY_LEN (i->operands)); - return i->operands[o].code != '_'; -} - -#define foreach_x86_legacy_prefix \ - _ (OPERAND_SIZE, 0x66) \ - _ (ADDRESS_SIZE, 0x67) \ - _ (SEGMENT_CS, 0x2e) \ - _ (SEGMENT_DS, 0x3e) \ - _ (SEGMENT_ES, 0x26) \ - _ (SEGMENT_FS, 0x64) \ - _ (SEGMENT_GS, 0x65) \ - _ (SEGMENT_SS, 0x36) \ - _ (LOCK, 0xf0) \ - _ (REPZ, 0xf3) \ - _ (REPNZ, 0xf2) - -#define foreach_x86_insn_parse_flag \ - /* Parse in 32/64-bit mode. */ \ - _ (PARSE_32_BIT, 0) \ - _ (PARSE_64_BIT, 0) \ - _ (IS_ADDRESS, 0) \ - /* regs[1/2] is a valid base/index register */ \ - _ (HAS_BASE, 0) \ - _ (HAS_INDEX, 0) \ - /* rex w bit */ \ - _ (OPERAND_SIZE_64, 0) - -typedef enum -{ -#define _(f,o) X86_INSN_FLAG_BIT_##f, - foreach_x86_insn_parse_flag foreach_x86_legacy_prefix -#undef _ -} x86_insn_parse_flag_bit_t; - -typedef enum -{ -#define _(f,o) X86_INSN_##f = 1 << X86_INSN_FLAG_BIT_##f, - foreach_x86_insn_parse_flag foreach_x86_legacy_prefix -#undef _ -} x86_insn_parse_flag_t; - -typedef struct -{ - /* Registers in instruction. - [0] is modrm reg field - [1] is base reg - [2] is index reg. */ - u8 regs[3]; - - /* Scale for index register. */ - u8 log2_index_scale:2; - u8 log2_effective_operand_bytes:3; - u8 log2_effective_address_bytes:3; - - i32 displacement; - - /* Parser flags: set of x86_insn_parse_flag_t enums. */ - u32 flags; - - i64 immediate; - - x86_insn_t insn; -} x86_insn_parse_t; - -u8 *x86_insn_parse (x86_insn_parse_t * p, u8 * code_start); -format_function_t format_x86_insn_parse; - -#endif /* included_asm_x86_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/backtrace.c b/src/vppinfra/backtrace.c deleted file mode 100644 index e713bae6876..00000000000 --- a/src/vppinfra/backtrace.c +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - Copyright (c) 2004 Eliot Dresselhaus - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include <vppinfra/clib.h> -#include <vppinfra/error.h> - -#ifdef __mips__ - -/* Let code below know we've defined _clib_backtrace */ -#define clib_backtrace_defined - -#include <vppinfra/asm_mips.h> - -__clib_export uword -clib_backtrace (uword * callers, uword max_callers, uword n_frames_to_skip) -{ - u32 *pc; - void *sp; - uword i, saved_pc; - - /* Figure current PC, saved PC and stack pointer. */ - asm volatile (".set push\n" - ".set noat\n" "move %[saved_pc], $31\n" "move %[sp], $29\n" - /* Fetches current PC. */ - "la $at, 1f\n" - "jalr %[pc], $at\n" - "nop\n" - "1:\n" - ".set pop\n":[pc] "=r" (pc), - [saved_pc] "=r" (saved_pc),[sp] "=r" (sp)); - - /* Also skip current frame. */ - n_frames_to_skip += 1; - - for (i = 0; i < max_callers + n_frames_to_skip; i++) - { - mips_insn_opcode_t op; - mips_insn_special_funct_t funct; - i32 insn, rs, rt, rd, immediate, found_saved_pc; - u32 *start_pc; - - /* Parse instructions until we reach prologue for this - stack frame. We'll need to figure out where saved - PC is and where previous stack frame lives. */ - start_pc = pc; - found_saved_pc = 0; - while (1) - { - insn = *--pc; - op = mips_insn_get_op (insn); - funct = mips_insn_get_funct (insn); - rs = mips_insn_get_rs (insn); - rt = mips_insn_get_rt (insn); - rd = mips_insn_get_rd (insn); - immediate = mips_insn_get_immediate (insn); - - switch (op) - { - default: - break; - - case MIPS_OPCODE_sd: - case MIPS_OPCODE_sw: - /* Trace stores of return address. */ - if (rt == MIPS_REG_RA) - { - void *addr = sp + immediate; - - /* If RA is stored somewhere other than in the - stack frame, give up. */ - if (rs != MIPS_REG_SP) - goto backtrace_done; - - ASSERT (immediate % 4 == 0); - if (op == MIPS_OPCODE_sw) - saved_pc = ((u32 *) addr)[0]; - else - saved_pc = ((u64 *) addr)[0]; - found_saved_pc = 1; - } - break; - - case MIPS_OPCODE_addiu: - case MIPS_OPCODE_daddiu: - case MIPS_OPCODE_addi: - case MIPS_OPCODE_daddi: - if (rt == MIPS_REG_SP) - { - if (rs != MIPS_REG_SP) - goto backtrace_done; - - ASSERT (immediate % 4 == 0); - - /* Assume positive offset is part of the epilogue. - E.g. - jr ra - add sp,sp,100 - */ - if (immediate > 0) - continue; - - /* Negative offset means allocate stack space. - This could either be the prologue or could be due to - alloca. */ - sp -= immediate; - - /* This frame will not save RA. */ - if (i == 0) - goto found_prologue; - - /* Assume that addiu sp,sp,-N without store of ra means - that we have not found the prologue yet. */ - if (found_saved_pc) - goto found_prologue; - } - break; - - case MIPS_OPCODE_slti: - case MIPS_OPCODE_sltiu: - case MIPS_OPCODE_andi: - case MIPS_OPCODE_ori: - case MIPS_OPCODE_xori: - case MIPS_OPCODE_lui: - case MIPS_OPCODE_ldl: - case MIPS_OPCODE_ldr: - case MIPS_OPCODE_lb: - case MIPS_OPCODE_lh: - case MIPS_OPCODE_lwl: - case MIPS_OPCODE_lw: - case MIPS_OPCODE_lbu: - case MIPS_OPCODE_lhu: - case MIPS_OPCODE_lwr: - case MIPS_OPCODE_lwu: - case MIPS_OPCODE_ld: - /* Give up when we find anyone setting the stack pointer. */ - if (rt == MIPS_REG_SP) - goto backtrace_done; - break; - - case MIPS_OPCODE_SPECIAL: - if (rd == MIPS_REG_SP) - switch (funct) - { - default: - /* Give up when we find anyone setting the stack pointer. */ - goto backtrace_done; - - case MIPS_SPECIAL_FUNCT_break: - case MIPS_SPECIAL_FUNCT_jr: - case MIPS_SPECIAL_FUNCT_sync: - case MIPS_SPECIAL_FUNCT_syscall: - case MIPS_SPECIAL_FUNCT_tge: - case MIPS_SPECIAL_FUNCT_tgeu: - case MIPS_SPECIAL_FUNCT_tlt: - case MIPS_SPECIAL_FUNCT_tltu: - case MIPS_SPECIAL_FUNCT_teq: - case MIPS_SPECIAL_FUNCT_tne: - /* These instructions can validly have rd == MIPS_REG_SP */ - break; - } - break; - } - } - - found_prologue: - /* Check sanity of saved pc. */ - if (saved_pc & 3) - goto backtrace_done; - if (saved_pc == 0) - goto backtrace_done; - - if (i >= n_frames_to_skip) - callers[i - n_frames_to_skip] = saved_pc; - pc = uword_to_pointer (saved_pc, u32 *); - } - -backtrace_done: - if (i < n_frames_to_skip) - return 0; - else - return i - n_frames_to_skip; -} -#endif /* __mips__ */ - -#ifndef clib_backtrace_defined -#define clib_backtrace_defined - -/* use glibc backtrace for stack trace */ -#include <execinfo.h> - -__clib_export uword -clib_backtrace (uword * callers, uword max_callers, uword n_frames_to_skip) -{ - int size; - void *array[20]; - /* Also skip current frame. */ - n_frames_to_skip += 1; - - size = clib_min (ARRAY_LEN (array), max_callers + n_frames_to_skip); - - size = backtrace (array, size); - - uword i; - - for (i = 0; i < max_callers + n_frames_to_skip && i < size; i++) - { - if (i >= n_frames_to_skip) - callers[i - n_frames_to_skip] = pointer_to_uword (array[i]); - } - - if (i < n_frames_to_skip) - return 0; - else - return i - n_frames_to_skip; -} - - -#endif /* clib_backtrace_defined */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/clib.h b/src/vppinfra/clib.h index d14582492d6..75cebc65672 100644 --- a/src/vppinfra/clib.h +++ b/src/vppinfra/clib.h @@ -385,10 +385,6 @@ void qsort (void *base, uword n, uword size, int (*)(const void *, const void *)); #endif -/* Stack backtrace. */ -uword -clib_backtrace (uword * callers, uword max_callers, uword n_frames_to_skip); - #include <vppinfra/byte_order.h> #endif /* included_clib_h */ diff --git a/src/vppinfra/format_ansi.h b/src/vppinfra/format_ansi.h new file mode 100644 index 00000000000..c35406aacf7 --- /dev/null +++ b/src/vppinfra/format_ansi.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Cisco Systems, Inc. + */ + +#ifndef __FORMAT_ANSI_H__ +#define __FORMAT_ANSI_H__ + +#define ANSI_RESET "\x1b[0m" +#define ANSI_BOLD "\x1b[1m" +#define ANSI_ITALIC "\x1b[3m" +#define ANSI_UNDERLINE "\x1b[4m" +#define ANSI_BLINK "\x1b[5m" +#define ANSI_FG_BLACK "\x1b[30m" +#define ANSI_FG_RED "\x1b[31m" +#define ANSI_FG_GREEN "\x1b[32m" +#define ANSI_FG_YELLOW "\x1b[33m" +#define ANSI_FG_BLUE "\x1b[34m" +#define ANSI_FG_MAGENTA "\x1b[35m" +#define ANSI_FG_CYAN "\x1b[36m" +#define ANSI_FG_WHITE "\x1b[37m" +#define ANSI_FG_DEFAULT "\x1b[39m" +#define ANSI_BG_BLACK "\x1b[40m" +#define ANSI_BG_RED "\x1b[41m" +#define ANSI_BG_GREEN "\x1b[42m" +#define ANSI_BG_YELLOW "\x1b[43m" +#define ANSI_BG_BLUE "\x1b[44m" +#define ANSI_BG_MAGENTA "\x1b[45m" +#define ANSI_BG_CYAN "\x1b[46m" +#define ANSI_BG_WHITE "\x1b[47m" +#define ANSI_BG_DEFAULT "\x1b[49m" +#define ANSI_FG_BR_BLACK "\x1b[90m" +#define ANSI_FG_BR_RED "\x1b[91m" +#define ANSI_FG_BR_GREEN "\x1b[92m" +#define ANSI_FG_BR_YELLOW "\x1b[93m" +#define ANSI_FG_BR_BLUE "\x1b[94m" +#define ANSI_FG_BR_MAGENTA "\x1b[95m" +#define ANSI_FG_BR_CYAN "\x1b[96m" +#define ANSI_FG_BR_WHITE "\x1b[97m" +#define ANSI_BG_BR_BLACK "\x1b[100m" +#define ANSI_BG_BR_RED "\x1b[101m" +#define ANSI_BG_BR_GREEN "\x1b[102m" +#define ANSI_BG_BR_YELLOW "\x1b[103m" +#define ANSI_BG_BR_BLUE "\x1b[104m" +#define ANSI_BG_BR_MAGENTA "\x1b[105m" +#define ANSI_BG_BR_CYAN "\x1b[106m" +#define ANSI_BG_BR_WHITE "\x1b[107m" + +#endif /* __FORMAT_ANSI_H__ */ diff --git a/src/vppinfra/heap.c b/src/vppinfra/heap.c index 7db814200f8..9920528732d 100644 --- a/src/vppinfra/heap.c +++ b/src/vppinfra/heap.c @@ -680,6 +680,7 @@ debug_elt (u8 * s, void *v, word i, word n) i = -n / 2; for (e = e0; 1; e = heap_next (e)) { + s = format (s, " "); if (heap_is_free (e)) s = format (s, "index %4d, free\n", e - h->elts); else if (h->format_elt) diff --git a/src/vppinfra/mem_dlmalloc.c b/src/vppinfra/mem_dlmalloc.c index a188164a7ba..e98687fff2a 100644 --- a/src/vppinfra/mem_dlmalloc.c +++ b/src/vppinfra/mem_dlmalloc.c @@ -19,6 +19,7 @@ #include <vppinfra/lock.h> #include <vppinfra/hash.h> #include <vppinfra/elf_clib.h> +#include <vppinfra/stack.h> typedef struct { @@ -65,15 +66,13 @@ mheap_get_trace_internal (const clib_mem_heap_t *heap, uword offset, { mheap_trace_main_t *tm = &mheap_trace_main; mheap_trace_t *t; - uword i, n_callers, trace_index, *p; - mheap_trace_t trace; + uword i, trace_index, *p; + mheap_trace_t trace = {}; + int index; if (heap != tm->current_traced_mheap || mheap_trace_thread_disable) return; - /* Spurious Coverity warnings be gone. */ - clib_memset (&trace, 0, sizeof (trace)); - clib_spinlock_lock (&tm->lock); /* heap could have changed while we were waiting on the lock */ @@ -83,9 +82,19 @@ mheap_get_trace_internal (const clib_mem_heap_t *heap, uword offset, /* Turn off tracing for this thread to avoid embarrassment... */ mheap_trace_thread_disable = 1; - /* Skip our frame and mspace_get_aligned's frame */ - n_callers = clib_backtrace (trace.callers, ARRAY_LEN (trace.callers), 2); - if (n_callers == 0) + index = -2; /* skip first 2 stack frames */ + foreach_clib_stack_frame (sf) + { + if (index >= 0) + { + if (index == ARRAY_LEN (trace.callers)) + break; + trace.callers[index] = sf->ip; + } + index++; + } + + if (index < 1) goto out; if (!tm->trace_by_callers) diff --git a/src/vppinfra/mhash.c b/src/vppinfra/mhash.c index f0f1aa470d7..babaaeec726 100644 --- a/src/vppinfra/mhash.c +++ b/src/vppinfra/mhash.c @@ -164,6 +164,8 @@ mhash_sanitize_hash_user (mhash_t * mh) h->user = pointer_to_uword (mh); } +static u8 *mhash_format_pair_default (u8 *s, va_list *args); + __clib_export void mhash_init (mhash_t * h, uword n_value_bytes, uword n_key_bytes) { @@ -208,12 +210,12 @@ mhash_init (mhash_t * h, uword n_value_bytes, uword n_key_bytes) vec_validate (h->key_tmps, os_get_nthreads () - 1); ASSERT (n_key_bytes < ARRAY_LEN (t)); - h->hash = hash_create2 ( /* elts */ 0, + h->hash = hash_create2 (/* elts */ 0, /* user */ pointer_to_uword (h), /* value_bytes */ n_value_bytes, t[n_key_bytes].key_sum, t[n_key_bytes].key_equal, /* format pair/arg */ - 0, 0); + mhash_format_pair_default, 0); } static uword @@ -331,8 +333,8 @@ mhash_set_mem (mhash_t * h, void *key, uword * new_value, uword * old_value) { if (key_alloc_from_free_list) { - h->key_vector_free_indices[l] = i; - vec_set_len (h->key_vector_free_indices, l + 1); + vec_set_len (h->key_vector_free_indices, l); + h->key_vector_free_indices[l - 1] = i; } else vec_dec_len (h->key_vector_or_heap, h->n_key_bytes); @@ -371,8 +373,8 @@ mhash_unset (mhash_t * h, void *key, uword * old_value) return 1; } -u8 * -format_mhash_key (u8 * s, va_list * va) +__clib_export u8 * +format_mhash_key (u8 *s, va_list *va) { mhash_t *h = va_arg (*va, mhash_t *); u32 ki = va_arg (*va, u32); @@ -387,7 +389,43 @@ format_mhash_key (u8 * s, va_list * va) else if (h->format_key) s = format (s, "%U", h->format_key, k); else - s = format (s, "%U", format_hex_bytes, k, h->n_key_bytes); + s = format (s, "0x%U", format_hex_bytes, k, h->n_key_bytes); + + return s; +} + +static u8 * +mhash_format_pair_default (u8 *s, va_list *args) +{ + void *CLIB_UNUSED (user_arg) = va_arg (*args, void *); + void *v = va_arg (*args, void *); + hash_pair_t *p = va_arg (*args, hash_pair_t *); + hash_t *h = hash_header (v); + mhash_t *mh = uword_to_pointer (h->user, mhash_t *); + + s = format (s, "%U", format_mhash_key, mh, (u32) p->key); + if (hash_value_bytes (h) > 0) + s = format (s, " -> 0x%8U", format_hex_bytes, &p->value[0], + hash_value_bytes (h)); + return s; +} + +__clib_export u8 * +format_mhash (u8 *s, va_list *va) +{ + mhash_t *h = va_arg (*va, mhash_t *); + int verbose = va_arg (*va, int); + + s = format (s, "mhash %p, %wd elts, \n", h, mhash_elts (h)); + if (mhash_key_vector_is_heap (h)) + s = format (s, " %U", format_heap, h->key_vector_or_heap, verbose); + else + s = format (s, " keys %wd elts, %wd size, %wd free, %wd bytes used\n", + vec_len (h->key_vector_or_heap) / h->n_key_bytes, + h->n_key_bytes, vec_len (h->key_vector_free_indices), + vec_bytes (h->key_vector_or_heap) + + vec_bytes (h->key_vector_free_indices)); + s = format (s, " %U", format_hash, h->hash, verbose); return s; } diff --git a/src/vppinfra/mhash.h b/src/vppinfra/mhash.h index 7eb1918384e..62aee365fa3 100644 --- a/src/vppinfra/mhash.h +++ b/src/vppinfra/mhash.h @@ -166,8 +166,13 @@ do { \ })); \ } while (0) +u8 *format_mhash (u8 *s, va_list *va); + format_function_t format_mhash_key; +/* Main test routine. */ +int test_mhash_main (unformat_input_t *input); + #endif /* included_clib_mhash_h */ /* diff --git a/src/vppinfra/stack.c b/src/vppinfra/stack.c new file mode 100644 index 00000000000..190e880c228 --- /dev/null +++ b/src/vppinfra/stack.c @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Cisco Systems, Inc. + */ + +#define _GNU_SOURCE +#include <dlfcn.h> + +#include <vppinfra/clib.h> +#include <vppinfra/stack.h> +#include <vppinfra/error.h> + +#if HAVE_LIBUNWIND == 1 + +#define UNW_LOCAL_ONLY +#include <libunwind.h> + +static __thread unw_cursor_t cursor; +static __thread unw_context_t context; + +#endif + +__clib_export clib_stack_frame_t * +clib_stack_frame_get (clib_stack_frame_t *sf) +{ +#if HAVE_LIBUNWIND == 1 + Dl_info info = {}; + + if (sf->index == 0) + { + if (unw_getcontext (&context) < 0) + { + clib_warning ("libunwind: cannot get local machine state\n"); + return 0; + } + if (unw_init_local (&cursor, &context) < 0) + { + clib_warning ( + "libunwind: cannot initialize cursor for local unwinding\n"); + return 0; + } + if (unw_step (&cursor) < 1) + return 0; + } + else if (unw_step (&cursor) < 1) + return 0; + + if (unw_get_reg (&cursor, UNW_REG_IP, &sf->ip)) + { + clib_warning ("libunwind: cannot read IP\n"); + return 0; + } + + if (unw_get_reg (&cursor, UNW_REG_SP, &sf->sp)) + { + clib_warning ("libunwind: cannot read SP\n"); + return 0; + } + + if (unw_get_proc_name (&cursor, sf->name, sizeof (sf->name), &sf->offset) < + 0) + sf->name[0] = sf->offset = 0; + + sf->is_signal_frame = unw_is_signal_frame (&cursor) ? 1 : 0; + + if (dladdr ((void *) sf->ip, &info)) + sf->file_name = info.dli_fname; + else + sf->file_name = 0; + + sf->index++; + return sf; +#else + return 0; +#endif +} diff --git a/src/vppinfra/stack.h b/src/vppinfra/stack.h new file mode 100644 index 00000000000..98a621d4176 --- /dev/null +++ b/src/vppinfra/stack.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Cisco Systems, Inc. + */ + +#ifndef __STACK_H__ +#define __STACK_H__ + +#include <vppinfra/clib.h> + +typedef struct +{ + uword ip, sp; + uword offset; + char name[64]; + const char *file_name; + u32 index; + u8 is_signal_frame; +} clib_stack_frame_t; + +clib_stack_frame_t *clib_stack_frame_get (clib_stack_frame_t *); + +#define foreach_clib_stack_frame(sf) \ + for (clib_stack_frame_t _sf = {}, *sf = clib_stack_frame_get (&_sf); sf; \ + sf = clib_stack_frame_get (sf)) + +#endif /* __STACK_H__ */ diff --git a/src/vppinfra/test_mhash.c b/src/vppinfra/test_mhash.c new file mode 100644 index 00000000000..70be2b9b382 --- /dev/null +++ b/src/vppinfra/test_mhash.c @@ -0,0 +1,403 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Yandex LLC. + */ + +#ifdef CLIB_LINUX_KERNEL +#include <linux/unistd.h> +#endif + +#ifdef CLIB_UNIX +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <vppinfra/time.h> +#endif + +#include <vppinfra/random.h> +#include <vppinfra/mem.h> +#include <vppinfra/hash.h> +#include <vppinfra/mhash.h> +#include <vppinfra/error.h> +#include <vppinfra/format.h> +#include <vppinfra/bitmap.h> + +static int verbose; +#define if_verbose(format, args...) \ + if (verbose) \ + { \ + clib_warning (format, ##args); \ + } + +typedef struct +{ + int n_iterations; + + int n_iterations_per_print; + + /* Number of pairs to insert into mhash. */ + int n_pairs; + + /* True to validate correctness of mhash functions. */ + int n_iterations_per_validate; + + /* Verbosity level for mhash formats. */ + int verbose; + + /* Random number seed. */ + u32 seed; +} mhash_test_t; + +static clib_error_t * +mhash_next_test (mhash_t *h) +{ + hash_next_t hn = { 0 }; + hash_pair_t *p0, *p1; + clib_error_t *error = 0; + + hash_foreach_pair (p0, h->hash, { + p1 = hash_next (h->hash, &hn); + error = CLIB_ERROR_ASSERT (p0 == p1); + if (error) + break; + }); + + if (!error) + error = CLIB_ERROR_ASSERT (!hash_next (h->hash, &hn)); + + return error; +} + +static clib_error_t * +test_word_key (mhash_test_t *ht) +{ + mhash_t _h = { 0 }, *h = &_h; + word i, j; + + word *keys = 0, *vals = 0; + uword *is_inserted = 0; + + clib_error_t *error = 0; + + vec_resize (keys, ht->n_pairs); + vec_resize (vals, vec_len (keys)); + + mhash_init (h, sizeof (vals[0]), sizeof (keys[0])); + /* borrow 0 elt to make index keys non-zero */ + vec_validate (h->key_vector_or_heap, 0); + + { + uword *unique = 0; + u32 k; + + for (i = 0; i < vec_len (keys); i++) + { + do + { + k = random_u32 (&ht->seed) & 0xfffff; + } + while (clib_bitmap_get (unique, k)); + unique = clib_bitmap_ori (unique, k); + keys[i] = k; + vals[i] = i; + } + + clib_bitmap_free (unique); + } + + for (i = 0; i < ht->n_iterations; i++) + { + u32 vi = random_u32 (&ht->seed) % vec_len (keys); + + if (clib_bitmap_get (is_inserted, vi)) + { + mhash_unset (h, &keys[vi], 0); + mhash_unset (h, &keys[vi], 0); + } + else + { + mhash_set (h, &keys[vi], vals[vi], 0); + mhash_set (h, &keys[vi], vals[vi], 0); + } + + is_inserted = clib_bitmap_xori (is_inserted, vi); + + if (ht->n_iterations_per_print > 0 && + ((i + 1) % ht->n_iterations_per_print) == 0) + if_verbose ("iteration %d\n %U", i + 1, format_mhash, h, ht->verbose); + + if (ht->n_iterations_per_validate == 0 || + (i + 1) % ht->n_iterations_per_validate) + continue; + + { + uword ki, *k, *v; + + mhash_foreach (k, v, h, { + ki = v[0]; + ASSERT (keys[ki] == k[0]); + }); + } + + if ((error = hash_validate (h->hash))) + goto done; + + for (j = 0; j < vec_len (keys); j++) + { + uword *v; + v = mhash_get (h, &keys[j]); + if ((error = CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) == + (v != 0)))) + goto done; + if (v) + { + if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j]))) + goto done; + } + } + } + + if ((error = mhash_next_test (h))) + goto done; + + if_verbose ("%U", format_mhash, h, ht->verbose); + + for (i = 0; i < vec_len (keys); i++) + { + if (!clib_bitmap_get (is_inserted, i)) + continue; + + mhash_unset (h, &keys[i], 0); + mhash_unset (h, &keys[i], 0); + is_inserted = clib_bitmap_xori (is_inserted, i); + + if (ht->n_iterations_per_validate == 0 || + (i + 1) % ht->n_iterations_per_validate) + continue; + + if ((error = hash_validate (h->hash))) + goto done; + + for (j = 0; j < vec_len (keys); j++) + { + uword *v; + v = mhash_get (h, &keys[j]); + if ((error = CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) == + (v != 0)))) + goto done; + if (v) + { + if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j]))) + goto done; + } + } + } + +done: + mhash_free (h); + vec_free (keys); + vec_free (vals); + clib_bitmap_free (is_inserted); + + if (verbose) + fformat (stderr, "%U\n", format_clib_mem_usage, /* verbose */ 0); + + return error; +} + +static u8 * +test2_format (u8 *s, va_list *args) +{ + void *CLIB_UNUSED (user_arg) = va_arg (*args, void *); + void *v = va_arg (*args, void *); + hash_pair_t *p = va_arg (*args, hash_pair_t *); + hash_t *h = hash_header (v); + mhash_t *mh = uword_to_pointer (h->user, mhash_t *); + + return format (s, "0x%8U <- %U", format_hex_bytes, &p->value[0], + hash_value_bytes (h), format_mhash_key, mh, (u32) p->key); +} + +static clib_error_t * +test_string_key (mhash_test_t *ht, uword is_c_string) +{ + mhash_t _h = { 0 }, *h = &_h; + word i, j; + + u8 **keys = 0; + word *vals = 0; + uword *is_inserted = 0; + + clib_error_t *error = 0; + + vec_resize (keys, ht->n_pairs); + vec_resize (vals, vec_len (keys)); + + if (is_c_string) + mhash_init_c_string (h, sizeof (vals[0])); + else + mhash_init_vec_string (h, sizeof (vals[0])); + hash_set_pair_format (h->hash, test2_format, 0); + + for (i = 0; i < vec_len (keys); i++) + { + keys[i] = random_string (&ht->seed, 5 + (random_u32 (&ht->seed) & 0xf)); + keys[i] = format (keys[i], "%x", i); + if (is_c_string) + vec_terminate_c_string (keys[i]); + vals[i] = random_u32 (&ht->seed); + } + + for (i = 0; i < ht->n_iterations; i++) + { + u32 vi = random_u32 (&ht->seed) % vec_len (keys); + + if (clib_bitmap_get (is_inserted, vi)) + { + mhash_unset (h, keys[vi], 0); + mhash_unset (h, keys[vi], 0); + } + else + { + mhash_set (h, keys[vi], vals[vi], 0); + mhash_set (h, keys[vi], vals[vi], 0); + } + + is_inserted = clib_bitmap_xori (is_inserted, vi); + + if (ht->n_iterations_per_print > 0 && + ((i + 1) % ht->n_iterations_per_print) == 0) + if_verbose ("iteration %d\n %U", i + 1, format_mhash, h, ht->verbose); + + if (ht->n_iterations_per_validate == 0 || + (i + 1) % ht->n_iterations_per_validate) + continue; + + if ((error = hash_validate (h->hash))) + goto done; + + for (j = 0; j < vec_len (keys); j++) + { + uword *v; + v = mhash_get (h, keys[j]); + if ((error = CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) == + (v != 0)))) + goto done; + if (v) + { + if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j]))) + goto done; + } + } + } + + if ((error = mhash_next_test (h))) + goto done; + + if_verbose ("%U", format_mhash, h, ht->verbose); + + for (i = 0; i < vec_len (keys); i++) + { + if (!clib_bitmap_get (is_inserted, i)) + continue; + + mhash_unset (h, keys[i], 0); + mhash_unset (h, keys[i], 0); + is_inserted = clib_bitmap_xori (is_inserted, i); + + if (ht->n_iterations_per_validate == 0 || + (i + 1) % ht->n_iterations_per_validate) + continue; + + if ((error = hash_validate (h->hash))) + goto done; + + for (j = 0; j < vec_len (keys); j++) + { + uword *v; + v = mhash_get (h, keys[j]); + if ((error = CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) == + (v != 0)))) + goto done; + if (v) + { + if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j]))) + goto done; + } + } + } + +done: + mhash_free (h); + vec_free (vals); + clib_bitmap_free (is_inserted); + + for (i = 0; i < vec_len (keys); i++) + vec_free (keys[i]); + vec_free (keys); + + if (verbose) + fformat (stderr, "%U\n", format_clib_mem_usage, /* verbose */ 0); + + return error; +} + +int +test_mhash_main (unformat_input_t *input) +{ + mhash_test_t _ht = { 0 }, *ht = &_ht; + clib_error_t *error; + + ht->n_iterations = 100; + ht->n_pairs = 10; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (0 == unformat (input, "iter %d", &ht->n_iterations) && + 0 == unformat (input, "print %d", &ht->n_iterations_per_print) && + 0 == unformat (input, "elts %d", &ht->n_pairs) && + 0 == unformat (input, "seed %d", &ht->seed) && + 0 == unformat (input, "verbose %=", &ht->verbose, 1) && + 0 == unformat (input, "valid %d", &ht->n_iterations_per_validate)) + { + clib_warning ("unknown input `%U'", format_unformat_error, input); + return 1; + } + } + + if (!ht->seed) + ht->seed = random_default_seed (); + + if_verbose ("testing %d iterations, seed %d", ht->n_iterations, ht->seed); + + error = test_word_key (ht); + if (error) + clib_error_report (error); + + error = test_string_key (ht, 0); + if (error) + clib_error_report (error); + + error = test_string_key (ht, 1); + if (error) + clib_error_report (error); + + return 0; +} + +#ifdef CLIB_UNIX +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + int ret; + + clib_mem_init (0, 3ULL << 30); + + verbose = (argc > 1); + unformat_init_command_line (&i, argv); + ret = test_mhash_main (&i); + unformat_free (&i); + + return ret; +} +#endif /* CLIB_UNIX */ diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c index 5008f82c493..31c0a489e8d 100644 --- a/src/vppinfra/unix-misc.c +++ b/src/vppinfra/unix-misc.c @@ -35,6 +35,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + #include <vppinfra/error.h> #include <vppinfra/os.h> #include <vppinfra/bitmap.h> @@ -42,9 +46,15 @@ #include <vppinfra/format.h> #ifdef __linux__ #include <vppinfra/linux/sysfs.h> +#include <sched.h> #elif defined(__FreeBSD__) -#include <sys/sysctl.h> +#define _WANT_FREEBSD_BITSET +#include <sys/cdefs.h> #include <sys/param.h> +#include <sys/types.h> +#include <sys/cpuset.h> +#include <sys/domainset.h> +#include <sys/sysctl.h> #endif #include <sys/stat.h> @@ -278,10 +288,70 @@ os_get_online_cpu_core_bitmap () } __clib_export clib_bitmap_t * +os_get_cpu_affinity_bitmap (int pid) +{ +#if __linux + int index, ret; + cpu_set_t cpuset; + uword *affinity_cpus; + + clib_bitmap_alloc (affinity_cpus, sizeof (cpu_set_t)); + clib_bitmap_zero (affinity_cpus); + + __CPU_ZERO_S (sizeof (cpu_set_t), &cpuset); + + ret = sched_getaffinity (0, sizeof (cpu_set_t), &cpuset); + + if (ret < 0) + { + clib_bitmap_free (affinity_cpus); + return 0; + } + + for (index = 0; index < sizeof (cpu_set_t); index++) + if (__CPU_ISSET_S (index, sizeof (cpu_set_t), &cpuset)) + clib_bitmap_set (affinity_cpus, index, 1); + return affinity_cpus; +#elif defined(__FreeBSD__) + cpuset_t mask; + uword *r = NULL; + + if (cpuset_getaffinity (CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET, -1, + sizeof (mask), &mask) != 0) + { + clib_bitmap_free (r); + return NULL; + } + + for (int bit = 0; bit < CPU_SETSIZE; bit++) + clib_bitmap_set (r, bit, CPU_ISSET (bit, &mask)); + + return r; +#else + return NULL; +#endif +} + +__clib_export clib_bitmap_t * os_get_online_cpu_node_bitmap () { #if __linux__ return clib_sysfs_read_bitmap ("/sys/devices/system/node/online"); +#elif defined(__FreeBSD__) + domainset_t domain; + uword *r = NULL; + int policy; + + if (cpuset_getdomain (CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET, -1, + sizeof (domain), &domain, &policy) != 0) + { + clib_bitmap_free (r); + return NULL; + } + + for (int bit = 0; bit < CPU_SETSIZE; bit++) + clib_bitmap_set (r, bit, CPU_ISSET (bit, &domain)); + return r; #else return 0; #endif |