aboutsummaryrefslogtreecommitdiffstats
path: root/vnet/vnet/devices/dpdk/dpdk_priv.h
diff options
context:
space:
mode:
Diffstat (limited to 'vnet/vnet/devices/dpdk/dpdk_priv.h')
-rw-r--r--vnet/vnet/devices/dpdk/dpdk_priv.h437
1 files changed, 437 insertions, 0 deletions
diff --git a/vnet/vnet/devices/dpdk/dpdk_priv.h b/vnet/vnet/devices/dpdk/dpdk_priv.h
new file mode 100644
index 00000000000..e452e02d90d
--- /dev/null
+++ b/vnet/vnet/devices/dpdk/dpdk_priv.h
@@ -0,0 +1,437 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define DPDK_NB_RX_DESC_DEFAULT 512
+#define DPDK_NB_TX_DESC_DEFAULT 512
+#define DPDK_NB_RX_DESC_VIRTIO 256
+#define DPDK_NB_TX_DESC_VIRTIO 256
+#define DPDK_NB_RX_DESC_10GE 2048
+#define DPDK_NB_TX_DESC_10GE 2048
+#define DPDK_NB_RX_DESC_40GE (4096-128)
+#define DPDK_NB_TX_DESC_40GE 2048
+
+#define foreach_dpdk_counter \
+ _ (tx_frames_ok, opackets) \
+ _ (tx_bytes_ok, obytes) \
+ _ (tx_errors, oerrors) \
+ _ (tx_loopback_frames_ok, olbpackets) \
+ _ (tx_loopback_bytes_ok, olbbytes) \
+ _ (rx_frames_ok, ipackets) \
+ _ (rx_bytes_ok, ibytes) \
+ _ (rx_errors, ierrors) \
+ _ (rx_missed, imissed) \
+ _ (rx_bad_crc, ibadcrc) \
+ _ (rx_bad_length, ibadlen) \
+ _ (rx_multicast_frames_ok, imcasts) \
+ _ (rx_no_bufs, rx_nombuf) \
+ _ (rx_filter_match, fdirmatch) \
+ _ (rx_filter_miss, fdirmiss) \
+ _ (tx_pause_xon, tx_pause_xon) \
+ _ (rx_pause_xon, rx_pause_xon) \
+ _ (tx_pause_xoff, tx_pause_xoff) \
+ _ (rx_pause_xoff, rx_pause_xoff) \
+ _ (rx_loopback_frames_ok, ilbpackets) \
+ _ (rx_loopback_bytes_ok, ilbbytes)
+
+#define foreach_dpdk_q_counter \
+ _ (rx_frames_ok, q_ipackets) \
+ _ (tx_frames_ok, q_opackets) \
+ _ (rx_bytes_ok, q_ibytes) \
+ _ (tx_bytes_ok, q_obytes) \
+ _ (rx_errors, q_errors)
+
+#define foreach_dpdk_rss_hf \
+ _(ETH_RSS_IPV4, "ipv4") \
+ _(ETH_RSS_FRAG_IPV4, "ipv4-frag") \
+ _(ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \
+ _(ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \
+ _(ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \
+ _(ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \
+ _(ETH_RSS_IPV6, "ipv6") \
+ _(ETH_RSS_FRAG_IPV6, "ipv6-frag") \
+ _(ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \
+ _(ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \
+ _(ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \
+ _(ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \
+ _(ETH_RSS_L2_PAYLOAD, "l2-payload") \
+ _(ETH_RSS_IPV6_EX, "ipv6-ex") \
+ _(ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \
+ _(ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex")
+
+#define foreach_dpdk_rx_offload_caps \
+ _(DEV_RX_OFFLOAD_VLAN_STRIP, "vlan-strip") \
+ _(DEV_RX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \
+ _(DEV_RX_OFFLOAD_UDP_CKSUM , "udp-cksum") \
+ _(DEV_RX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \
+ _(DEV_RX_OFFLOAD_TCP_LRO , "rcp-lro") \
+ _(DEV_RX_OFFLOAD_QINQ_STRIP, "qinq-strip")
+
+#define foreach_dpdk_tx_offload_caps \
+ _(DEV_TX_OFFLOAD_VLAN_INSERT, "vlan-insert") \
+ _(DEV_TX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \
+ _(DEV_TX_OFFLOAD_UDP_CKSUM , "udp-cksum") \
+ _(DEV_TX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \
+ _(DEV_TX_OFFLOAD_SCTP_CKSUM , "sctp-cksum") \
+ _(DEV_TX_OFFLOAD_TCP_TSO , "tcp-tso") \
+ _(DEV_TX_OFFLOAD_UDP_TSO , "udp-tso") \
+ _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \
+ _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert")
+
+#if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0)
+
+#define foreach_dpdk_pkt_rx_offload_flag \
+ _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \
+ _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \
+ _ (PKT_RX_FDIR, "RX packet with FDIR infos") \
+ _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
+ _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
+ _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
+ _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet")
+
+#define foreach_dpdk_pkt_type \
+ _ (RTE_PTYPE_L3_IPV4, "Packet with IPv4 header") \
+ _ (RTE_PTYPE_L3_IPV4_EXT, "Packet with extended IPv4 header") \
+ _ (RTE_PTYPE_L3_IPV6, "Packet with IPv6 header") \
+ _ (RTE_PTYPE_L3_IPV6_EXT, "Packet with extended IPv6 header")
+#else
+#define foreach_dpdk_pkt_rx_offload_flag \
+ _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \
+ _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \
+ _ (PKT_RX_FDIR, "RX packet with FDIR infos") \
+ _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
+ _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
+ _ (PKT_RX_IPV4_HDR, "RX packet with IPv4 header") \
+ _ (PKT_RX_IPV4_HDR_EXT, "RX packet with extended IPv4 header") \
+ _ (PKT_RX_IPV6_HDR, "RX packet with IPv6 header") \
+ _ (PKT_RX_IPV6_HDR_EXT, "RX packet with extended IPv6 header") \
+ _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
+ _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet")
+
+#define foreach_dpdk_pkt_type /* Dummy */
+#endif /* RTE_VERSION */
+
+#define foreach_dpdk_pkt_tx_offload_flag \
+ _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \
+ _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \
+ _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \
+ _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \
+ _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp")
+
+#define foreach_dpdk_pkt_offload_flag \
+ foreach_dpdk_pkt_rx_offload_flag \
+ foreach_dpdk_pkt_tx_offload_flag
+
+static inline u8 * format_dpdk_pkt_types (u8 * s, va_list * va)
+{
+ u32 *pkt_types = va_arg (*va, u32 *);
+ uword indent __attribute__((unused)) = format_get_indent (s) + 2;
+
+ if (!*pkt_types)
+ return s;
+
+ s = format (s, "Packet Types");
+
+#define _(F, S) \
+ if (*pkt_types & F) \
+ { \
+ s = format (s, "\n%U%s (0x%04x) %s", \
+ format_white_space, indent, #F, F, S); \
+ }
+
+ foreach_dpdk_pkt_type
+
+#undef _
+
+ return s;
+}
+
+static inline u8 * format_dpdk_pkt_offload_flags (u8 * s, va_list * va)
+{
+ u16 *ol_flags = va_arg (*va, u16 *);
+ uword indent = format_get_indent (s) + 2;
+
+ if (!*ol_flags)
+ return s;
+
+ s = format (s, "Packet Offload Flags");
+
+#define _(F, S) \
+ if (*ol_flags & F) \
+ { \
+ s = format (s, "\n%U%s (0x%04x) %s", \
+ format_white_space, indent, #F, F, S); \
+ }
+
+ foreach_dpdk_pkt_offload_flag
+
+#undef _
+
+ return s;
+}
+
+static inline u8 * format_dpdk_rte_mbuf (u8 * s, va_list * va)
+{
+ struct rte_mbuf * mb = va_arg (*va, struct rte_mbuf *);
+ uword indent = format_get_indent (s) + 2;
+
+ s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d"
+ "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x,"
+ "\n%Upacket_type 0x%x",
+ mb->port, mb->nb_segs, mb->pkt_len,
+ format_white_space, indent,
+ mb->buf_len, mb->data_len, mb->ol_flags,
+ format_white_space, indent,
+ mb->packet_type);
+
+ if (mb->ol_flags)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_dpdk_pkt_offload_flags, &mb->ol_flags);
+
+ if (mb->packet_type)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_dpdk_pkt_types, &mb->packet_type);
+ return s;
+}
+
+#ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS
+#define foreach_dpdk_pkt_ext_rx_offload_flag \
+ _ (PKT_EXT_RX_PKT_ERROR, "RX Packet Error") \
+ _ (PKT_EXT_RX_BAD_FCS, "RX Bad FCS checksum") \
+ _ (PKT_EXT_RX_UDP, "RX packet with UDP L4 header") \
+ _ (PKT_EXT_RX_TCP, "RX packet with TCP L4 header") \
+ _ (PKT_EXT_RX_IPV4_FRAGMENT, "RX packet IPv4 Fragment")
+
+#define foreach_dpdk_pkt_ext_offload_flag \
+ foreach_dpdk_pkt_rx_offload_flag \
+ foreach_dpdk_pkt_ext_rx_offload_flag
+
+static inline u8 * format_dpdk_pkt_rx_offload_flags (u8 * s, va_list * va)
+{
+ u16 *ol_flags = va_arg (*va, u16 *);
+ uword indent = format_get_indent (s) + 2;
+
+ if (!*ol_flags)
+ return s;
+
+ s = format (s, "Packet RX Offload Flags");
+
+#define _(F, S) \
+ if (*ol_flags & F) \
+ { \
+ s = format (s, "\n%U%s (0x%04x) %s", \
+ format_white_space, indent, #F, F, S); \
+ }
+
+ foreach_dpdk_pkt_ext_offload_flag
+
+#undef _
+
+ return s;
+}
+
+static inline u8 * format_dpdk_rx_rte_mbuf (u8 * s, va_list * va)
+{
+ struct rte_mbuf * mb = va_arg (*va, struct rte_mbuf *);
+ uword indent = format_get_indent (s) + 2;
+
+ /*
+ * Note: Assumes mb is head of pkt chain -- port, nb_segs, & pkt_len
+ * are only valid for the 1st mbuf segment.
+ */
+ s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d"
+ "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x"
+ "\n%Upacket_type 0x%x",
+ mb->port, mb->nb_segs, mb->pkt_len,
+ format_white_space, indent,
+ mb->buf_len, mb->data_len, mb->ol_flags,
+ format_white_space, indent,
+ mb->packet_type);
+
+ if (mb->ol_flags)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_dpdk_pkt_rx_offload_flags, &mb->ol_flags);
+
+ if (mb->packet_type)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_dpdk_pkt_types, &mb->packet_type);
+ return s;
+}
+#endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */
+
+/* These args appear by themselves */
+#define foreach_eal_double_hyphen_predicate_arg \
+_(no-shconf) \
+_(no-hpet) \
+_(no-pci) \
+_(no-huge) \
+_(vmware-tsc-map) \
+_(virtio-vhost)
+
+#define foreach_eal_single_hyphen_mandatory_arg \
+_(coremask, c) \
+_(nchannels, n) \
+
+#define foreach_eal_single_hyphen_arg \
+_(blacklist, b) \
+_(mem-alloc-request, m) \
+_(force-ranks, r)
+
+/* These args are preceeded by "--" and followed by a single string */
+#define foreach_eal_double_hyphen_arg \
+_(huge-dir) \
+_(proc-type) \
+_(file-prefix) \
+_(socket-mem) \
+_(vdev)
+
+static inline u32
+dpdk_rx_burst ( dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id)
+{
+ u32 n_buffers;
+ u32 n_left;
+ u32 n_this_chunk;
+
+ n_left = VLIB_FRAME_SIZE;
+ n_buffers = 0;
+
+ if (PREDICT_TRUE(xd->dev_type == VNET_DPDK_DEV_ETH))
+ {
+ while (n_left)
+ {
+ n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id,
+ xd->rx_vectors[queue_id] + n_buffers, n_left);
+ n_buffers += n_this_chunk;
+ n_left -= n_this_chunk;
+
+ /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */
+ if (n_this_chunk < 32)
+ break;
+ }
+ }
+ else if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER)
+ {
+ vlib_main_t * vm = vlib_get_main();
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ unsigned socket_id = rte_socket_id();
+
+ if (PREDICT_FALSE(!xd->vu_is_running))
+ return 0;
+
+ n_buffers = rte_vhost_dequeue_burst(&xd->vu_vhost_dev, VIRTIO_TXQ,
+ bm->pktmbuf_pools[socket_id],
+ xd->rx_vectors[queue_id], VLIB_FRAME_SIZE);
+
+ f64 now = vlib_time_now (dm->vlib_main);
+
+ /* send pending interrupts if needed */
+ if (dpdk_vhost_user_want_interrupt(xd, VIRTIO_TXQ)) {
+ dpdk_vu_vring *vring = &(xd->vu_intf->vrings[VIRTIO_TXQ]);
+ vring->n_since_last_int += n_buffers;
+
+ if ((vring->n_since_last_int && (vring->int_deadline < now))
+ || (vring->n_since_last_int > dm->vhost_coalesce_frames))
+ dpdk_vhost_user_send_interrupt(dm->vlib_main, xd, VIRTIO_TXQ);
+ }
+
+ if (dpdk_vhost_user_want_interrupt(xd, VIRTIO_RXQ)) {
+ dpdk_vu_vring *vring = &(xd->vu_intf->vrings[VIRTIO_RXQ]);
+ if (vring->n_since_last_int && (vring->int_deadline < now))
+ dpdk_vhost_user_send_interrupt(dm->vlib_main, xd, VIRTIO_RXQ);
+ }
+
+ }
+ else if (xd->dev_type == VNET_DPDK_DEV_KNI)
+ {
+ n_buffers = rte_kni_rx_burst(xd->kni, xd->rx_vectors[queue_id], VLIB_FRAME_SIZE);
+ rte_kni_handle_request(xd->kni);
+ }
+ else
+ {
+ ASSERT(0);
+ }
+
+ return n_buffers;
+}
+
+
+static inline void
+dpdk_update_counters (dpdk_device_t * xd, f64 now)
+{
+ vlib_simple_counter_main_t * cm;
+ vnet_main_t * vnm = vnet_get_main();
+ u32 my_cpu = os_get_cpu_number();
+ u64 rxerrors, last_rxerrors;
+ int len;
+
+ /* only update counters for PMD interfaces */
+ if (xd->dev_type != VNET_DPDK_DEV_ETH)
+ return;
+
+ /*
+ * DAW-FIXME: VMXNET3 device stop/start doesn't work,
+ * therefore fake the stop in the dpdk driver by
+ * silently dropping all of the incoming pkts instead of
+ * stopping the driver / hardware.
+ */
+ if (xd->admin_up != 0xff)
+ {
+ xd->time_last_stats_update = now ? now : xd->time_last_stats_update;
+ memcpy (&xd->last_stats, &xd->stats, sizeof (xd->last_stats));
+ rte_eth_stats_get (xd->device_index, &xd->stats);
+
+ /* maybe bump interface rx no buffer counter */
+ if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf))
+ {
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_RX_NO_BUF);
+
+ vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+ xd->stats.rx_nombuf -
+ xd->last_stats.rx_nombuf);
+ }
+
+ /* missed pkt counter */
+ if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed))
+ {
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_RX_MISS);
+
+ vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+ xd->stats.imissed -
+ xd->last_stats.imissed);
+ }
+ rxerrors = xd->stats.ibadcrc
+ + xd->stats.ibadlen + xd->stats.ierrors;
+ last_rxerrors = xd->last_stats.ibadcrc
+ + xd->last_stats.ibadlen + xd->last_stats.ierrors;
+
+ if (PREDICT_FALSE (rxerrors != last_rxerrors))
+ {
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_RX_ERROR);
+
+ vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+ rxerrors - last_rxerrors);
+ }
+ }
+
+ if ((len = rte_eth_xstats_get(xd->device_index, NULL, 0)) > 0)
+ {
+ vec_validate(xd->xstats, len - 1);
+ len = rte_eth_xstats_get(xd->device_index, xd->xstats, vec_len(xd->xstats));
+ ASSERT(vec_len(xd->xstats) == len);
+ _vec_len(xd->xstats) = len;
+ }
+}