aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_eal/common/include
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_eal/common/include')
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_vect.h2
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_atomic_32.h2
-rw-r--r--lib/librte_eal/common/include/rte_bitmap.h561
-rw-r--r--lib/librte_eal/common/include/rte_bus.h42
-rw-r--r--lib/librte_eal/common/include/rte_common.h23
-rw-r--r--lib/librte_eal/common/include/rte_debug.h2
-rw-r--r--lib/librte_eal/common/include/rte_dev.h31
-rw-r--r--lib/librte_eal/common/include/rte_eal.h52
-rw-r--r--lib/librte_eal/common/include/rte_eal_interrupts.h250
-rw-r--r--lib/librte_eal/common/include/rte_interrupts.h2
-rw-r--r--lib/librte_eal/common/include/rte_lcore.h14
-rw-r--r--lib/librte_eal/common/include/rte_log.h30
-rw-r--r--lib/librte_eal/common/include/rte_malloc.h17
-rw-r--r--lib/librte_eal/common/include/rte_memory.h103
-rw-r--r--lib/librte_eal/common/include/rte_memzone.h6
-rw-r--r--lib/librte_eal/common/include/rte_pci.h598
-rw-r--r--lib/librte_eal/common/include/rte_service.h197
-rw-r--r--lib/librte_eal/common/include/rte_service_component.h36
-rw-r--r--lib/librte_eal/common/include/rte_vdev.h131
-rw-r--r--lib/librte_eal/common/include/rte_version.h6
-rw-r--r--lib/librte_eal/common/include/rte_vfio.h153
21 files changed, 1302 insertions, 956 deletions
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index 782350d1..aa887a97 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -136,7 +136,7 @@ vgetq_lane_p64(poly64x2_t x, const int lane)
#endif
/*
- * If (0 <= index <= 15), then call the ASIMD ext intruction on the
+ * If (0 <= index <= 15), then call the ASIMD ext instruction on the
* 128 bit regs v0 and v1 with the appropriate index.
*
* Else returns a zero vector.
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
index 2e04c759..fb3abf18 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
@@ -81,7 +81,7 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
: "memory" ); /* no-clobber list */
#else
asm volatile (
- "mov %%ebx, %%edi\n"
+ "xchgl %%ebx, %%edi;\n"
MPLOCKED
"cmpxchg8b (%[dst]);"
"setz %[res];"
diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h
new file mode 100644
index 00000000..010d752c
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_bitmap.h
@@ -0,0 +1,561 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_BITMAP_H__
+#define __INCLUDE_RTE_BITMAP_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Bitmap
+ *
+ * The bitmap component provides a mechanism to manage large arrays of bits
+ * through bit get/set/clear and bit array scan operations.
+ *
+ * The bitmap scan operation is optimized for 64-bit CPUs using 64/128 byte cache
+ * lines. The bitmap is hierarchically organized using two arrays (array1 and
+ * array2), with each bit in array1 being associated with a full cache line
+ * (512/1024 bits) of bitmap bits, which are stored in array2: the bit in array1
+ * is set only when there is at least one bit set within its associated array2
+ * bits, otherwise the bit in array1 is cleared. The read and write operations
+ * for array1 and array2 are always done in slabs of 64 bits.
+ *
+ * This bitmap is not thread safe. For lock free operation on a specific bitmap
+ * instance, a single writer thread performing bit set/clear operations is
+ * allowed, only the writer thread can do bitmap scan operations, while there
+ * can be several reader threads performing bit get operations in parallel with
+ * the writer thread. When the use of locking primitives is acceptable, the
+ * serialization of the bit set/clear and bitmap scan operations needs to be
+ * enforced by the caller, while the bit get operation does not require locking
+ * the bitmap.
+ *
+ ***/
+
+#include <string.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_branch_prediction.h>
+#include <rte_prefetch.h>
+
+#ifndef RTE_BITMAP_OPTIMIZATIONS
+#define RTE_BITMAP_OPTIMIZATIONS 1
+#endif
+
+/* Slab */
+#define RTE_BITMAP_SLAB_BIT_SIZE 64
+#define RTE_BITMAP_SLAB_BIT_SIZE_LOG2 6
+#define RTE_BITMAP_SLAB_BIT_MASK (RTE_BITMAP_SLAB_BIT_SIZE - 1)
+
+/* Cache line (CL) */
+#define RTE_BITMAP_CL_BIT_SIZE (RTE_CACHE_LINE_SIZE * 8)
+#define RTE_BITMAP_CL_BIT_SIZE_LOG2 (RTE_CACHE_LINE_SIZE_LOG2 + 3)
+#define RTE_BITMAP_CL_BIT_MASK (RTE_BITMAP_CL_BIT_SIZE - 1)
+
+#define RTE_BITMAP_CL_SLAB_SIZE (RTE_BITMAP_CL_BIT_SIZE / RTE_BITMAP_SLAB_BIT_SIZE)
+#define RTE_BITMAP_CL_SLAB_SIZE_LOG2 (RTE_BITMAP_CL_BIT_SIZE_LOG2 - RTE_BITMAP_SLAB_BIT_SIZE_LOG2)
+#define RTE_BITMAP_CL_SLAB_MASK (RTE_BITMAP_CL_SLAB_SIZE - 1)
+
+/** Bitmap data structure */
+struct rte_bitmap {
+ /* Context for array1 and array2 */
+ uint64_t *array1; /**< Bitmap array1 */
+ uint64_t *array2; /**< Bitmap array2 */
+ uint32_t array1_size; /**< Number of 64-bit slabs in array1 that are actually used */
+ uint32_t array2_size; /**< Number of 64-bit slabs in array2 */
+
+ /* Context for the "scan next" operation */
+ uint32_t index1; /**< Bitmap scan: Index of current array1 slab */
+ uint32_t offset1; /**< Bitmap scan: Offset of current bit within current array1 slab */
+ uint32_t index2; /**< Bitmap scan: Index of current array2 slab */
+ uint32_t go2; /**< Bitmap scan: Go/stop condition for current array2 cache line */
+
+ /* Storage space for array1 and array2 */
+ uint8_t memory[];
+};
+
+static inline void
+__rte_bitmap_index1_inc(struct rte_bitmap *bmp)
+{
+ bmp->index1 = (bmp->index1 + 1) & (bmp->array1_size - 1);
+}
+
+static inline uint64_t
+__rte_bitmap_mask1_get(struct rte_bitmap *bmp)
+{
+ return (~1lu) << bmp->offset1;
+}
+
+static inline void
+__rte_bitmap_index2_set(struct rte_bitmap *bmp)
+{
+ bmp->index2 = (((bmp->index1 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2) + bmp->offset1) << RTE_BITMAP_CL_SLAB_SIZE_LOG2);
+}
+
+#if RTE_BITMAP_OPTIMIZATIONS
+
+static inline int
+rte_bsf64(uint64_t slab, uint32_t *pos)
+{
+ if (likely(slab == 0)) {
+ return 0;
+ }
+
+ *pos = __builtin_ctzll(slab);
+ return 1;
+}
+
+#else
+
+static inline int
+rte_bsf64(uint64_t slab, uint32_t *pos)
+{
+ uint64_t mask;
+ uint32_t i;
+
+ if (likely(slab == 0)) {
+ return 0;
+ }
+
+ for (i = 0, mask = 1; i < RTE_BITMAP_SLAB_BIT_SIZE; i ++, mask <<= 1) {
+ if (unlikely(slab & mask)) {
+ *pos = i;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+#endif
+
+static inline uint32_t
+__rte_bitmap_get_memory_footprint(uint32_t n_bits,
+ uint32_t *array1_byte_offset, uint32_t *array1_slabs,
+ uint32_t *array2_byte_offset, uint32_t *array2_slabs)
+{
+ uint32_t n_slabs_context, n_slabs_array1, n_cache_lines_context_and_array1;
+ uint32_t n_cache_lines_array2;
+ uint32_t n_bytes_total;
+
+ n_cache_lines_array2 = (n_bits + RTE_BITMAP_CL_BIT_SIZE - 1) / RTE_BITMAP_CL_BIT_SIZE;
+ n_slabs_array1 = (n_cache_lines_array2 + RTE_BITMAP_SLAB_BIT_SIZE - 1) / RTE_BITMAP_SLAB_BIT_SIZE;
+ n_slabs_array1 = rte_align32pow2(n_slabs_array1);
+ n_slabs_context = (sizeof(struct rte_bitmap) + (RTE_BITMAP_SLAB_BIT_SIZE / 8) - 1) / (RTE_BITMAP_SLAB_BIT_SIZE / 8);
+ n_cache_lines_context_and_array1 = (n_slabs_context + n_slabs_array1 + RTE_BITMAP_CL_SLAB_SIZE - 1) / RTE_BITMAP_CL_SLAB_SIZE;
+ n_bytes_total = (n_cache_lines_context_and_array1 + n_cache_lines_array2) * RTE_CACHE_LINE_SIZE;
+
+ if (array1_byte_offset) {
+ *array1_byte_offset = n_slabs_context * (RTE_BITMAP_SLAB_BIT_SIZE / 8);
+ }
+ if (array1_slabs) {
+ *array1_slabs = n_slabs_array1;
+ }
+ if (array2_byte_offset) {
+ *array2_byte_offset = n_cache_lines_context_and_array1 * RTE_CACHE_LINE_SIZE;
+ }
+ if (array2_slabs) {
+ *array2_slabs = n_cache_lines_array2 * RTE_BITMAP_CL_SLAB_SIZE;
+ }
+
+ return n_bytes_total;
+}
+
+static inline void
+__rte_bitmap_scan_init(struct rte_bitmap *bmp)
+{
+ bmp->index1 = bmp->array1_size - 1;
+ bmp->offset1 = RTE_BITMAP_SLAB_BIT_SIZE - 1;
+ __rte_bitmap_index2_set(bmp);
+ bmp->index2 += RTE_BITMAP_CL_SLAB_SIZE;
+
+ bmp->go2 = 0;
+}
+
+/**
+ * Bitmap memory footprint calculation
+ *
+ * @param n_bits
+ * Number of bits in the bitmap
+ * @return
+ * Bitmap memory footprint measured in bytes on success, 0 on error
+ */
+static inline uint32_t
+rte_bitmap_get_memory_footprint(uint32_t n_bits) {
+ /* Check input arguments */
+ if (n_bits == 0) {
+ return 0;
+ }
+
+ return __rte_bitmap_get_memory_footprint(n_bits, NULL, NULL, NULL, NULL);
+}
+
+/**
+ * Bitmap initialization
+ *
+ * @param mem_size
+ * Minimum expected size of bitmap.
+ * @param mem
+ * Base address of array1 and array2.
+ * @param n_bits
+ * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
+ * @return
+ * Handle to bitmap instance.
+ */
+static inline struct rte_bitmap *
+rte_bitmap_init(uint32_t n_bits, uint8_t *mem, uint32_t mem_size)
+{
+ struct rte_bitmap *bmp;
+ uint32_t array1_byte_offset, array1_slabs, array2_byte_offset, array2_slabs;
+ uint32_t size;
+
+ /* Check input arguments */
+ if (n_bits == 0) {
+ return NULL;
+ }
+
+ if ((mem == NULL) || (((uintptr_t) mem) & RTE_CACHE_LINE_MASK)) {
+ return NULL;
+ }
+
+ size = __rte_bitmap_get_memory_footprint(n_bits,
+ &array1_byte_offset, &array1_slabs,
+ &array2_byte_offset, &array2_slabs);
+ if (size < mem_size) {
+ return NULL;
+ }
+
+ /* Setup bitmap */
+ memset(mem, 0, size);
+ bmp = (struct rte_bitmap *) mem;
+
+ bmp->array1 = (uint64_t *) &mem[array1_byte_offset];
+ bmp->array1_size = array1_slabs;
+ bmp->array2 = (uint64_t *) &mem[array2_byte_offset];
+ bmp->array2_size = array2_slabs;
+
+ __rte_bitmap_scan_init(bmp);
+
+ return bmp;
+}
+
+/**
+ * Bitmap free
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @return
+ * 0 upon success, error code otherwise
+ */
+static inline int
+rte_bitmap_free(struct rte_bitmap *bmp)
+{
+ /* Check input arguments */
+ if (bmp == NULL) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Bitmap reset
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ */
+static inline void
+rte_bitmap_reset(struct rte_bitmap *bmp)
+{
+ memset(bmp->array1, 0, bmp->array1_size * sizeof(uint64_t));
+ memset(bmp->array2, 0, bmp->array2_size * sizeof(uint64_t));
+ __rte_bitmap_scan_init(bmp);
+}
+
+/**
+ * Bitmap location prefetch into CPU L1 cache
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ * @return
+ * 0 upon success, error code otherwise
+ */
+static inline void
+rte_bitmap_prefetch0(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab2;
+ uint32_t index2;
+
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ slab2 = bmp->array2 + index2;
+ rte_prefetch0((void *) slab2);
+}
+
+/**
+ * Bitmap bit get
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ * @return
+ * 0 when bit is cleared, non-zero when bit is set
+ */
+static inline uint64_t
+rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab2;
+ uint32_t index2, offset2;
+
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+ return (*slab2) & (1lu << offset2);
+}
+
+/**
+ * Bitmap bit set
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ */
+static inline void
+rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab1, *slab2;
+ uint32_t index1, index2, offset1, offset2;
+
+ /* Set bit in array2 slab and set bit in array1 slab */
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+ index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+ offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+ slab1 = bmp->array1 + index1;
+
+ *slab2 |= 1lu << offset2;
+ *slab1 |= 1lu << offset1;
+}
+
+/**
+ * Bitmap slab set
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position identifying the array2 slab
+ * @param slab
+ * Value to be assigned to the 64-bit slab in array2
+ */
+static inline void
+rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab)
+{
+ uint64_t *slab1, *slab2;
+ uint32_t index1, index2, offset1;
+
+ /* Set bits in array2 slab and set bit in array1 slab */
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+ offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+ slab1 = bmp->array1 + index1;
+
+ *slab2 |= slab;
+ *slab1 |= 1lu << offset1;
+}
+
+static inline uint64_t
+__rte_bitmap_line_not_empty(uint64_t *slab2)
+{
+ uint64_t v1, v2, v3, v4;
+
+ v1 = slab2[0] | slab2[1];
+ v2 = slab2[2] | slab2[3];
+ v3 = slab2[4] | slab2[5];
+ v4 = slab2[6] | slab2[7];
+ v1 |= v2;
+ v3 |= v4;
+
+ return v1 | v3;
+}
+
+/**
+ * Bitmap bit clear
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ */
+static inline void
+rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab1, *slab2;
+ uint32_t index1, index2, offset1, offset2;
+
+ /* Clear bit in array2 slab */
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+
+ /* Return if array2 slab is not all-zeros */
+ *slab2 &= ~(1lu << offset2);
+ if (*slab2){
+ return;
+ }
+
+ /* Check the entire cache line of array2 for all-zeros */
+ index2 &= ~ RTE_BITMAP_CL_SLAB_MASK;
+ slab2 = bmp->array2 + index2;
+ if (__rte_bitmap_line_not_empty(slab2)) {
+ return;
+ }
+
+ /* The array2 cache line is all-zeros, so clear bit in array1 slab */
+ index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+ offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+ slab1 = bmp->array1 + index1;
+ *slab1 &= ~(1lu << offset1);
+
+ return;
+}
+
+static inline int
+__rte_bitmap_scan_search(struct rte_bitmap *bmp)
+{
+ uint64_t value1;
+ uint32_t i;
+
+ /* Check current array1 slab */
+ value1 = bmp->array1[bmp->index1];
+ value1 &= __rte_bitmap_mask1_get(bmp);
+
+ if (rte_bsf64(value1, &bmp->offset1)) {
+ return 1;
+ }
+
+ __rte_bitmap_index1_inc(bmp);
+ bmp->offset1 = 0;
+
+ /* Look for another array1 slab */
+ for (i = 0; i < bmp->array1_size; i ++, __rte_bitmap_index1_inc(bmp)) {
+ value1 = bmp->array1[bmp->index1];
+
+ if (rte_bsf64(value1, &bmp->offset1)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static inline void
+__rte_bitmap_scan_read_init(struct rte_bitmap *bmp)
+{
+ __rte_bitmap_index2_set(bmp);
+ bmp->go2 = 1;
+ rte_prefetch1((void *)(bmp->array2 + bmp->index2 + 8));
+}
+
+static inline int
+__rte_bitmap_scan_read(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+ uint64_t *slab2;
+
+ slab2 = bmp->array2 + bmp->index2;
+ for ( ; bmp->go2 ; bmp->index2 ++, slab2 ++, bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK) {
+ if (*slab2) {
+ *pos = bmp->index2 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ *slab = *slab2;
+
+ bmp->index2 ++;
+ slab2 ++;
+ bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Bitmap scan (with automatic wrap-around)
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * When function call returns 1, pos contains the position of the next set
+ * bit, otherwise not modified
+ * @param slab
+ * When function call returns 1, slab contains the value of the entire 64-bit
+ * slab where the bit indicated by pos is located. Slabs are always 64-bit
+ * aligned, so the position of the first bit of the slab (this bit is not
+ * necessarily set) is pos / 64. Once a slab has been returned by the bitmap
+ * scan operation, the internal pointers of the bitmap are updated to point
+ * after this slab, so the same slab will not be returned again if it
+ * contains more than one bit which is set. When function call returns 0,
+ * slab is not modified.
+ * @return
+ * 0 if there is no bit set in the bitmap, 1 otherwise
+ */
+static inline int
+rte_bitmap_scan(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+ /* Return data from current array2 line if available */
+ if (__rte_bitmap_scan_read(bmp, pos, slab)) {
+ return 1;
+ }
+
+ /* Look for non-empty array2 line */
+ if (__rte_bitmap_scan_search(bmp)) {
+ __rte_bitmap_scan_read_init(bmp);
+ __rte_bitmap_scan_read(bmp, pos, slab);
+ return 1;
+ }
+
+ /* Empty bitmap */
+ return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_BITMAP_H__ */
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index c79368d3..6fb08341 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -55,6 +55,21 @@ extern "C" {
/** Double linked list of buses */
TAILQ_HEAD(rte_bus_list, rte_bus);
+
+/**
+ * IOVA mapping mode.
+ *
+ * IOVA mapping mode is iommu programming mode of a device.
+ * That device (for example: IOMMU backed DMA device) based
+ * on rte_iova_mode will generate physical or virtual address.
+ *
+ */
+enum rte_iova_mode {
+ RTE_IOVA_DC = 0, /* Don't care mode */
+ RTE_IOVA_PA = (1 << 0), /* DMA using physical address */
+ RTE_IOVA_VA = (1 << 1) /* DMA using virtual address */
+};
+
/**
* Bus specific scan for devices attached on the bus.
* For each bus object, the scan would be responsible for finding devices and
@@ -168,6 +183,20 @@ struct rte_bus_conf {
enum rte_bus_scan_mode scan_mode; /**< Scan policy. */
};
+
+/**
+ * Get common iommu class of the all the devices on the bus. The bus may
+ * check that those devices are attached to iommu driver.
+ * If no devices are attached to the bus. The bus may return with don't care
+ * (_DC) value.
+ * Otherwise, The bus will return appropriate _pa or _va iova mode.
+ *
+ * @return
+ * enum rte_iova_mode value.
+ */
+typedef enum rte_iova_mode (*rte_bus_get_iommu_class_t)(void);
+
+
/**
* A structure describing a generic bus.
*/
@@ -181,6 +210,7 @@ struct rte_bus {
rte_bus_unplug_t unplug; /**< Remove single device from driver */
rte_bus_parse_t parse; /**< Parse a device name */
struct rte_bus_conf conf; /**< Bus configuration */
+ rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
};
/**
@@ -280,12 +310,22 @@ struct rte_bus *rte_bus_find_by_device(const struct rte_device *dev);
*/
struct rte_bus *rte_bus_find_by_name(const char *busname);
+
+/**
+ * Get the common iommu class of devices bound on to buses available in the
+ * system. The default mode is PA.
+ *
+ * @return
+ * enum rte_iova_mode value.
+ */
+enum rte_iova_mode rte_bus_get_iommu_class(void);
+
/**
* Helper for Bus registration.
* The constructor has higher priority than PMD constructors.
*/
#define RTE_REGISTER_BUS(nm, bus) \
-RTE_INIT_PRIO(businitfn_ ##nm, 101); \
+RTE_INIT_PRIO(businitfn_ ##nm, 110); \
static void businitfn_ ##nm(void) \
{\
(bus).name = RTE_STR(nm);\
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 1afc66e3..de853e16 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -109,6 +109,29 @@ typedef uint16_t unaligned_uint16_t;
#define RTE_SET_USED(x) (void)(x)
/**
+ * Run function before main() with low priority.
+ *
+ * The constructor will be run after prioritized constructors.
+ *
+ * @param func
+ * Constructor function.
+ */
+#define RTE_INIT(func) \
+static void __attribute__((constructor, used)) func(void)
+
+/**
+ * Run function before main() with high priority.
+ *
+ * @param func
+ * Constructor function.
+ * @param prio
+ * Priority number must be above 100.
+ * Lowest number is the first to run.
+ */
+#define RTE_INIT_PRIO(func, prio) \
+static void __attribute__((constructor(prio), used)) func(void)
+
+/**
* Force a function to be inlined
*/
#define __rte_always_inline inline __attribute__((always_inline))
diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h
index cab6fb4c..79b67b3e 100644
--- a/lib/librte_eal/common/include/rte_debug.h
+++ b/lib/librte_eal/common/include/rte_debug.h
@@ -79,7 +79,7 @@ void rte_dump_registers(void);
#define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy")
#define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__)
-#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG
+#ifdef RTE_ENABLE_ASSERT
#define RTE_ASSERT(exp) RTE_VERIFY(exp)
#else
#define RTE_ASSERT(exp) do {} while (0)
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 5386d3a2..9342e0cb 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -49,7 +49,6 @@ extern "C" {
#include <stdio.h>
#include <sys/queue.h>
-#include <rte_config.h>
#include <rte_log.h>
__attribute__((format(printf, 2, 0)))
@@ -152,7 +151,11 @@ struct rte_driver {
const char *alias; /**< Driver alias. */
};
-#define RTE_DEV_NAME_MAX_LEN (32)
+/*
+ * Internal identifier length
+ * Sufficiently large to allow for UUID or PCI address
+ */
+#define RTE_DEV_NAME_MAX_LEN 64
/**
* A structure describing a generic device.
@@ -166,28 +169,6 @@ struct rte_device {
};
/**
- * Initialize a driver specified by name.
- *
- * @param name
- * The pointer to a driver name to be initialized.
- * @param args
- * The pointer to arguments used by driver initialization.
- * @return
- * 0 on success, negative on error
- */
-int rte_vdev_init(const char *name, const char *args);
-
-/**
- * Uninitalize a driver specified by name.
- *
- * @param name
- * The pointer to a driver name to be initialized.
- * @return
- * 0 on success, negative on error
- */
-int rte_vdev_uninit(const char *name);
-
-/**
* Attach a device to a registered driver.
*
* @param name
@@ -312,4 +293,4 @@ __attribute__((used)) = str
}
#endif
-#endif /* _RTE_VDEV_H_ */
+#endif /* _RTE_DEV_H_ */
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index 0e7363d7..09b66819 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -44,7 +44,9 @@
#include <sched.h>
#include <rte_per_lcore.h>
-#include <rte_config.h>
+#include <rte_bus.h>
+
+#include <rte_pci_dev_feature_defs.h>
#ifdef __cplusplus
extern "C" {
@@ -87,6 +89,9 @@ struct rte_config {
/** Primary or secondary configuration */
enum rte_proc_type_t process_type;
+ /** PA or VA mapping mode */
+ enum rte_iova_mode iova_mode;
+
/**
* Pointer to memory configuration, which may be shared across multiple
* DPDK instances
@@ -264,6 +269,32 @@ rte_set_application_usage_hook(rte_usage_hook_t usage_func);
int rte_eal_has_hugepages(void);
/**
+ * Whether EAL is using PCI bus.
+ * Disabled by --no-pci option.
+ *
+ * @return
+ * Nonzero if the PCI bus is enabled.
+ */
+int rte_eal_has_pci(void);
+
+/**
+ * Whether the EAL was asked to create UIO device.
+ *
+ * @return
+ * Nonzero if true.
+ */
+int rte_eal_create_uio_dev(void);
+
+/**
+ * The user-configured vfio interrupt mode.
+ *
+ * @return
+ * Interrupt mode configured with the command line,
+ * RTE_INTR_MODE_NONE by default.
+ */
+enum rte_intr_mode rte_eal_vfio_intr_mode(void);
+
+/**
* A wrap API for syscall gettid.
*
* @return
@@ -287,11 +318,22 @@ static inline int rte_gettid(void)
return RTE_PER_LCORE(_thread_id);
}
-#define RTE_INIT(func) \
-static void __attribute__((constructor, used)) func(void)
+/**
+ * Get the iova mode
+ *
+ * @return
+ * enum rte_iova_mode value.
+ */
+enum rte_iova_mode rte_eal_iova_mode(void);
-#define RTE_INIT_PRIO(func, prio) \
-static void __attribute__((constructor(prio), used)) func(void)
+/**
+ * Get default pool ops name for mbuf
+ *
+ * @return
+ * returns default pool ops name.
+ */
+const char *
+rte_eal_mbuf_default_mempool_ops(void);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_eal_interrupts.h b/lib/librte_eal/common/include/rte_eal_interrupts.h
new file mode 100644
index 00000000..031f78cc
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_eal_interrupts.h
@@ -0,0 +1,250 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_INTERRUPTS_H_
+#error "don't include this file directly, please include generic <rte_interrupts.h>"
+#endif
+
+/**
+ * @file rte_eal_interrupts.h
+ * @internal
+ *
+ * Contains function prototypes exposed by the EAL for interrupt handling by
+ * drivers and other DPDK internal consumers.
+ */
+
+#ifndef _RTE_EAL_INTERRUPTS_H_
+#define _RTE_EAL_INTERRUPTS_H_
+
+#define RTE_MAX_RXTX_INTR_VEC_ID 32
+#define RTE_INTR_VEC_ZERO_OFFSET 0
+#define RTE_INTR_VEC_RXTX_OFFSET 1
+
+/**
+ * The interrupt source type, e.g. UIO, VFIO, ALARM etc.
+ */
+enum rte_intr_handle_type {
+ RTE_INTR_HANDLE_UNKNOWN = 0, /**< generic unknown handle */
+ RTE_INTR_HANDLE_UIO, /**< uio device handle */
+ RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */
+ RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */
+ RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */
+ RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */
+ RTE_INTR_HANDLE_ALARM, /**< alarm handle */
+ RTE_INTR_HANDLE_EXT, /**< external handler */
+ RTE_INTR_HANDLE_VDEV, /**< virtual device */
+ RTE_INTR_HANDLE_MAX /**< count of elements */
+};
+
+#define RTE_INTR_EVENT_ADD 1UL
+#define RTE_INTR_EVENT_DEL 2UL
+
+typedef void (*rte_intr_event_cb_t)(int fd, void *arg);
+
+struct rte_epoll_data {
+ uint32_t event; /**< event type */
+ void *data; /**< User data */
+ rte_intr_event_cb_t cb_fun; /**< IN: callback fun */
+ void *cb_arg; /**< IN: callback arg */
+};
+
+enum {
+ RTE_EPOLL_INVALID = 0,
+ RTE_EPOLL_VALID,
+ RTE_EPOLL_EXEC,
+};
+
+/** interrupt epoll event obj, taken by epoll_event.ptr */
+struct rte_epoll_event {
+ volatile uint32_t status; /**< OUT: event status */
+ int fd; /**< OUT: event fd */
+ int epfd; /**< OUT: epoll instance the ev associated with */
+ struct rte_epoll_data epdata;
+};
+
+/** Handle for interrupts. */
+struct rte_intr_handle {
+ RTE_STD_C11
+ union {
+ int vfio_dev_fd; /**< VFIO device file descriptor */
+ int uio_cfg_fd; /**< UIO cfg file desc for uio_pci_generic */
+ };
+ int fd; /**< interrupt event file descriptor */
+ enum rte_intr_handle_type type; /**< handle type */
+ uint32_t max_intr; /**< max interrupt requested */
+ uint32_t nb_efd; /**< number of available efd(event fd) */
+ uint8_t efd_counter_size; /**< size of efd counter, used for vdev */
+ int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */
+ struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID];
+ /**< intr vector epoll event */
+ int *intr_vec; /**< intr vector number array */
+};
+
+#define RTE_EPOLL_PER_THREAD -1 /**< to hint using per thread epfd */
+
+/**
+ * It waits for events on the epoll instance.
+ *
+ * @param epfd
+ * Epoll instance fd on which the caller wait for events.
+ * @param events
+ * Memory area contains the events that will be available for the caller.
+ * @param maxevents
+ * Up to maxevents are returned, must greater than zero.
+ * @param timeout
+ * Specifying a timeout of -1 causes a block indefinitely.
+ * Specifying a timeout equal to zero cause to return immediately.
+ * @return
+ * - On success, returns the number of available event.
+ * - On failure, a negative value.
+ */
+int
+rte_epoll_wait(int epfd, struct rte_epoll_event *events,
+ int maxevents, int timeout);
+
+/**
+ * It performs control operations on epoll instance referred by the epfd.
+ * It requests that the operation op be performed for the target fd.
+ *
+ * @param epfd
+ * Epoll instance fd on which the caller perform control operations.
+ * @param op
+ * The operation be performed for the target fd.
+ * @param fd
+ * The target fd on which the control ops perform.
+ * @param event
+ * Describes the object linked to the fd.
+ * Note: The caller must take care the object deletion after CTL_DEL.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_epoll_ctl(int epfd, int op, int fd,
+ struct rte_epoll_event *event);
+
+/**
+ * The function returns the per thread epoll instance.
+ *
+ * @return
+ * epfd the epoll instance referred to.
+ */
+int
+rte_intr_tls_epfd(void);
+
+/**
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ * @param epfd
+ * Epoll instance fd which the intr vector associated to.
+ * @param op
+ * The operation be performed for the vector.
+ * Operation type of {ADD, DEL}.
+ * @param vec
+ * RX intr vector number added to the epoll instance wait list.
+ * @param data
+ * User raw data.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
+ int epfd, int op, unsigned int vec, void *data);
+
+/**
+ * It deletes registered eventfds.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+void
+rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle);
+
+/**
+ * It enables the packet I/O interrupt event if it's necessary.
+ * It creates event fd for each interrupt vector when MSIX is used,
+ * otherwise it multiplexes a single event fd.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ * @param nb_efd
+ * Number of interrupt vector trying to enable.
+ * The value 0 is not allowed.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd);
+
+/**
+ * It disables the packet I/O interrupt event.
+ * It deletes registered eventfds and closes the open fds.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+void
+rte_intr_efd_disable(struct rte_intr_handle *intr_handle);
+
+/**
+ * The packet I/O interrupt on datapath is enabled or not.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+int
+rte_intr_dp_is_en(struct rte_intr_handle *intr_handle);
+
+/**
+ * The interrupt handle instance allows other causes or not.
+ * Other causes stand for any none packet I/O interrupts.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+int
+rte_intr_allow_others(struct rte_intr_handle *intr_handle);
+
+/**
+ * The multiple interrupt vector capability of interrupt handle instance.
+ * It returns zero if no multiple interrupt vector support.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+int
+rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
+
+#endif /* _RTE_EAL_INTERRUPTS_H_ */
diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h
index 5d06ed79..43177c7a 100644
--- a/lib/librte_eal/common/include/rte_interrupts.h
+++ b/lib/librte_eal/common/include/rte_interrupts.h
@@ -53,7 +53,7 @@ struct rte_intr_handle;
/** Function to be registered for the specific interrupt */
typedef void (*rte_intr_callback_fn)(void *cb_arg);
-#include <exec-env/rte_interrupts.h>
+#include "rte_eal_interrupts.h"
/**
* It registers the callback for the specific interrupt. Multiple
diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
index 50e0d0fe..c89e6bab 100644
--- a/lib/librte_eal/common/include/rte_lcore.h
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -262,6 +262,20 @@ void rte_thread_get_affinity(rte_cpuset_t *cpusetp);
*/
int rte_thread_setname(pthread_t id, const char *name);
+/**
+ * Test if the core supplied has a specific role
+ *
+ * @param lcore_id
+ * The identifier of the lcore, which MUST be between 0 and
+ * RTE_MAX_LCORE-1.
+ * @param role
+ * The role to be checked against.
+ * @return
+ * On success, return 0; otherwise return a negative value.
+ */
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
index ec8dba79..16564d41 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -87,6 +87,7 @@ extern struct rte_logs rte_logs;
#define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */
#define RTE_LOGTYPE_EFD 18 /**< Log related to EFD. */
#define RTE_LOGTYPE_EVENTDEV 19 /**< Log related to eventdev. */
+#define RTE_LOGTYPE_GSO 20 /**< Log related to GSO. */
/* these log types can be used in an application */
#define RTE_LOGTYPE_USER1 24 /**< User-defined log type 1. */
@@ -138,12 +139,6 @@ int rte_openlog_stream(FILE *f);
void rte_log_set_global_level(uint32_t level);
/**
- * Deprecated, replaced by rte_log_set_global_level().
- */
-__rte_deprecated
-void rte_set_log_level(uint32_t level);
-
-/**
* Get the global log level.
*
* @return
@@ -152,29 +147,6 @@ void rte_set_log_level(uint32_t level);
uint32_t rte_log_get_global_level(void);
/**
- * Deprecated, replaced by rte_log_get_global_level().
- */
-__rte_deprecated
-uint32_t rte_get_log_level(void);
-
-/**
- * Enable or disable the log type.
- *
- * @param type
- * Log type, for example, RTE_LOGTYPE_EAL.
- * @param enable
- * True for enable; false for disable.
- */
-__rte_deprecated
-void rte_set_log_type(uint32_t type, int enable);
-
-/**
- * Get the global log type.
- */
-__rte_deprecated
-uint32_t rte_get_log_type(void);
-
-/**
* Get the log level for a given type.
*
* @param logtype
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index 3d37f79b..5d4c11a7 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -323,17 +323,24 @@ int
rte_malloc_set_limit(const char *type, size_t max);
/**
- * Return the physical address of a virtual address obtained through
+ * Return the IO address of a virtual address obtained through
* rte_malloc
*
* @param addr
* Address obtained from a previous rte_malloc call
* @return
- * RTE_BAD_PHYS_ADDR on error
- * otherwise return physical address of the buffer
+ * RTE_BAD_IOVA on error
+ * otherwise return an address suitable for IO
*/
-phys_addr_t
-rte_malloc_virt2phy(const void *addr);
+rte_iova_t
+rte_malloc_virt2iova(const void *addr);
+
+__rte_deprecated
+static inline phys_addr_t
+rte_malloc_virt2phy(const void *addr)
+{
+ return rte_malloc_virt2iova(addr);
+}
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 4aa5d1f7..14aacea5 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -44,12 +44,6 @@
#include <stddef.h>
#include <stdio.h>
-#include <rte_config.h>
-
-#ifdef RTE_EXEC_ENV_LINUXAPP
-#include <exec-env/rte_dom0_common.h>
-#endif
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -98,14 +92,27 @@ enum rte_page_sizes {
*/
#define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
-typedef uint64_t phys_addr_t; /**< Physical address definition. */
+typedef uint64_t phys_addr_t; /**< Physical address. */
#define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1)
+/**
+ * IO virtual address type.
+ * When the physical addressing mode (IOVA as PA) is in use,
+ * the translation from an IO virtual address (IOVA) to a physical address
+ * is a direct mapping, i.e. the same value.
+ * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation.
+ */
+typedef uint64_t rte_iova_t;
+#define RTE_BAD_IOVA ((rte_iova_t)-1)
/**
* Physical memory segment descriptor.
*/
struct rte_memseg {
- phys_addr_t phys_addr; /**< Start physical address. */
+ RTE_STD_C11
+ union {
+ phys_addr_t phys_addr; /**< deprecated - Start physical address. */
+ rte_iova_t iova; /**< Start IO address. */
+ };
RTE_STD_C11
union {
void *addr; /**< Start virtual address. */
@@ -116,10 +123,6 @@ struct rte_memseg {
int32_t socket_id; /**< NUMA socket ID. */
uint32_t nchannel; /**< Number of channels. */
uint32_t nrank; /**< Number of ranks. */
-#ifdef RTE_LIBRTE_XEN_DOM0
- /**< store segment MFNs */
- uint64_t mfn[DOM0_NUM_MEMBLOCK];
-#endif
} __rte_packed;
/**
@@ -140,11 +143,21 @@ int rte_mem_lock_page(const void *virt);
* @param virt
* The virtual address.
* @return
- * The physical address or RTE_BAD_PHYS_ADDR on error.
+ * The physical address or RTE_BAD_IOVA on error.
*/
phys_addr_t rte_mem_virt2phy(const void *virt);
/**
+ * Get IO virtual address of any mapped virtual address in the current process.
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * The IO address or RTE_BAD_IOVA on error.
+ */
+rte_iova_t rte_mem_virt2iova(const void *virt);
+
+/**
* Get the layout of the available physical memory.
*
* It can be useful for an application to have the full physical
@@ -195,68 +208,16 @@ unsigned rte_memory_get_nchannel(void);
*/
unsigned rte_memory_get_nrank(void);
-#ifdef RTE_LIBRTE_XEN_DOM0
-
-/**< Internal use only - should DOM0 memory mapping be used */
-int rte_xen_dom0_supported(void);
-
-/**< Internal use only - phys to virt mapping for xen */
-phys_addr_t rte_xen_mem_phy2mch(int32_t, const phys_addr_t);
-
/**
- * Return the physical address of elt, which is an element of the pool mp.
- *
- * @param memseg_id
- * Identifier of the memory segment owning the physical address. If
- * set to -1, find it automatically.
- * @param phy_addr
- * physical address of elt.
- *
- * @return
- * The physical address or RTE_BAD_PHYS_ADDR on error.
- */
-static inline phys_addr_t
-rte_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr)
-{
- if (rte_xen_dom0_supported())
- return rte_xen_mem_phy2mch(memseg_id, phy_addr);
- else
- return phy_addr;
-}
-
-/**
- * Memory init for supporting application running on Xen domain0.
- *
- * @param void
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
*
* @return
- * 0: successfully
- * negative: error
+ * 1 if the system is able to obtain physical addresses.
+ * 0 if using DMA addresses through an IOMMU.
*/
-int rte_xen_dom0_memory_init(void);
-
-/**
- * Attach to memory setments of primary process on Xen domain0.
- *
- * @param void
- *
- * @return
- * 0: successfully
- * negative: error
- */
-int rte_xen_dom0_memory_attach(void);
-#else
-static inline int rte_xen_dom0_supported(void)
-{
- return 0;
-}
-
-static inline phys_addr_t
-rte_mem_phy2mch(int32_t memseg_id __rte_unused, const phys_addr_t phy_addr)
-{
- return phy_addr;
-}
-#endif
+int rte_eal_using_phys_addrs(void);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
index 1d0827f4..6f0ba182 100644
--- a/lib/librte_eal/common/include/rte_memzone.h
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -78,7 +78,11 @@ struct rte_memzone {
#define RTE_MEMZONE_NAMESIZE 32 /**< Maximum length of memory zone name.*/
char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */
- phys_addr_t phys_addr; /**< Start physical address. */
+ RTE_STD_C11
+ union {
+ phys_addr_t phys_addr; /**< deprecated - Start physical address. */
+ rte_iova_t iova; /**< Start IO address. */
+ };
RTE_STD_C11
union {
void *addr; /**< Start virtual address. */
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
deleted file mode 100644
index 8b123391..00000000
--- a/lib/librte_eal/common/include/rte_pci.h
+++ /dev/null
@@ -1,598 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- * Copyright 2013-2014 6WIND S.A.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _RTE_PCI_H_
-#define _RTE_PCI_H_
-
-/**
- * @file
- *
- * RTE PCI Interface
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <errno.h>
-#include <sys/queue.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#include <rte_debug.h>
-#include <rte_interrupts.h>
-#include <rte_dev.h>
-#include <rte_bus.h>
-
-/** Pathname of PCI devices directory. */
-const char *pci_get_sysfs_path(void);
-
-/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
-#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
-#define PCI_PRI_STR_SIZE sizeof("XXXXXXXX:XX:XX.X")
-
-/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */
-#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
-
-/** Nb. of values in PCI device identifier format string. */
-#define PCI_FMT_NVAL 4
-
-/** Nb. of values in PCI resource format. */
-#define PCI_RESOURCE_FMT_NVAL 3
-
-/** Maximum number of PCI resources. */
-#define PCI_MAX_RESOURCE 6
-
-/* Forward declarations */
-struct rte_pci_device;
-struct rte_pci_driver;
-
-/** List of PCI devices */
-TAILQ_HEAD(rte_pci_device_list, rte_pci_device);
-/** List of PCI drivers */
-TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver);
-
-/* PCI Bus iterators */
-#define FOREACH_DEVICE_ON_PCIBUS(p) \
- TAILQ_FOREACH(p, &(rte_pci_bus.device_list), next)
-
-#define FOREACH_DRIVER_ON_PCIBUS(p) \
- TAILQ_FOREACH(p, &(rte_pci_bus.driver_list), next)
-
-/**
- * A structure describing an ID for a PCI driver. Each driver provides a
- * table of these IDs for each device that it supports.
- */
-struct rte_pci_id {
- uint32_t class_id; /**< Class ID (class, subclass, pi) or RTE_CLASS_ANY_ID. */
- uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */
- uint16_t device_id; /**< Device ID or PCI_ANY_ID. */
- uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
- uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */
-};
-
-/**
- * A structure describing the location of a PCI device.
- */
-struct rte_pci_addr {
- uint32_t domain; /**< Device domain */
- uint8_t bus; /**< Device bus */
- uint8_t devid; /**< Device ID */
- uint8_t function; /**< Device function. */
-};
-
-struct rte_devargs;
-
-/**
- * A structure describing a PCI device.
- */
-struct rte_pci_device {
- TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */
- struct rte_device device; /**< Inherit core device */
- struct rte_pci_addr addr; /**< PCI location. */
- struct rte_pci_id id; /**< PCI ID. */
- struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE];
- /**< PCI Memory Resource */
- struct rte_intr_handle intr_handle; /**< Interrupt handle */
- struct rte_pci_driver *driver; /**< Associated driver */
- uint16_t max_vfs; /**< sriov enable if not zero */
- enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */
- char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */
-};
-
-/**
- * @internal
- * Helper macro for drivers that need to convert to struct rte_pci_device.
- */
-#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device)
-
-/** Any PCI device identifier (vendor, device, ...) */
-#define PCI_ANY_ID (0xffff)
-#define RTE_CLASS_ANY_ID (0xffffff)
-
-#ifdef __cplusplus
-/** C++ macro used to help building up tables of device IDs */
-#define RTE_PCI_DEVICE(vend, dev) \
- RTE_CLASS_ANY_ID, \
- (vend), \
- (dev), \
- PCI_ANY_ID, \
- PCI_ANY_ID
-#else
-/** Macro used to help building up tables of device IDs */
-#define RTE_PCI_DEVICE(vend, dev) \
- .class_id = RTE_CLASS_ANY_ID, \
- .vendor_id = (vend), \
- .device_id = (dev), \
- .subsystem_vendor_id = PCI_ANY_ID, \
- .subsystem_device_id = PCI_ANY_ID
-#endif
-
-/**
- * Initialisation function for the driver called during PCI probing.
- */
-typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *);
-
-/**
- * Uninitialisation function for the driver called during hotplugging.
- */
-typedef int (pci_remove_t)(struct rte_pci_device *);
-
-/**
- * A structure describing a PCI driver.
- */
-struct rte_pci_driver {
- TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */
- struct rte_driver driver; /**< Inherit core driver. */
- struct rte_pci_bus *bus; /**< PCI bus reference. */
- pci_probe_t *probe; /**< Device Probe function. */
- pci_remove_t *remove; /**< Device Remove function. */
- const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */
- uint32_t drv_flags; /**< Flags contolling handling of device. */
-};
-
-/**
- * Structure describing the PCI bus
- */
-struct rte_pci_bus {
- struct rte_bus bus; /**< Inherit the generic class */
- struct rte_pci_device_list device_list; /**< List of PCI devices */
- struct rte_pci_driver_list driver_list; /**< List of PCI drivers */
-};
-
-/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
-#define RTE_PCI_DRV_NEED_MAPPING 0x0001
-/** Device driver supports link state interrupt */
-#define RTE_PCI_DRV_INTR_LSC 0x0008
-/** Device driver supports device removal interrupt */
-#define RTE_PCI_DRV_INTR_RMV 0x0010
-/** Device driver needs to keep mapped resources if unsupported dev detected */
-#define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020
-
-/**
- * A structure describing a PCI mapping.
- */
-struct pci_map {
- void *addr;
- char *path;
- uint64_t offset;
- uint64_t size;
- uint64_t phaddr;
-};
-
-/**
- * A structure describing a mapped PCI resource.
- * For multi-process we need to reproduce all PCI mappings in secondary
- * processes, so save them in a tailq.
- */
-struct mapped_pci_resource {
- TAILQ_ENTRY(mapped_pci_resource) next;
-
- struct rte_pci_addr pci_addr;
- char path[PATH_MAX];
- int nb_maps;
- struct pci_map maps[PCI_MAX_RESOURCE];
-};
-
-/** mapped pci device list */
-TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
-
-/**< Internal use only - Macro used by pci addr parsing functions **/
-#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \
-do { \
- unsigned long val; \
- char *end; \
- errno = 0; \
- val = strtoul((in), &end, 16); \
- if (errno != 0 || end[0] != (dlm) || val > (lim)) \
- return -EINVAL; \
- (fd) = (typeof (fd))val; \
- (in) = end + 1; \
-} while(0)
-
-/**
- * Utility function to produce a PCI Bus-Device-Function value
- * given a string representation. Assumes that the BDF is provided without
- * a domain prefix (i.e. domain returned is always 0)
- *
- * @param input
- * The input string to be parsed. Should have the format XX:XX.X
- * @param dev_addr
- * The PCI Bus-Device-Function address to be returned. Domain will always be
- * returned as 0
- * @return
- * 0 on success, negative on error.
- */
-static inline int
-eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr)
-{
- dev_addr->domain = 0;
- GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
- GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
- GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
- return 0;
-}
-
-/**
- * Utility function to produce a PCI Bus-Device-Function value
- * given a string representation. Assumes that the BDF is provided including
- * a domain prefix.
- *
- * @param input
- * The input string to be parsed. Should have the format XXXX:XX:XX.X
- * @param dev_addr
- * The PCI Bus-Device-Function address to be returned
- * @return
- * 0 on success, negative on error.
- */
-static inline int
-eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
-{
- GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':');
- GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
- GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
- GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
- return 0;
-}
-#undef GET_PCIADDR_FIELD
-
-/**
- * Utility function to write a pci device name, this device name can later be
- * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_*
- * BDF helpers.
- *
- * @param addr
- * The PCI Bus-Device-Function address
- * @param output
- * The output buffer string
- * @param size
- * The output buffer size
- */
-static inline void
-rte_pci_device_name(const struct rte_pci_addr *addr,
- char *output, size_t size)
-{
- RTE_VERIFY(size >= PCI_PRI_STR_SIZE);
- RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT,
- addr->domain, addr->bus,
- addr->devid, addr->function) >= 0);
-}
-
-/* Compare two PCI device addresses. */
-/**
- * Utility function to compare two PCI device addresses.
- *
- * @param addr
- * The PCI Bus-Device-Function address to compare
- * @param addr2
- * The PCI Bus-Device-Function address to compare
- * @return
- * 0 on equal PCI address.
- * Positive on addr is greater than addr2.
- * Negative on addr is less than addr2, or error.
- */
-static inline int
-rte_eal_compare_pci_addr(const struct rte_pci_addr *addr,
- const struct rte_pci_addr *addr2)
-{
- uint64_t dev_addr, dev_addr2;
-
- if ((addr == NULL) || (addr2 == NULL))
- return -1;
-
- dev_addr = ((uint64_t)addr->domain << 24) |
- (addr->bus << 16) | (addr->devid << 8) | addr->function;
- dev_addr2 = ((uint64_t)addr2->domain << 24) |
- (addr2->bus << 16) | (addr2->devid << 8) | addr2->function;
-
- if (dev_addr > dev_addr2)
- return 1;
- else if (dev_addr < dev_addr2)
- return -1;
- else
- return 0;
-}
-
-/**
- * Scan the content of the PCI bus, and the devices in the devices
- * list
- *
- * @return
- * 0 on success, negative on error
- */
-int rte_pci_scan(void);
-
-/**
- * Probe the PCI bus
- *
- * @return
- * - 0 on success.
- * - !0 on error.
- */
-int
-rte_pci_probe(void);
-
-/**
- * Map the PCI device resources in user space virtual memory address
- *
- * Note that driver should not call this function when flag
- * RTE_PCI_DRV_NEED_MAPPING is set, as EAL will do that for
- * you when it's on.
- *
- * @param dev
- * A pointer to a rte_pci_device structure describing the device
- * to use
- *
- * @return
- * 0 on success, negative on error and positive if no driver
- * is found for the device.
- */
-int rte_pci_map_device(struct rte_pci_device *dev);
-
-/**
- * Unmap this device
- *
- * @param dev
- * A pointer to a rte_pci_device structure describing the device
- * to use
- */
-void rte_pci_unmap_device(struct rte_pci_device *dev);
-
-/**
- * @internal
- * Map a particular resource from a file.
- *
- * @param requested_addr
- * The starting address for the new mapping range.
- * @param fd
- * The file descriptor.
- * @param offset
- * The offset for the mapping range.
- * @param size
- * The size for the mapping range.
- * @param additional_flags
- * The additional flags for the mapping range.
- * @return
- * - On success, the function returns a pointer to the mapped area.
- * - On error, the value MAP_FAILED is returned.
- */
-void *pci_map_resource(void *requested_addr, int fd, off_t offset,
- size_t size, int additional_flags);
-
-/**
- * @internal
- * Unmap a particular resource.
- *
- * @param requested_addr
- * The address for the unmapping range.
- * @param size
- * The size for the unmapping range.
- */
-void pci_unmap_resource(void *requested_addr, size_t size);
-
-/**
- * Probe the single PCI device.
- *
- * Scan the content of the PCI bus, and find the pci device specified by pci
- * address, then call the probe() function for registered driver that has a
- * matching entry in its id_table for discovered device.
- *
- * @param addr
- * The PCI Bus-Device-Function address to probe.
- * @return
- * - 0 on success.
- * - Negative on error.
- */
-int rte_pci_probe_one(const struct rte_pci_addr *addr);
-
-/**
- * Close the single PCI device.
- *
- * Scan the content of the PCI bus, and find the pci device specified by pci
- * address, then call the remove() function for registered driver that has a
- * matching entry in its id_table for discovered device.
- *
- * @param addr
- * The PCI Bus-Device-Function address to close.
- * @return
- * - 0 on success.
- * - Negative on error.
- */
-int rte_pci_detach(const struct rte_pci_addr *addr);
-
-/**
- * Dump the content of the PCI bus.
- *
- * @param f
- * A pointer to a file for output
- */
-void rte_pci_dump(FILE *f);
-
-/**
- * Register a PCI driver.
- *
- * @param driver
- * A pointer to a rte_pci_driver structure describing the driver
- * to be registered.
- */
-void rte_pci_register(struct rte_pci_driver *driver);
-
-/** Helper for PCI device registration from driver (eth, crypto) instance */
-#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \
-RTE_INIT(pciinitfn_ ##nm); \
-static void pciinitfn_ ##nm(void) \
-{\
- (pci_drv).driver.name = RTE_STR(nm);\
- rte_pci_register(&pci_drv); \
-} \
-RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
-
-/**
- * Unregister a PCI driver.
- *
- * @param driver
- * A pointer to a rte_pci_driver structure describing the driver
- * to be unregistered.
- */
-void rte_pci_unregister(struct rte_pci_driver *driver);
-
-/**
- * Read PCI config space.
- *
- * @param device
- * A pointer to a rte_pci_device structure describing the device
- * to use
- * @param buf
- * A data buffer where the bytes should be read into
- * @param len
- * The length of the data buffer.
- * @param offset
- * The offset into PCI config space
- */
-int rte_pci_read_config(const struct rte_pci_device *device,
- void *buf, size_t len, off_t offset);
-
-/**
- * Write PCI config space.
- *
- * @param device
- * A pointer to a rte_pci_device structure describing the device
- * to use
- * @param buf
- * A data buffer containing the bytes should be written
- * @param len
- * The length of the data buffer.
- * @param offset
- * The offset into PCI config space
- */
-int rte_pci_write_config(const struct rte_pci_device *device,
- const void *buf, size_t len, off_t offset);
-
-/**
- * A structure used to access io resources for a pci device.
- * rte_pci_ioport is arch, os, driver specific, and should not be used outside
- * of pci ioport api.
- */
-struct rte_pci_ioport {
- struct rte_pci_device *dev;
- uint64_t base;
- uint64_t len; /* only filled for memory mapped ports */
-};
-
-/**
- * Initialize a rte_pci_ioport object for a pci device io resource.
- *
- * This object is then used to gain access to those io resources (see below).
- *
- * @param dev
- * A pointer to a rte_pci_device structure describing the device
- * to use.
- * @param bar
- * Index of the io pci resource we want to access.
- * @param p
- * The rte_pci_ioport object to be initialized.
- * @return
- * 0 on success, negative on error.
- */
-int rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p);
-
-/**
- * Release any resources used in a rte_pci_ioport object.
- *
- * @param p
- * The rte_pci_ioport object to be uninitialized.
- * @return
- * 0 on success, negative on error.
- */
-int rte_pci_ioport_unmap(struct rte_pci_ioport *p);
-
-/**
- * Read from a io pci resource.
- *
- * @param p
- * The rte_pci_ioport object from which we want to read.
- * @param data
- * A data buffer where the bytes should be read into
- * @param len
- * The length of the data buffer.
- * @param offset
- * The offset into the pci io resource.
- */
-void rte_pci_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset);
-
-/**
- * Write to a io pci resource.
- *
- * @param p
- * The rte_pci_ioport object to which we want to write.
- * @param data
- * A data buffer where the bytes should be read into
- * @param len
- * The length of the data buffer.
- * @param offset
- * The offset into the pci io resource.
- */
-void rte_pci_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _RTE_PCI_H_ */
diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h
index 7c6f7383..92724406 100644
--- a/lib/librte_eal/common/include/rte_service.h
+++ b/lib/librte_eal/common/include/rte_service.h
@@ -61,9 +61,6 @@ extern "C" {
#include <rte_lcore.h>
-/* forward declaration only. Definition in rte_service_private.h */
-struct rte_service_spec;
-
#define RTE_SERVICE_NAME_MAX 32
/* Capabilities of a service.
@@ -89,40 +86,32 @@ struct rte_service_spec;
*/
uint32_t rte_service_get_count(void);
-
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Return the specification of a service by integer id.
+ * Return the id of a service by name.
*
- * This function provides the specification of a service. This can be used by
- * the application to understand what the service represents. The service
- * must not be modified by the application directly, only passed to the various
- * rte_service_* functions.
- *
- * @param id The integer id of the service to retrieve
- * @retval non-zero A valid pointer to the service_spec
- * @retval NULL Invalid *id* provided.
- */
-struct rte_service_spec *rte_service_get_by_id(uint32_t id);
-
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
+ * This function provides the id of the service using the service name as
+ * lookup key. The service id is to be passed to other functions in the
+ * rte_service_* API.
*
- * Return the specification of a service by name.
- *
- * This function provides the specification of a service using the service name
- * as lookup key. This can be used by the application to understand what the
- * service represents. The service must not be modified by the application
- * directly, only passed to the various rte_service_* functions.
+ * Example usage:
+ * @code
+ * uint32_t service_id;
+ * int32_t ret = rte_service_get_by_name("service_X", &service_id);
+ * if (ret) {
+ * // handle error
+ * }
+ * @endcode
*
* @param name The name of the service to retrieve
- * @retval non-zero A valid pointer to the service_spec
- * @retval NULL Invalid *name* provided.
+ * @param[out] service_id A pointer to a uint32_t, to be filled in with the id.
+ * @retval 0 Success. The service id is provided in *service_id*.
+ * @retval -EINVAL Null *service_id* pointer provided
+ * @retval -ENODEV No such service registered
*/
-struct rte_service_spec *rte_service_get_by_name(const char *name);
+int32_t rte_service_get_by_name(const char *name, uint32_t *service_id);
/**
* @warning
@@ -133,7 +122,7 @@ struct rte_service_spec *rte_service_get_by_name(const char *name);
* @return A pointer to the name of the service. The returned pointer remains
* in ownership of the service, and the application must not free it.
*/
-const char *rte_service_get_name(const struct rte_service_spec *service);
+const char *rte_service_get_name(uint32_t id);
/**
* @warning
@@ -146,17 +135,16 @@ const char *rte_service_get_name(const struct rte_service_spec *service);
* @retval 1 Capability supported by this service instance
* @retval 0 Capability not supported by this service instance
*/
-int32_t rte_service_probe_capability(const struct rte_service_spec *service,
- uint32_t capability);
+int32_t rte_service_probe_capability(uint32_t id, uint32_t capability);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Enable a core to run a service.
+ * Map or unmap a lcore to a service.
*
- * Each core can be added or removed from running specific services. This
- * functions adds *lcore* to the set of cores that will run *service*.
+ * Each core can be added or removed from running a specific service. This
+ * function enables or disables *lcore* to run *service_id*.
*
* If multiple cores are enabled on a service, an atomic is used to ensure that
* only one cores runs the service at a time. The exception to this is when
@@ -164,82 +152,120 @@ int32_t rte_service_probe_capability(const struct rte_service_spec *service,
* called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set,
* the service function can be run on multiple threads at the same time.
*
- * @retval 0 lcore added successfully
+ * @param service_id the service to apply the lcore to
+ * @param lcore The lcore that will be mapped to service
+ * @param enable Zero to unmap or disable the core, non-zero to enable
+ *
+ * @retval 0 lcore map updated successfully
* @retval -EINVAL An invalid service or lcore was provided.
*/
-int32_t rte_service_enable_on_lcore(struct rte_service_spec *service,
- uint32_t lcore);
+int32_t rte_service_map_lcore_set(uint32_t service_id, uint32_t lcore,
+ uint32_t enable);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Disable a core to run a service.
+ * Retrieve the mapping of an lcore to a service.
*
- * Each core can be added or removed from running specific services. This
- * functions removes *lcore* to the set of cores that will run *service*.
+ * @param service_id the service to apply the lcore to
+ * @param lcore The lcore that will be mapped to service
*
- * @retval 0 Lcore removed successfully
+ * @retval 1 lcore is mapped to service
+ * @retval 0 lcore is not mapped to service
* @retval -EINVAL An invalid service or lcore was provided.
*/
-int32_t rte_service_disable_on_lcore(struct rte_service_spec *service,
- uint32_t lcore);
+int32_t rte_service_map_lcore_get(uint32_t service_id, uint32_t lcore);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Return if an lcore is enabled for the service.
+ * Set the runstate of the service.
*
- * This function allows the application to query if *lcore* is currently set to
- * run *service*.
+ * Each service is either running or stopped. Setting a non-zero runstate
+ * enables the service to run, while setting runstate zero disables it.
*
- * @retval 1 Lcore enabled on this lcore
- * @retval 0 Lcore disabled on this lcore
- * @retval -EINVAL An invalid service or lcore was provided.
+ * @param id The id of the service
+ * @param runstate The run state to apply to the service
+ *
+ * @retval 0 The service was successfully started
+ * @retval -EINVAL Invalid service id
*/
-int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service,
- uint32_t lcore);
-
+int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Enable *service* to run.
- *
- * This function switches on a service during runtime.
- * @retval 0 The service was successfully started
+ * Get the runstate for the service with *id*. See *rte_service_runstate_set*
+ * for details of runstates. A service can call this function to ensure that
+ * the application has indicated that it will receive CPU cycles. Either a
+ * service-core is mapped (default case), or the application has explicitly
+ * disabled the check that a service-cores is mapped to the service and takes
+ * responsibility to run the service manually using the available function
+ * *rte_service_run_iter_on_app_lcore* to do so.
+ *
+ * @retval 1 Service is running
+ * @retval 0 Service is stopped
+ * @retval -EINVAL Invalid service id
*/
-int32_t rte_service_start(struct rte_service_spec *service);
+int32_t rte_service_runstate_get(uint32_t id);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Disable *service*.
+ * Enable or disable the check for a service-core being mapped to the service.
+ * An application can disable the check when takes the responsibility to run a
+ * service itself using *rte_service_run_iter_on_app_lcore*.
+ *
+ * @param id The id of the service to set the check on
+ * @param enable When zero, the check is disabled. Non-zero enables the check.
*
- * Switch off a service, so it is not run until it is *rte_service_start* is
- * called on it.
- * @retval 0 Service successfully switched off
+ * @retval 0 Success
+ * @retval -EINVAL Invalid service ID
*/
-int32_t rte_service_stop(struct rte_service_spec *service);
+int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enable);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Returns if *service* is currently running.
- *
- * This function returns true if the service has been started using
- * *rte_service_start*, AND a service core is mapped to the service. This
- * function can be used to ensure that the service will be run.
- *
- * @retval 1 Service is currently running, and has a service lcore mapped
- * @retval 0 Service is currently stopped, or no service lcore is mapped
- * @retval -EINVAL Invalid service pointer provided
+ * This function runs a service callback from a non-service lcore.
+ *
+ * This function is designed to enable gradual porting to service cores, and
+ * to enable unit tests to verify a service behaves as expected.
+ *
+ * When called, this function ensures that the service identified by *id* is
+ * safe to run on this lcore. Multi-thread safe services are invoked even if
+ * other cores are simultaneously running them as they are multi-thread safe.
+ *
+ * Multi-thread unsafe services are handled depending on the variable
+ * *serialize_multithread_unsafe*:
+ * - When set, the function will check if a service is already being invoked
+ * on another lcore, refusing to run it and returning -EBUSY.
+ * - When zero, the application takes responsibility to ensure that the service
+ * indicated by *id* is not going to be invoked by another lcore. This setting
+ * avoids atomic operations, so is likely to be more performant.
+ *
+ * @param id The ID of the service to run
+ * @param serialize_multithread_unsafe This parameter indicates to the service
+ * cores library if it is required to use atomics to serialize access
+ * to mult-thread unsafe services. As there is an overhead in using
+ * atomics, applications can choose to enable or disable this feature
+ *
+ * Note that any thread calling this function MUST be a DPDK EAL thread, as
+ * the *rte_lcore_id* function is used to access internal data structures.
+ *
+ * @retval 0 Service was run on the calling thread successfully
+ * @retval -EBUSY Another lcore is executing the service, and it is not a
+ * multi-thread safe service, so the service was not run on this lcore
+ * @retval -ENOEXEC Service is not in a run-able state
+ * @retval -EINVAL Invalid service id
*/
-int32_t rte_service_is_running(const struct rte_service_spec *service);
+int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
+ uint32_t serialize_multithread_unsafe);
/**
* @warning
@@ -341,13 +367,12 @@ int32_t rte_service_lcore_reset_all(void);
* Enable or disable statistics collection for *service*.
*
* This function enables per core, per-service cycle count collection.
- * @param service The service to enable statistics gathering on.
+ * @param id The service to enable statistics gathering on.
* @param enable Zero to disable statistics, non-zero to enable.
* @retval 0 Success
* @retval -EINVAL Invalid service pointer passed
*/
-int32_t rte_service_set_stats_enable(struct rte_service_spec *service,
- int32_t enable);
+int32_t rte_service_set_stats_enable(uint32_t id, int32_t enable);
/**
* @warning
@@ -374,10 +399,26 @@ int32_t rte_service_lcore_list(uint32_t array[], uint32_t n);
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Dumps any information available about the service. If service is NULL,
- * dumps info for all services.
+ * Get the numer of services running on the supplied lcore.
+ *
+ * @param lcore Id of the service core.
+ * @retval >=0 Number of services registered to this core.
+ * @retval -EINVAL Invalid lcore provided
+ * @retval -ENOTSUP The provided lcore is not a service core.
+ */
+int32_t rte_service_lcore_count_services(uint32_t lcore);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Dumps any information available about the service. When id is UINT32_MAX,
+ * this function dumps info for all services.
+ *
+ * @retval 0 Statistics have been successfully dumped
+ * @retval -EINVAL Invalid service id provided
*/
-int32_t rte_service_dump(FILE *f, struct rte_service_spec *service);
+int32_t rte_service_dump(FILE *f, uint32_t id);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_service_component.h b/lib/librte_eal/common/include/rte_service_component.h
index 7a946a1e..ac965cb4 100644
--- a/lib/librte_eal/common/include/rte_service_component.h
+++ b/lib/librte_eal/common/include/rte_service_component.h
@@ -85,21 +85,30 @@ struct rte_service_spec {
*
* For example the eventdev SW PMD requires CPU cycles to perform its
* scheduling. This can be achieved by registering it as a service, and the
- * application can then assign CPU resources to it using
- * *rte_service_set_coremask*.
+ * application can then assign CPU resources to that service.
+ *
+ * Note that when a service component registers itself, it is not permitted to
+ * add or remove service-core threads, or modify lcore-to-service mappings. The
+ * only API that may be called by the service-component is
+ * *rte_service_component_runstate_set*, which indicates that the service
+ * component is ready to be executed.
*
* @param spec The specification of the service to register
+ * @param[out] service_id A pointer to a uint32_t, which will be filled in
+ * during registration of the service. It is set to the integers
+ * service number given to the service. This parameter may be NULL.
* @retval 0 Successfully registered the service.
* -EINVAL Attempted to register an invalid service (eg, no callback
* set)
*/
-int32_t rte_service_register(const struct rte_service_spec *spec);
+int32_t rte_service_component_register(const struct rte_service_spec *spec,
+ uint32_t *service_id);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Unregister a service.
+ * Unregister a service component.
*
* The service being removed must be stopped before calling this function.
*
@@ -107,7 +116,7 @@ int32_t rte_service_register(const struct rte_service_spec *spec);
* @retval -EBUSY The service is currently running, stop the service before
* calling unregister. No action has been taken.
*/
-int32_t rte_service_unregister(struct rte_service_spec *service);
+int32_t rte_service_component_unregister(uint32_t id);
/**
* @warning
@@ -131,6 +140,23 @@ int32_t rte_service_start_with_defaults(void);
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
+ * Set the backend runstate of a component.
+ *
+ * This function allows services to be registered at startup, but not yet
+ * enabled to run by default. When the service has been configured (via the
+ * usual method; eg rte_eventdev_configure, the service can mark itself as
+ * ready to run. The differentiation between backend runstate and
+ * service_runstate is that the backend runstate is set by the service
+ * component while the service runstate is reserved for application usage.
+ *
+ * @retval 0 Success
+ */
+int32_t rte_service_component_runstate_set(uint32_t id, uint32_t runstate);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
* Initialize the service library.
*
* In order to use the service library, it must be initialized. EAL initializes
diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h
deleted file mode 100644
index 29f5a523..00000000
--- a/lib/librte_eal/common/include/rte_vdev.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2016 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RTE_VDEV_H
-#define RTE_VDEV_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/queue.h>
-#include <rte_dev.h>
-#include <rte_devargs.h>
-
-struct rte_vdev_device {
- TAILQ_ENTRY(rte_vdev_device) next; /**< Next attached vdev */
- struct rte_device device; /**< Inherit core device */
-};
-
-/**
- * @internal
- * Helper macro for drivers that need to convert to struct rte_vdev_device.
- */
-#define RTE_DEV_TO_VDEV(ptr) \
- container_of(ptr, struct rte_vdev_device, device)
-
-static inline const char *
-rte_vdev_device_name(const struct rte_vdev_device *dev)
-{
- if (dev && dev->device.name)
- return dev->device.name;
- return NULL;
-}
-
-static inline const char *
-rte_vdev_device_args(const struct rte_vdev_device *dev)
-{
- if (dev && dev->device.devargs)
- return dev->device.devargs->args;
- return "";
-}
-
-/** Double linked list of virtual device drivers. */
-TAILQ_HEAD(vdev_driver_list, rte_vdev_driver);
-
-/**
- * Probe function called for each virtual device driver once.
- */
-typedef int (rte_vdev_probe_t)(struct rte_vdev_device *dev);
-
-/**
- * Remove function called for each virtual device driver once.
- */
-typedef int (rte_vdev_remove_t)(struct rte_vdev_device *dev);
-
-/**
- * A virtual device driver abstraction.
- */
-struct rte_vdev_driver {
- TAILQ_ENTRY(rte_vdev_driver) next; /**< Next in list. */
- struct rte_driver driver; /**< Inherited general driver. */
- rte_vdev_probe_t *probe; /**< Virtual device probe function. */
- rte_vdev_remove_t *remove; /**< Virtual device remove function. */
-};
-
-/**
- * Register a virtual device driver.
- *
- * @param driver
- * A pointer to a rte_vdev_driver structure describing the driver
- * to be registered.
- */
-void rte_vdev_register(struct rte_vdev_driver *driver);
-
-/**
- * Unregister a virtual device driver.
- *
- * @param driver
- * A pointer to a rte_vdev_driver structure describing the driver
- * to be unregistered.
- */
-void rte_vdev_unregister(struct rte_vdev_driver *driver);
-
-#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\
-RTE_INIT(vdrvinitfn_ ##vdrv);\
-static const char *vdrvinit_ ## nm ## _alias;\
-static void vdrvinitfn_ ##vdrv(void)\
-{\
- (vdrv).driver.name = RTE_STR(nm);\
- (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\
- rte_vdev_register(&vdrv);\
-} \
-RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
-
-#define RTE_PMD_REGISTER_ALIAS(nm, alias)\
-static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index a69a7075..d08cf48a 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -61,7 +61,7 @@ extern "C" {
/**
* Minor version/month number i.e. the mm in yy.mm.z
*/
-#define RTE_VER_MONTH 8
+#define RTE_VER_MONTH 11
/**
* Patch level number i.e. the z in yy.mm.z
@@ -71,14 +71,14 @@ extern "C" {
/**
* Extra string to be appended to version number
*/
-#define RTE_VER_SUFFIX ""
+#define RTE_VER_SUFFIX "-rc"
/**
* Patch release number
* 0-15 = release candidates
* 16 = release
*/
-#define RTE_VER_RELEASE 16
+#define RTE_VER_RELEASE 3
/**
* Macro to compute a version number usable for comparisons
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
new file mode 100644
index 00000000..a69c4ff6
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -0,0 +1,153 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 6WIND S.A. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_VFIO_H_
+#define _RTE_VFIO_H_
+
+/*
+ * determine if VFIO is present on the system
+ */
+#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO)
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
+#define VFIO_PRESENT
+#endif /* kernel version >= 3.6.0 */
+#endif /* RTE_EAL_VFIO */
+
+#ifdef VFIO_PRESENT
+
+#include <linux/vfio.h>
+
+#define VFIO_DIR "/dev/vfio"
+#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
+#define VFIO_GROUP_FMT "/dev/vfio/%u"
+#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
+#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
+#define VFIO_GET_REGION_IDX(x) (x >> 40)
+#define VFIO_NOIOMMU_MODE \
+ "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
+
+/**
+ * Setup vfio_cfg for the device identified by its address.
+ * It discovers the configured I/O MMU groups or sets a new one for the device.
+ * If a new groups is assigned, the DMA mapping is performed.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param sysfs_base
+ * sysfs path prefix.
+ *
+ * @param dev_addr
+ * device location.
+ *
+ * @param vfio_dev_fd
+ * VFIO fd.
+ *
+ * @param device_info
+ * Device information.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ * >1 if the device cannot be managed this way.
+ */
+int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+ int *vfio_dev_fd, struct vfio_device_info *device_info);
+
+/**
+ * Release a device mapped to a VFIO-managed I/O MMU group.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param sysfs_base
+ * sysfs path prefix.
+ *
+ * @param dev_addr
+ * device location.
+ *
+ * @param fd
+ * VFIO fd.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ */
+int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd);
+
+/**
+ * Enable a VFIO-related kmod.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param modname
+ * kernel module name.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ */
+int rte_vfio_enable(const char *modname);
+
+/**
+ * Check whether a VFIO-related kmod is enabled.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param modname
+ * kernel module name.
+ *
+ * @return
+ * !0 if true.
+ * 0 otherwise.
+ */
+int rte_vfio_is_enabled(const char *modname);
+
+/**
+ * Whether VFIO NOIOMMU mode is enabled.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @return
+ * !0 if true.
+ * 0 otherwise.
+ */
+int rte_vfio_noiommu_is_enabled(void);
+
+#endif /* VFIO_PRESENT */
+
+#endif /* _RTE_VFIO_H_ */