diff options
Diffstat (limited to 'lib/librte_eal/common/include')
21 files changed, 1302 insertions, 956 deletions
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h index 782350d1..aa887a97 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h @@ -136,7 +136,7 @@ vgetq_lane_p64(poly64x2_t x, const int lane) #endif /* - * If (0 <= index <= 15), then call the ASIMD ext intruction on the + * If (0 <= index <= 15), then call the ASIMD ext instruction on the * 128 bit regs v0 and v1 with the appropriate index. * * Else returns a zero vector. diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h index 2e04c759..fb3abf18 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h @@ -81,7 +81,7 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) : "memory" ); /* no-clobber list */ #else asm volatile ( - "mov %%ebx, %%edi\n" + "xchgl %%ebx, %%edi;\n" MPLOCKED "cmpxchg8b (%[dst]);" "setz %[res];" diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h new file mode 100644 index 00000000..010d752c --- /dev/null +++ b/lib/librte_eal/common/include/rte_bitmap.h @@ -0,0 +1,561 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_BITMAP_H__ +#define __INCLUDE_RTE_BITMAP_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Bitmap + * + * The bitmap component provides a mechanism to manage large arrays of bits + * through bit get/set/clear and bit array scan operations. + * + * The bitmap scan operation is optimized for 64-bit CPUs using 64/128 byte cache + * lines. The bitmap is hierarchically organized using two arrays (array1 and + * array2), with each bit in array1 being associated with a full cache line + * (512/1024 bits) of bitmap bits, which are stored in array2: the bit in array1 + * is set only when there is at least one bit set within its associated array2 + * bits, otherwise the bit in array1 is cleared. The read and write operations + * for array1 and array2 are always done in slabs of 64 bits. + * + * This bitmap is not thread safe. For lock free operation on a specific bitmap + * instance, a single writer thread performing bit set/clear operations is + * allowed, only the writer thread can do bitmap scan operations, while there + * can be several reader threads performing bit get operations in parallel with + * the writer thread. When the use of locking primitives is acceptable, the + * serialization of the bit set/clear and bitmap scan operations needs to be + * enforced by the caller, while the bit get operation does not require locking + * the bitmap. + * + ***/ + +#include <string.h> +#include <rte_common.h> +#include <rte_debug.h> +#include <rte_memory.h> +#include <rte_branch_prediction.h> +#include <rte_prefetch.h> + +#ifndef RTE_BITMAP_OPTIMIZATIONS +#define RTE_BITMAP_OPTIMIZATIONS 1 +#endif + +/* Slab */ +#define RTE_BITMAP_SLAB_BIT_SIZE 64 +#define RTE_BITMAP_SLAB_BIT_SIZE_LOG2 6 +#define RTE_BITMAP_SLAB_BIT_MASK (RTE_BITMAP_SLAB_BIT_SIZE - 1) + +/* Cache line (CL) */ +#define RTE_BITMAP_CL_BIT_SIZE (RTE_CACHE_LINE_SIZE * 8) +#define RTE_BITMAP_CL_BIT_SIZE_LOG2 (RTE_CACHE_LINE_SIZE_LOG2 + 3) +#define RTE_BITMAP_CL_BIT_MASK (RTE_BITMAP_CL_BIT_SIZE - 1) + +#define RTE_BITMAP_CL_SLAB_SIZE (RTE_BITMAP_CL_BIT_SIZE / RTE_BITMAP_SLAB_BIT_SIZE) +#define RTE_BITMAP_CL_SLAB_SIZE_LOG2 (RTE_BITMAP_CL_BIT_SIZE_LOG2 - RTE_BITMAP_SLAB_BIT_SIZE_LOG2) +#define RTE_BITMAP_CL_SLAB_MASK (RTE_BITMAP_CL_SLAB_SIZE - 1) + +/** Bitmap data structure */ +struct rte_bitmap { + /* Context for array1 and array2 */ + uint64_t *array1; /**< Bitmap array1 */ + uint64_t *array2; /**< Bitmap array2 */ + uint32_t array1_size; /**< Number of 64-bit slabs in array1 that are actually used */ + uint32_t array2_size; /**< Number of 64-bit slabs in array2 */ + + /* Context for the "scan next" operation */ + uint32_t index1; /**< Bitmap scan: Index of current array1 slab */ + uint32_t offset1; /**< Bitmap scan: Offset of current bit within current array1 slab */ + uint32_t index2; /**< Bitmap scan: Index of current array2 slab */ + uint32_t go2; /**< Bitmap scan: Go/stop condition for current array2 cache line */ + + /* Storage space for array1 and array2 */ + uint8_t memory[]; +}; + +static inline void +__rte_bitmap_index1_inc(struct rte_bitmap *bmp) +{ + bmp->index1 = (bmp->index1 + 1) & (bmp->array1_size - 1); +} + +static inline uint64_t +__rte_bitmap_mask1_get(struct rte_bitmap *bmp) +{ + return (~1lu) << bmp->offset1; +} + +static inline void +__rte_bitmap_index2_set(struct rte_bitmap *bmp) +{ + bmp->index2 = (((bmp->index1 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2) + bmp->offset1) << RTE_BITMAP_CL_SLAB_SIZE_LOG2); +} + +#if RTE_BITMAP_OPTIMIZATIONS + +static inline int +rte_bsf64(uint64_t slab, uint32_t *pos) +{ + if (likely(slab == 0)) { + return 0; + } + + *pos = __builtin_ctzll(slab); + return 1; +} + +#else + +static inline int +rte_bsf64(uint64_t slab, uint32_t *pos) +{ + uint64_t mask; + uint32_t i; + + if (likely(slab == 0)) { + return 0; + } + + for (i = 0, mask = 1; i < RTE_BITMAP_SLAB_BIT_SIZE; i ++, mask <<= 1) { + if (unlikely(slab & mask)) { + *pos = i; + return 1; + } + } + + return 0; +} + +#endif + +static inline uint32_t +__rte_bitmap_get_memory_footprint(uint32_t n_bits, + uint32_t *array1_byte_offset, uint32_t *array1_slabs, + uint32_t *array2_byte_offset, uint32_t *array2_slabs) +{ + uint32_t n_slabs_context, n_slabs_array1, n_cache_lines_context_and_array1; + uint32_t n_cache_lines_array2; + uint32_t n_bytes_total; + + n_cache_lines_array2 = (n_bits + RTE_BITMAP_CL_BIT_SIZE - 1) / RTE_BITMAP_CL_BIT_SIZE; + n_slabs_array1 = (n_cache_lines_array2 + RTE_BITMAP_SLAB_BIT_SIZE - 1) / RTE_BITMAP_SLAB_BIT_SIZE; + n_slabs_array1 = rte_align32pow2(n_slabs_array1); + n_slabs_context = (sizeof(struct rte_bitmap) + (RTE_BITMAP_SLAB_BIT_SIZE / 8) - 1) / (RTE_BITMAP_SLAB_BIT_SIZE / 8); + n_cache_lines_context_and_array1 = (n_slabs_context + n_slabs_array1 + RTE_BITMAP_CL_SLAB_SIZE - 1) / RTE_BITMAP_CL_SLAB_SIZE; + n_bytes_total = (n_cache_lines_context_and_array1 + n_cache_lines_array2) * RTE_CACHE_LINE_SIZE; + + if (array1_byte_offset) { + *array1_byte_offset = n_slabs_context * (RTE_BITMAP_SLAB_BIT_SIZE / 8); + } + if (array1_slabs) { + *array1_slabs = n_slabs_array1; + } + if (array2_byte_offset) { + *array2_byte_offset = n_cache_lines_context_and_array1 * RTE_CACHE_LINE_SIZE; + } + if (array2_slabs) { + *array2_slabs = n_cache_lines_array2 * RTE_BITMAP_CL_SLAB_SIZE; + } + + return n_bytes_total; +} + +static inline void +__rte_bitmap_scan_init(struct rte_bitmap *bmp) +{ + bmp->index1 = bmp->array1_size - 1; + bmp->offset1 = RTE_BITMAP_SLAB_BIT_SIZE - 1; + __rte_bitmap_index2_set(bmp); + bmp->index2 += RTE_BITMAP_CL_SLAB_SIZE; + + bmp->go2 = 0; +} + +/** + * Bitmap memory footprint calculation + * + * @param n_bits + * Number of bits in the bitmap + * @return + * Bitmap memory footprint measured in bytes on success, 0 on error + */ +static inline uint32_t +rte_bitmap_get_memory_footprint(uint32_t n_bits) { + /* Check input arguments */ + if (n_bits == 0) { + return 0; + } + + return __rte_bitmap_get_memory_footprint(n_bits, NULL, NULL, NULL, NULL); +} + +/** + * Bitmap initialization + * + * @param mem_size + * Minimum expected size of bitmap. + * @param mem + * Base address of array1 and array2. + * @param n_bits + * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512. + * @return + * Handle to bitmap instance. + */ +static inline struct rte_bitmap * +rte_bitmap_init(uint32_t n_bits, uint8_t *mem, uint32_t mem_size) +{ + struct rte_bitmap *bmp; + uint32_t array1_byte_offset, array1_slabs, array2_byte_offset, array2_slabs; + uint32_t size; + + /* Check input arguments */ + if (n_bits == 0) { + return NULL; + } + + if ((mem == NULL) || (((uintptr_t) mem) & RTE_CACHE_LINE_MASK)) { + return NULL; + } + + size = __rte_bitmap_get_memory_footprint(n_bits, + &array1_byte_offset, &array1_slabs, + &array2_byte_offset, &array2_slabs); + if (size < mem_size) { + return NULL; + } + + /* Setup bitmap */ + memset(mem, 0, size); + bmp = (struct rte_bitmap *) mem; + + bmp->array1 = (uint64_t *) &mem[array1_byte_offset]; + bmp->array1_size = array1_slabs; + bmp->array2 = (uint64_t *) &mem[array2_byte_offset]; + bmp->array2_size = array2_slabs; + + __rte_bitmap_scan_init(bmp); + + return bmp; +} + +/** + * Bitmap free + * + * @param bmp + * Handle to bitmap instance + * @return + * 0 upon success, error code otherwise + */ +static inline int +rte_bitmap_free(struct rte_bitmap *bmp) +{ + /* Check input arguments */ + if (bmp == NULL) { + return -1; + } + + return 0; +} + +/** + * Bitmap reset + * + * @param bmp + * Handle to bitmap instance + */ +static inline void +rte_bitmap_reset(struct rte_bitmap *bmp) +{ + memset(bmp->array1, 0, bmp->array1_size * sizeof(uint64_t)); + memset(bmp->array2, 0, bmp->array2_size * sizeof(uint64_t)); + __rte_bitmap_scan_init(bmp); +} + +/** + * Bitmap location prefetch into CPU L1 cache + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + * @return + * 0 upon success, error code otherwise + */ +static inline void +rte_bitmap_prefetch0(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab2; + uint32_t index2; + + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + slab2 = bmp->array2 + index2; + rte_prefetch0((void *) slab2); +} + +/** + * Bitmap bit get + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + * @return + * 0 when bit is cleared, non-zero when bit is set + */ +static inline uint64_t +rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab2; + uint32_t index2, offset2; + + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + return (*slab2) & (1lu << offset2); +} + +/** + * Bitmap bit set + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + */ +static inline void +rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab1, *slab2; + uint32_t index1, index2, offset1, offset2; + + /* Set bit in array2 slab and set bit in array1 slab */ + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK; + index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2); + offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + slab1 = bmp->array1 + index1; + + *slab2 |= 1lu << offset2; + *slab1 |= 1lu << offset1; +} + +/** + * Bitmap slab set + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position identifying the array2 slab + * @param slab + * Value to be assigned to the 64-bit slab in array2 + */ +static inline void +rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab) +{ + uint64_t *slab1, *slab2; + uint32_t index1, index2, offset1; + + /* Set bits in array2 slab and set bit in array1 slab */ + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2); + offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + slab1 = bmp->array1 + index1; + + *slab2 |= slab; + *slab1 |= 1lu << offset1; +} + +static inline uint64_t +__rte_bitmap_line_not_empty(uint64_t *slab2) +{ + uint64_t v1, v2, v3, v4; + + v1 = slab2[0] | slab2[1]; + v2 = slab2[2] | slab2[3]; + v3 = slab2[4] | slab2[5]; + v4 = slab2[6] | slab2[7]; + v1 |= v2; + v3 |= v4; + + return v1 | v3; +} + +/** + * Bitmap bit clear + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + */ +static inline void +rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab1, *slab2; + uint32_t index1, index2, offset1, offset2; + + /* Clear bit in array2 slab */ + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + + /* Return if array2 slab is not all-zeros */ + *slab2 &= ~(1lu << offset2); + if (*slab2){ + return; + } + + /* Check the entire cache line of array2 for all-zeros */ + index2 &= ~ RTE_BITMAP_CL_SLAB_MASK; + slab2 = bmp->array2 + index2; + if (__rte_bitmap_line_not_empty(slab2)) { + return; + } + + /* The array2 cache line is all-zeros, so clear bit in array1 slab */ + index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2); + offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK; + slab1 = bmp->array1 + index1; + *slab1 &= ~(1lu << offset1); + + return; +} + +static inline int +__rte_bitmap_scan_search(struct rte_bitmap *bmp) +{ + uint64_t value1; + uint32_t i; + + /* Check current array1 slab */ + value1 = bmp->array1[bmp->index1]; + value1 &= __rte_bitmap_mask1_get(bmp); + + if (rte_bsf64(value1, &bmp->offset1)) { + return 1; + } + + __rte_bitmap_index1_inc(bmp); + bmp->offset1 = 0; + + /* Look for another array1 slab */ + for (i = 0; i < bmp->array1_size; i ++, __rte_bitmap_index1_inc(bmp)) { + value1 = bmp->array1[bmp->index1]; + + if (rte_bsf64(value1, &bmp->offset1)) { + return 1; + } + } + + return 0; +} + +static inline void +__rte_bitmap_scan_read_init(struct rte_bitmap *bmp) +{ + __rte_bitmap_index2_set(bmp); + bmp->go2 = 1; + rte_prefetch1((void *)(bmp->array2 + bmp->index2 + 8)); +} + +static inline int +__rte_bitmap_scan_read(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab) +{ + uint64_t *slab2; + + slab2 = bmp->array2 + bmp->index2; + for ( ; bmp->go2 ; bmp->index2 ++, slab2 ++, bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK) { + if (*slab2) { + *pos = bmp->index2 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + *slab = *slab2; + + bmp->index2 ++; + slab2 ++; + bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK; + return 1; + } + } + + return 0; +} + +/** + * Bitmap scan (with automatic wrap-around) + * + * @param bmp + * Handle to bitmap instance + * @param pos + * When function call returns 1, pos contains the position of the next set + * bit, otherwise not modified + * @param slab + * When function call returns 1, slab contains the value of the entire 64-bit + * slab where the bit indicated by pos is located. Slabs are always 64-bit + * aligned, so the position of the first bit of the slab (this bit is not + * necessarily set) is pos / 64. Once a slab has been returned by the bitmap + * scan operation, the internal pointers of the bitmap are updated to point + * after this slab, so the same slab will not be returned again if it + * contains more than one bit which is set. When function call returns 0, + * slab is not modified. + * @return + * 0 if there is no bit set in the bitmap, 1 otherwise + */ +static inline int +rte_bitmap_scan(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab) +{ + /* Return data from current array2 line if available */ + if (__rte_bitmap_scan_read(bmp, pos, slab)) { + return 1; + } + + /* Look for non-empty array2 line */ + if (__rte_bitmap_scan_search(bmp)) { + __rte_bitmap_scan_read_init(bmp); + __rte_bitmap_scan_read(bmp, pos, slab); + return 1; + } + + /* Empty bitmap */ + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_BITMAP_H__ */ diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h index c79368d3..6fb08341 100644 --- a/lib/librte_eal/common/include/rte_bus.h +++ b/lib/librte_eal/common/include/rte_bus.h @@ -55,6 +55,21 @@ extern "C" { /** Double linked list of buses */ TAILQ_HEAD(rte_bus_list, rte_bus); + +/** + * IOVA mapping mode. + * + * IOVA mapping mode is iommu programming mode of a device. + * That device (for example: IOMMU backed DMA device) based + * on rte_iova_mode will generate physical or virtual address. + * + */ +enum rte_iova_mode { + RTE_IOVA_DC = 0, /* Don't care mode */ + RTE_IOVA_PA = (1 << 0), /* DMA using physical address */ + RTE_IOVA_VA = (1 << 1) /* DMA using virtual address */ +}; + /** * Bus specific scan for devices attached on the bus. * For each bus object, the scan would be responsible for finding devices and @@ -168,6 +183,20 @@ struct rte_bus_conf { enum rte_bus_scan_mode scan_mode; /**< Scan policy. */ }; + +/** + * Get common iommu class of the all the devices on the bus. The bus may + * check that those devices are attached to iommu driver. + * If no devices are attached to the bus. The bus may return with don't care + * (_DC) value. + * Otherwise, The bus will return appropriate _pa or _va iova mode. + * + * @return + * enum rte_iova_mode value. + */ +typedef enum rte_iova_mode (*rte_bus_get_iommu_class_t)(void); + + /** * A structure describing a generic bus. */ @@ -181,6 +210,7 @@ struct rte_bus { rte_bus_unplug_t unplug; /**< Remove single device from driver */ rte_bus_parse_t parse; /**< Parse a device name */ struct rte_bus_conf conf; /**< Bus configuration */ + rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */ }; /** @@ -280,12 +310,22 @@ struct rte_bus *rte_bus_find_by_device(const struct rte_device *dev); */ struct rte_bus *rte_bus_find_by_name(const char *busname); + +/** + * Get the common iommu class of devices bound on to buses available in the + * system. The default mode is PA. + * + * @return + * enum rte_iova_mode value. + */ +enum rte_iova_mode rte_bus_get_iommu_class(void); + /** * Helper for Bus registration. * The constructor has higher priority than PMD constructors. */ #define RTE_REGISTER_BUS(nm, bus) \ -RTE_INIT_PRIO(businitfn_ ##nm, 101); \ +RTE_INIT_PRIO(businitfn_ ##nm, 110); \ static void businitfn_ ##nm(void) \ {\ (bus).name = RTE_STR(nm);\ diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index 1afc66e3..de853e16 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -109,6 +109,29 @@ typedef uint16_t unaligned_uint16_t; #define RTE_SET_USED(x) (void)(x) /** + * Run function before main() with low priority. + * + * The constructor will be run after prioritized constructors. + * + * @param func + * Constructor function. + */ +#define RTE_INIT(func) \ +static void __attribute__((constructor, used)) func(void) + +/** + * Run function before main() with high priority. + * + * @param func + * Constructor function. + * @param prio + * Priority number must be above 100. + * Lowest number is the first to run. + */ +#define RTE_INIT_PRIO(func, prio) \ +static void __attribute__((constructor(prio), used)) func(void) + +/** * Force a function to be inlined */ #define __rte_always_inline inline __attribute__((always_inline)) diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h index cab6fb4c..79b67b3e 100644 --- a/lib/librte_eal/common/include/rte_debug.h +++ b/lib/librte_eal/common/include/rte_debug.h @@ -79,7 +79,7 @@ void rte_dump_registers(void); #define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy") #define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__) -#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG +#ifdef RTE_ENABLE_ASSERT #define RTE_ASSERT(exp) RTE_VERIFY(exp) #else #define RTE_ASSERT(exp) do {} while (0) diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index 5386d3a2..9342e0cb 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -49,7 +49,6 @@ extern "C" { #include <stdio.h> #include <sys/queue.h> -#include <rte_config.h> #include <rte_log.h> __attribute__((format(printf, 2, 0))) @@ -152,7 +151,11 @@ struct rte_driver { const char *alias; /**< Driver alias. */ }; -#define RTE_DEV_NAME_MAX_LEN (32) +/* + * Internal identifier length + * Sufficiently large to allow for UUID or PCI address + */ +#define RTE_DEV_NAME_MAX_LEN 64 /** * A structure describing a generic device. @@ -166,28 +169,6 @@ struct rte_device { }; /** - * Initialize a driver specified by name. - * - * @param name - * The pointer to a driver name to be initialized. - * @param args - * The pointer to arguments used by driver initialization. - * @return - * 0 on success, negative on error - */ -int rte_vdev_init(const char *name, const char *args); - -/** - * Uninitalize a driver specified by name. - * - * @param name - * The pointer to a driver name to be initialized. - * @return - * 0 on success, negative on error - */ -int rte_vdev_uninit(const char *name); - -/** * Attach a device to a registered driver. * * @param name @@ -312,4 +293,4 @@ __attribute__((used)) = str } #endif -#endif /* _RTE_VDEV_H_ */ +#endif /* _RTE_DEV_H_ */ diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h index 0e7363d7..09b66819 100644 --- a/lib/librte_eal/common/include/rte_eal.h +++ b/lib/librte_eal/common/include/rte_eal.h @@ -44,7 +44,9 @@ #include <sched.h> #include <rte_per_lcore.h> -#include <rte_config.h> +#include <rte_bus.h> + +#include <rte_pci_dev_feature_defs.h> #ifdef __cplusplus extern "C" { @@ -87,6 +89,9 @@ struct rte_config { /** Primary or secondary configuration */ enum rte_proc_type_t process_type; + /** PA or VA mapping mode */ + enum rte_iova_mode iova_mode; + /** * Pointer to memory configuration, which may be shared across multiple * DPDK instances @@ -264,6 +269,32 @@ rte_set_application_usage_hook(rte_usage_hook_t usage_func); int rte_eal_has_hugepages(void); /** + * Whether EAL is using PCI bus. + * Disabled by --no-pci option. + * + * @return + * Nonzero if the PCI bus is enabled. + */ +int rte_eal_has_pci(void); + +/** + * Whether the EAL was asked to create UIO device. + * + * @return + * Nonzero if true. + */ +int rte_eal_create_uio_dev(void); + +/** + * The user-configured vfio interrupt mode. + * + * @return + * Interrupt mode configured with the command line, + * RTE_INTR_MODE_NONE by default. + */ +enum rte_intr_mode rte_eal_vfio_intr_mode(void); + +/** * A wrap API for syscall gettid. * * @return @@ -287,11 +318,22 @@ static inline int rte_gettid(void) return RTE_PER_LCORE(_thread_id); } -#define RTE_INIT(func) \ -static void __attribute__((constructor, used)) func(void) +/** + * Get the iova mode + * + * @return + * enum rte_iova_mode value. + */ +enum rte_iova_mode rte_eal_iova_mode(void); -#define RTE_INIT_PRIO(func, prio) \ -static void __attribute__((constructor(prio), used)) func(void) +/** + * Get default pool ops name for mbuf + * + * @return + * returns default pool ops name. + */ +const char * +rte_eal_mbuf_default_mempool_ops(void); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_eal_interrupts.h b/lib/librte_eal/common/include/rte_eal_interrupts.h new file mode 100644 index 00000000..031f78cc --- /dev/null +++ b/lib/librte_eal/common/include/rte_eal_interrupts.h @@ -0,0 +1,250 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_INTERRUPTS_H_ +#error "don't include this file directly, please include generic <rte_interrupts.h>" +#endif + +/** + * @file rte_eal_interrupts.h + * @internal + * + * Contains function prototypes exposed by the EAL for interrupt handling by + * drivers and other DPDK internal consumers. + */ + +#ifndef _RTE_EAL_INTERRUPTS_H_ +#define _RTE_EAL_INTERRUPTS_H_ + +#define RTE_MAX_RXTX_INTR_VEC_ID 32 +#define RTE_INTR_VEC_ZERO_OFFSET 0 +#define RTE_INTR_VEC_RXTX_OFFSET 1 + +/** + * The interrupt source type, e.g. UIO, VFIO, ALARM etc. + */ +enum rte_intr_handle_type { + RTE_INTR_HANDLE_UNKNOWN = 0, /**< generic unknown handle */ + RTE_INTR_HANDLE_UIO, /**< uio device handle */ + RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */ + RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */ + RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */ + RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */ + RTE_INTR_HANDLE_ALARM, /**< alarm handle */ + RTE_INTR_HANDLE_EXT, /**< external handler */ + RTE_INTR_HANDLE_VDEV, /**< virtual device */ + RTE_INTR_HANDLE_MAX /**< count of elements */ +}; + +#define RTE_INTR_EVENT_ADD 1UL +#define RTE_INTR_EVENT_DEL 2UL + +typedef void (*rte_intr_event_cb_t)(int fd, void *arg); + +struct rte_epoll_data { + uint32_t event; /**< event type */ + void *data; /**< User data */ + rte_intr_event_cb_t cb_fun; /**< IN: callback fun */ + void *cb_arg; /**< IN: callback arg */ +}; + +enum { + RTE_EPOLL_INVALID = 0, + RTE_EPOLL_VALID, + RTE_EPOLL_EXEC, +}; + +/** interrupt epoll event obj, taken by epoll_event.ptr */ +struct rte_epoll_event { + volatile uint32_t status; /**< OUT: event status */ + int fd; /**< OUT: event fd */ + int epfd; /**< OUT: epoll instance the ev associated with */ + struct rte_epoll_data epdata; +}; + +/** Handle for interrupts. */ +struct rte_intr_handle { + RTE_STD_C11 + union { + int vfio_dev_fd; /**< VFIO device file descriptor */ + int uio_cfg_fd; /**< UIO cfg file desc for uio_pci_generic */ + }; + int fd; /**< interrupt event file descriptor */ + enum rte_intr_handle_type type; /**< handle type */ + uint32_t max_intr; /**< max interrupt requested */ + uint32_t nb_efd; /**< number of available efd(event fd) */ + uint8_t efd_counter_size; /**< size of efd counter, used for vdev */ + int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */ + struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID]; + /**< intr vector epoll event */ + int *intr_vec; /**< intr vector number array */ +}; + +#define RTE_EPOLL_PER_THREAD -1 /**< to hint using per thread epfd */ + +/** + * It waits for events on the epoll instance. + * + * @param epfd + * Epoll instance fd on which the caller wait for events. + * @param events + * Memory area contains the events that will be available for the caller. + * @param maxevents + * Up to maxevents are returned, must greater than zero. + * @param timeout + * Specifying a timeout of -1 causes a block indefinitely. + * Specifying a timeout equal to zero cause to return immediately. + * @return + * - On success, returns the number of available event. + * - On failure, a negative value. + */ +int +rte_epoll_wait(int epfd, struct rte_epoll_event *events, + int maxevents, int timeout); + +/** + * It performs control operations on epoll instance referred by the epfd. + * It requests that the operation op be performed for the target fd. + * + * @param epfd + * Epoll instance fd on which the caller perform control operations. + * @param op + * The operation be performed for the target fd. + * @param fd + * The target fd on which the control ops perform. + * @param event + * Describes the object linked to the fd. + * Note: The caller must take care the object deletion after CTL_DEL. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_epoll_ctl(int epfd, int op, int fd, + struct rte_epoll_event *event); + +/** + * The function returns the per thread epoll instance. + * + * @return + * epfd the epoll instance referred to. + */ +int +rte_intr_tls_epfd(void); + +/** + * @param intr_handle + * Pointer to the interrupt handle. + * @param epfd + * Epoll instance fd which the intr vector associated to. + * @param op + * The operation be performed for the vector. + * Operation type of {ADD, DEL}. + * @param vec + * RX intr vector number added to the epoll instance wait list. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, + int epfd, int op, unsigned int vec, void *data); + +/** + * It deletes registered eventfds. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +void +rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle); + +/** + * It enables the packet I/O interrupt event if it's necessary. + * It creates event fd for each interrupt vector when MSIX is used, + * otherwise it multiplexes a single event fd. + * + * @param intr_handle + * Pointer to the interrupt handle. + * @param nb_efd + * Number of interrupt vector trying to enable. + * The value 0 is not allowed. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd); + +/** + * It disables the packet I/O interrupt event. + * It deletes registered eventfds and closes the open fds. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +void +rte_intr_efd_disable(struct rte_intr_handle *intr_handle); + +/** + * The packet I/O interrupt on datapath is enabled or not. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_dp_is_en(struct rte_intr_handle *intr_handle); + +/** + * The interrupt handle instance allows other causes or not. + * Other causes stand for any none packet I/O interrupts. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_allow_others(struct rte_intr_handle *intr_handle); + +/** + * The multiple interrupt vector capability of interrupt handle instance. + * It returns zero if no multiple interrupt vector support. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); + +#endif /* _RTE_EAL_INTERRUPTS_H_ */ diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h index 5d06ed79..43177c7a 100644 --- a/lib/librte_eal/common/include/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -53,7 +53,7 @@ struct rte_intr_handle; /** Function to be registered for the specific interrupt */ typedef void (*rte_intr_callback_fn)(void *cb_arg); -#include <exec-env/rte_interrupts.h> +#include "rte_eal_interrupts.h" /** * It registers the callback for the specific interrupt. Multiple diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h index 50e0d0fe..c89e6bab 100644 --- a/lib/librte_eal/common/include/rte_lcore.h +++ b/lib/librte_eal/common/include/rte_lcore.h @@ -262,6 +262,20 @@ void rte_thread_get_affinity(rte_cpuset_t *cpusetp); */ int rte_thread_setname(pthread_t id, const char *name); +/** + * Test if the core supplied has a specific role + * + * @param lcore_id + * The identifier of the lcore, which MUST be between 0 and + * RTE_MAX_LCORE-1. + * @param role + * The role to be checked against. + * @return + * On success, return 0; otherwise return a negative value. + */ +int +rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h index ec8dba79..16564d41 100644 --- a/lib/librte_eal/common/include/rte_log.h +++ b/lib/librte_eal/common/include/rte_log.h @@ -87,6 +87,7 @@ extern struct rte_logs rte_logs; #define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */ #define RTE_LOGTYPE_EFD 18 /**< Log related to EFD. */ #define RTE_LOGTYPE_EVENTDEV 19 /**< Log related to eventdev. */ +#define RTE_LOGTYPE_GSO 20 /**< Log related to GSO. */ /* these log types can be used in an application */ #define RTE_LOGTYPE_USER1 24 /**< User-defined log type 1. */ @@ -138,12 +139,6 @@ int rte_openlog_stream(FILE *f); void rte_log_set_global_level(uint32_t level); /** - * Deprecated, replaced by rte_log_set_global_level(). - */ -__rte_deprecated -void rte_set_log_level(uint32_t level); - -/** * Get the global log level. * * @return @@ -152,29 +147,6 @@ void rte_set_log_level(uint32_t level); uint32_t rte_log_get_global_level(void); /** - * Deprecated, replaced by rte_log_get_global_level(). - */ -__rte_deprecated -uint32_t rte_get_log_level(void); - -/** - * Enable or disable the log type. - * - * @param type - * Log type, for example, RTE_LOGTYPE_EAL. - * @param enable - * True for enable; false for disable. - */ -__rte_deprecated -void rte_set_log_type(uint32_t type, int enable); - -/** - * Get the global log type. - */ -__rte_deprecated -uint32_t rte_get_log_type(void); - -/** * Get the log level for a given type. * * @param logtype diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h index 3d37f79b..5d4c11a7 100644 --- a/lib/librte_eal/common/include/rte_malloc.h +++ b/lib/librte_eal/common/include/rte_malloc.h @@ -323,17 +323,24 @@ int rte_malloc_set_limit(const char *type, size_t max); /** - * Return the physical address of a virtual address obtained through + * Return the IO address of a virtual address obtained through * rte_malloc * * @param addr * Address obtained from a previous rte_malloc call * @return - * RTE_BAD_PHYS_ADDR on error - * otherwise return physical address of the buffer + * RTE_BAD_IOVA on error + * otherwise return an address suitable for IO */ -phys_addr_t -rte_malloc_virt2phy(const void *addr); +rte_iova_t +rte_malloc_virt2iova(const void *addr); + +__rte_deprecated +static inline phys_addr_t +rte_malloc_virt2phy(const void *addr) +{ + return rte_malloc_virt2iova(addr); +} #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index 4aa5d1f7..14aacea5 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -44,12 +44,6 @@ #include <stddef.h> #include <stdio.h> -#include <rte_config.h> - -#ifdef RTE_EXEC_ENV_LINUXAPP -#include <exec-env/rte_dom0_common.h> -#endif - #ifdef __cplusplus extern "C" { #endif @@ -98,14 +92,27 @@ enum rte_page_sizes { */ #define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE) -typedef uint64_t phys_addr_t; /**< Physical address definition. */ +typedef uint64_t phys_addr_t; /**< Physical address. */ #define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1) +/** + * IO virtual address type. + * When the physical addressing mode (IOVA as PA) is in use, + * the translation from an IO virtual address (IOVA) to a physical address + * is a direct mapping, i.e. the same value. + * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation. + */ +typedef uint64_t rte_iova_t; +#define RTE_BAD_IOVA ((rte_iova_t)-1) /** * Physical memory segment descriptor. */ struct rte_memseg { - phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 + union { + phys_addr_t phys_addr; /**< deprecated - Start physical address. */ + rte_iova_t iova; /**< Start IO address. */ + }; RTE_STD_C11 union { void *addr; /**< Start virtual address. */ @@ -116,10 +123,6 @@ struct rte_memseg { int32_t socket_id; /**< NUMA socket ID. */ uint32_t nchannel; /**< Number of channels. */ uint32_t nrank; /**< Number of ranks. */ -#ifdef RTE_LIBRTE_XEN_DOM0 - /**< store segment MFNs */ - uint64_t mfn[DOM0_NUM_MEMBLOCK]; -#endif } __rte_packed; /** @@ -140,11 +143,21 @@ int rte_mem_lock_page(const void *virt); * @param virt * The virtual address. * @return - * The physical address or RTE_BAD_PHYS_ADDR on error. + * The physical address or RTE_BAD_IOVA on error. */ phys_addr_t rte_mem_virt2phy(const void *virt); /** + * Get IO virtual address of any mapped virtual address in the current process. + * + * @param virt + * The virtual address. + * @return + * The IO address or RTE_BAD_IOVA on error. + */ +rte_iova_t rte_mem_virt2iova(const void *virt); + +/** * Get the layout of the available physical memory. * * It can be useful for an application to have the full physical @@ -195,68 +208,16 @@ unsigned rte_memory_get_nchannel(void); */ unsigned rte_memory_get_nrank(void); -#ifdef RTE_LIBRTE_XEN_DOM0 - -/**< Internal use only - should DOM0 memory mapping be used */ -int rte_xen_dom0_supported(void); - -/**< Internal use only - phys to virt mapping for xen */ -phys_addr_t rte_xen_mem_phy2mch(int32_t, const phys_addr_t); - /** - * Return the physical address of elt, which is an element of the pool mp. - * - * @param memseg_id - * Identifier of the memory segment owning the physical address. If - * set to -1, find it automatically. - * @param phy_addr - * physical address of elt. - * - * @return - * The physical address or RTE_BAD_PHYS_ADDR on error. - */ -static inline phys_addr_t -rte_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr) -{ - if (rte_xen_dom0_supported()) - return rte_xen_mem_phy2mch(memseg_id, phy_addr); - else - return phy_addr; -} - -/** - * Memory init for supporting application running on Xen domain0. - * - * @param void + * Drivers based on uio will not load unless physical + * addresses are obtainable. It is only possible to get + * physical addresses when running as a privileged user. * * @return - * 0: successfully - * negative: error + * 1 if the system is able to obtain physical addresses. + * 0 if using DMA addresses through an IOMMU. */ -int rte_xen_dom0_memory_init(void); - -/** - * Attach to memory setments of primary process on Xen domain0. - * - * @param void - * - * @return - * 0: successfully - * negative: error - */ -int rte_xen_dom0_memory_attach(void); -#else -static inline int rte_xen_dom0_supported(void) -{ - return 0; -} - -static inline phys_addr_t -rte_mem_phy2mch(int32_t memseg_id __rte_unused, const phys_addr_t phy_addr) -{ - return phy_addr; -} -#endif +int rte_eal_using_phys_addrs(void); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h index 1d0827f4..6f0ba182 100644 --- a/lib/librte_eal/common/include/rte_memzone.h +++ b/lib/librte_eal/common/include/rte_memzone.h @@ -78,7 +78,11 @@ struct rte_memzone { #define RTE_MEMZONE_NAMESIZE 32 /**< Maximum length of memory zone name.*/ char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */ - phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 + union { + phys_addr_t phys_addr; /**< deprecated - Start physical address. */ + rte_iova_t iova; /**< Start IO address. */ + }; RTE_STD_C11 union { void *addr; /**< Start virtual address. */ diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h deleted file mode 100644 index 8b123391..00000000 --- a/lib/librte_eal/common/include/rte_pci.h +++ /dev/null @@ -1,598 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. - * Copyright 2013-2014 6WIND S.A. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_PCI_H_ -#define _RTE_PCI_H_ - -/** - * @file - * - * RTE PCI Interface - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include <stdio.h> -#include <stdlib.h> -#include <limits.h> -#include <errno.h> -#include <sys/queue.h> -#include <stdint.h> -#include <inttypes.h> - -#include <rte_debug.h> -#include <rte_interrupts.h> -#include <rte_dev.h> -#include <rte_bus.h> - -/** Pathname of PCI devices directory. */ -const char *pci_get_sysfs_path(void); - -/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ -#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 -#define PCI_PRI_STR_SIZE sizeof("XXXXXXXX:XX:XX.X") - -/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */ -#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 - -/** Nb. of values in PCI device identifier format string. */ -#define PCI_FMT_NVAL 4 - -/** Nb. of values in PCI resource format. */ -#define PCI_RESOURCE_FMT_NVAL 3 - -/** Maximum number of PCI resources. */ -#define PCI_MAX_RESOURCE 6 - -/* Forward declarations */ -struct rte_pci_device; -struct rte_pci_driver; - -/** List of PCI devices */ -TAILQ_HEAD(rte_pci_device_list, rte_pci_device); -/** List of PCI drivers */ -TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver); - -/* PCI Bus iterators */ -#define FOREACH_DEVICE_ON_PCIBUS(p) \ - TAILQ_FOREACH(p, &(rte_pci_bus.device_list), next) - -#define FOREACH_DRIVER_ON_PCIBUS(p) \ - TAILQ_FOREACH(p, &(rte_pci_bus.driver_list), next) - -/** - * A structure describing an ID for a PCI driver. Each driver provides a - * table of these IDs for each device that it supports. - */ -struct rte_pci_id { - uint32_t class_id; /**< Class ID (class, subclass, pi) or RTE_CLASS_ANY_ID. */ - uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */ - uint16_t device_id; /**< Device ID or PCI_ANY_ID. */ - uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */ - uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */ -}; - -/** - * A structure describing the location of a PCI device. - */ -struct rte_pci_addr { - uint32_t domain; /**< Device domain */ - uint8_t bus; /**< Device bus */ - uint8_t devid; /**< Device ID */ - uint8_t function; /**< Device function. */ -}; - -struct rte_devargs; - -/** - * A structure describing a PCI device. - */ -struct rte_pci_device { - TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */ - struct rte_device device; /**< Inherit core device */ - struct rte_pci_addr addr; /**< PCI location. */ - struct rte_pci_id id; /**< PCI ID. */ - struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; - /**< PCI Memory Resource */ - struct rte_intr_handle intr_handle; /**< Interrupt handle */ - struct rte_pci_driver *driver; /**< Associated driver */ - uint16_t max_vfs; /**< sriov enable if not zero */ - enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ - char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */ -}; - -/** - * @internal - * Helper macro for drivers that need to convert to struct rte_pci_device. - */ -#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device) - -/** Any PCI device identifier (vendor, device, ...) */ -#define PCI_ANY_ID (0xffff) -#define RTE_CLASS_ANY_ID (0xffffff) - -#ifdef __cplusplus -/** C++ macro used to help building up tables of device IDs */ -#define RTE_PCI_DEVICE(vend, dev) \ - RTE_CLASS_ANY_ID, \ - (vend), \ - (dev), \ - PCI_ANY_ID, \ - PCI_ANY_ID -#else -/** Macro used to help building up tables of device IDs */ -#define RTE_PCI_DEVICE(vend, dev) \ - .class_id = RTE_CLASS_ANY_ID, \ - .vendor_id = (vend), \ - .device_id = (dev), \ - .subsystem_vendor_id = PCI_ANY_ID, \ - .subsystem_device_id = PCI_ANY_ID -#endif - -/** - * Initialisation function for the driver called during PCI probing. - */ -typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *); - -/** - * Uninitialisation function for the driver called during hotplugging. - */ -typedef int (pci_remove_t)(struct rte_pci_device *); - -/** - * A structure describing a PCI driver. - */ -struct rte_pci_driver { - TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ - struct rte_driver driver; /**< Inherit core driver. */ - struct rte_pci_bus *bus; /**< PCI bus reference. */ - pci_probe_t *probe; /**< Device Probe function. */ - pci_remove_t *remove; /**< Device Remove function. */ - const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ - uint32_t drv_flags; /**< Flags contolling handling of device. */ -}; - -/** - * Structure describing the PCI bus - */ -struct rte_pci_bus { - struct rte_bus bus; /**< Inherit the generic class */ - struct rte_pci_device_list device_list; /**< List of PCI devices */ - struct rte_pci_driver_list driver_list; /**< List of PCI drivers */ -}; - -/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ -#define RTE_PCI_DRV_NEED_MAPPING 0x0001 -/** Device driver supports link state interrupt */ -#define RTE_PCI_DRV_INTR_LSC 0x0008 -/** Device driver supports device removal interrupt */ -#define RTE_PCI_DRV_INTR_RMV 0x0010 -/** Device driver needs to keep mapped resources if unsupported dev detected */ -#define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020 - -/** - * A structure describing a PCI mapping. - */ -struct pci_map { - void *addr; - char *path; - uint64_t offset; - uint64_t size; - uint64_t phaddr; -}; - -/** - * A structure describing a mapped PCI resource. - * For multi-process we need to reproduce all PCI mappings in secondary - * processes, so save them in a tailq. - */ -struct mapped_pci_resource { - TAILQ_ENTRY(mapped_pci_resource) next; - - struct rte_pci_addr pci_addr; - char path[PATH_MAX]; - int nb_maps; - struct pci_map maps[PCI_MAX_RESOURCE]; -}; - -/** mapped pci device list */ -TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); - -/**< Internal use only - Macro used by pci addr parsing functions **/ -#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \ -do { \ - unsigned long val; \ - char *end; \ - errno = 0; \ - val = strtoul((in), &end, 16); \ - if (errno != 0 || end[0] != (dlm) || val > (lim)) \ - return -EINVAL; \ - (fd) = (typeof (fd))val; \ - (in) = end + 1; \ -} while(0) - -/** - * Utility function to produce a PCI Bus-Device-Function value - * given a string representation. Assumes that the BDF is provided without - * a domain prefix (i.e. domain returned is always 0) - * - * @param input - * The input string to be parsed. Should have the format XX:XX.X - * @param dev_addr - * The PCI Bus-Device-Function address to be returned. Domain will always be - * returned as 0 - * @return - * 0 on success, negative on error. - */ -static inline int -eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr) -{ - dev_addr->domain = 0; - GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':'); - GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.'); - GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0); - return 0; -} - -/** - * Utility function to produce a PCI Bus-Device-Function value - * given a string representation. Assumes that the BDF is provided including - * a domain prefix. - * - * @param input - * The input string to be parsed. Should have the format XXXX:XX:XX.X - * @param dev_addr - * The PCI Bus-Device-Function address to be returned - * @return - * 0 on success, negative on error. - */ -static inline int -eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr) -{ - GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':'); - GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':'); - GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.'); - GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0); - return 0; -} -#undef GET_PCIADDR_FIELD - -/** - * Utility function to write a pci device name, this device name can later be - * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_* - * BDF helpers. - * - * @param addr - * The PCI Bus-Device-Function address - * @param output - * The output buffer string - * @param size - * The output buffer size - */ -static inline void -rte_pci_device_name(const struct rte_pci_addr *addr, - char *output, size_t size) -{ - RTE_VERIFY(size >= PCI_PRI_STR_SIZE); - RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT, - addr->domain, addr->bus, - addr->devid, addr->function) >= 0); -} - -/* Compare two PCI device addresses. */ -/** - * Utility function to compare two PCI device addresses. - * - * @param addr - * The PCI Bus-Device-Function address to compare - * @param addr2 - * The PCI Bus-Device-Function address to compare - * @return - * 0 on equal PCI address. - * Positive on addr is greater than addr2. - * Negative on addr is less than addr2, or error. - */ -static inline int -rte_eal_compare_pci_addr(const struct rte_pci_addr *addr, - const struct rte_pci_addr *addr2) -{ - uint64_t dev_addr, dev_addr2; - - if ((addr == NULL) || (addr2 == NULL)) - return -1; - - dev_addr = ((uint64_t)addr->domain << 24) | - (addr->bus << 16) | (addr->devid << 8) | addr->function; - dev_addr2 = ((uint64_t)addr2->domain << 24) | - (addr2->bus << 16) | (addr2->devid << 8) | addr2->function; - - if (dev_addr > dev_addr2) - return 1; - else if (dev_addr < dev_addr2) - return -1; - else - return 0; -} - -/** - * Scan the content of the PCI bus, and the devices in the devices - * list - * - * @return - * 0 on success, negative on error - */ -int rte_pci_scan(void); - -/** - * Probe the PCI bus - * - * @return - * - 0 on success. - * - !0 on error. - */ -int -rte_pci_probe(void); - -/** - * Map the PCI device resources in user space virtual memory address - * - * Note that driver should not call this function when flag - * RTE_PCI_DRV_NEED_MAPPING is set, as EAL will do that for - * you when it's on. - * - * @param dev - * A pointer to a rte_pci_device structure describing the device - * to use - * - * @return - * 0 on success, negative on error and positive if no driver - * is found for the device. - */ -int rte_pci_map_device(struct rte_pci_device *dev); - -/** - * Unmap this device - * - * @param dev - * A pointer to a rte_pci_device structure describing the device - * to use - */ -void rte_pci_unmap_device(struct rte_pci_device *dev); - -/** - * @internal - * Map a particular resource from a file. - * - * @param requested_addr - * The starting address for the new mapping range. - * @param fd - * The file descriptor. - * @param offset - * The offset for the mapping range. - * @param size - * The size for the mapping range. - * @param additional_flags - * The additional flags for the mapping range. - * @return - * - On success, the function returns a pointer to the mapped area. - * - On error, the value MAP_FAILED is returned. - */ -void *pci_map_resource(void *requested_addr, int fd, off_t offset, - size_t size, int additional_flags); - -/** - * @internal - * Unmap a particular resource. - * - * @param requested_addr - * The address for the unmapping range. - * @param size - * The size for the unmapping range. - */ -void pci_unmap_resource(void *requested_addr, size_t size); - -/** - * Probe the single PCI device. - * - * Scan the content of the PCI bus, and find the pci device specified by pci - * address, then call the probe() function for registered driver that has a - * matching entry in its id_table for discovered device. - * - * @param addr - * The PCI Bus-Device-Function address to probe. - * @return - * - 0 on success. - * - Negative on error. - */ -int rte_pci_probe_one(const struct rte_pci_addr *addr); - -/** - * Close the single PCI device. - * - * Scan the content of the PCI bus, and find the pci device specified by pci - * address, then call the remove() function for registered driver that has a - * matching entry in its id_table for discovered device. - * - * @param addr - * The PCI Bus-Device-Function address to close. - * @return - * - 0 on success. - * - Negative on error. - */ -int rte_pci_detach(const struct rte_pci_addr *addr); - -/** - * Dump the content of the PCI bus. - * - * @param f - * A pointer to a file for output - */ -void rte_pci_dump(FILE *f); - -/** - * Register a PCI driver. - * - * @param driver - * A pointer to a rte_pci_driver structure describing the driver - * to be registered. - */ -void rte_pci_register(struct rte_pci_driver *driver); - -/** Helper for PCI device registration from driver (eth, crypto) instance */ -#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \ -RTE_INIT(pciinitfn_ ##nm); \ -static void pciinitfn_ ##nm(void) \ -{\ - (pci_drv).driver.name = RTE_STR(nm);\ - rte_pci_register(&pci_drv); \ -} \ -RTE_PMD_EXPORT_NAME(nm, __COUNTER__) - -/** - * Unregister a PCI driver. - * - * @param driver - * A pointer to a rte_pci_driver structure describing the driver - * to be unregistered. - */ -void rte_pci_unregister(struct rte_pci_driver *driver); - -/** - * Read PCI config space. - * - * @param device - * A pointer to a rte_pci_device structure describing the device - * to use - * @param buf - * A data buffer where the bytes should be read into - * @param len - * The length of the data buffer. - * @param offset - * The offset into PCI config space - */ -int rte_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset); - -/** - * Write PCI config space. - * - * @param device - * A pointer to a rte_pci_device structure describing the device - * to use - * @param buf - * A data buffer containing the bytes should be written - * @param len - * The length of the data buffer. - * @param offset - * The offset into PCI config space - */ -int rte_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset); - -/** - * A structure used to access io resources for a pci device. - * rte_pci_ioport is arch, os, driver specific, and should not be used outside - * of pci ioport api. - */ -struct rte_pci_ioport { - struct rte_pci_device *dev; - uint64_t base; - uint64_t len; /* only filled for memory mapped ports */ -}; - -/** - * Initialize a rte_pci_ioport object for a pci device io resource. - * - * This object is then used to gain access to those io resources (see below). - * - * @param dev - * A pointer to a rte_pci_device structure describing the device - * to use. - * @param bar - * Index of the io pci resource we want to access. - * @param p - * The rte_pci_ioport object to be initialized. - * @return - * 0 on success, negative on error. - */ -int rte_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p); - -/** - * Release any resources used in a rte_pci_ioport object. - * - * @param p - * The rte_pci_ioport object to be uninitialized. - * @return - * 0 on success, negative on error. - */ -int rte_pci_ioport_unmap(struct rte_pci_ioport *p); - -/** - * Read from a io pci resource. - * - * @param p - * The rte_pci_ioport object from which we want to read. - * @param data - * A data buffer where the bytes should be read into - * @param len - * The length of the data buffer. - * @param offset - * The offset into the pci io resource. - */ -void rte_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset); - -/** - * Write to a io pci resource. - * - * @param p - * The rte_pci_ioport object to which we want to write. - * @param data - * A data buffer where the bytes should be read into - * @param len - * The length of the data buffer. - * @param offset - * The offset into the pci io resource. - */ -void rte_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset); - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_PCI_H_ */ diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h index 7c6f7383..92724406 100644 --- a/lib/librte_eal/common/include/rte_service.h +++ b/lib/librte_eal/common/include/rte_service.h @@ -61,9 +61,6 @@ extern "C" { #include <rte_lcore.h> -/* forward declaration only. Definition in rte_service_private.h */ -struct rte_service_spec; - #define RTE_SERVICE_NAME_MAX 32 /* Capabilities of a service. @@ -89,40 +86,32 @@ struct rte_service_spec; */ uint32_t rte_service_get_count(void); - /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Return the specification of a service by integer id. + * Return the id of a service by name. * - * This function provides the specification of a service. This can be used by - * the application to understand what the service represents. The service - * must not be modified by the application directly, only passed to the various - * rte_service_* functions. - * - * @param id The integer id of the service to retrieve - * @retval non-zero A valid pointer to the service_spec - * @retval NULL Invalid *id* provided. - */ -struct rte_service_spec *rte_service_get_by_id(uint32_t id); - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice + * This function provides the id of the service using the service name as + * lookup key. The service id is to be passed to other functions in the + * rte_service_* API. * - * Return the specification of a service by name. - * - * This function provides the specification of a service using the service name - * as lookup key. This can be used by the application to understand what the - * service represents. The service must not be modified by the application - * directly, only passed to the various rte_service_* functions. + * Example usage: + * @code + * uint32_t service_id; + * int32_t ret = rte_service_get_by_name("service_X", &service_id); + * if (ret) { + * // handle error + * } + * @endcode * * @param name The name of the service to retrieve - * @retval non-zero A valid pointer to the service_spec - * @retval NULL Invalid *name* provided. + * @param[out] service_id A pointer to a uint32_t, to be filled in with the id. + * @retval 0 Success. The service id is provided in *service_id*. + * @retval -EINVAL Null *service_id* pointer provided + * @retval -ENODEV No such service registered */ -struct rte_service_spec *rte_service_get_by_name(const char *name); +int32_t rte_service_get_by_name(const char *name, uint32_t *service_id); /** * @warning @@ -133,7 +122,7 @@ struct rte_service_spec *rte_service_get_by_name(const char *name); * @return A pointer to the name of the service. The returned pointer remains * in ownership of the service, and the application must not free it. */ -const char *rte_service_get_name(const struct rte_service_spec *service); +const char *rte_service_get_name(uint32_t id); /** * @warning @@ -146,17 +135,16 @@ const char *rte_service_get_name(const struct rte_service_spec *service); * @retval 1 Capability supported by this service instance * @retval 0 Capability not supported by this service instance */ -int32_t rte_service_probe_capability(const struct rte_service_spec *service, - uint32_t capability); +int32_t rte_service_probe_capability(uint32_t id, uint32_t capability); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Enable a core to run a service. + * Map or unmap a lcore to a service. * - * Each core can be added or removed from running specific services. This - * functions adds *lcore* to the set of cores that will run *service*. + * Each core can be added or removed from running a specific service. This + * function enables or disables *lcore* to run *service_id*. * * If multiple cores are enabled on a service, an atomic is used to ensure that * only one cores runs the service at a time. The exception to this is when @@ -164,82 +152,120 @@ int32_t rte_service_probe_capability(const struct rte_service_spec *service, * called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set, * the service function can be run on multiple threads at the same time. * - * @retval 0 lcore added successfully + * @param service_id the service to apply the lcore to + * @param lcore The lcore that will be mapped to service + * @param enable Zero to unmap or disable the core, non-zero to enable + * + * @retval 0 lcore map updated successfully * @retval -EINVAL An invalid service or lcore was provided. */ -int32_t rte_service_enable_on_lcore(struct rte_service_spec *service, - uint32_t lcore); +int32_t rte_service_map_lcore_set(uint32_t service_id, uint32_t lcore, + uint32_t enable); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Disable a core to run a service. + * Retrieve the mapping of an lcore to a service. * - * Each core can be added or removed from running specific services. This - * functions removes *lcore* to the set of cores that will run *service*. + * @param service_id the service to apply the lcore to + * @param lcore The lcore that will be mapped to service * - * @retval 0 Lcore removed successfully + * @retval 1 lcore is mapped to service + * @retval 0 lcore is not mapped to service * @retval -EINVAL An invalid service or lcore was provided. */ -int32_t rte_service_disable_on_lcore(struct rte_service_spec *service, - uint32_t lcore); +int32_t rte_service_map_lcore_get(uint32_t service_id, uint32_t lcore); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Return if an lcore is enabled for the service. + * Set the runstate of the service. * - * This function allows the application to query if *lcore* is currently set to - * run *service*. + * Each service is either running or stopped. Setting a non-zero runstate + * enables the service to run, while setting runstate zero disables it. * - * @retval 1 Lcore enabled on this lcore - * @retval 0 Lcore disabled on this lcore - * @retval -EINVAL An invalid service or lcore was provided. + * @param id The id of the service + * @param runstate The run state to apply to the service + * + * @retval 0 The service was successfully started + * @retval -EINVAL Invalid service id */ -int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service, - uint32_t lcore); - +int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Enable *service* to run. - * - * This function switches on a service during runtime. - * @retval 0 The service was successfully started + * Get the runstate for the service with *id*. See *rte_service_runstate_set* + * for details of runstates. A service can call this function to ensure that + * the application has indicated that it will receive CPU cycles. Either a + * service-core is mapped (default case), or the application has explicitly + * disabled the check that a service-cores is mapped to the service and takes + * responsibility to run the service manually using the available function + * *rte_service_run_iter_on_app_lcore* to do so. + * + * @retval 1 Service is running + * @retval 0 Service is stopped + * @retval -EINVAL Invalid service id */ -int32_t rte_service_start(struct rte_service_spec *service); +int32_t rte_service_runstate_get(uint32_t id); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Disable *service*. + * Enable or disable the check for a service-core being mapped to the service. + * An application can disable the check when takes the responsibility to run a + * service itself using *rte_service_run_iter_on_app_lcore*. + * + * @param id The id of the service to set the check on + * @param enable When zero, the check is disabled. Non-zero enables the check. * - * Switch off a service, so it is not run until it is *rte_service_start* is - * called on it. - * @retval 0 Service successfully switched off + * @retval 0 Success + * @retval -EINVAL Invalid service ID */ -int32_t rte_service_stop(struct rte_service_spec *service); +int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enable); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Returns if *service* is currently running. - * - * This function returns true if the service has been started using - * *rte_service_start*, AND a service core is mapped to the service. This - * function can be used to ensure that the service will be run. - * - * @retval 1 Service is currently running, and has a service lcore mapped - * @retval 0 Service is currently stopped, or no service lcore is mapped - * @retval -EINVAL Invalid service pointer provided + * This function runs a service callback from a non-service lcore. + * + * This function is designed to enable gradual porting to service cores, and + * to enable unit tests to verify a service behaves as expected. + * + * When called, this function ensures that the service identified by *id* is + * safe to run on this lcore. Multi-thread safe services are invoked even if + * other cores are simultaneously running them as they are multi-thread safe. + * + * Multi-thread unsafe services are handled depending on the variable + * *serialize_multithread_unsafe*: + * - When set, the function will check if a service is already being invoked + * on another lcore, refusing to run it and returning -EBUSY. + * - When zero, the application takes responsibility to ensure that the service + * indicated by *id* is not going to be invoked by another lcore. This setting + * avoids atomic operations, so is likely to be more performant. + * + * @param id The ID of the service to run + * @param serialize_multithread_unsafe This parameter indicates to the service + * cores library if it is required to use atomics to serialize access + * to mult-thread unsafe services. As there is an overhead in using + * atomics, applications can choose to enable or disable this feature + * + * Note that any thread calling this function MUST be a DPDK EAL thread, as + * the *rte_lcore_id* function is used to access internal data structures. + * + * @retval 0 Service was run on the calling thread successfully + * @retval -EBUSY Another lcore is executing the service, and it is not a + * multi-thread safe service, so the service was not run on this lcore + * @retval -ENOEXEC Service is not in a run-able state + * @retval -EINVAL Invalid service id */ -int32_t rte_service_is_running(const struct rte_service_spec *service); +int32_t rte_service_run_iter_on_app_lcore(uint32_t id, + uint32_t serialize_multithread_unsafe); /** * @warning @@ -341,13 +367,12 @@ int32_t rte_service_lcore_reset_all(void); * Enable or disable statistics collection for *service*. * * This function enables per core, per-service cycle count collection. - * @param service The service to enable statistics gathering on. + * @param id The service to enable statistics gathering on. * @param enable Zero to disable statistics, non-zero to enable. * @retval 0 Success * @retval -EINVAL Invalid service pointer passed */ -int32_t rte_service_set_stats_enable(struct rte_service_spec *service, - int32_t enable); +int32_t rte_service_set_stats_enable(uint32_t id, int32_t enable); /** * @warning @@ -374,10 +399,26 @@ int32_t rte_service_lcore_list(uint32_t array[], uint32_t n); * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Dumps any information available about the service. If service is NULL, - * dumps info for all services. + * Get the numer of services running on the supplied lcore. + * + * @param lcore Id of the service core. + * @retval >=0 Number of services registered to this core. + * @retval -EINVAL Invalid lcore provided + * @retval -ENOTSUP The provided lcore is not a service core. + */ +int32_t rte_service_lcore_count_services(uint32_t lcore); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Dumps any information available about the service. When id is UINT32_MAX, + * this function dumps info for all services. + * + * @retval 0 Statistics have been successfully dumped + * @retval -EINVAL Invalid service id provided */ -int32_t rte_service_dump(FILE *f, struct rte_service_spec *service); +int32_t rte_service_dump(FILE *f, uint32_t id); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_service_component.h b/lib/librte_eal/common/include/rte_service_component.h index 7a946a1e..ac965cb4 100644 --- a/lib/librte_eal/common/include/rte_service_component.h +++ b/lib/librte_eal/common/include/rte_service_component.h @@ -85,21 +85,30 @@ struct rte_service_spec { * * For example the eventdev SW PMD requires CPU cycles to perform its * scheduling. This can be achieved by registering it as a service, and the - * application can then assign CPU resources to it using - * *rte_service_set_coremask*. + * application can then assign CPU resources to that service. + * + * Note that when a service component registers itself, it is not permitted to + * add or remove service-core threads, or modify lcore-to-service mappings. The + * only API that may be called by the service-component is + * *rte_service_component_runstate_set*, which indicates that the service + * component is ready to be executed. * * @param spec The specification of the service to register + * @param[out] service_id A pointer to a uint32_t, which will be filled in + * during registration of the service. It is set to the integers + * service number given to the service. This parameter may be NULL. * @retval 0 Successfully registered the service. * -EINVAL Attempted to register an invalid service (eg, no callback * set) */ -int32_t rte_service_register(const struct rte_service_spec *spec); +int32_t rte_service_component_register(const struct rte_service_spec *spec, + uint32_t *service_id); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Unregister a service. + * Unregister a service component. * * The service being removed must be stopped before calling this function. * @@ -107,7 +116,7 @@ int32_t rte_service_register(const struct rte_service_spec *spec); * @retval -EBUSY The service is currently running, stop the service before * calling unregister. No action has been taken. */ -int32_t rte_service_unregister(struct rte_service_spec *service); +int32_t rte_service_component_unregister(uint32_t id); /** * @warning @@ -131,6 +140,23 @@ int32_t rte_service_start_with_defaults(void); * @warning * @b EXPERIMENTAL: this API may change without prior notice * + * Set the backend runstate of a component. + * + * This function allows services to be registered at startup, but not yet + * enabled to run by default. When the service has been configured (via the + * usual method; eg rte_eventdev_configure, the service can mark itself as + * ready to run. The differentiation between backend runstate and + * service_runstate is that the backend runstate is set by the service + * component while the service runstate is reserved for application usage. + * + * @retval 0 Success + */ +int32_t rte_service_component_runstate_set(uint32_t id, uint32_t runstate); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * * Initialize the service library. * * In order to use the service library, it must be initialized. EAL initializes diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h deleted file mode 100644 index 29f5a523..00000000 --- a/lib/librte_eal/common/include/rte_vdev.h +++ /dev/null @@ -1,131 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2016 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RTE_VDEV_H -#define RTE_VDEV_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/queue.h> -#include <rte_dev.h> -#include <rte_devargs.h> - -struct rte_vdev_device { - TAILQ_ENTRY(rte_vdev_device) next; /**< Next attached vdev */ - struct rte_device device; /**< Inherit core device */ -}; - -/** - * @internal - * Helper macro for drivers that need to convert to struct rte_vdev_device. - */ -#define RTE_DEV_TO_VDEV(ptr) \ - container_of(ptr, struct rte_vdev_device, device) - -static inline const char * -rte_vdev_device_name(const struct rte_vdev_device *dev) -{ - if (dev && dev->device.name) - return dev->device.name; - return NULL; -} - -static inline const char * -rte_vdev_device_args(const struct rte_vdev_device *dev) -{ - if (dev && dev->device.devargs) - return dev->device.devargs->args; - return ""; -} - -/** Double linked list of virtual device drivers. */ -TAILQ_HEAD(vdev_driver_list, rte_vdev_driver); - -/** - * Probe function called for each virtual device driver once. - */ -typedef int (rte_vdev_probe_t)(struct rte_vdev_device *dev); - -/** - * Remove function called for each virtual device driver once. - */ -typedef int (rte_vdev_remove_t)(struct rte_vdev_device *dev); - -/** - * A virtual device driver abstraction. - */ -struct rte_vdev_driver { - TAILQ_ENTRY(rte_vdev_driver) next; /**< Next in list. */ - struct rte_driver driver; /**< Inherited general driver. */ - rte_vdev_probe_t *probe; /**< Virtual device probe function. */ - rte_vdev_remove_t *remove; /**< Virtual device remove function. */ -}; - -/** - * Register a virtual device driver. - * - * @param driver - * A pointer to a rte_vdev_driver structure describing the driver - * to be registered. - */ -void rte_vdev_register(struct rte_vdev_driver *driver); - -/** - * Unregister a virtual device driver. - * - * @param driver - * A pointer to a rte_vdev_driver structure describing the driver - * to be unregistered. - */ -void rte_vdev_unregister(struct rte_vdev_driver *driver); - -#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\ -RTE_INIT(vdrvinitfn_ ##vdrv);\ -static const char *vdrvinit_ ## nm ## _alias;\ -static void vdrvinitfn_ ##vdrv(void)\ -{\ - (vdrv).driver.name = RTE_STR(nm);\ - (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\ - rte_vdev_register(&vdrv);\ -} \ -RTE_PMD_EXPORT_NAME(nm, __COUNTER__) - -#define RTE_PMD_REGISTER_ALIAS(nm, alias)\ -static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias) - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index a69a7075..d08cf48a 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -61,7 +61,7 @@ extern "C" { /** * Minor version/month number i.e. the mm in yy.mm.z */ -#define RTE_VER_MONTH 8 +#define RTE_VER_MONTH 11 /** * Patch level number i.e. the z in yy.mm.z @@ -71,14 +71,14 @@ extern "C" { /** * Extra string to be appended to version number */ -#define RTE_VER_SUFFIX "" +#define RTE_VER_SUFFIX "-rc" /** * Patch release number * 0-15 = release candidates * 16 = release */ -#define RTE_VER_RELEASE 16 +#define RTE_VER_RELEASE 3 /** * Macro to compute a version number usable for comparisons diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h new file mode 100644 index 00000000..a69c4ff6 --- /dev/null +++ b/lib/librte_eal/common/include/rte_vfio.h @@ -0,0 +1,153 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 6WIND S.A. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VFIO_H_ +#define _RTE_VFIO_H_ + +/* + * determine if VFIO is present on the system + */ +#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO) +#include <linux/version.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) +#define VFIO_PRESENT +#endif /* kernel version >= 3.6.0 */ +#endif /* RTE_EAL_VFIO */ + +#ifdef VFIO_PRESENT + +#include <linux/vfio.h> + +#define VFIO_DIR "/dev/vfio" +#define VFIO_CONTAINER_PATH "/dev/vfio/vfio" +#define VFIO_GROUP_FMT "/dev/vfio/%u" +#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" +#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) +#define VFIO_GET_REGION_IDX(x) (x >> 40) +#define VFIO_NOIOMMU_MODE \ + "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode" + +/** + * Setup vfio_cfg for the device identified by its address. + * It discovers the configured I/O MMU groups or sets a new one for the device. + * If a new groups is assigned, the DMA mapping is performed. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param sysfs_base + * sysfs path prefix. + * + * @param dev_addr + * device location. + * + * @param vfio_dev_fd + * VFIO fd. + * + * @param device_info + * Device information. + * + * @return + * 0 on success. + * <0 on failure. + * >1 if the device cannot be managed this way. + */ +int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, + int *vfio_dev_fd, struct vfio_device_info *device_info); + +/** + * Release a device mapped to a VFIO-managed I/O MMU group. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param sysfs_base + * sysfs path prefix. + * + * @param dev_addr + * device location. + * + * @param fd + * VFIO fd. + * + * @return + * 0 on success. + * <0 on failure. + */ +int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); + +/** + * Enable a VFIO-related kmod. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param modname + * kernel module name. + * + * @return + * 0 on success. + * <0 on failure. + */ +int rte_vfio_enable(const char *modname); + +/** + * Check whether a VFIO-related kmod is enabled. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param modname + * kernel module name. + * + * @return + * !0 if true. + * 0 otherwise. + */ +int rte_vfio_is_enabled(const char *modname); + +/** + * Whether VFIO NOIOMMU mode is enabled. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @return + * !0 if true. + * 0 otherwise. + */ +int rte_vfio_noiommu_is_enabled(void); + +#endif /* VFIO_PRESENT */ + +#endif /* _RTE_VFIO_H_ */ |