diff options
author | Hanoh Haim <hhaim@cisco.com> | 2015-06-24 14:03:29 +0300 |
---|---|---|
committer | Hanoh Haim <hhaim@cisco.com> | 2015-06-24 14:03:29 +0300 |
commit | 8b52a31ed2c299b759f330c4f976b9c70f5765f4 (patch) | |
tree | 9d6da5438b5b56b1d2d57e6c13494b4e65d000e7 /src/dpdk_lib18/librte_lpm |
first version
Diffstat (limited to 'src/dpdk_lib18/librte_lpm')
-rwxr-xr-x | src/dpdk_lib18/librte_lpm/Makefile | 49 | ||||
-rwxr-xr-x | src/dpdk_lib18/librte_lpm/rte_lpm.c | 1017 | ||||
-rwxr-xr-x | src/dpdk_lib18/librte_lpm/rte_lpm.h | 472 | ||||
-rwxr-xr-x | src/dpdk_lib18/librte_lpm/rte_lpm6.c | 892 | ||||
-rwxr-xr-x | src/dpdk_lib18/librte_lpm/rte_lpm6.h | 228 |
5 files changed, 2658 insertions, 0 deletions
diff --git a/src/dpdk_lib18/librte_lpm/Makefile b/src/dpdk_lib18/librte_lpm/Makefile new file mode 100755 index 00000000..fa94163f --- /dev/null +++ b/src/dpdk_lib18/librte_lpm/Makefile @@ -0,0 +1,49 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_lpm.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_LPM) := rte_lpm.c rte_lpm6.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include := rte_lpm.h rte_lpm6.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_LPM) += lib/librte_eal lib/librte_malloc + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/dpdk_lib18/librte_lpm/rte_lpm.c b/src/dpdk_lib18/librte_lpm/rte_lpm.c new file mode 100755 index 00000000..983e04b1 --- /dev/null +++ b/src/dpdk_lib18/librte_lpm/rte_lpm.c @@ -0,0 +1,1017 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> +#include <stdint.h> +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <errno.h> +#include <sys/queue.h> + +#include <rte_log.h> +#include <rte_branch_prediction.h> +#include <rte_common.h> +#include <rte_memory.h> /* for definition of RTE_CACHE_LINE_SIZE */ +#include <rte_malloc.h> +#include <rte_memzone.h> +#include <rte_tailq.h> +#include <rte_eal.h> +#include <rte_eal_memconfig.h> +#include <rte_per_lcore.h> +#include <rte_string_fns.h> +#include <rte_errno.h> +#include <rte_rwlock.h> +#include <rte_spinlock.h> + +#include "rte_lpm.h" + +TAILQ_HEAD(rte_lpm_list, rte_tailq_entry); + +#define MAX_DEPTH_TBL24 24 + +enum valid_flag { + INVALID = 0, + VALID +}; + +/* Macro to enable/disable run-time checks. */ +#if defined(RTE_LIBRTE_LPM_DEBUG) +#include <rte_debug.h> +#define VERIFY_DEPTH(depth) do { \ + if ((depth == 0) || (depth > RTE_LPM_MAX_DEPTH)) \ + rte_panic("LPM: Invalid depth (%u) at line %d", \ + (unsigned)(depth), __LINE__); \ +} while (0) +#else +#define VERIFY_DEPTH(depth) +#endif + +/* + * Converts a given depth value to its corresponding mask value. + * + * depth (IN) : range = 1 - 32 + * mask (OUT) : 32bit mask + */ +static uint32_t __attribute__((pure)) +depth_to_mask(uint8_t depth) +{ + VERIFY_DEPTH(depth); + + /* To calculate a mask start with a 1 on the left hand side and right + * shift while populating the left hand side with 1's + */ + return (int)0x80000000 >> (depth - 1); +} + +/* + * Converts given depth value to its corresponding range value. + */ +static inline uint32_t __attribute__((pure)) +depth_to_range(uint8_t depth) +{ + VERIFY_DEPTH(depth); + + /* + * Calculate tbl24 range. (Note: 2^depth = 1 << depth) + */ + if (depth <= MAX_DEPTH_TBL24) + return 1 << (MAX_DEPTH_TBL24 - depth); + + /* Else if depth is greater than 24 */ + return (1 << (RTE_LPM_MAX_DEPTH - depth)); +} + +/* + * Find an existing lpm table and return a pointer to it. + */ +struct rte_lpm * +rte_lpm_find_existing(const char *name) +{ + struct rte_lpm *l = NULL; + struct rte_tailq_entry *te; + struct rte_lpm_list *lpm_list; + + /* check that we have an initialised tail queue */ + if ((lpm_list = RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_LPM, + rte_lpm_list)) == NULL) { + rte_errno = E_RTE_NO_TAILQ; + return NULL; + } + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, lpm_list, next) { + l = (struct rte_lpm *) te->data; + if (strncmp(name, l->name, RTE_LPM_NAMESIZE) == 0) + break; + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return l; +} + +/* + * Allocates memory for LPM object + */ +struct rte_lpm * +rte_lpm_create(const char *name, int socket_id, int max_rules, + __rte_unused int flags) +{ + char mem_name[RTE_LPM_NAMESIZE]; + struct rte_lpm *lpm = NULL; + struct rte_tailq_entry *te; + uint32_t mem_size; + struct rte_lpm_list *lpm_list; + + /* check that we have an initialised tail queue */ + if ((lpm_list = RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_LPM, + rte_lpm_list)) == NULL) { + rte_errno = E_RTE_NO_TAILQ; + return NULL; + } + + RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl24_entry) != 2); + RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl8_entry) != 2); + + /* Check user arguments. */ + if ((name == NULL) || (socket_id < -1) || (max_rules == 0)){ + rte_errno = EINVAL; + return NULL; + } + + snprintf(mem_name, sizeof(mem_name), "LPM_%s", name); + + /* Determine the amount of memory to allocate. */ + mem_size = sizeof(*lpm) + (sizeof(lpm->rules_tbl[0]) * max_rules); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* guarantee there's no existing */ + TAILQ_FOREACH(te, lpm_list, next) { + lpm = (struct rte_lpm *) te->data; + if (strncmp(name, lpm->name, RTE_LPM_NAMESIZE) == 0) + break; + } + if (te != NULL) + goto exit; + + /* allocate tailq entry */ + te = rte_zmalloc("LPM_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, LPM, "Failed to allocate tailq entry\n"); + goto exit; + } + + /* Allocate memory to store the LPM data structures. */ + lpm = (struct rte_lpm *)rte_zmalloc_socket(mem_name, mem_size, + RTE_CACHE_LINE_SIZE, socket_id); + if (lpm == NULL) { + RTE_LOG(ERR, LPM, "LPM memory allocation failed\n"); + rte_free(te); + goto exit; + } + + /* Save user arguments. */ + lpm->max_rules = max_rules; + snprintf(lpm->name, sizeof(lpm->name), "%s", name); + + te->data = (void *) lpm; + + TAILQ_INSERT_TAIL(lpm_list, te, next); + +exit: + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return lpm; +} + +/* + * Deallocates memory for given LPM table. + */ +void +rte_lpm_free(struct rte_lpm *lpm) +{ + struct rte_lpm_list *lpm_list; + struct rte_tailq_entry *te; + + /* Check user arguments. */ + if (lpm == NULL) + return; + + /* check that we have an initialised tail queue */ + if ((lpm_list = + RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_LPM, rte_lpm_list)) == NULL) { + rte_errno = E_RTE_NO_TAILQ; + return; + } + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find our tailq entry */ + TAILQ_FOREACH(te, lpm_list, next) { + if (te->data == (void *) lpm) + break; + } + if (te == NULL) { + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return; + } + + TAILQ_REMOVE(lpm_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + rte_free(lpm); + rte_free(te); +} + +/* + * Adds a rule to the rule table. + * + * NOTE: The rule table is split into 32 groups. Each group contains rules that + * apply to a specific prefix depth (i.e. group 1 contains rules that apply to + * prefixes with a depth of 1 etc.). In the following code (depth - 1) is used + * to refer to depth 1 because even though the depth range is 1 - 32, depths + * are stored in the rule table from 0 - 31. + * NOTE: Valid range for depth parameter is 1 .. 32 inclusive. + */ +static inline int32_t +rule_add(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth, + uint8_t next_hop) +{ + uint32_t rule_gindex, rule_index, last_rule; + int i; + + VERIFY_DEPTH(depth); + + /* Scan through rule group to see if rule already exists. */ + if (lpm->rule_info[depth - 1].used_rules > 0) { + + /* rule_gindex stands for rule group index. */ + rule_gindex = lpm->rule_info[depth - 1].first_rule; + /* Initialise rule_index to point to start of rule group. */ + rule_index = rule_gindex; + /* Last rule = Last used rule in this rule group. */ + last_rule = rule_gindex + lpm->rule_info[depth - 1].used_rules; + + for (; rule_index < last_rule; rule_index++) { + + /* If rule already exists update its next_hop and return. */ + if (lpm->rules_tbl[rule_index].ip == ip_masked) { + lpm->rules_tbl[rule_index].next_hop = next_hop; + + return rule_index; + } + } + } else { + /* Calculate the position in which the rule will be stored. */ + rule_index = 0; + + for (i = depth - 1; i > 0; i--) { + if (lpm->rule_info[i - 1].used_rules > 0) { + rule_index = lpm->rule_info[i - 1].first_rule + lpm->rule_info[i - 1].used_rules; + break; + } + } + if (rule_index == lpm->max_rules) + return -ENOSPC; + + lpm->rule_info[depth - 1].first_rule = rule_index; + } + + /* Make room for the new rule in the array. */ + for (i = RTE_LPM_MAX_DEPTH; i > depth; i--) { + if (lpm->rule_info[i - 1].first_rule + lpm->rule_info[i - 1].used_rules == lpm->max_rules) + return -ENOSPC; + + if (lpm->rule_info[i - 1].used_rules > 0) { + lpm->rules_tbl[lpm->rule_info[i - 1].first_rule + lpm->rule_info[i - 1].used_rules] + = lpm->rules_tbl[lpm->rule_info[i - 1].first_rule]; + lpm->rule_info[i - 1].first_rule++; + } + } + + /* Add the new rule. */ + lpm->rules_tbl[rule_index].ip = ip_masked; + lpm->rules_tbl[rule_index].next_hop = next_hop; + + /* Increment the used rules counter for this rule group. */ + lpm->rule_info[depth - 1].used_rules++; + + return rule_index; +} + +/* + * Delete a rule from the rule table. + * NOTE: Valid range for depth parameter is 1 .. 32 inclusive. + */ +static inline void +rule_delete(struct rte_lpm *lpm, int32_t rule_index, uint8_t depth) +{ + int i; + + VERIFY_DEPTH(depth); + + lpm->rules_tbl[rule_index] = lpm->rules_tbl[lpm->rule_info[depth - 1].first_rule + + lpm->rule_info[depth - 1].used_rules - 1]; + + for (i = depth; i < RTE_LPM_MAX_DEPTH; i++) { + if (lpm->rule_info[i].used_rules > 0) { + lpm->rules_tbl[lpm->rule_info[i].first_rule - 1] = + lpm->rules_tbl[lpm->rule_info[i].first_rule + lpm->rule_info[i].used_rules - 1]; + lpm->rule_info[i].first_rule--; + } + } + + lpm->rule_info[depth - 1].used_rules--; +} + +/* + * Finds a rule in rule table. + * NOTE: Valid range for depth parameter is 1 .. 32 inclusive. + */ +static inline int32_t +rule_find(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth) +{ + uint32_t rule_gindex, last_rule, rule_index; + + VERIFY_DEPTH(depth); + + rule_gindex = lpm->rule_info[depth - 1].first_rule; + last_rule = rule_gindex + lpm->rule_info[depth - 1].used_rules; + + /* Scan used rules at given depth to find rule. */ + for (rule_index = rule_gindex; rule_index < last_rule; rule_index++) { + /* If rule is found return the rule index. */ + if (lpm->rules_tbl[rule_index].ip == ip_masked) + return (rule_index); + } + + /* If rule is not found return -E_RTE_NO_TAILQ. */ + return -E_RTE_NO_TAILQ; +} + +/* + * Find, clean and allocate a tbl8. + */ +static inline int32_t +tbl8_alloc(struct rte_lpm_tbl8_entry *tbl8) +{ + uint32_t tbl8_gindex; /* tbl8 group index. */ + struct rte_lpm_tbl8_entry *tbl8_entry; + + /* Scan through tbl8 to find a free (i.e. INVALID) tbl8 group. */ + for (tbl8_gindex = 0; tbl8_gindex < RTE_LPM_TBL8_NUM_GROUPS; + tbl8_gindex++) { + tbl8_entry = &tbl8[tbl8_gindex * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES]; + /* If a free tbl8 group is found clean it and set as VALID. */ + if (!tbl8_entry->valid_group) { + memset(&tbl8_entry[0], 0, + RTE_LPM_TBL8_GROUP_NUM_ENTRIES * + sizeof(tbl8_entry[0])); + + tbl8_entry->valid_group = VALID; + + /* Return group index for allocated tbl8 group. */ + return tbl8_gindex; + } + } + + /* If there are no tbl8 groups free then return error. */ + return -ENOSPC; +} + +static inline void +tbl8_free(struct rte_lpm_tbl8_entry *tbl8, uint32_t tbl8_group_start) +{ + /* Set tbl8 group invalid*/ + tbl8[tbl8_group_start].valid_group = INVALID; +} + +static inline int32_t +add_depth_small(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, + uint8_t next_hop) +{ + uint32_t tbl24_index, tbl24_range, tbl8_index, tbl8_group_end, i, j; + + /* Calculate the index into Table24. */ + tbl24_index = ip >> 8; + tbl24_range = depth_to_range(depth); + + for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) { + /* + * For invalid OR valid and non-extended tbl 24 entries set + * entry. + */ + if (!lpm->tbl24[i].valid || (lpm->tbl24[i].ext_entry == 0 && + lpm->tbl24[i].depth <= depth)) { + + struct rte_lpm_tbl24_entry new_tbl24_entry = { + { .next_hop = next_hop, }, + .valid = VALID, + .ext_entry = 0, + .depth = depth, + }; + + /* Setting tbl24 entry in one go to avoid race + * conditions */ + lpm->tbl24[i] = new_tbl24_entry; + + continue; + } + + /* If tbl24 entry is valid and extended calculate the index + * into tbl8. */ + tbl8_index = lpm->tbl24[i].tbl8_gindex * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl8_group_end = tbl8_index + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + for (j = tbl8_index; j < tbl8_group_end; j++) { + if (!lpm->tbl8[j].valid || + lpm->tbl8[j].depth <= depth) { + struct rte_lpm_tbl8_entry new_tbl8_entry = { + .valid = VALID, + .valid_group = VALID, + .depth = depth, + .next_hop = next_hop, + }; + + /* + * Setting tbl8 entry in one go to avoid race + * conditions + */ + lpm->tbl8[j] = new_tbl8_entry; + + continue; + } + } + } + + return 0; +} + +static inline int32_t +add_depth_big(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth, + uint8_t next_hop) +{ + uint32_t tbl24_index; + int32_t tbl8_group_index, tbl8_group_start, tbl8_group_end, tbl8_index, + tbl8_range, i; + + tbl24_index = (ip_masked >> 8); + tbl8_range = depth_to_range(depth); + + if (!lpm->tbl24[tbl24_index].valid) { + /* Search for a free tbl8 group. */ + tbl8_group_index = tbl8_alloc(lpm->tbl8); + + /* Check tbl8 allocation was successful. */ + if (tbl8_group_index < 0) { + return tbl8_group_index; + } + + /* Find index into tbl8 and range. */ + tbl8_index = (tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES) + + (ip_masked & 0xFF); + + /* Set tbl8 entry. */ + for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) { + lpm->tbl8[i].depth = depth; + lpm->tbl8[i].next_hop = next_hop; + lpm->tbl8[i].valid = VALID; + } + + /* + * Update tbl24 entry to point to new tbl8 entry. Note: The + * ext_flag and tbl8_index need to be updated simultaneously, + * so assign whole structure in one go + */ + + struct rte_lpm_tbl24_entry new_tbl24_entry = { + { .tbl8_gindex = (uint8_t)tbl8_group_index, }, + .valid = VALID, + .ext_entry = 1, + .depth = 0, + }; + + lpm->tbl24[tbl24_index] = new_tbl24_entry; + + }/* If valid entry but not extended calculate the index into Table8. */ + else if (lpm->tbl24[tbl24_index].ext_entry == 0) { + /* Search for free tbl8 group. */ + tbl8_group_index = tbl8_alloc(lpm->tbl8); + + if (tbl8_group_index < 0) { + return tbl8_group_index; + } + + tbl8_group_start = tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl8_group_end = tbl8_group_start + + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + /* Populate new tbl8 with tbl24 value. */ + for (i = tbl8_group_start; i < tbl8_group_end; i++) { + lpm->tbl8[i].valid = VALID; + lpm->tbl8[i].depth = lpm->tbl24[tbl24_index].depth; + lpm->tbl8[i].next_hop = + lpm->tbl24[tbl24_index].next_hop; + } + + tbl8_index = tbl8_group_start + (ip_masked & 0xFF); + + /* Insert new rule into the tbl8 entry. */ + for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) { + if (!lpm->tbl8[i].valid || + lpm->tbl8[i].depth <= depth) { + lpm->tbl8[i].valid = VALID; + lpm->tbl8[i].depth = depth; + lpm->tbl8[i].next_hop = next_hop; + + continue; + } + } + + /* + * Update tbl24 entry to point to new tbl8 entry. Note: The + * ext_flag and tbl8_index need to be updated simultaneously, + * so assign whole structure in one go. + */ + + struct rte_lpm_tbl24_entry new_tbl24_entry = { + { .tbl8_gindex = (uint8_t)tbl8_group_index, }, + .valid = VALID, + .ext_entry = 1, + .depth = 0, + }; + + lpm->tbl24[tbl24_index] = new_tbl24_entry; + + } + else { /* + * If it is valid, extended entry calculate the index into tbl8. + */ + tbl8_group_index = lpm->tbl24[tbl24_index].tbl8_gindex; + tbl8_group_start = tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl8_index = tbl8_group_start + (ip_masked & 0xFF); + + for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) { + + if (!lpm->tbl8[i].valid || + lpm->tbl8[i].depth <= depth) { + struct rte_lpm_tbl8_entry new_tbl8_entry = { + .valid = VALID, + .depth = depth, + .next_hop = next_hop, + .valid_group = lpm->tbl8[i].valid_group, + }; + + /* + * Setting tbl8 entry in one go to avoid race + * condition + */ + lpm->tbl8[i] = new_tbl8_entry; + + continue; + } + } + } + + return 0; +} + +/* + * Add a route + */ +int +rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, + uint8_t next_hop) +{ + int32_t rule_index, status = 0; + uint32_t ip_masked; + + /* Check user arguments. */ + if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM_MAX_DEPTH)) + return -EINVAL; + + ip_masked = ip & depth_to_mask(depth); + + /* Add the rule to the rule table. */ + rule_index = rule_add(lpm, ip_masked, depth, next_hop); + + /* If the is no space available for new rule return error. */ + if (rule_index < 0) { + return rule_index; + } + + if (depth <= MAX_DEPTH_TBL24) { + status = add_depth_small(lpm, ip_masked, depth, next_hop); + } + else { /* If depth > RTE_LPM_MAX_DEPTH_TBL24 */ + status = add_depth_big(lpm, ip_masked, depth, next_hop); + + /* + * If add fails due to exhaustion of tbl8 extensions delete + * rule that was added to rule table. + */ + if (status < 0) { + rule_delete(lpm, rule_index, depth); + + return status; + } + } + + return 0; +} + +/* + * Look for a rule in the high-level rules table + */ +int +rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, +uint8_t *next_hop) +{ + uint32_t ip_masked; + int32_t rule_index; + + /* Check user arguments. */ + if ((lpm == NULL) || + (next_hop == NULL) || + (depth < 1) || (depth > RTE_LPM_MAX_DEPTH)) + return -EINVAL; + + /* Look for the rule using rule_find. */ + ip_masked = ip & depth_to_mask(depth); + rule_index = rule_find(lpm, ip_masked, depth); + + if (rule_index >= 0) { + *next_hop = lpm->rules_tbl[rule_index].next_hop; + return 1; + } + + /* If rule is not found return 0. */ + return 0; +} + +static inline int32_t +find_previous_rule(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, uint8_t *sub_rule_depth) +{ + int32_t rule_index; + uint32_t ip_masked; + uint8_t prev_depth; + + for (prev_depth = (uint8_t)(depth - 1); prev_depth > 0; prev_depth--) { + ip_masked = ip & depth_to_mask(prev_depth); + + rule_index = rule_find(lpm, ip_masked, prev_depth); + + if (rule_index >= 0) { + *sub_rule_depth = prev_depth; + return rule_index; + } + } + + return -1; +} + +static inline int32_t +delete_depth_small(struct rte_lpm *lpm, uint32_t ip_masked, + uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth) +{ + uint32_t tbl24_range, tbl24_index, tbl8_group_index, tbl8_index, i, j; + + /* Calculate the range and index into Table24. */ + tbl24_range = depth_to_range(depth); + tbl24_index = (ip_masked >> 8); + + /* + * Firstly check the sub_rule_index. A -1 indicates no replacement rule + * and a positive number indicates a sub_rule_index. + */ + if (sub_rule_index < 0) { + /* + * If no replacement rule exists then invalidate entries + * associated with this rule. + */ + for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) { + + if (lpm->tbl24[i].ext_entry == 0 && + lpm->tbl24[i].depth <= depth ) { + lpm->tbl24[i].valid = INVALID; + } + else { + /* + * If TBL24 entry is extended, then there has + * to be a rule with depth >= 25 in the + * associated TBL8 group. + */ + + tbl8_group_index = lpm->tbl24[i].tbl8_gindex; + tbl8_index = tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + for (j = tbl8_index; j < (tbl8_index + + RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) { + + if (lpm->tbl8[j].depth <= depth) + lpm->tbl8[j].valid = INVALID; + } + } + } + } + else { + /* + * If a replacement rule exists then modify entries + * associated with this rule. + */ + + struct rte_lpm_tbl24_entry new_tbl24_entry = { + {.next_hop = lpm->rules_tbl[sub_rule_index].next_hop,}, + .valid = VALID, + .ext_entry = 0, + .depth = sub_rule_depth, + }; + + struct rte_lpm_tbl8_entry new_tbl8_entry = { + .valid = VALID, + .depth = sub_rule_depth, + .next_hop = lpm->rules_tbl + [sub_rule_index].next_hop, + }; + + for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) { + + if (lpm->tbl24[i].ext_entry == 0 && + lpm->tbl24[i].depth <= depth ) { + lpm->tbl24[i] = new_tbl24_entry; + } + else { + /* + * If TBL24 entry is extended, then there has + * to be a rule with depth >= 25 in the + * associated TBL8 group. + */ + + tbl8_group_index = lpm->tbl24[i].tbl8_gindex; + tbl8_index = tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + for (j = tbl8_index; j < (tbl8_index + + RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) { + + if (lpm->tbl8[j].depth <= depth) + lpm->tbl8[j] = new_tbl8_entry; + } + } + } + } + + return 0; +} + +/* + * Checks if table 8 group can be recycled. + * + * Return of -EEXIST means tbl8 is in use and thus can not be recycled. + * Return of -EINVAL means tbl8 is empty and thus can be recycled + * Return of value > -1 means tbl8 is in use but has all the same values and + * thus can be recycled + */ +static inline int32_t +tbl8_recycle_check(struct rte_lpm_tbl8_entry *tbl8, uint32_t tbl8_group_start) +{ + uint32_t tbl8_group_end, i; + tbl8_group_end = tbl8_group_start + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + /* + * Check the first entry of the given tbl8. If it is invalid we know + * this tbl8 does not contain any rule with a depth < RTE_LPM_MAX_DEPTH + * (As they would affect all entries in a tbl8) and thus this table + * can not be recycled. + */ + if (tbl8[tbl8_group_start].valid) { + /* + * If first entry is valid check if the depth is less than 24 + * and if so check the rest of the entries to verify that they + * are all of this depth. + */ + if (tbl8[tbl8_group_start].depth < MAX_DEPTH_TBL24) { + for (i = (tbl8_group_start + 1); i < tbl8_group_end; + i++) { + + if (tbl8[i].depth != + tbl8[tbl8_group_start].depth) { + + return -EEXIST; + } + } + /* If all entries are the same return the tb8 index */ + return tbl8_group_start; + } + + return -EEXIST; + } + /* + * If the first entry is invalid check if the rest of the entries in + * the tbl8 are invalid. + */ + for (i = (tbl8_group_start + 1); i < tbl8_group_end; i++) { + if (tbl8[i].valid) + return -EEXIST; + } + /* If no valid entries are found then return -EINVAL. */ + return -EINVAL; +} + +static inline int32_t +delete_depth_big(struct rte_lpm *lpm, uint32_t ip_masked, + uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth) +{ + uint32_t tbl24_index, tbl8_group_index, tbl8_group_start, tbl8_index, + tbl8_range, i; + int32_t tbl8_recycle_index; + + /* + * Calculate the index into tbl24 and range. Note: All depths larger + * than MAX_DEPTH_TBL24 are associated with only one tbl24 entry. + */ + tbl24_index = ip_masked >> 8; + + /* Calculate the index into tbl8 and range. */ + tbl8_group_index = lpm->tbl24[tbl24_index].tbl8_gindex; + tbl8_group_start = tbl8_group_index * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl8_index = tbl8_group_start + (ip_masked & 0xFF); + tbl8_range = depth_to_range(depth); + + if (sub_rule_index < 0) { + /* + * Loop through the range of entries on tbl8 for which the + * rule_to_delete must be removed or modified. + */ + for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) { + if (lpm->tbl8[i].depth <= depth) + lpm->tbl8[i].valid = INVALID; + } + } + else { + /* Set new tbl8 entry. */ + struct rte_lpm_tbl8_entry new_tbl8_entry = { + .valid = VALID, + .depth = sub_rule_depth, + .valid_group = lpm->tbl8[tbl8_group_start].valid_group, + .next_hop = lpm->rules_tbl[sub_rule_index].next_hop, + }; + + /* + * Loop through the range of entries on tbl8 for which the + * rule_to_delete must be modified. + */ + for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) { + if (lpm->tbl8[i].depth <= depth) + lpm->tbl8[i] = new_tbl8_entry; + } + } + + /* + * Check if there are any valid entries in this tbl8 group. If all + * tbl8 entries are invalid we can free the tbl8 and invalidate the + * associated tbl24 entry. + */ + + tbl8_recycle_index = tbl8_recycle_check(lpm->tbl8, tbl8_group_start); + + if (tbl8_recycle_index == -EINVAL){ + /* Set tbl24 before freeing tbl8 to avoid race condition. */ + lpm->tbl24[tbl24_index].valid = 0; + tbl8_free(lpm->tbl8, tbl8_group_start); + } + else if (tbl8_recycle_index > -1) { + /* Update tbl24 entry. */ + struct rte_lpm_tbl24_entry new_tbl24_entry = { + { .next_hop = lpm->tbl8[tbl8_recycle_index].next_hop, }, + .valid = VALID, + .ext_entry = 0, + .depth = lpm->tbl8[tbl8_recycle_index].depth, + }; + + /* Set tbl24 before freeing tbl8 to avoid race condition. */ + lpm->tbl24[tbl24_index] = new_tbl24_entry; + tbl8_free(lpm->tbl8, tbl8_group_start); + } + + return 0; +} + +/* + * Deletes a rule + */ +int +rte_lpm_delete(struct rte_lpm *lpm, uint32_t ip, uint8_t depth) +{ + int32_t rule_to_delete_index, sub_rule_index; + uint32_t ip_masked; + uint8_t sub_rule_depth; + /* + * Check input arguments. Note: IP must be a positive integer of 32 + * bits in length therefore it need not be checked. + */ + if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM_MAX_DEPTH)) { + return -EINVAL; + } + + ip_masked = ip & depth_to_mask(depth); + + /* + * Find the index of the input rule, that needs to be deleted, in the + * rule table. + */ + rule_to_delete_index = rule_find(lpm, ip_masked, depth); + + /* + * Check if rule_to_delete_index was found. If no rule was found the + * function rule_find returns -E_RTE_NO_TAILQ. + */ + if (rule_to_delete_index < 0) + return -E_RTE_NO_TAILQ; + + /* Delete the rule from the rule table. */ + rule_delete(lpm, rule_to_delete_index, depth); + + /* + * Find rule to replace the rule_to_delete. If there is no rule to + * replace the rule_to_delete we return -1 and invalidate the table + * entries associated with this rule. + */ + sub_rule_depth = 0; + sub_rule_index = find_previous_rule(lpm, ip, depth, &sub_rule_depth); + + /* + * If the input depth value is less than 25 use function + * delete_depth_small otherwise use delete_depth_big. + */ + if (depth <= MAX_DEPTH_TBL24) { + return delete_depth_small(lpm, ip_masked, depth, + sub_rule_index, sub_rule_depth); + } + else { /* If depth > MAX_DEPTH_TBL24 */ + return delete_depth_big(lpm, ip_masked, depth, sub_rule_index, sub_rule_depth); + } +} + +/* + * Delete all rules from the LPM table. + */ +void +rte_lpm_delete_all(struct rte_lpm *lpm) +{ + /* Zero rule information. */ + memset(lpm->rule_info, 0, sizeof(lpm->rule_info)); + + /* Zero tbl24. */ + memset(lpm->tbl24, 0, sizeof(lpm->tbl24)); + + /* Zero tbl8. */ + memset(lpm->tbl8, 0, sizeof(lpm->tbl8)); + + /* Delete all rules form the rules table. */ + memset(lpm->rules_tbl, 0, sizeof(lpm->rules_tbl[0]) * lpm->max_rules); +} + diff --git a/src/dpdk_lib18/librte_lpm/rte_lpm.h b/src/dpdk_lib18/librte_lpm/rte_lpm.h new file mode 100755 index 00000000..62d7736e --- /dev/null +++ b/src/dpdk_lib18/librte_lpm/rte_lpm.h @@ -0,0 +1,472 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LPM_H_ +#define _RTE_LPM_H_ + +/** + * @file + * RTE Longest Prefix Match (LPM) + */ + +#include <errno.h> +#include <sys/queue.h> +#include <stdint.h> +#include <stdlib.h> +#include <rte_branch_prediction.h> +#include <rte_memory.h> +#include <rte_common.h> +#include <rte_common_vect.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** Max number of characters in LPM name. */ +#define RTE_LPM_NAMESIZE 32 + +/** @deprecated Possible location to allocate memory. This was for last + * parameter of rte_lpm_create(), but is now redundant. The LPM table is always + * allocated in memory using librte_malloc which uses a memzone. */ +#define RTE_LPM_HEAP 0 + +/** @deprecated Possible location to allocate memory. This was for last + * parameter of rte_lpm_create(), but is now redundant. The LPM table is always + * allocated in memory using librte_malloc which uses a memzone. */ +#define RTE_LPM_MEMZONE 1 + +/** Maximum depth value possible for IPv4 LPM. */ +#define RTE_LPM_MAX_DEPTH 32 + +/** @internal Total number of tbl24 entries. */ +#define RTE_LPM_TBL24_NUM_ENTRIES (1 << 24) + +/** @internal Number of entries in a tbl8 group. */ +#define RTE_LPM_TBL8_GROUP_NUM_ENTRIES 256 + +/** @internal Total number of tbl8 groups in the tbl8. */ +#define RTE_LPM_TBL8_NUM_GROUPS 256 + +/** @internal Total number of tbl8 entries. */ +#define RTE_LPM_TBL8_NUM_ENTRIES (RTE_LPM_TBL8_NUM_GROUPS * \ + RTE_LPM_TBL8_GROUP_NUM_ENTRIES) + +/** @internal Macro to enable/disable run-time checks. */ +#if defined(RTE_LIBRTE_LPM_DEBUG) +#define RTE_LPM_RETURN_IF_TRUE(cond, retval) do { \ + if (cond) return (retval); \ +} while (0) +#else +#define RTE_LPM_RETURN_IF_TRUE(cond, retval) +#endif + +/** @internal bitmask with valid and ext_entry/valid_group fields set */ +#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x0300 + +/** Bitmask used to indicate successful lookup */ +#define RTE_LPM_LOOKUP_SUCCESS 0x0100 + +/** @internal Tbl24 entry structure. */ +struct rte_lpm_tbl24_entry { + /* Stores Next hop or group index (i.e. gindex)into tbl8. */ + union { + uint8_t next_hop; + uint8_t tbl8_gindex; + }; + /* Using single uint8_t to store 3 values. */ + uint8_t valid :1; /**< Validation flag. */ + uint8_t ext_entry :1; /**< External entry. */ + uint8_t depth :6; /**< Rule depth. */ +}; + +/** @internal Tbl8 entry structure. */ +struct rte_lpm_tbl8_entry { + uint8_t next_hop; /**< next hop. */ + /* Using single uint8_t to store 3 values. */ + uint8_t valid :1; /**< Validation flag. */ + uint8_t valid_group :1; /**< Group validation flag. */ + uint8_t depth :6; /**< Rule depth. */ +}; + +/** @internal Rule structure. */ +struct rte_lpm_rule { + uint32_t ip; /**< Rule IP address. */ + uint8_t next_hop; /**< Rule next hop. */ +}; + +/** @internal Contains metadata about the rules table. */ +struct rte_lpm_rule_info { + uint32_t used_rules; /**< Used rules so far. */ + uint32_t first_rule; /**< Indexes the first rule of a given depth. */ +}; + +/** @internal LPM structure. */ +struct rte_lpm { + /* LPM metadata. */ + char name[RTE_LPM_NAMESIZE]; /**< Name of the lpm. */ + int mem_location; /**< @deprecated @see RTE_LPM_HEAP and RTE_LPM_MEMZONE. */ + uint32_t max_rules; /**< Max. balanced rules per lpm. */ + struct rte_lpm_rule_info rule_info[RTE_LPM_MAX_DEPTH]; /**< Rule info table. */ + + /* LPM Tables. */ + struct rte_lpm_tbl24_entry tbl24[RTE_LPM_TBL24_NUM_ENTRIES] \ + __rte_cache_aligned; /**< LPM tbl24 table. */ + struct rte_lpm_tbl8_entry tbl8[RTE_LPM_TBL8_NUM_ENTRIES] \ + __rte_cache_aligned; /**< LPM tbl8 table. */ + struct rte_lpm_rule rules_tbl[0] \ + __rte_cache_aligned; /**< LPM rules. */ +}; + +/** + * Create an LPM object. + * + * @param name + * LPM object name + * @param socket_id + * NUMA socket ID for LPM table memory allocation + * @param max_rules + * Maximum number of LPM rules that can be added + * @param flags + * This parameter is currently unused + * @return + * Handle to LPM object on success, NULL otherwise with rte_errno set + * to an appropriate values. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - E_RTE_NO_TAILQ - no tailq list could be got for the lpm object list + * - EINVAL - invalid parameter passed to function + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_lpm * +rte_lpm_create(const char *name, int socket_id, int max_rules, int flags); + +/** + * Find an existing LPM object and return a pointer to it. + * + * @param name + * Name of the lpm object as passed to rte_lpm_create() + * @return + * Pointer to lpm object or NULL if object not found with rte_errno + * set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + */ +struct rte_lpm * +rte_lpm_find_existing(const char *name); + +/** + * Free an LPM object. + * + * @param lpm + * LPM object handle + * @return + * None + */ +void +rte_lpm_free(struct rte_lpm *lpm); + +/** + * Add a rule to the LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * IP of the rule to be added to the LPM table + * @param depth + * Depth of the rule to be added to the LPM table + * @param next_hop + * Next hop of the rule to be added to the LPM table + * @return + * 0 on success, negative value otherwise + */ +int +rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, uint8_t next_hop); + +/** + * Check if a rule is present in the LPM table, + * and provide its next hop if it is. + * + * @param lpm + * LPM object handle + * @param ip + * IP of the rule to be searched + * @param depth + * Depth of the rule to searched + * @param next_hop + * Next hop of the rule (valid only if it is found) + * @return + * 1 if the rule exists, 0 if it does not, a negative value on failure + */ +int +rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, +uint8_t *next_hop); + +/** + * Delete a rule from the LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * IP of the rule to be deleted from the LPM table + * @param depth + * Depth of the rule to be deleted from the LPM table + * @return + * 0 on success, negative value otherwise + */ +int +rte_lpm_delete(struct rte_lpm *lpm, uint32_t ip, uint8_t depth); + +/** + * Delete all rules from the LPM table. + * + * @param lpm + * LPM object handle + */ +void +rte_lpm_delete_all(struct rte_lpm *lpm); + +/** + * Lookup an IP into the LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * IP to be looked up in the LPM table + * @param next_hop + * Next hop of the most specific rule found for IP (valid on lookup hit only) + * @return + * -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit + */ +static inline int +rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint8_t *next_hop) +{ + unsigned tbl24_index = (ip >> 8); + uint16_t tbl_entry; + + /* DEBUG: Check user input arguments. */ + RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) || (next_hop == NULL)), -EINVAL); + + /* Copy tbl24 entry */ + tbl_entry = *(const uint16_t *)&lpm->tbl24[tbl24_index]; + + /* Copy tbl8 entry (only if needed) */ + if (unlikely((tbl_entry & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + + unsigned tbl8_index = (uint8_t)ip + + ((uint8_t)tbl_entry * RTE_LPM_TBL8_GROUP_NUM_ENTRIES); + + tbl_entry = *(const uint16_t *)&lpm->tbl8[tbl8_index]; + } + + *next_hop = (uint8_t)tbl_entry; + return (tbl_entry & RTE_LPM_LOOKUP_SUCCESS) ? 0 : -ENOENT; +} + +/** + * Lookup multiple IP addresses in an LPM table. This may be implemented as a + * macro, so the address of the function should not be used. + * + * @param lpm + * LPM object handle + * @param ips + * Array of IPs to be looked up in the LPM table + * @param next_hops + * Next hop of the most specific rule found for IP (valid on lookup hit only). + * This is an array of two byte values. The most significant byte in each + * value says whether the lookup was successful (bitmask + * RTE_LPM_LOOKUP_SUCCESS is set). The least significant byte is the + * actual next hop. + * @param n + * Number of elements in ips (and next_hops) array to lookup. This should be a + * compile time constant, and divisible by 8 for best performance. + * @return + * -EINVAL for incorrect arguments, otherwise 0 + */ +#define rte_lpm_lookup_bulk(lpm, ips, next_hops, n) \ + rte_lpm_lookup_bulk_func(lpm, ips, next_hops, n) + +static inline int +rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips, + uint16_t * next_hops, const unsigned n) +{ + unsigned i; + unsigned tbl24_indexes[n]; + + /* DEBUG: Check user input arguments. */ + RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) || (ips == NULL) || + (next_hops == NULL)), -EINVAL); + + for (i = 0; i < n; i++) { + tbl24_indexes[i] = ips[i] >> 8; + } + + for (i = 0; i < n; i++) { + /* Simply copy tbl24 entry to output */ + next_hops[i] = *(const uint16_t *)&lpm->tbl24[tbl24_indexes[i]]; + + /* Overwrite output with tbl8 entry if needed */ + if (unlikely((next_hops[i] & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + + unsigned tbl8_index = (uint8_t)ips[i] + + ((uint8_t)next_hops[i] * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES); + + next_hops[i] = *(const uint16_t *)&lpm->tbl8[tbl8_index]; + } + } + return 0; +} + +/* Mask four results. */ +#define RTE_LPM_MASKX4_RES UINT64_C(0x00ff00ff00ff00ff) + +/** + * Lookup four IP addresses in an LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * Four IPs to be looked up in the LPM table + * @param hop + * Next hop of the most specific rule found for IP (valid on lookup hit only). + * This is an 4 elements array of two byte values. + * If the lookup was succesfull for the given IP, then least significant byte + * of the corresponding element is the actual next hop and the most + * significant byte is zero. + * If the lookup for the given IP failed, then corresponding element would + * contain default value, see description of then next parameter. + * @param defv + * Default value to populate into corresponding element of hop[] array, + * if lookup would fail. + */ +static inline void +rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4], + uint16_t defv) +{ + __m128i i24; + rte_xmm_t i8; + uint16_t tbl[4]; + uint64_t idx, pt; + + const __m128i mask8 = + _mm_set_epi32(UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX); + + /* + * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 4 LPM entries + * as one 64-bit value (0x0300030003000300). + */ + const uint64_t mask_xv = + ((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK | + (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 16 | + (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32 | + (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 48); + + /* + * RTE_LPM_LOOKUP_SUCCESS for 4 LPM entries + * as one 64-bit value (0x0100010001000100). + */ + const uint64_t mask_v = + ((uint64_t)RTE_LPM_LOOKUP_SUCCESS | + (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 16 | + (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32 | + (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 48); + + /* get 4 indexes for tbl24[]. */ + i24 = _mm_srli_epi32(ip, CHAR_BIT); + + /* extract values from tbl24[] */ + idx = _mm_cvtsi128_si64(i24); + i24 = _mm_srli_si128(i24, sizeof(uint64_t)); + + tbl[0] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx]; + tbl[1] = *(const uint16_t *)&lpm->tbl24[idx >> 32]; + + idx = _mm_cvtsi128_si64(i24); + + tbl[2] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx]; + tbl[3] = *(const uint16_t *)&lpm->tbl24[idx >> 32]; + + /* get 4 indexes for tbl8[]. */ + i8.m = _mm_and_si128(ip, mask8); + + pt = (uint64_t)tbl[0] | + (uint64_t)tbl[1] << 16 | + (uint64_t)tbl[2] << 32 | + (uint64_t)tbl[3] << 48; + + /* search successfully finished for all 4 IP addresses. */ + if (likely((pt & mask_xv) == mask_v)) { + uintptr_t ph = (uintptr_t)hop; + *(uint64_t *)ph = pt & RTE_LPM_MASKX4_RES; + return; + } + + if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[0] = i8.u32[0] + + (uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl[0] = *(const uint16_t *)&lpm->tbl8[i8.u32[0]]; + } + if (unlikely((pt >> 16 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[1] = i8.u32[1] + + (uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl[1] = *(const uint16_t *)&lpm->tbl8[i8.u32[1]]; + } + if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[2] = i8.u32[2] + + (uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl[2] = *(const uint16_t *)&lpm->tbl8[i8.u32[2]]; + } + if (unlikely((pt >> 48 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[3] = i8.u32[3] + + (uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl[3] = *(const uint16_t *)&lpm->tbl8[i8.u32[3]]; + } + + hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[0] : defv; + hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[1] : defv; + hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[2] : defv; + hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[3] : defv; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LPM_H_ */ diff --git a/src/dpdk_lib18/librte_lpm/rte_lpm6.c b/src/dpdk_lib18/librte_lpm/rte_lpm6.c new file mode 100755 index 00000000..42e6d800 --- /dev/null +++ b/src/dpdk_lib18/librte_lpm/rte_lpm6.c @@ -0,0 +1,892 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <string.h> +#include <stdint.h> +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <errno.h> +#include <sys/queue.h> + +#include <rte_log.h> +#include <rte_branch_prediction.h> +#include <rte_common.h> +#include <rte_memory.h> +#include <rte_malloc.h> +#include <rte_memzone.h> +#include <rte_memcpy.h> +#include <rte_tailq.h> +#include <rte_eal.h> +#include <rte_eal_memconfig.h> +#include <rte_per_lcore.h> +#include <rte_string_fns.h> +#include <rte_errno.h> +#include <rte_rwlock.h> +#include <rte_spinlock.h> + +#include "rte_lpm6.h" + +#define RTE_LPM6_TBL24_NUM_ENTRIES (1 << 24) +#define RTE_LPM6_TBL8_GROUP_NUM_ENTRIES 256 +#define RTE_LPM6_TBL8_MAX_NUM_GROUPS (1 << 21) + +#define RTE_LPM6_VALID_EXT_ENTRY_BITMASK 0xA0000000 +#define RTE_LPM6_LOOKUP_SUCCESS 0x20000000 +#define RTE_LPM6_TBL8_BITMASK 0x001FFFFF + +#define ADD_FIRST_BYTE 3 +#define LOOKUP_FIRST_BYTE 4 +#define BYTE_SIZE 8 +#define BYTES2_SIZE 16 + +#define lpm6_tbl8_gindex next_hop + +/** Flags for setting an entry as valid/invalid. */ +enum valid_flag { + INVALID = 0, + VALID +}; + +TAILQ_HEAD(rte_lpm6_list, rte_tailq_entry); + +/** Tbl entry structure. It is the same for both tbl24 and tbl8 */ +struct rte_lpm6_tbl_entry { + uint32_t next_hop: 21; /**< Next hop / next table to be checked. */ + uint32_t depth :8; /**< Rule depth. */ + + /* Flags. */ + uint32_t valid :1; /**< Validation flag. */ + uint32_t valid_group :1; /**< Group validation flag. */ + uint32_t ext_entry :1; /**< External entry. */ +}; + +/** Rules tbl entry structure. */ +struct rte_lpm6_rule { + uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */ + uint8_t next_hop; /**< Rule next hop. */ + uint8_t depth; /**< Rule depth. */ +}; + +/** LPM6 structure. */ +struct rte_lpm6 { + /* LPM metadata. */ + char name[RTE_LPM6_NAMESIZE]; /**< Name of the lpm. */ + uint32_t max_rules; /**< Max number of rules. */ + uint32_t used_rules; /**< Used rules so far. */ + uint32_t number_tbl8s; /**< Number of tbl8s to allocate. */ + uint32_t next_tbl8; /**< Next tbl8 to be used. */ + + /* LPM Tables. */ + struct rte_lpm6_rule *rules_tbl; /**< LPM rules. */ + struct rte_lpm6_tbl_entry tbl24[RTE_LPM6_TBL24_NUM_ENTRIES] + __rte_cache_aligned; /**< LPM tbl24 table. */ + struct rte_lpm6_tbl_entry tbl8[0] + __rte_cache_aligned; /**< LPM tbl8 table. */ +}; + +/* + * Takes an array of uint8_t (IPv6 address) and masks it using the depth. + * It leaves untouched one bit per unit in the depth variable + * and set the rest to 0. + */ +static inline void +mask_ip(uint8_t *ip, uint8_t depth) +{ + int16_t part_depth, mask; + int i; + + part_depth = depth; + + for (i = 0; i < RTE_LPM6_IPV6_ADDR_SIZE; i++) { + if (part_depth < BYTE_SIZE && part_depth >= 0) { + mask = (uint16_t)(~(UINT8_MAX >> part_depth)); + ip[i] = (uint8_t)(ip[i] & mask); + } else if (part_depth < 0) { + ip[i] = 0; + } + part_depth -= BYTE_SIZE; + } +} + +/* + * Allocates memory for LPM object + */ +struct rte_lpm6 * +rte_lpm6_create(const char *name, int socket_id, + const struct rte_lpm6_config *config) +{ + char mem_name[RTE_LPM6_NAMESIZE]; + struct rte_lpm6 *lpm = NULL; + struct rte_tailq_entry *te; + uint64_t mem_size, rules_size; + struct rte_lpm6_list *lpm_list; + + /* Check that we have an initialised tail queue */ + if ((lpm_list = + RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_LPM6, rte_lpm6_list)) == NULL) { + rte_errno = E_RTE_NO_TAILQ; + return NULL; + } + + RTE_BUILD_BUG_ON(sizeof(struct rte_lpm6_tbl_entry) != sizeof(uint32_t)); + + /* Check user arguments. */ + if ((name == NULL) || (socket_id < -1) || (config == NULL) || + (config->max_rules == 0) || + config->number_tbl8s > RTE_LPM6_TBL8_MAX_NUM_GROUPS) { + rte_errno = EINVAL; + return NULL; + } + + snprintf(mem_name, sizeof(mem_name), "LPM_%s", name); + + /* Determine the amount of memory to allocate. */ + mem_size = sizeof(*lpm) + (sizeof(lpm->tbl8[0]) * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * config->number_tbl8s); + rules_size = sizeof(struct rte_lpm6_rule) * config->max_rules; + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* Guarantee there's no existing */ + TAILQ_FOREACH(te, lpm_list, next) { + lpm = (struct rte_lpm6 *) te->data; + if (strncmp(name, lpm->name, RTE_LPM6_NAMESIZE) == 0) + break; + } + if (te != NULL) + goto exit; + + /* allocate tailq entry */ + te = rte_zmalloc("LPM6_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, LPM, "Failed to allocate tailq entry!\n"); + goto exit; + } + + /* Allocate memory to store the LPM data structures. */ + lpm = (struct rte_lpm6 *)rte_zmalloc_socket(mem_name, (size_t)mem_size, + RTE_CACHE_LINE_SIZE, socket_id); + + if (lpm == NULL) { + RTE_LOG(ERR, LPM, "LPM memory allocation failed\n"); + rte_free(te); + goto exit; + } + + lpm->rules_tbl = (struct rte_lpm6_rule *)rte_zmalloc_socket(NULL, + (size_t)rules_size, RTE_CACHE_LINE_SIZE, socket_id); + + if (lpm->rules_tbl == NULL) { + RTE_LOG(ERR, LPM, "LPM memory allocation failed\n"); + rte_free(lpm); + rte_free(te); + goto exit; + } + + /* Save user arguments. */ + lpm->max_rules = config->max_rules; + lpm->number_tbl8s = config->number_tbl8s; + snprintf(lpm->name, sizeof(lpm->name), "%s", name); + + te->data = (void *) lpm; + + TAILQ_INSERT_TAIL(lpm_list, te, next); + +exit: + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return lpm; +} + +/* + * Find an existing lpm table and return a pointer to it. + */ +struct rte_lpm6 * +rte_lpm6_find_existing(const char *name) +{ + struct rte_lpm6 *l = NULL; + struct rte_tailq_entry *te; + struct rte_lpm6_list *lpm_list; + + /* Check that we have an initialised tail queue */ + if ((lpm_list = RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_LPM6, + rte_lpm6_list)) == NULL) { + rte_errno = E_RTE_NO_TAILQ; + return NULL; + } + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, lpm_list, next) { + l = (struct rte_lpm6 *) te->data; + if (strncmp(name, l->name, RTE_LPM6_NAMESIZE) == 0) + break; + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return l; +} + +/* + * Deallocates memory for given LPM table. + */ +void +rte_lpm6_free(struct rte_lpm6 *lpm) +{ + struct rte_lpm6_list *lpm_list; + struct rte_tailq_entry *te; + + /* Check user arguments. */ + if (lpm == NULL) + return; + + /* check that we have an initialised tail queue */ + if ((lpm_list = + RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_LPM, rte_lpm6_list)) == NULL) { + rte_errno = E_RTE_NO_TAILQ; + return; + } + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find our tailq entry */ + TAILQ_FOREACH(te, lpm_list, next) { + if (te->data == (void *) lpm) + break; + } + if (te == NULL) { + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return; + } + + TAILQ_REMOVE(lpm_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + rte_free(lpm); + rte_free(te); +} + +/* + * Checks if a rule already exists in the rules table and updates + * the nexthop if so. Otherwise it adds a new rule if enough space is available. + */ +static inline int32_t +rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t next_hop, uint8_t depth) +{ + uint32_t rule_index; + + /* Scan through rule list to see if rule already exists. */ + for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) { + + /* If rule already exists update its next_hop and return. */ + if ((memcmp (lpm->rules_tbl[rule_index].ip, ip, + RTE_LPM6_IPV6_ADDR_SIZE) == 0) && + lpm->rules_tbl[rule_index].depth == depth) { + lpm->rules_tbl[rule_index].next_hop = next_hop; + + return rule_index; + } + } + + /* + * If rule does not exist check if there is space to add a new rule to + * this rule group. If there is no space return error. + */ + if (lpm->used_rules == lpm->max_rules) { + return -ENOSPC; + } + + /* If there is space for the new rule add it. */ + rte_memcpy(lpm->rules_tbl[rule_index].ip, ip, RTE_LPM6_IPV6_ADDR_SIZE); + lpm->rules_tbl[rule_index].next_hop = next_hop; + lpm->rules_tbl[rule_index].depth = depth; + + /* Increment the used rules counter for this rule group. */ + lpm->used_rules++; + + return rule_index; +} + +/* + * Function that expands a rule across the data structure when a less-generic + * one has been added before. It assures that every possible combination of bits + * in the IP address returns a match. + */ +static void +expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t depth, + uint8_t next_hop) +{ + uint32_t tbl8_group_end, tbl8_gindex_next, j; + + tbl8_group_end = tbl8_gindex + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES; + + struct rte_lpm6_tbl_entry new_tbl8_entry = { + .valid = VALID, + .valid_group = VALID, + .depth = depth, + .next_hop = next_hop, + .ext_entry = 0, + }; + + for (j = tbl8_gindex; j < tbl8_group_end; j++) { + if (!lpm->tbl8[j].valid || (lpm->tbl8[j].ext_entry == 0 + && lpm->tbl8[j].depth <= depth)) { + + lpm->tbl8[j] = new_tbl8_entry; + + } else if (lpm->tbl8[j].ext_entry == 1) { + + tbl8_gindex_next = lpm->tbl8[j].lpm6_tbl8_gindex + * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES; + expand_rule(lpm, tbl8_gindex_next, depth, next_hop); + } + } +} + +/* + * Partially adds a new route to the data structure (tbl24+tbl8s). + * It returns 0 on success, a negative number on failure, or 1 if + * the process needs to be continued by calling the function again. + */ +static inline int +add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl, + struct rte_lpm6_tbl_entry **tbl_next, uint8_t *ip, uint8_t bytes, + uint8_t first_byte, uint8_t depth, uint8_t next_hop) +{ + uint32_t tbl_index, tbl_range, tbl8_group_start, tbl8_group_end, i; + int32_t tbl8_gindex; + int8_t bitshift; + uint8_t bits_covered; + + /* + * Calculate index to the table based on the number and position + * of the bytes being inspected in this step. + */ + tbl_index = 0; + for (i = first_byte; i < (uint32_t)(first_byte + bytes); i++) { + bitshift = (int8_t)((bytes - i)*BYTE_SIZE); + + if (bitshift < 0) bitshift = 0; + tbl_index = tbl_index | ip[i-1] << bitshift; + } + + /* Number of bits covered in this step */ + bits_covered = (uint8_t)((bytes+first_byte-1)*BYTE_SIZE); + + /* + * If depth if smaller than this number (ie this is the last step) + * expand the rule across the relevant positions in the table. + */ + if (depth <= bits_covered) { + tbl_range = 1 << (bits_covered - depth); + + for (i = tbl_index; i < (tbl_index + tbl_range); i++) { + if (!tbl[i].valid || (tbl[i].ext_entry == 0 && + tbl[i].depth <= depth)) { + + struct rte_lpm6_tbl_entry new_tbl_entry = { + .next_hop = next_hop, + .depth = depth, + .valid = VALID, + .valid_group = VALID, + .ext_entry = 0, + }; + + tbl[i] = new_tbl_entry; + + } else if (tbl[i].ext_entry == 1) { + + /* + * If tbl entry is valid and extended calculate the index + * into next tbl8 and expand the rule across the data structure. + */ + tbl8_gindex = tbl[i].lpm6_tbl8_gindex * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES; + expand_rule(lpm, tbl8_gindex, depth, next_hop); + } + } + + return 0; + } + /* + * If this is not the last step just fill one position + * and calculate the index to the next table. + */ + else { + /* If it's invalid a new tbl8 is needed */ + if (!tbl[tbl_index].valid) { + if (lpm->next_tbl8 < lpm->number_tbl8s) + tbl8_gindex = (lpm->next_tbl8)++; + else + return -ENOSPC; + + struct rte_lpm6_tbl_entry new_tbl_entry = { + .lpm6_tbl8_gindex = tbl8_gindex, + .depth = 0, + .valid = VALID, + .valid_group = VALID, + .ext_entry = 1, + }; + + tbl[tbl_index] = new_tbl_entry; + } + /* + * If it's valid but not extended the rule that was stored * + * here needs to be moved to the next table. + */ + else if (tbl[tbl_index].ext_entry == 0) { + /* Search for free tbl8 group. */ + if (lpm->next_tbl8 < lpm->number_tbl8s) + tbl8_gindex = (lpm->next_tbl8)++; + else + return -ENOSPC; + + tbl8_group_start = tbl8_gindex * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES; + tbl8_group_end = tbl8_group_start + + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES; + + /* Populate new tbl8 with tbl value. */ + for (i = tbl8_group_start; i < tbl8_group_end; i++) { + lpm->tbl8[i].valid = VALID; + lpm->tbl8[i].depth = tbl[tbl_index].depth; + lpm->tbl8[i].next_hop = tbl[tbl_index].next_hop; + lpm->tbl8[i].ext_entry = 0; + } + + /* + * Update tbl entry to point to new tbl8 entry. Note: The + * ext_flag and tbl8_index need to be updated simultaneously, + * so assign whole structure in one go. + */ + struct rte_lpm6_tbl_entry new_tbl_entry = { + .lpm6_tbl8_gindex = tbl8_gindex, + .depth = 0, + .valid = VALID, + .valid_group = VALID, + .ext_entry = 1, + }; + + tbl[tbl_index] = new_tbl_entry; + } + + *tbl_next = &(lpm->tbl8[tbl[tbl_index].lpm6_tbl8_gindex * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES]); + } + + return 1; +} + +/* + * Add a route + */ +int +rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, + uint8_t next_hop) +{ + struct rte_lpm6_tbl_entry *tbl; + struct rte_lpm6_tbl_entry *tbl_next; + int32_t rule_index; + int status; + uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE]; + int i; + + /* Check user arguments. */ + if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM6_MAX_DEPTH)) + return -EINVAL; + + /* Copy the IP and mask it to avoid modifying user's input data. */ + memcpy(masked_ip, ip, RTE_LPM6_IPV6_ADDR_SIZE); + mask_ip(masked_ip, depth); + + /* Add the rule to the rule table. */ + rule_index = rule_add(lpm, masked_ip, next_hop, depth); + + /* If there is no space available for new rule return error. */ + if (rule_index < 0) { + return rule_index; + } + + /* Inspect the first three bytes through tbl24 on the first step. */ + tbl = lpm->tbl24; + status = add_step (lpm, tbl, &tbl_next, masked_ip, ADD_FIRST_BYTE, 1, + depth, next_hop); + if (status < 0) { + rte_lpm6_delete(lpm, masked_ip, depth); + + return status; + } + + /* + * Inspect one by one the rest of the bytes until + * the process is completed. + */ + for (i = ADD_FIRST_BYTE; i < RTE_LPM6_IPV6_ADDR_SIZE && status == 1; i++) { + tbl = tbl_next; + status = add_step (lpm, tbl, &tbl_next, masked_ip, 1, (uint8_t)(i+1), + depth, next_hop); + if (status < 0) { + rte_lpm6_delete(lpm, masked_ip, depth); + + return status; + } + } + + return status; +} + +/* + * Takes a pointer to a table entry and inspect one level. + * The function returns 0 on lookup success, ENOENT if no match was found + * or 1 if the process needs to be continued by calling the function again. + */ +static inline int +lookup_step(const struct rte_lpm6 *lpm, const struct rte_lpm6_tbl_entry *tbl, + const struct rte_lpm6_tbl_entry **tbl_next, uint8_t *ip, + uint8_t first_byte, uint8_t *next_hop) +{ + uint32_t tbl8_index, tbl_entry; + + /* Take the integer value from the pointer. */ + tbl_entry = *(const uint32_t *)tbl; + + /* If it is valid and extended we calculate the new pointer to return. */ + if ((tbl_entry & RTE_LPM6_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM6_VALID_EXT_ENTRY_BITMASK) { + + tbl8_index = ip[first_byte-1] + + ((tbl_entry & RTE_LPM6_TBL8_BITMASK) * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES); + + *tbl_next = &lpm->tbl8[tbl8_index]; + + return 1; + } else { + /* If not extended then we can have a match. */ + *next_hop = (uint8_t)tbl_entry; + return (tbl_entry & RTE_LPM6_LOOKUP_SUCCESS) ? 0 : -ENOENT; + } +} + +/* + * Looks up an IP + */ +int +rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop) +{ + const struct rte_lpm6_tbl_entry *tbl; + const struct rte_lpm6_tbl_entry *tbl_next; + int status; + uint8_t first_byte; + uint32_t tbl24_index; + + /* DEBUG: Check user input arguments. */ + if ((lpm == NULL) || (ip == NULL) || (next_hop == NULL)) { + return -EINVAL; + } + + first_byte = LOOKUP_FIRST_BYTE; + tbl24_index = (ip[0] << BYTES2_SIZE) | (ip[1] << BYTE_SIZE) | ip[2]; + + /* Calculate pointer to the first entry to be inspected */ + tbl = &lpm->tbl24[tbl24_index]; + + do { + /* Continue inspecting following levels until success or failure */ + status = lookup_step(lpm, tbl, &tbl_next, ip, first_byte++, next_hop); + tbl = tbl_next; + } while (status == 1); + + return status; +} + +/* + * Looks up a group of IP addresses + */ +int +rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm, + uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], + int16_t * next_hops, unsigned n) +{ + unsigned i; + const struct rte_lpm6_tbl_entry *tbl; + const struct rte_lpm6_tbl_entry *tbl_next; + uint32_t tbl24_index; + uint8_t first_byte, next_hop; + int status; + + /* DEBUG: Check user input arguments. */ + if ((lpm == NULL) || (ips == NULL) || (next_hops == NULL)) { + return -EINVAL; + } + + for (i = 0; i < n; i++) { + first_byte = LOOKUP_FIRST_BYTE; + tbl24_index = (ips[i][0] << BYTES2_SIZE) | + (ips[i][1] << BYTE_SIZE) | ips[i][2]; + + /* Calculate pointer to the first entry to be inspected */ + tbl = &lpm->tbl24[tbl24_index]; + + do { + /* Continue inspecting following levels until success or failure */ + status = lookup_step(lpm, tbl, &tbl_next, ips[i], first_byte++, + &next_hop); + tbl = tbl_next; + } while (status == 1); + + if (status < 0) + next_hops[i] = -1; + else + next_hops[i] = next_hop; + } + + return 0; +} + +/* + * Finds a rule in rule table. + * NOTE: Valid range for depth parameter is 1 .. 128 inclusive. + */ +static inline int32_t +rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth) +{ + uint32_t rule_index; + + /* Scan used rules at given depth to find rule. */ + for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) { + /* If rule is found return the rule index. */ + if ((memcmp (lpm->rules_tbl[rule_index].ip, ip, + RTE_LPM6_IPV6_ADDR_SIZE) == 0) && + lpm->rules_tbl[rule_index].depth == depth) { + + return rule_index; + } + } + + /* If rule is not found return -ENOENT. */ + return -ENOENT; +} + +/* + * Look for a rule in the high-level rules table + */ +int +rte_lpm6_is_rule_present(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, +uint8_t *next_hop) +{ + uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE]; + int32_t rule_index; + + /* Check user arguments. */ + if ((lpm == NULL) || next_hop == NULL || ip == NULL || + (depth < 1) || (depth > RTE_LPM6_MAX_DEPTH)) + return -EINVAL; + + /* Copy the IP and mask it to avoid modifying user's input data. */ + memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE); + mask_ip(ip_masked, depth); + + /* Look for the rule using rule_find. */ + rule_index = rule_find(lpm, ip_masked, depth); + + if (rule_index >= 0) { + *next_hop = lpm->rules_tbl[rule_index].next_hop; + return 1; + } + + /* If rule is not found return 0. */ + return 0; +} + +/* + * Delete a rule from the rule table. + * NOTE: Valid range for depth parameter is 1 .. 128 inclusive. + */ +static inline void +rule_delete(struct rte_lpm6 *lpm, int32_t rule_index) +{ + /* + * Overwrite redundant rule with last rule in group and decrement rule + * counter. + */ + lpm->rules_tbl[rule_index] = lpm->rules_tbl[lpm->used_rules-1]; + lpm->used_rules--; +} + +/* + * Deletes a rule + */ +int +rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth) +{ + int32_t rule_to_delete_index; + uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE]; + unsigned i; + + /* + * Check input arguments. + */ + if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM6_MAX_DEPTH)) { + return -EINVAL; + } + + /* Copy the IP and mask it to avoid modifying user's input data. */ + memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE); + mask_ip(ip_masked, depth); + + /* + * Find the index of the input rule, that needs to be deleted, in the + * rule table. + */ + rule_to_delete_index = rule_find(lpm, ip_masked, depth); + + /* + * Check if rule_to_delete_index was found. If no rule was found the + * function rule_find returns -ENOENT. + */ + if (rule_to_delete_index < 0) + return rule_to_delete_index; + + /* Delete the rule from the rule table. */ + rule_delete(lpm, rule_to_delete_index); + + /* + * Set all the table entries to 0 (ie delete every rule + * from the data structure. + */ + lpm->next_tbl8 = 0; + memset(lpm->tbl24, 0, sizeof(lpm->tbl24)); + memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0]) + * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s); + + /* + * Add every rule again (except for the one that was removed from + * the rules table). + */ + for (i = 0; i < lpm->used_rules; i++) { + rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth, + lpm->rules_tbl[i].next_hop); + } + + return 0; +} + +/* + * Deletes a group of rules + */ +int +rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm, + uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths, unsigned n) +{ + int32_t rule_to_delete_index; + uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE]; + unsigned i; + + /* + * Check input arguments. + */ + if ((lpm == NULL) || (ips == NULL) || (depths == NULL)) { + return -EINVAL; + } + + for (i = 0; i < n; i++) { + /* Copy the IP and mask it to avoid modifying user's input data. */ + memcpy(ip_masked, ips[i], RTE_LPM6_IPV6_ADDR_SIZE); + mask_ip(ip_masked, depths[i]); + + /* + * Find the index of the input rule, that needs to be deleted, in the + * rule table. + */ + rule_to_delete_index = rule_find(lpm, ip_masked, depths[i]); + + /* + * Check if rule_to_delete_index was found. If no rule was found the + * function rule_find returns -ENOENT. + */ + if (rule_to_delete_index < 0) + continue; + + /* Delete the rule from the rule table. */ + rule_delete(lpm, rule_to_delete_index); + } + + /* + * Set all the table entries to 0 (ie delete every rule + * from the data structure. + */ + lpm->next_tbl8 = 0; + memset(lpm->tbl24, 0, sizeof(lpm->tbl24)); + memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0]) + * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s); + + /* + * Add every rule again (except for the ones that were removed from + * the rules table). + */ + for (i = 0; i < lpm->used_rules; i++) { + rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth, + lpm->rules_tbl[i].next_hop); + } + + return 0; +} + +/* + * Delete all rules from the LPM table. + */ +void +rte_lpm6_delete_all(struct rte_lpm6 *lpm) +{ + /* Zero used rules counter. */ + lpm->used_rules = 0; + + /* Zero next tbl8 index. */ + lpm->next_tbl8 = 0; + + /* Zero tbl24. */ + memset(lpm->tbl24, 0, sizeof(lpm->tbl24)); + + /* Zero tbl8. */ + memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0]) * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s); + + /* Delete all rules form the rules table. */ + memset(lpm->rules_tbl, 0, sizeof(struct rte_lpm6_rule) * lpm->max_rules); +} diff --git a/src/dpdk_lib18/librte_lpm/rte_lpm6.h b/src/dpdk_lib18/librte_lpm/rte_lpm6.h new file mode 100755 index 00000000..4db810f9 --- /dev/null +++ b/src/dpdk_lib18/librte_lpm/rte_lpm6.h @@ -0,0 +1,228 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _RTE_LPM6_H_ +#define _RTE_LPM6_H_ + +/** + * @file + * RTE Longest Prefix Match for IPv6 (LPM6) + */ + +#ifdef __cplusplus +extern "C" { +#endif + + +#define RTE_LPM6_MAX_DEPTH 128 +#define RTE_LPM6_IPV6_ADDR_SIZE 16 +/** Max number of characters in LPM name. */ +#define RTE_LPM6_NAMESIZE 32 + +/** LPM structure. */ +struct rte_lpm6; + +/** LPM configuration structure. */ +struct rte_lpm6_config { + uint32_t max_rules; /**< Max number of rules. */ + uint32_t number_tbl8s; /**< Number of tbl8s to allocate. */ + int flags; /**< This field is currently unused. */ +}; + +/** + * Create an LPM object. + * + * @param name + * LPM object name + * @param socket_id + * NUMA socket ID for LPM table memory allocation + * @param config + * Structure containing the configuration + * @return + * Handle to LPM object on success, NULL otherwise with rte_errno set + * to an appropriate values. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - E_RTE_NO_TAILQ - no tailq list could be got for the lpm object list + * - EINVAL - invalid parameter passed to function + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_lpm6 * +rte_lpm6_create(const char *name, int socket_id, + const struct rte_lpm6_config *config); + +/** + * Find an existing LPM object and return a pointer to it. + * + * @param name + * Name of the lpm object as passed to rte_lpm6_create() + * @return + * Pointer to lpm object or NULL if object not found with rte_errno + * set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + */ +struct rte_lpm6 * +rte_lpm6_find_existing(const char *name); + +/** + * Free an LPM object. + * + * @param lpm + * LPM object handle + * @return + * None + */ +void +rte_lpm6_free(struct rte_lpm6 *lpm); + +/** + * Add a rule to the LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * IP of the rule to be added to the LPM table + * @param depth + * Depth of the rule to be added to the LPM table + * @param next_hop + * Next hop of the rule to be added to the LPM table + * @return + * 0 on success, negative value otherwise + */ +int +rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, + uint8_t next_hop); + +/** + * Check if a rule is present in the LPM table, + * and provide its next hop if it is. + * + * @param lpm + * LPM object handle + * @param ip + * IP of the rule to be searched + * @param depth + * Depth of the rule to searched + * @param next_hop + * Next hop of the rule (valid only if it is found) + * @return + * 1 if the rule exists, 0 if it does not, a negative value on failure + */ +int +rte_lpm6_is_rule_present(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, +uint8_t *next_hop); + +/** + * Delete a rule from the LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * IP of the rule to be deleted from the LPM table + * @param depth + * Depth of the rule to be deleted from the LPM table + * @return + * 0 on success, negative value otherwise + */ +int +rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth); + +/** + * Delete a rule from the LPM table. + * + * @param lpm + * LPM object handle + * @param ips + * Array of IPs to be deleted from the LPM table + * @param depths + * Array of depths of the rules to be deleted from the LPM table + * @param n + * Number of rules to be deleted from the LPM table + * @return + * 0 on success, negative value otherwise. + */ +int +rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm, + uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths, unsigned n); + +/** + * Delete all rules from the LPM table. + * + * @param lpm + * LPM object handle + */ +void +rte_lpm6_delete_all(struct rte_lpm6 *lpm); + +/** + * Lookup an IP into the LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * IP to be looked up in the LPM table + * @param next_hop + * Next hop of the most specific rule found for IP (valid on lookup hit only) + * @return + * -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit + */ +int +rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop); + +/** + * Lookup multiple IP addresses in an LPM table. + * + * @param lpm + * LPM object handle + * @param ips + * Array of IPs to be looked up in the LPM table + * @param next_hops + * Next hop of the most specific rule found for IP (valid on lookup hit only). + * This is an array of two byte values. The next hop will be stored on + * each position on success; otherwise the position will be set to -1. + * @param n + * Number of elements in ips (and next_hops) array to lookup. + * @return + * -EINVAL for incorrect arguments, otherwise 0 + */ +int +rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm, + uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], + int16_t * next_hops, unsigned n); + +#ifdef __cplusplus +} +#endif + +#endif |