From 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 19 Dec 2016 23:05:39 +0100 Subject: Reorganize source tree to use single autotools instance Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion --- src/vnet/policer/node_funcs.c | 938 +++++++++++++++++++++++++ src/vnet/policer/police.h | 214 ++++++ src/vnet/policer/policer.c | 528 +++++++++++++++ src/vnet/policer/policer.h | 107 +++ src/vnet/policer/xlate.c | 1505 +++++++++++++++++++++++++++++++++++++++++ src/vnet/policer/xlate.h | 186 +++++ 6 files changed, 3478 insertions(+) create mode 100644 src/vnet/policer/node_funcs.c create mode 100644 src/vnet/policer/police.h create mode 100644 src/vnet/policer/policer.c create mode 100644 src/vnet/policer/policer.h create mode 100644 src/vnet/policer/xlate.c create mode 100644 src/vnet/policer/xlate.h (limited to 'src/vnet/policer') diff --git a/src/vnet/policer/node_funcs.c b/src/vnet/policer/node_funcs.c new file mode 100644 index 00000000000..1f4997ff669 --- /dev/null +++ b/src/vnet/policer/node_funcs.c @@ -0,0 +1,938 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#define IP4_NON_DSCP_BITS 0x03 +#define IP4_DSCP_SHIFT 2 +#define IP6_NON_DSCP_BITS 0xf03fffff +#define IP6_DSCP_SHIFT 22 + +/* Dispatch functions meant to be instantiated elsewhere */ + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + u32 policer_index; +} vnet_policer_trace_t; + +/* packet trace format function */ +static u8 * +format_policer_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + vnet_policer_trace_t *t = va_arg (*args, vnet_policer_trace_t *); + + s = format (s, "VNET_POLICER: sw_if_index %d policer_index %d next %d", + t->sw_if_index, t->policer_index, t->next_index); + return s; +} + +#define foreach_vnet_policer_error \ +_(TRANSMIT, "Packets Transmitted") \ +_(DROP, "Packets Dropped") + +typedef enum +{ +#define _(sym,str) VNET_POLICER_ERROR_##sym, + foreach_vnet_policer_error +#undef _ + VNET_POLICER_N_ERROR, +} vnet_policer_error_t; + +static char *vnet_policer_error_strings[] = { +#define _(sym,string) string, + foreach_vnet_policer_error +#undef _ +}; + +static_always_inline void +vnet_policer_mark (vlib_buffer_t * b, u8 dscp) +{ + ethernet_header_t *eh; + ip4_header_t *ip4h; + ip6_header_t *ip6h; + u16 type; + + eh = (ethernet_header_t *) b->data; + type = clib_net_to_host_u16 (eh->type); + + if (PREDICT_TRUE (type == ETHERNET_TYPE_IP4)) + { + ip4h = (ip4_header_t *) & (b->data[sizeof (ethernet_header_t)]);; + ip4h->tos &= IP4_NON_DSCP_BITS; + ip4h->tos |= dscp << IP4_DSCP_SHIFT; + ip4h->checksum = ip4_header_checksum (ip4h); + } + else + { + if (PREDICT_TRUE (type == ETHERNET_TYPE_IP6)) + { + ip6h = (ip6_header_t *) & (b->data[sizeof (ethernet_header_t)]); + ip6h->ip_version_traffic_class_and_flow_label &= + clib_host_to_net_u32 (IP6_NON_DSCP_BITS); + ip6h->ip_version_traffic_class_and_flow_label |= + clib_host_to_net_u32 (dscp << IP6_DSCP_SHIFT); + } + } +} + +static_always_inline + u8 vnet_policer_police (vlib_main_t * vm, + vlib_buffer_t * b, + u32 policer_index, + u64 time_in_policer_periods, + policer_result_e packet_color) +{ + u8 act; + u32 len; + u32 col; + policer_read_response_type_st *pol; + vnet_policer_main_t *pm = &vnet_policer_main; + + len = vlib_buffer_length_in_chain (vm, b); + pol = &pm->policers[policer_index]; + col = vnet_police_packet (pol, len, packet_color, time_in_policer_periods); + act = pol->action[col]; + if (PREDICT_TRUE (act == SSE2_QOS_ACTION_MARK_AND_TRANSMIT)) + vnet_policer_mark (b, pol->mark_dscp[col]); + + return act; +} + +static inline uword +vnet_policer_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, vnet_policer_index_t which) +{ + u32 n_left_from, *from, *to_next; + vnet_policer_next_t next_index; + vnet_policer_main_t *pm = &vnet_policer_main; + u64 time_in_policer_periods; + u32 transmitted = 0; + + time_in_policer_periods = + clib_cpu_time_now () >> POLICER_TICKS_PER_PERIOD_SHIFT; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + u32 pi0 = 0, pi1 = 0; + u8 act0, act1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *b2, *b3; + + b2 = vlib_get_buffer (vm, from[2]); + b3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (b2, LOAD); + vlib_prefetch_buffer_header (b3, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + next0 = VNET_POLICER_NEXT_TRANSMIT; + + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + next1 = VNET_POLICER_NEXT_TRANSMIT; + + + if (which == VNET_POLICER_INDEX_BY_SW_IF_INDEX) + { + pi0 = pm->policer_index_by_sw_if_index[sw_if_index0]; + pi1 = pm->policer_index_by_sw_if_index[sw_if_index1]; + } + + if (which == VNET_POLICER_INDEX_BY_OPAQUE) + { + pi0 = vnet_buffer (b0)->policer.index; + pi1 = vnet_buffer (b1)->policer.index; + } + + if (which == VNET_POLICER_INDEX_BY_EITHER) + { + pi0 = vnet_buffer (b0)->policer.index; + pi0 = (pi0 != ~0) ? pi0 : + pm->policer_index_by_sw_if_index[sw_if_index0]; + pi1 = vnet_buffer (b1)->policer.index; + pi1 = (pi1 != ~0) ? pi1 : + pm->policer_index_by_sw_if_index[sw_if_index1]; + } + + act0 = vnet_policer_police (vm, b0, pi0, time_in_policer_periods, + POLICE_CONFORM /* no chaining */ ); + + act1 = vnet_policer_police (vm, b1, pi1, time_in_policer_periods, + POLICE_CONFORM /* no chaining */ ); + + if (PREDICT_FALSE (act0 == SSE2_QOS_ACTION_DROP)) /* drop action */ + { + next0 = VNET_POLICER_NEXT_DROP; + b0->error = node->errors[VNET_POLICER_ERROR_DROP]; + } + else /* transmit or mark-and-transmit action */ + { + transmitted++; + } + + if (PREDICT_FALSE (act1 == SSE2_QOS_ACTION_DROP)) /* drop action */ + { + next1 = VNET_POLICER_NEXT_DROP; + b1->error = node->errors[VNET_POLICER_ERROR_DROP]; + } + else /* transmit or mark-and-transmit action */ + { + transmitted++; + } + + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + vnet_policer_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + vnet_policer_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + u32 pi0 = 0; + u8 act0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + next0 = VNET_POLICER_NEXT_TRANSMIT; + + if (which == VNET_POLICER_INDEX_BY_SW_IF_INDEX) + pi0 = pm->policer_index_by_sw_if_index[sw_if_index0]; + + if (which == VNET_POLICER_INDEX_BY_OPAQUE) + pi0 = vnet_buffer (b0)->policer.index; + + if (which == VNET_POLICER_INDEX_BY_EITHER) + { + pi0 = vnet_buffer (b0)->policer.index; + pi0 = (pi0 != ~0) ? pi0 : + pm->policer_index_by_sw_if_index[sw_if_index0]; + } + + act0 = vnet_policer_police (vm, b0, pi0, time_in_policer_periods, + POLICE_CONFORM /* no chaining */ ); + + if (PREDICT_FALSE (act0 == SSE2_QOS_ACTION_DROP)) /* drop action */ + { + next0 = VNET_POLICER_NEXT_DROP; + b0->error = node->errors[VNET_POLICER_ERROR_DROP]; + } + else /* transmit or mark-and-transmit action */ + { + transmitted++; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + vnet_policer_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->policer_index = pi0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + VNET_POLICER_ERROR_TRANSMIT, transmitted); + return frame->n_vectors; +} + +uword +vnet_policer_by_sw_if_index (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return vnet_policer_inline (vm, node, frame, + VNET_POLICER_INDEX_BY_SW_IF_INDEX); +} + +uword +vnet_policer_by_opaque (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return vnet_policer_inline (vm, node, frame, VNET_POLICER_INDEX_BY_OPAQUE); +} + +uword +vnet_policer_by_either (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return vnet_policer_inline (vm, node, frame, VNET_POLICER_INDEX_BY_EITHER); +} + +void +vnet_policer_node_funcs_reference (void) +{ +} + + +#define TEST_CODE 1 + +#ifdef TEST_CODE + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (policer_by_sw_if_index_node, static) = { + .function = vnet_policer_by_sw_if_index, + .name = "policer-by-sw-if-index", + .vector_size = sizeof (u32), + .format_trace = format_policer_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(vnet_policer_error_strings), + .error_strings = vnet_policer_error_strings, + + .n_next_nodes = VNET_POLICER_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [VNET_POLICER_NEXT_TRANSMIT] = "ethernet-input", + [VNET_POLICER_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (policer_by_sw_if_index_node, + vnet_policer_by_sw_if_index); +/* *INDENT-ON* */ + + +int +test_policer_add_del (u32 rx_sw_if_index, u8 * config_name, int is_add) +{ + vnet_policer_main_t *pm = &vnet_policer_main; + policer_read_response_type_st *template; + policer_read_response_type_st *policer; + vnet_hw_interface_t *rxhi; + uword *p; + + rxhi = vnet_get_sup_hw_interface (pm->vnet_main, rx_sw_if_index); + + /* Make sure caller didn't pass a vlan subif, etc. */ + if (rxhi->sw_if_index != rx_sw_if_index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if (is_add) + { + + p = hash_get_mem (pm->policer_config_by_name, config_name); + + if (p == 0) + return -2; + + template = pool_elt_at_index (pm->policer_templates, p[0]); + + vnet_hw_interface_rx_redirect_to_node + (pm->vnet_main, rxhi->hw_if_index, policer_by_sw_if_index_node.index); + + pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES); + + policer[0] = template[0]; + + vec_validate (pm->policer_index_by_sw_if_index, rx_sw_if_index); + pm->policer_index_by_sw_if_index[rx_sw_if_index] + = policer - pm->policers; + } + else + { + u32 pi; + vnet_hw_interface_rx_redirect_to_node (pm->vnet_main, + rxhi->hw_if_index, + ~0 /* disable */ ); + + pi = pm->policer_index_by_sw_if_index[rx_sw_if_index]; + pm->policer_index_by_sw_if_index[rx_sw_if_index] = ~0; + pool_put_index (pm->policers, pi); + } + + return 0; +} + +static clib_error_t * +test_policer_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_policer_main_t *pm = &vnet_policer_main; + unformat_input_t _line_input, *line_input = &_line_input; + u32 rx_sw_if_index; + int rv; + u8 *config_name = 0; + int rx_set = 0; + int is_add = 1; + int is_show = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "intfc %U", unformat_vnet_sw_interface, + pm->vnet_main, &rx_sw_if_index)) + rx_set = 1; + else if (unformat (line_input, "show")) + is_show = 1; + else if (unformat (line_input, "policer %s", &config_name)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else + break; + } + + if (rx_set == 0) + return clib_error_return (0, "interface not set"); + + if (is_show) + { + u32 pi = pm->policer_index_by_sw_if_index[rx_sw_if_index]; + policer_read_response_type_st *policer; + policer = pool_elt_at_index (pm->policers, pi); + + vlib_cli_output (vm, "%U", format_policer_instance, policer); + return 0; + } + + if (is_add && config_name == 0) + { + return clib_error_return (0, "policer config name required"); + } + + rv = test_policer_add_del (rx_sw_if_index, config_name, is_add); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return + (0, "WARNING: vnet_vnet_policer_add_del returned %d", rv); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (test_patch_command, static) = { + .path = "test policer", + .short_help = + "intfc policer [del]", + .function = test_policer_command_fn, +}; +/* *INDENT-ON* */ + +#endif /* TEST_CODE */ + + +typedef struct +{ + u32 sw_if_index; + u32 next_index; + u32 table_index; + u32 offset; + u32 policer_index; +} policer_classify_trace_t; + +static u8 * +format_policer_classify_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + policer_classify_trace_t *t = va_arg (*args, policer_classify_trace_t *); + + s = format (s, "POLICER_CLASSIFY: sw_if_index %d next %d table %d offset %d" + " policer_index %d", + t->sw_if_index, t->next_index, t->table_index, t->offset, + t->policer_index); + return s; +} + +#define foreach_policer_classify_error \ +_(MISS, "Policer classify misses") \ +_(HIT, "Policer classify hits") \ +_(CHAIN_HIT, "Polcier classify hits after chain walk") \ +_(DROP, "Policer classify action drop") + +typedef enum +{ +#define _(sym,str) POLICER_CLASSIFY_ERROR_##sym, + foreach_policer_classify_error +#undef _ + POLICER_CLASSIFY_N_ERROR, +} policer_classify_error_t; + +static char *policer_classify_error_strings[] = { +#define _(sym,string) string, + foreach_policer_classify_error +#undef _ +}; + +static inline uword +policer_classify_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + policer_classify_table_id_t tid) +{ + u32 n_left_from, *from, *to_next; + policer_classify_next_index_t next_index; + policer_classify_main_t *pcm = &policer_classify_main; + vnet_classify_main_t *vcm = pcm->vnet_classify_main; + f64 now = vlib_time_now (vm); + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + u32 drop = 0; + u32 n_next_nodes; + u64 time_in_policer_periods; + + time_in_policer_periods = + clib_cpu_time_now () >> POLICER_TICKS_PER_PERIOD_SHIFT; + + n_next_nodes = node->n_next_nodes; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + /* First pass: compute hashes */ + while (n_left_from > 2) + { + vlib_buffer_t *b0, *b1; + u32 bi0, bi1; + u8 *h0, *h1; + u32 sw_if_index0, sw_if_index1; + u32 table_index0, table_index1; + vnet_classify_table_t *t0, *t1; + + /* Prefetch next iteration */ + { + vlib_buffer_t *p1, *p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + h1 = b1->data; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = + pcm->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + table_index1 = + pcm->classify_table_index_by_sw_if_index[tid][sw_if_index1]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + + t1 = pool_elt_at_index (vcm->tables, table_index1); + + vnet_buffer (b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash); + + vnet_buffer (b1)->l2_classify.hash = + vnet_classify_hash_packet (t1, (u8 *) h1); + + vnet_classify_prefetch_bucket (t1, vnet_buffer (b1)->l2_classify.hash); + + vnet_buffer (b0)->l2_classify.table_index = table_index0; + + vnet_buffer (b1)->l2_classify.table_index = table_index1; + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t *b0; + u32 bi0; + u8 *h0; + u32 sw_if_index0; + u32 table_index0; + vnet_classify_table_t *t0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = + pcm->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + vnet_buffer (b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_buffer (b0)->l2_classify.table_index = table_index0; + vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash); + + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = POLICER_CLASSIFY_NEXT_INDEX_DROP; + u32 table_index0; + vnet_classify_table_t *t0; + vnet_classify_entry_t *e0; + u64 hash0; + u8 *h0; + u8 act0; + + /* Stride 3 seems to work best */ + if (PREDICT_TRUE (n_left_from > 3)) + { + vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]); + vnet_classify_table_t *tp1; + u32 table_index1; + u64 phash1; + + table_index1 = vnet_buffer (p1)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index1 != ~0)) + { + tp1 = pool_elt_at_index (vcm->tables, table_index1); + phash1 = vnet_buffer (p1)->l2_classify.hash; + vnet_classify_prefetch_entry (tp1, phash1); + } + } + + /* Speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + table_index0 = vnet_buffer (b0)->l2_classify.table_index; + e0 = 0; + t0 = 0; + + if (tid == POLICER_CLASSIFY_TABLE_L2) + { + /* Feature bitmap update */ + vnet_buffer (b0)->l2.feature_bitmap &= + ~L2INPUT_FEAT_POLICER_CLAS; + /* Determine the next node */ + next0 = + feat_bitmap_get_next_node_index (pcm->feat_next_node_index, + vnet_buffer (b0)-> + l2.feature_bitmap); + } + else + vnet_get_config_data (pcm->vnet_config_main[tid], + &b0->current_config_index, &next0, + /* # bytes of config data */ 0); + + vnet_buffer (b0)->l2_classify.opaque_index = ~0; + + if (PREDICT_TRUE (table_index0 != ~0)) + { + hash0 = vnet_buffer (b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + + if (e0) + { + act0 = vnet_policer_police (vm, + b0, + e0->next_index, + time_in_policer_periods, + e0->opaque_index); + if (PREDICT_FALSE (act0 == SSE2_QOS_ACTION_DROP)) + { + next0 = POLICER_CLASSIFY_NEXT_INDEX_DROP; + b0->error = node->errors[POLICER_CLASSIFY_ERROR_DROP]; + drop++; + } + hits++; + } + else + { + while (1) + { + if (PREDICT_TRUE (t0->next_table_index != ~0)) + { + t0 = pool_elt_at_index (vcm->tables, + t0->next_table_index); + } + else + { + next0 = (t0->miss_next_index < n_next_nodes) ? + t0->miss_next_index : next0; + misses++; + break; + } + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = + vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + if (e0) + { + act0 = vnet_policer_police (vm, + b0, + e0->next_index, + time_in_policer_periods, + e0->opaque_index); + if (PREDICT_FALSE (act0 == SSE2_QOS_ACTION_DROP)) + { + next0 = POLICER_CLASSIFY_NEXT_INDEX_DROP; + b0->error = + node->errors[POLICER_CLASSIFY_ERROR_DROP]; + drop++; + } + hits++; + chain_hits++; + break; + } + } + } + } + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + policer_classify_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + t->table_index = t0 ? t0 - vcm->tables : ~0; + t->offset = (e0 && t0) ? vnet_classify_get_offset (t0, e0) : ~0; + t->policer_index = e0 ? e0->next_index : ~0; + } + + /* Verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + POLICER_CLASSIFY_ERROR_MISS, misses); + vlib_node_increment_counter (vm, node->node_index, + POLICER_CLASSIFY_ERROR_HIT, hits); + vlib_node_increment_counter (vm, node->node_index, + POLICER_CLASSIFY_ERROR_CHAIN_HIT, chain_hits); + vlib_node_increment_counter (vm, node->node_index, + POLICER_CLASSIFY_ERROR_DROP, drop); + + return frame->n_vectors; +} + +static uword +ip4_policer_classify (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return policer_classify_inline (vm, node, frame, + POLICER_CLASSIFY_TABLE_IP4); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_policer_classify_node) = { + .function = ip4_policer_classify, + .name = "ip4-policer-classify", + .vector_size = sizeof (u32), + .format_trace = format_policer_classify_trace, + .n_errors = ARRAY_LEN(policer_classify_error_strings), + .error_strings = policer_classify_error_strings, + .n_next_nodes = POLICER_CLASSIFY_NEXT_INDEX_N_NEXT, + .next_nodes = { + [POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_policer_classify_node, ip4_policer_classify); +/* *INDENT-ON* */ + +static uword +ip6_policer_classify (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return policer_classify_inline (vm, node, frame, + POLICER_CLASSIFY_TABLE_IP6); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_policer_classify_node) = { + .function = ip6_policer_classify, + .name = "ip6-policer-classify", + .vector_size = sizeof (u32), + .format_trace = format_policer_classify_trace, + .n_errors = ARRAY_LEN(policer_classify_error_strings), + .error_strings = policer_classify_error_strings, + .n_next_nodes = POLICER_CLASSIFY_NEXT_INDEX_N_NEXT, + .next_nodes = { + [POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_policer_classify_node, ip6_policer_classify); +/* *INDENT-ON* */ + +static uword +l2_policer_classify (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return policer_classify_inline (vm, node, frame, POLICER_CLASSIFY_TABLE_L2); +} + +VLIB_REGISTER_NODE (l2_policer_classify_node) = +{ + .function = l2_policer_classify,.name = "l2-policer-classify",.vector_size = + sizeof (u32),.format_trace = format_policer_classify_trace,.n_errors = + ARRAY_LEN (policer_classify_error_strings),.error_strings = + policer_classify_error_strings,.n_next_nodes = + POLICER_CLASSIFY_NEXT_INDEX_N_NEXT,.next_nodes = + { + [POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",} +,}; + +VLIB_NODE_FUNCTION_MULTIARCH (l2_policer_classify_node, l2_policer_classify); + + +static clib_error_t * +policer_classify_init (vlib_main_t * vm) +{ + policer_classify_main_t *pcm = &policer_classify_main; + + pcm->vlib_main = vm; + pcm->vnet_main = vnet_get_main (); + pcm->vnet_classify_main = &vnet_classify_main; + + /* Initialize L2 feature next-node indexes */ + feat_bitmap_init_next_nodes (vm, + l2_policer_classify_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names (), + pcm->feat_next_node_index); + + return 0; +} + +VLIB_INIT_FUNCTION (policer_classify_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/policer/police.h b/src/vnet/policer/police.h new file mode 100644 index 00000000000..34bcf9ca5a8 --- /dev/null +++ b/src/vnet/policer/police.h @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __POLICE_H__ +#define __POLICE_H__ + +typedef enum +{ + POLICE_CONFORM = 0, + POLICE_EXCEED = 1, + POLICE_VIOLATE = 2, +} policer_result_e; + +// This is the hardware representation of the policer. +// To be multithread-safe, the policer is accessed through a spin-lock +// on the lock field. (For a policer update operation, 24B needs to be +// modified and this would be a challenge to do with atomic instructions.) +// The structure is padded so that no other data is put into the same +// 64B cache-line. This reduces cache-thrashing between threads. +// +// A note on scale: +// The HW TSC tick is roughly one CPU clock cycle. +// This is shifted to create a larger period, with a goal to be around 50usec. +// The period time will vary based on CPU clock speed. +// CPU speeds of 1Ghz to 8Ghz are targetted. +// The shift amount is a constant 17 bits, resulting in a period between +// 16usec (8Ghz CPU) and 131usec (1Ghz CPU). +// The token_per_period computation takes into account the clock speed. +// +// The 32-bit bucket/limit supports about 850ms of burst on a 40GE port, +// or 340ms on a 100GE port. If a larger burst is configued, then the +// programmed value is simply capped at 2^32-1. If we needed to support +// more than that, the bucket and limit fields could be expanded. +// +// tokens_per_period should be > 1000 to support 0.1% granularity. +// To support lower rates (which would not meet this requirement), the packet +// length, bucket, and limit values can be scaled. The scale is a power of 2 +// so the multiplication can be implemented as a shift. The control plane +// computes the shift amount be the largest possible that still supports the +// burst size. This makes the rate accuracy as high as possible. +// +// The 64-bit last_update_time supports a 4Ghz CPU without rollover for 100 years +// +// The lock field should be used for a spin-lock on the struct. + +#define POLICER_TICKS_PER_PERIOD_SHIFT 17 +#define POLICER_TICKS_PER_PERIOD (1 << POLICER_TICKS_PER_PERIOD_SHIFT) + +typedef struct +{ + + u32 lock; // for exclusive access to the struct + + u32 single_rate; // 1 = single rate policer, 0 = two rate policer + u32 color_aware; // for hierarchical policing + u32 scale; // power-of-2 shift amount for lower rates + u8 action[3]; + u8 mark_dscp[3]; + u8 pad[2]; + + // Fields are marked as 2R if they are only used for a 2-rate policer, + // and MOD if they are modified as part of the update operation. + // 1 token = 1 byte. + + u32 cir_tokens_per_period; // # of tokens for each period + u32 pir_tokens_per_period; // 2R + + u32 current_limit; + u32 current_bucket; // MOD + u32 extended_limit; + u32 extended_bucket; // MOD + + u64 last_update_time; // MOD + u64 pad64; + +} policer_read_response_type_st; + +static inline policer_result_e +vnet_police_packet (policer_read_response_type_st * policer, + u32 packet_length, + policer_result_e packet_color, u64 time) +{ + u64 n_periods; + u64 current_tokens, extended_tokens; + policer_result_e result; + + // Scale packet length to support a wide range of speeds + packet_length = packet_length << policer->scale; + + // Compute the number of policer periods that have passed since the last + // operation. + n_periods = time - policer->last_update_time; + policer->last_update_time = time; + + // Since there is no background last-update-time adjustment, n_periods + // could grow large if the policer is idle for a long time. This could + // cause a 64-bit overflow when computing tokens_per_period * num_periods. + // It will overflow if log2(n_periods) + log2(tokens_per_period) > 64. + // + // To mitigate this, the policer configuration algorithm insures that + // tokens_per_period is less than 2^22, i.e. this is a 22 bit value not + // a 32-bit value. Thus overflow will only occur if n_periods > 64-22 or + // 42. 2^42 min-sized periods is 16us * 2^42, or 2 years. So this can + // rarely occur. If overflow does happen, the only effect will be that + // fewer tokens than the max burst will be added to the bucket for this + // packet. This constraint on tokens_per_period lets the ucode omit + // code to dynamically check for or prevent the overflow. + + if (policer->single_rate) + { + + // Compute number of tokens for this time period + current_tokens = + policer->current_bucket + n_periods * policer->cir_tokens_per_period; + if (current_tokens > policer->current_limit) + { + current_tokens = policer->current_limit; + } + + extended_tokens = + policer->extended_bucket + n_periods * policer->cir_tokens_per_period; + if (extended_tokens > policer->extended_limit) + { + extended_tokens = policer->extended_limit; + } + + // Determine color + + if ((!policer->color_aware || (packet_color == POLICE_CONFORM)) + && (current_tokens >= packet_length)) + { + policer->current_bucket = current_tokens - packet_length; + policer->extended_bucket = extended_tokens - packet_length; + result = POLICE_CONFORM; + } + else if ((!policer->color_aware || (packet_color != POLICE_VIOLATE)) + && (extended_tokens >= packet_length)) + { + policer->current_bucket = current_tokens; + policer->extended_bucket = extended_tokens - packet_length; + result = POLICE_EXCEED; + } + else + { + policer->current_bucket = current_tokens; + policer->extended_bucket = extended_tokens; + result = POLICE_VIOLATE; + } + + } + else + { + // Two-rate policer + + // Compute number of tokens for this time period + current_tokens = + policer->current_bucket + n_periods * policer->cir_tokens_per_period; + extended_tokens = + policer->extended_bucket + n_periods * policer->pir_tokens_per_period; + if (current_tokens > policer->current_limit) + { + current_tokens = policer->current_limit; + } + if (extended_tokens > policer->extended_limit) + { + extended_tokens = policer->extended_limit; + } + + // Determine color + + if ((policer->color_aware && (packet_color == POLICE_VIOLATE)) + || (extended_tokens < packet_length)) + { + policer->current_bucket = current_tokens; + policer->extended_bucket = extended_tokens; + result = POLICE_VIOLATE; + } + else if ((policer->color_aware && (packet_color == POLICE_EXCEED)) + || (current_tokens < packet_length)) + { + policer->current_bucket = current_tokens; + policer->extended_bucket = extended_tokens - packet_length; + result = POLICE_EXCEED; + } + else + { + policer->current_bucket = current_tokens - packet_length; + policer->extended_bucket = extended_tokens - packet_length; + result = POLICE_CONFORM; + } + } + return result; +} + +#endif // __POLICE_H__ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/policer/policer.c b/src/vnet/policer/policer.c new file mode 100644 index 00000000000..290a6af57e2 --- /dev/null +++ b/src/vnet/policer/policer.c @@ -0,0 +1,528 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +clib_error_t * +policer_add_del (vlib_main_t * vm, + u8 * name, + sse2_qos_pol_cfg_params_st * cfg, + u32 * policer_index, u8 is_add) +{ + vnet_policer_main_t *pm = &vnet_policer_main; + policer_read_response_type_st test_policer; + policer_read_response_type_st *policer; + uword *p; + u32 pi; + int rv; + + p = hash_get_mem (pm->policer_config_by_name, name); + + if (is_add == 0) + { + if (p == 0) + { + vec_free (name); + return clib_error_return (0, "No such policer configuration"); + } + hash_unset_mem (pm->policer_config_by_name, name); + hash_unset_mem (pm->policer_index_by_name, name); + vec_free (name); + return 0; + } + + if (p != 0) + { + vec_free (name); + return clib_error_return (0, "Policer already exists"); + } + + /* Vet the configuration before adding it to the table */ + rv = sse2_pol_logical_2_physical (cfg, &test_policer); + + if (rv == 0) + { + policer_read_response_type_st *pp; + sse2_qos_pol_cfg_params_st *cp; + + pool_get (pm->configs, cp); + pool_get (pm->policer_templates, pp); + + ASSERT (cp - pm->configs == pp - pm->policer_templates); + + clib_memcpy (cp, cfg, sizeof (*cp)); + clib_memcpy (pp, &test_policer, sizeof (*pp)); + + hash_set_mem (pm->policer_config_by_name, name, cp - pm->configs); + pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES); + policer[0] = pp[0]; + pi = policer - pm->policers; + hash_set_mem (pm->policer_index_by_name, name, pi); + *policer_index = pi; + } + else + { + vec_free (name); + return clib_error_return (0, "Config failed sanity check"); + } + + return 0; +} + +u8 * +format_policer_instance (u8 * s, va_list * va) +{ + policer_read_response_type_st *i + = va_arg (*va, policer_read_response_type_st *); + + s = format (s, "policer at %llx: %s rate, %s color-aware\n", + i, i->single_rate ? "single" : "dual", + i->color_aware ? "is" : "not"); + s = format (s, "cir %u tok/period, pir %u tok/period, scale %u\n", + i->cir_tokens_per_period, i->pir_tokens_per_period, i->scale); + s = format (s, "cur lim %u, cur bkt %u, ext lim %u, ext bkt %u\n", + i->current_limit, + i->current_bucket, i->extended_limit, i->extended_bucket); + s = format (s, "last update %llu\n", i->last_update_time); + return s; +} + +static u8 * +format_policer_round_type (u8 * s, va_list * va) +{ + sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (c->rnd_type == SSE2_QOS_ROUND_TO_CLOSEST) + s = format (s, "closest"); + else if (c->rnd_type == SSE2_QOS_ROUND_TO_UP) + s = format (s, "up"); + else if (c->rnd_type == SSE2_QOS_ROUND_TO_DOWN) + s = format (s, "down"); + else + s = format (s, "ILLEGAL"); + return s; +} + + +static u8 * +format_policer_rate_type (u8 * s, va_list * va) +{ + sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (c->rate_type == SSE2_QOS_RATE_KBPS) + s = format (s, "kbps"); + else if (c->rate_type == SSE2_QOS_RATE_PPS) + s = format (s, "pps"); + else + s = format (s, "ILLEGAL"); + return s; +} + +static u8 * +format_policer_type (u8 * s, va_list * va) +{ + sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (c->rfc == SSE2_QOS_POLICER_TYPE_1R2C) + s = format (s, "1r2c"); + + else if (c->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697) + s = format (s, "1r3c"); + + else if (c->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698) + s = format (s, "2r3c-2698"); + + else if (c->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115) + s = format (s, "2r3c-4115"); + + else if (c->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1) + s = format (s, "2r3c-mef5cf1"); + else + s = format (s, "ILLEGAL"); + return s; +} + +static u8 * +format_dscp (u8 * s, va_list * va) +{ + u32 i = va_arg (*va, u32); + char *t = 0; + + switch (i) + { +#define _(v,f,str) case VNET_DSCP_##f: t = str; break; + foreach_vnet_dscp +#undef _ + default: + return format (s, "ILLEGAL"); + } + s = format (s, "%s", t); + return s; +} + +static u8 * +format_policer_action_type (u8 * s, va_list * va) +{ + sse2_qos_pol_action_params_st *a + = va_arg (*va, sse2_qos_pol_action_params_st *); + + if (a->action_type == SSE2_QOS_ACTION_DROP) + s = format (s, "drop"); + else if (a->action_type == SSE2_QOS_ACTION_TRANSMIT) + s = format (s, "transmit"); + else if (a->action_type == SSE2_QOS_ACTION_MARK_AND_TRANSMIT) + s = format (s, "mark-and-transmit %U", format_dscp, a->dscp); + else + s = format (s, "ILLEGAL"); + return s; +} + +u8 * +format_policer_config (u8 * s, va_list * va) +{ + sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + s = format (s, "type %U cir %u eir %u cb %u eb %u\n", + format_policer_type, c, + c->rb.kbps.cir_kbps, + c->rb.kbps.eir_kbps, c->rb.kbps.cb_bytes, c->rb.kbps.eb_bytes); + s = format (s, "rate type %U, round type %U\n", + format_policer_rate_type, c, format_policer_round_type, c); + s = format (s, "conform action %U, exceed action %U, violate action %U\n", + format_policer_action_type, &c->conform_action, + format_policer_action_type, &c->exceed_action, + format_policer_action_type, &c->violate_action); + return s; +} + +static uword +unformat_policer_type (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (!unformat (input, "type")) + return 0; + + if (unformat (input, "1r2c")) + c->rfc = SSE2_QOS_POLICER_TYPE_1R2C; + else if (unformat (input, "1r3c")) + c->rfc = SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697; + else if (unformat (input, "2r3c-2698")) + c->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698; + else if (unformat (input, "2r3c-4115")) + c->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115; + else if (unformat (input, "2r3c-mef5cf1")) + c->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1; + else + return 0; + return 1; +} + +static uword +unformat_policer_round_type (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (!unformat (input, "round")) + return 0; + + if (unformat (input, "closest")) + c->rnd_type = SSE2_QOS_ROUND_TO_CLOSEST; + else if (unformat (input, "up")) + c->rnd_type = SSE2_QOS_ROUND_TO_UP; + else if (unformat (input, "down")) + c->rnd_type = SSE2_QOS_ROUND_TO_DOWN; + else + return 0; + return 1; +} + +static uword +unformat_policer_rate_type (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (!unformat (input, "rate")) + return 0; + + if (unformat (input, "kbps")) + c->rate_type = SSE2_QOS_RATE_KBPS; + else if (unformat (input, "pps")) + c->rate_type = SSE2_QOS_RATE_PPS; + else + return 0; + return 1; +} + +static uword +unformat_policer_cir (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (unformat (input, "cir %u", &c->rb.kbps.cir_kbps)) + return 1; + return 0; +} + +static uword +unformat_policer_eir (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (unformat (input, "eir %u", &c->rb.kbps.eir_kbps)) + return 1; + return 0; +} + +static uword +unformat_policer_cb (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (unformat (input, "cb %u", &c->rb.kbps.cb_bytes)) + return 1; + return 0; +} + +static uword +unformat_policer_eb (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (unformat (input, "eb %u", &c->rb.kbps.eb_bytes)) + return 1; + return 0; +} + +static uword +unformat_dscp (unformat_input_t * input, va_list * va) +{ + u8 *r = va_arg (*va, u8 *); + + if (0); +#define _(v,f,str) else if (unformat (input, str)) *r = VNET_DSCP_##f; + foreach_vnet_dscp +#undef _ + else + return 0; + return 1; +} + +static uword +unformat_policer_action_type (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_action_params_st *a + = va_arg (*va, sse2_qos_pol_action_params_st *); + + if (unformat (input, "drop")) + a->action_type = SSE2_QOS_ACTION_DROP; + else if (unformat (input, "transmit")) + a->action_type = SSE2_QOS_ACTION_TRANSMIT; + else if (unformat (input, "mark-and-transmit %U", unformat_dscp, &a->dscp)) + a->action_type = SSE2_QOS_ACTION_MARK_AND_TRANSMIT; + else + return 0; + return 1; +} + +static uword +unformat_policer_action (unformat_input_t * input, va_list * va) +{ + sse2_qos_pol_cfg_params_st *c = va_arg (*va, sse2_qos_pol_cfg_params_st *); + + if (unformat (input, "conform-action %U", unformat_policer_action_type, + &c->conform_action)) + return 1; + else if (unformat (input, "exceed-action %U", unformat_policer_action_type, + &c->exceed_action)) + return 1; + else if (unformat (input, "violate-action %U", unformat_policer_action_type, + &c->violate_action)) + return 1; + return 0; +} + +static uword +unformat_policer_classify_next_index (unformat_input_t * input, va_list * va) +{ + u32 *r = va_arg (*va, u32 *); + vnet_policer_main_t *pm = &vnet_policer_main; + uword *p; + u8 *match_name = 0; + + if (unformat (input, "%s", &match_name)) + ; + else + return 0; + + p = hash_get_mem (pm->policer_index_by_name, match_name); + + if (p == 0) + return 0; + + *r = p[0]; + + return 1; +} + +static uword +unformat_policer_classify_precolor (unformat_input_t * input, va_list * va) +{ + u32 *r = va_arg (*va, u32 *); + + if (unformat (input, "conform-color")) + *r = POLICE_CONFORM; + else if (unformat (input, "exceed-color")) + *r = POLICE_EXCEED; + else + return 0; + + return 1; +} + +#define foreach_config_param \ +_(eb) \ +_(cb) \ +_(eir) \ +_(cir) \ +_(rate_type) \ +_(round_type) \ +_(type) \ +_(action) + +static clib_error_t * +configure_policer_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + sse2_qos_pol_cfg_params_st c; + unformat_input_t _line_input, *line_input = &_line_input; + u8 is_add = 1; + u8 *name = 0; + u32 pi; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + memset (&c, 0, sizeof (c)); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "name %s", &name)) + ; + else if (unformat (line_input, "color-aware")) + c.color_aware = 1; + +#define _(a) else if (unformat (line_input, "%U", unformat_policer_##a, &c)) ; + foreach_config_param +#undef _ + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + return policer_add_del (vm, name, &c, &pi, is_add); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (configure_policer_command, static) = { + .path = "configure policer", + .short_help = "configure policer name ", + .function = configure_policer_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_policer_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_policer_main_t *pm = &vnet_policer_main; + hash_pair_t *p; + u32 pool_index; + u8 *match_name = 0; + u8 *name; + sse2_qos_pol_cfg_params_st *config; + policer_read_response_type_st *templ; + + (void) unformat (input, "name %s", &match_name); + + /* *INDENT-OFF* */ + hash_foreach_pair (p, pm->policer_config_by_name, + ({ + name = (u8 *) p->key; + if (match_name == 0 || !strcmp((char *) name, (char *) match_name)) + { + pool_index = p->value[0]; + config = pool_elt_at_index (pm->configs, pool_index); + templ = pool_elt_at_index (pm->policer_templates, pool_index); + vlib_cli_output (vm, "Name \"%s\" %U ", + name, format_policer_config, config); + vlib_cli_output (vm, "Template %U", + format_policer_instance, templ); + vlib_cli_output (vm, "-----------"); + } + })); + /* *INDENT-ON* */ + return 0; +} + + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_policer_command, static) = { + .path = "show policer", + .short_help = "show policer [name]", + .function = show_policer_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +policer_init (vlib_main_t * vm) +{ + vnet_policer_main_t *pm = &vnet_policer_main; + void vnet_policer_node_funcs_reference (void); + + vnet_policer_node_funcs_reference (); + + pm->vlib_main = vm; + pm->vnet_main = vnet_get_main (); + + pm->policer_config_by_name = hash_create_string (0, sizeof (uword)); + pm->policer_index_by_name = hash_create_string (0, sizeof (uword)); + + vnet_classify_register_unformat_policer_next_index_fn + (unformat_policer_classify_next_index); + vnet_classify_register_unformat_opaque_index_fn + (unformat_policer_classify_precolor); + + return 0; +} + +VLIB_INIT_FUNCTION (policer_init); + + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/policer/policer.h b/src/vnet/policer/policer.h new file mode 100644 index 00000000000..8e2d7c79b7c --- /dev/null +++ b/src/vnet/policer/policer.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_policer_h__ +#define __included_policer_h__ + +#include +#include + +#include +#include + +typedef struct +{ + /* policer pool, aligned */ + policer_read_response_type_st *policers; + + /* config + template h/w policer instance parallel pools */ + sse2_qos_pol_cfg_params_st *configs; + policer_read_response_type_st *policer_templates; + + /* Config by name hash */ + uword *policer_config_by_name; + + /* Policer by name hash */ + uword *policer_index_by_name; + + /* Policer by sw_if_index vector */ + u32 *policer_index_by_sw_if_index; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} vnet_policer_main_t; + +vnet_policer_main_t vnet_policer_main; + +typedef enum +{ + VNET_POLICER_INDEX_BY_SW_IF_INDEX, + VNET_POLICER_INDEX_BY_OPAQUE, + VNET_POLICER_INDEX_BY_EITHER, +} vnet_policer_index_t; + +typedef enum +{ + VNET_POLICER_NEXT_TRANSMIT, + VNET_POLICER_NEXT_DROP, + VNET_POLICER_N_NEXT, +} vnet_policer_next_t; + +#define foreach_vnet_dscp \ + _(0 , CS0, "CS0") \ + _(8 , CS1, "CS1") \ + _(10, AF11, "AF11") \ + _(12, AF12, "AF12") \ + _(14, AF13, "AF13") \ + _(16, CS2, "CS2") \ + _(18, AF21, "AF21") \ + _(20, AF22, "AF22") \ + _(22, AF23, "AF23") \ + _(24, CS3, "CS3") \ + _(26, AF31, "AF31") \ + _(28, AF32, "AF32") \ + _(30, AF33, "AF33") \ + _(32, CS4, "CS4") \ + _(34, AF41, "AF41") \ + _(36, AF42, "AF42") \ + _(38, AF43, "AF43") \ + _(40, CS5, "CS5") \ + _(46, EF, "EF") \ + _(48, CS6, "CS6") \ + _(50, CS7, "CS7") + +typedef enum +{ +#define _(v,f,str) VNET_DSCP_##f = v, + foreach_vnet_dscp +#undef _ +} vnet_dscp_t; + +u8 *format_policer_instance (u8 * s, va_list * va); +clib_error_t *policer_add_del (vlib_main_t * vm, + u8 * name, + sse2_qos_pol_cfg_params_st * cfg, + u32 * policer_index, u8 is_add); + +#endif /* __included_policer_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/policer/xlate.c b/src/vnet/policer/xlate.c new file mode 100644 index 00000000000..74a6eb23d0a --- /dev/null +++ b/src/vnet/policer/xlate.c @@ -0,0 +1,1505 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#define INTERNAL_SS 1 + +/* debugs */ +#define SSE2_QOS_DEBUG_ERROR(msg, args...) \ + fformat(stderr, msg "\n", ##args); + +#define SSE2_QOS_DEBUG_INFO(msg, args...) \ + fformat(stderr, msg "\n", ##args); + + +#define SSE2_QOS_TR_ERR(TpParms...) +// { +// } + +#define SSE2_QOS_TR_INFO(TpParms...) + +#ifndef MIN +#define MIN(x,y) (((x)<(y))?(x):(y)) +#endif + +#ifndef MAX +#define MAX(x,y) (((x)>(y))?(x):(y)) +#endif + +#define IPE_POLICER_FULL_WRITE_REQUEST_M40AH_OFFSET 0 +#define IPE_POLICER_FULL_WRITE_REQUEST_M40AH_MASK 8 +#define IPE_POLICER_FULL_WRITE_REQUEST_M40AH_SHIFT 24 + +#define IPE_POLICER_FULL_WRITE_REQUEST_TYPE_OFFSET 2 +#define IPE_POLICER_FULL_WRITE_REQUEST_TYPE_MASK 2 +#define IPE_POLICER_FULL_WRITE_REQUEST_TYPE_SHIFT 10 + +#define IPE_POLICER_FULL_WRITE_REQUEST_CMD_OFFSET 3 +#define IPE_POLICER_FULL_WRITE_REQUEST_CMD_MASK 2 +#define IPE_POLICER_FULL_WRITE_REQUEST_CMD_SHIFT 0 + +#define IPE_POLICER_FULL_WRITE_REQUEST_M40AL_OFFSET 4 +#define IPE_POLICER_FULL_WRITE_REQUEST_M40AL_MASK 32 +#define IPE_POLICER_FULL_WRITE_REQUEST_M40AL_SHIFT 0 + +#define IPE_POLICER_FULL_WRITE_REQUEST_RFC_OFFSET 8 +#define IPE_POLICER_FULL_WRITE_REQUEST_RFC_MASK 2 +#define IPE_POLICER_FULL_WRITE_REQUEST_RFC_SHIFT 30 + +#define IPE_POLICER_FULL_WRITE_REQUEST_AN_OFFSET 8 +#define IPE_POLICER_FULL_WRITE_REQUEST_AN_MASK 1 +#define IPE_POLICER_FULL_WRITE_REQUEST_AN_SHIFT 29 + +#define IPE_POLICER_FULL_WRITE_REQUEST_REXP_OFFSET 8 +#define IPE_POLICER_FULL_WRITE_REQUEST_REXP_MASK 4 +#define IPE_POLICER_FULL_WRITE_REQUEST_REXP_SHIFT 22 + +#define IPE_POLICER_FULL_WRITE_REQUEST_ARM_OFFSET 9 +#define IPE_POLICER_FULL_WRITE_REQUEST_ARM_MASK 11 +#define IPE_POLICER_FULL_WRITE_REQUEST_ARM_SHIFT 11 + +#define IPE_POLICER_FULL_WRITE_REQUEST_PRM_OFFSET 10 +#define IPE_POLICER_FULL_WRITE_REQUEST_PRM_MASK 11 +#define IPE_POLICER_FULL_WRITE_REQUEST_PRM_SHIFT 0 + +#define IPE_POLICER_FULL_WRITE_REQUEST_CBLE_OFFSET 12 +#define IPE_POLICER_FULL_WRITE_REQUEST_CBLE_MASK 5 +#define IPE_POLICER_FULL_WRITE_REQUEST_CBLE_SHIFT 27 + +#define IPE_POLICER_FULL_WRITE_REQUEST_CBLM_OFFSET 12 +#define IPE_POLICER_FULL_WRITE_REQUEST_CBLM_MASK 7 +#define IPE_POLICER_FULL_WRITE_REQUEST_CBLM_SHIFT 20 + +#define IPE_POLICER_FULL_WRITE_REQUEST_EBLE_OFFSET 13 +#define IPE_POLICER_FULL_WRITE_REQUEST_EBLE_MASK 5 +#define IPE_POLICER_FULL_WRITE_REQUEST_EBLE_SHIFT 15 + +#define IPE_POLICER_FULL_WRITE_REQUEST_EBLM_OFFSET 14 +#define IPE_POLICER_FULL_WRITE_REQUEST_EBLM_MASK 7 +#define IPE_POLICER_FULL_WRITE_REQUEST_EBLM_SHIFT 8 + +#define IPE_POLICER_FULL_WRITE_REQUEST_CB_OFFSET 16 +#define IPE_POLICER_FULL_WRITE_REQUEST_CB_MASK 31 +#define IPE_POLICER_FULL_WRITE_REQUEST_CB_SHIFT 0 + +#define IPE_POLICER_FULL_WRITE_REQUEST_EB_OFFSET 20 +#define IPE_POLICER_FULL_WRITE_REQUEST_EB_MASK 31 +#define IPE_POLICER_FULL_WRITE_REQUEST_EB_SHIFT 0 + +#define IPE_RFC_RFC2697 0x00000000 +#define IPE_RFC_RFC2698 0x00000001 +#define IPE_RFC_RFC4115 0x00000002 +#define IPE_RFC_MEF5CF1 0x00000003 + +/* End of constants copied from sse_ipe_desc_fmt.h */ + +/* Misc Policer specific definitions */ +#define SSE2_QOS_POLICER_FIXED_PKT_SIZE 256 + +// TODO check what can be provided by hw macro based on ASIC +#define SSE2_QOS_POL_TICKS_PER_SEC 1000LL /* 1 tick = 1 ms */ + +/* + * Default burst, in ms (byte format) + */ +#define SSE2_QOS_POL_DEF_BURST_BYTE 100 + +/* + * Minimum burst needs to be such that the largest packet size is accomodated + */ +// Do we need to get it from some lib? +#define SSE2_QOS_POL_MIN_BURST_BYTE 9*1024 + + +/* + * Flag to indicate if AN is employed or not + * 1 - TRUE, 0 - FALSE + */ +#define SSE2_QOS_POL_ALLOW_NEGATIVE 1 + +// Various Macros to take care of policer calculations + +#define SSE2_QOS_POL_COMM_BKT_MAX \ + (1<> 1)) / denominator); + break; + + case SSE2_QOS_ROUND_TO_UP: + *rounded_value = (numerator / denominator); + if ((*rounded_value * denominator) < numerator) + { + *rounded_value += 1; + } + break; + + case SSE2_QOS_ROUND_TO_DOWN: + *rounded_value = (numerator / denominator); + break; + + case SSE2_QOS_ROUND_INVALID: + default: + SSE2_QOS_DEBUG_ERROR ("Illegal round type"); + SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_60, round_type); + rc = EINVAL; + break; + } + return (rc); +} + + +static int +sse2_pol_validate_cfg_params (sse2_qos_pol_cfg_params_st * cfg) +{ + u64 numer, denom, rnd_value; + u32 cir_hw, eir_hw; + int rc = 0; + + if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698) && + (cfg->rb.kbps.eir_kbps < cfg->rb.kbps.cir_kbps)) + { + SSE2_QOS_DEBUG_ERROR ("CIR (%u kbps) is greater than PIR (%u kbps)", + cfg->rb.kbps.cir_kbps, cfg->rb.kbps.eir_kbps); + SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_39, cfg->rb.kbps.cir_kbps, + cfg->rb.kbps.eir_kbps); + return (EINVAL); + } + + /* + * convert rates to bytes-per-tick + */ + numer = (u64) (cfg->rb.kbps.cir_kbps); + denom = (u64) (8 * SSE2_QOS_POL_TICKS_PER_SEC) / 1000; + rc = sse2_qos_pol_round (numer, denom, &rnd_value, + (sse2_qos_round_type_en) cfg->rnd_type); + if (rc != 0) + { + SSE2_QOS_DEBUG_ERROR ("Unable to convert CIR to bytes/tick format"); + // Error traced + return (rc); + } + cir_hw = (u32) rnd_value; + + numer = (u64) (cfg->rb.kbps.eir_kbps); + rc = sse2_qos_pol_round (numer, denom, &rnd_value, + (sse2_qos_round_type_en) cfg->rnd_type); + if (rc != 0) + { + SSE2_QOS_DEBUG_ERROR ("Unable to convert EIR to bytes/tick format"); + // Error traced + return (rc); + } + eir_hw = (u32) rnd_value; + + if (cir_hw > SSE2_QOS_POL_AVG_RATE_MAX) + { + SSE2_QOS_DEBUG_ERROR ("hw cir (%u bytes/tick) is greater than the " + "max supported value (%u)", cir_hw, + SSE2_QOS_POL_AVG_RATE_MAX); + SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_84, cir_hw, SSE2_QOS_POL_AVG_RATE_MAX); + return (EINVAL); + } + + if (eir_hw > SSE2_QOS_POL_PEAK_RATE_MAX) + { + SSE2_QOS_DEBUG_ERROR ("hw eir (%u bytes/tick) is greater than the " + "max supported value (%u). Capping it to the max. " + "supported value", eir_hw, + SSE2_QOS_POL_PEAK_RATE_MAX); + SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_85, eir_hw, + SSE2_QOS_POL_PEAK_RATE_MAX); + return (EINVAL); + } + /* + * CIR = 0, with bc != 0 is not allowed + */ + if ((cfg->rb.kbps.cir_kbps == 0) && cfg->rb.kbps.cb_bytes) + { + SSE2_QOS_DEBUG_ERROR ("CIR = 0 with bc != 0"); + SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_55); + return (EINVAL); + } + + if ((cfg->rb.kbps.eir_kbps == 0) && + (cfg->rfc > SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697)) + { + SSE2_QOS_DEBUG_ERROR ("EIR = 0 for a 2R3C policer (rfc: %u)", cfg->rfc); + SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_23, cfg->rb.kbps.eir_kbps, cfg->rfc); + return (EINVAL); + } + + if (cfg->rb.kbps.eir_kbps && + (cfg->rfc < SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698)) + { + SSE2_QOS_DEBUG_ERROR ("EIR: %u kbps for a 1-rate policer (rfc: %u)", + cfg->rb.kbps.eir_kbps, cfg->rfc); + SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_23, cfg->rb.kbps.eir_kbps, cfg->rfc); + return (EINVAL); + } + + if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) && cfg->rb.kbps.eb_bytes) + { + SSE2_QOS_DEBUG_ERROR ("For a 1R1B policer, EB burst cannot be > 0"); + SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_56); + return (EINVAL); + } + + return (0); +} + +static void +sse2_qos_convert_value_to_exp_mant_fmt (u64 value, + u16 max_exp_value, + u16 max_mant_value, + sse2_qos_round_type_en type, + u8 * exp, u32 * mant) +{ + u64 rnd_value; + u64 temp_mant; + u8 temp_exp; + + /* + * Select the lowest possible exp, and the largest possible mant + */ + temp_exp = 0; + temp_mant = value; + while (temp_exp <= max_exp_value) + { + if (temp_mant <= max_mant_value) + { + break; + } + + temp_exp++; + rnd_value = 0; + (void) sse2_qos_pol_round ((u64) value, (u64) (1 << temp_exp), + &rnd_value, type); + temp_mant = rnd_value; + } + + if (temp_exp > max_exp_value) + { + /* + * CAP mant to its max value, and decrement exp + */ + temp_exp--; + temp_mant = max_mant_value; + } + + *exp = temp_exp; + *mant = (u32) temp_mant; + + SSE2_QOS_DEBUG_INFO ("value: 0x%llx, mant: %u, exp: %u", value, *mant, + *exp); + return; +} + +static int +sse2_pol_convert_cfg_rates_to_hw (sse2_qos_pol_cfg_params_st * cfg, + sse2_qos_pol_hw_params_st * hw) +{ + int rc = 0; + u32 cir_hw, eir_hw, hi_mant, hi_rate, cir_rnded, eir_rnded, eir_kbps; + u64 numer, denom, rnd_value; + u8 exp; + + /* + * convert rates to bytes-per-tick (tick is 1ms) + * For rate conversion, the denominator is gonna be the same + */ + denom = (u64) ((SSE2_QOS_POL_TICKS_PER_SEC * 8) / 1000); + numer = (u64) (cfg->rb.kbps.cir_kbps); + rc = sse2_qos_pol_round (numer, denom, &rnd_value, + (sse2_qos_round_type_en) cfg->rnd_type); + if (rc != 0) + { + SSE2_QOS_DEBUG_ERROR + ("Rounding error, rate: %d kbps, rounding_type: %d", + cfg->rb.kbps.cir_kbps, cfg->rnd_type); + // Error is traced + return (rc); + } + cir_hw = (u32) rnd_value; + + if (cfg->rb.kbps.cir_kbps && (cir_hw == 0)) + { + /* + * After rounding, cir_hw = 0. Bump it up + */ + cir_hw = 1; + } + + if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) + { + eir_kbps = 0; + } + else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697) + { + eir_kbps = cfg->rb.kbps.cir_kbps; + } + else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115) + { + eir_kbps = cfg->rb.kbps.eir_kbps - cfg->rb.kbps.cir_kbps; + } + else + { + eir_kbps = cfg->rb.kbps.eir_kbps; + } + + numer = (u64) eir_kbps; + rc = sse2_qos_pol_round (numer, denom, &rnd_value, + (sse2_qos_round_type_en) cfg->rnd_type); + if (rc != 0) + { + SSE2_QOS_DEBUG_ERROR + ("Rounding error, rate: %d kbps, rounding_type: %d", eir_kbps, + cfg->rnd_type); + // Error is traced + return (rc); + } + eir_hw = (u32) rnd_value; + + if (eir_kbps && (eir_hw == 0)) + { + /* + * After rounding, eir_hw = 0. Bump it up + */ + eir_hw = 1; + } + + SSE2_QOS_DEBUG_INFO ("cir_hw: %u bytes/tick, eir_hw: %u bytes/tick", cir_hw, + eir_hw); + + if (cir_hw > eir_hw) + { + hi_rate = cir_hw; + } + else + { + hi_rate = eir_hw; + } + + if ((cir_hw == 0) && (eir_hw == 0)) + { + /* + * Both the rates are 0. Use exp = 15, and set the RFC to 4115. Also + * set AN = 0 + */ + exp = (u8) SSE2_QOS_POL_RATE_EXP_MAX; + hi_mant = 0; + hw->rfc = IPE_RFC_RFC4115; + hw->allow_negative = 0; + } + else + { + sse2_qos_convert_value_to_exp_mant_fmt (hi_rate, + (u16) SSE2_QOS_POL_RATE_EXP_MAX, + (u16) + SSE2_QOS_POL_AVG_RATE_MANT_MAX, + (sse2_qos_round_type_en) + cfg->rnd_type, &exp, &hi_mant); + } + + denom = (1ULL << exp); + if (hi_rate == eir_hw) + { + hw->peak_rate_man = (u16) hi_mant; + rc = sse2_qos_pol_round ((u64) cir_hw, denom, &rnd_value, + (sse2_qos_round_type_en) cfg->rnd_type); + hw->avg_rate_man = (u16) rnd_value; + } + else + { + hw->avg_rate_man = (u16) hi_mant; + rc = sse2_qos_pol_round ((u64) eir_hw, denom, &rnd_value, + (sse2_qos_round_type_en) cfg->rnd_type); + hw->peak_rate_man = (u16) rnd_value; + } + if (rc != 0) + { + SSE2_QOS_DEBUG_ERROR ("Rounding error"); + // Error is traced + return (rc); + } + hw->rate_exp = exp; + + if ((hw->avg_rate_man == 0) && (cfg->rb.kbps.cir_kbps)) + { + /* + * cir was reduced to 0 during rounding. Bump it up + */ + hw->avg_rate_man = 1; + SSE2_QOS_DEBUG_INFO ("CIR = 0 during rounding. Bump it up to %u " + "bytes/tick", (hw->avg_rate_man << hw->rate_exp)); + } + + if ((hw->peak_rate_man == 0) && eir_kbps) + { + /* + * eir was reduced to 0 during rounding. Bump it up + */ + hw->peak_rate_man = 1; + SSE2_QOS_DEBUG_INFO ("EIR = 0 during rounding. Bump it up to %u " + "bytes/tick", (hw->peak_rate_man << hw->rate_exp)); + } + + cir_rnded = (hw->avg_rate_man << hw->rate_exp); + eir_rnded = (hw->peak_rate_man << hw->rate_exp); + + SSE2_QOS_DEBUG_INFO ("Configured(rounded) values, cir: %u " + "kbps (mant: %u, exp: %u, rate: %u bytes/tick)", + cfg->rb.kbps.cir_kbps, hw->avg_rate_man, + hw->rate_exp, cir_rnded); + + SSE2_QOS_DEBUG_INFO ("Configured(rounded) values, eir: %u " + "kbps (mant: %u, exp: %u, rate: %u bytes/tick)", + cfg->rb.kbps.eir_kbps, hw->peak_rate_man, + hw->rate_exp, eir_rnded); + + return (rc); +} + +/***** + * NAME + * sse2_pol_get_bkt_max + * + * PARAMETERS + * rate_hw - either the averate rate or peak rate + * bkt_max - bit width in the current bucket or extended bucket + * + * RETURNS + * u64 - maximum token bytes for the current or extended bucket + * + * DESCRIPTION + * The current bucket or extended bucket fields are in units of either + * 1,2,4,8 bytes based on the average or peak rate respective to current + * or extended bucket. + * + * To get the actual maximum number of bytes that can be stored in the + * field, the value must be multiplied by the units of either 1,2,4,8 + * bytes based on the rate. + *****/ +u64 +sse2_pol_get_bkt_max (u64 rate_hw, u64 bkt_max) +{ + if (rate_hw <= RATE64) + { + return (bkt_max - 1); + } + else if (rate_hw <= RATE128) + { + return ((bkt_max * RATE_64TO128_UNIT) - RATE_64TO128_UNIT); + } + else if (rate_hw <= RATE256) + { + return ((bkt_max * RATE_128TO256_UNIT) - RATE_128TO256_UNIT); + } + /* rate must be over 256 */ + return ((bkt_max * RATE_OVER256_UNIT) - RATE_OVER256_UNIT); +} + +/***** + * NAME + * sse2_pol_get_bkt_value + * + * PARAMETERS + * rate_hw - either the averate rate or peak rate + * byte_value - bytes for this token bucket + * + * RETURNS + * u64 - unit value for the current or extended bucket field + * + * DESCRIPTION + * The current bucket or extended bucket fields are in units of either + * 1,2,4,8 bytes based on the average or peak rate respective to current + * or extended bucket. + * + * To get the units that can be stored in the field, the byte value must + * be divided by the units of either 1,2,4,8 bytes based on the rate. + *****/ +u64 +sse2_pol_get_bkt_value (u64 rate_hw, u64 byte_value) +{ + if (rate_hw <= RATE64) + { + return (byte_value); + } + else if (rate_hw <= RATE128) + { + return (byte_value / RATE_64TO128_UNIT); + } + else if (rate_hw <= RATE256) + { + return (byte_value / RATE_128TO256_UNIT); + } + /* rate must be over 256 */ + return (byte_value / RATE_OVER256_UNIT); +} + +static void +sse2_pol_rnd_burst_byte_fmt (u64 cfg_burst, + u16 max_exp_value, + u16 max_mant_value, + u32 max_bkt_value, + u32 rate_hw, + u8 * exp, u32 * mant, u32 * bkt_value) +{ + u64 bkt_max = max_bkt_value; + u64 bkt_limit_max; + u64 rnd_burst; + u64 temp_bkt_value; + + bkt_limit_max = ((u64) max_mant_value << (u64) max_exp_value); + bkt_max = sse2_pol_get_bkt_max (rate_hw, bkt_max); + bkt_max = MIN (bkt_max, bkt_limit_max); + if (!cfg_burst) + { + /* + * If configured burst = 0, compute the burst to be 100ms at a given + * rate. Note that for rate_hw = 0, exp = mant = 0. + */ + cfg_burst = (u64) rate_hw *(u64) SSE2_QOS_POL_DEF_BURST_BYTE; + } + + if (cfg_burst > bkt_max) + { + SSE2_QOS_DEBUG_ERROR ("burst 0x%llx bytes is greater than the max. " + "supported value 0x%llx bytes. Capping it to the " + "max", cfg_burst, bkt_max); + SSE2_QOS_TR_INFO (SSE2_QOS_TP_INFO_38, + (uint) cfg_burst, (uint) bkt_max); + cfg_burst = bkt_max; + } + + if (cfg_burst < SSE2_QOS_POL_MIN_BURST_BYTE) + { + /* + * Bump up the burst value ONLY if the cfg_burst is non-zero AND + * less than the min. supported value + */ + SSE2_QOS_DEBUG_INFO ("burst 0x%llx bytes is less than the min " + "supported value %u bytes. Rounding it up to " + "the min", cfg_burst, SSE2_QOS_POL_MIN_BURST_BYTE); + SSE2_QOS_TR_INFO (SSE2_QOS_TP_INFO_39, (uint) cfg_burst, + SSE2_QOS_POL_MIN_BURST_BYTE); + cfg_burst = SSE2_QOS_POL_MIN_BURST_BYTE; + } + + sse2_qos_convert_value_to_exp_mant_fmt (cfg_burst, + max_exp_value, + max_mant_value, + SSE2_QOS_ROUND_TO_DOWN, exp, mant); + + /* Bucket value is based on rate. */ + rnd_burst = ((u64) (*mant) << (u64) (*exp)); + temp_bkt_value = sse2_pol_get_bkt_value (rate_hw, rnd_burst); + *bkt_value = (u32) temp_bkt_value; +} + +static int +sse2_pol_convert_cfg_burst_to_hw (sse2_qos_pol_cfg_params_st * cfg, + sse2_qos_pol_hw_params_st * hw) +{ + u8 temp_exp; + u32 temp_mant, rate_hw; + u64 eb_bytes; + u32 bkt_value; + + /* + * compute Committed Burst + */ + SSE2_QOS_DEBUG_INFO ("Compute commit burst ..."); + rate_hw = (hw->avg_rate_man) << (hw->rate_exp); + sse2_pol_rnd_burst_byte_fmt (cfg->rb.kbps.cb_bytes, + (u16) SSE2_QOS_POL_COMM_BKT_LIMIT_EXP_MAX, + (u16) SSE2_QOS_POL_COMM_BKT_LIMIT_MANT_MAX, + (u32) SSE2_QOS_POL_COMM_BKT_MAX, + rate_hw, &temp_exp, &temp_mant, &bkt_value); + SSE2_QOS_DEBUG_INFO ("Committed burst, burst_limit: 0x%llx mant : %u, " + "exp: %u, rnded: 0x%llx cb:%u bytes", + cfg->rb.kbps.cb_bytes, temp_mant, temp_exp, + ((u64) temp_mant << (u64) temp_exp), bkt_value); + + hw->comm_bkt_limit_exp = temp_exp; + hw->comm_bkt_limit_man = (u8) temp_mant; + hw->comm_bkt = bkt_value; + + /* + * compute Exceed Burst + */ + SSE2_QOS_DEBUG_INFO ("Compute exceed burst ..."); + + if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) + { + /* + * For 1R2C, hw uses 2R3C (RFC-4115). As such, the Exceed Bucket + * params are set to 0. Recommendation is to use EB_exp = max_exp (=15) + * and EB_mant = 0 + */ + hw->extd_bkt_limit_exp = (u8) SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_MAX; + hw->extd_bkt_limit_man = 0; + SSE2_QOS_DEBUG_INFO ("Excess burst, burst: 0x%llx mant: %u, " + "exp: %u, rnded: 0x%llx bytes", + cfg->rb.kbps.eb_bytes, hw->extd_bkt_limit_man, + hw->extd_bkt_limit_exp, + ((u64) hw->extd_bkt_limit_man << + (u64) hw->extd_bkt_limit_exp)); + SSE2_QOS_TR_INFO (SSE2_QOS_TP_INFO_20, (uint) cfg->rb.kbps.eb_bytes, + hw->extd_bkt_limit_man, hw->extd_bkt_limit_exp); + return (0); + } + + if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697) + { + eb_bytes = cfg->rb.kbps.cb_bytes + cfg->rb.kbps.eb_bytes; + } + else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115) + { + eb_bytes = cfg->rb.kbps.eb_bytes - cfg->rb.kbps.cb_bytes; + } + else + { + eb_bytes = cfg->rb.kbps.eb_bytes; + } + + rate_hw = (hw->peak_rate_man) << (hw->rate_exp); + sse2_pol_rnd_burst_byte_fmt (eb_bytes, + (u16) SSE2_QOS_POL_EXTD_BKT_LIMIT_EXP_MAX, + (u16) SSE2_QOS_POL_EXTD_BKT_LIMIT_MANT_MAX, + (u32) SSE2_QOS_POL_EXTD_BKT_MAX, + rate_hw, &temp_exp, &temp_mant, &bkt_value); + + SSE2_QOS_DEBUG_INFO ("Excess burst, burst_limit: 0x%llx mant: %u, " + "exp: %u, rnded: 0x%llx eb:%u bytes", + cfg->rb.kbps.eb_bytes, temp_mant, temp_exp, + ((u64) temp_mant << (u64) temp_exp), bkt_value); + + hw->extd_bkt_limit_exp = (u8) temp_exp; + hw->extd_bkt_limit_man = (u8) temp_mant; + hw->extd_bkt = bkt_value; + + return (0); +} + + +/* + * Input: configured parameter values in 'cfg'. + * Output: h/w programmable parameter values in 'hw'. + * Return: success or failure code. + */ +static int +sse2_pol_convert_cfg_to_hw_params (sse2_qos_pol_cfg_params_st * cfg, + sse2_qos_pol_hw_params_st * hw) +{ + int rc = 0; + + /* + * clear the hw_params + */ + memset (hw, 0, sizeof (sse2_qos_pol_hw_params_st)); + + hw->allow_negative = SSE2_QOS_POL_ALLOW_NEGATIVE; + + if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) || + (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115)) + { + hw->rfc = IPE_RFC_RFC4115; + } + else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697) + { + hw->rfc = IPE_RFC_RFC2697; + } + else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698) + { + hw->rfc = IPE_RFC_RFC2698; + } + else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1) + { + hw->rfc = IPE_RFC_MEF5CF1; + } + else + { + SSE2_QOS_DEBUG_ERROR ("Invalid RFC type %d\n", cfg->rfc); + SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_61, cfg->rfc); + return (EINVAL); + } + + rc = sse2_pol_convert_cfg_rates_to_hw (cfg, hw); + if (rc != 0) + { + SSE2_QOS_DEBUG_ERROR ("Unable to convert config rates to hw. Error: %d", + rc); + // Error is traced + return (rc); + } + + rc = sse2_pol_convert_cfg_burst_to_hw (cfg, hw); + if (rc != 0) + { + SSE2_QOS_DEBUG_ERROR ("Unable to convert config burst to hw. Error: %d", + rc); + // Error is traced + return (rc); + } + + return 0; +} + + +u32 +sse2_qos_convert_pps_to_kbps (u32 rate_pps) +{ + // sse2_qos_ship_inc_counter(SSE2_QOS_SHIP_COUNTER_TYPE_API_CNT, + // SSE2_QOS_SHIP_CNT_POL_CONV_PPS_TO_KBPS); + + u64 numer, rnd_value = 0; + + numer = (u64) ((u64) rate_pps * + (u64) SSE2_QOS_POLICER_FIXED_PKT_SIZE * 8LL); + (void) sse2_qos_pol_round (numer, 1000LL, &rnd_value, + SSE2_QOS_ROUND_TO_CLOSEST); + + return ((u32) rnd_value); +} + +u32 +sse2_qos_convert_burst_ms_to_bytes (u32 burst_ms, u32 rate_kbps) +{ + u64 numer, rnd_value = 0; + + //sse2_qos_ship_inc_counter(SSE2_QOS_SHIP_COUNTER_TYPE_API_CNT, + // SSE2_QOS_SHIP_CNT_POL_CONV_BURST_MS_TO_BYTES); + + numer = (u64) ((u64) burst_ms * (u64) rate_kbps); + + (void) sse2_qos_pol_round (numer, 8LL, &rnd_value, + SSE2_QOS_ROUND_TO_CLOSEST); + + return ((u32) rnd_value); +} + + +/* + * Input: configured parameters in 'cfg'. + * Output: h/w parameters are returned in 'hw', + * Return: Status, success or failure code. + */ +int +sse2_pol_compute_hw_params (sse2_qos_pol_cfg_params_st * cfg, + sse2_qos_pol_hw_params_st * hw) +{ + int rc = 0; + + if (!cfg || !hw) + { + SSE2_QOS_DEBUG_ERROR ("Illegal parameters"); + return (-1); + } + + /* + * Validate the police config params being presented to RM + */ + rc = sse2_pol_validate_cfg_params (cfg); + if (rc != 0) + { + SSE2_QOS_DEBUG_ERROR ("Config parameter validation failed. Error: %d", + rc); + // Error is traced + return (-1); + } + + /* + * first round configured values to h/w supported values. This func + * also determines whether 'tick' or 'byte' format + */ + rc = sse2_pol_convert_cfg_to_hw_params (cfg, hw); + if (rc != 0) + { + SSE2_QOS_DEBUG_ERROR ("Unable to convert config params to hw params. " + "Error: %d", rc); + SSE2_QOS_TR_ERR (SSE2_QOS_TP_ERR_53, rc); + return (-1); + } + + return 0; +} + + +#if defined (INTERNAL_SS) || defined (X86) + +// For initializing the x86 policer format + +/* + * Return the number of hardware TSC timer ticks per second for the dataplane. + * This is approximately, but not exactly, the clock speed. + */ +static u64 +get_tsc_hz (void) +{ + f64 cpu_freq; + + cpu_freq = os_cpu_clock_frequency (); + return (u64) cpu_freq; +} + +/* + * Convert rates into bytes_per_period and scale. + * Return 0 if ok or 1 if error. + */ +static int +compute_policer_params (u64 hz, // CPU speed in clocks per second + u64 cir_rate, // in bytes per second + u64 pir_rate, // in bytes per second + u32 * current_limit, // in bytes, output may scale the input + u32 * extended_limit, // in bytes, output may scale the input + u32 * cir_bytes_per_period, + u32 * pir_bytes_per_period, u32 * scale) +{ + double period; + double internal_cir_bytes_per_period; + double internal_pir_bytes_per_period; + u32 max; + u32 scale_shift; + u32 scale_amount; + u32 __attribute__ ((unused)) orig_current_limit = *current_limit; + + // Compute period. For 1Ghz-to-8Ghz CPUs, the period will be in + // the range of 16 to 116 usec. + period = ((double) hz) / ((double) POLICER_TICKS_PER_PERIOD); + + // Determine bytes per period for each rate + internal_cir_bytes_per_period = (double) cir_rate / period; + internal_pir_bytes_per_period = (double) pir_rate / period; + + // Scale if possible. Scaling helps rate accuracy, but is contrained + // by the scaled rates and limits fitting in 32-bits. + // In addition, we need to insure the scaled rate is no larger than + // 2^22 tokens per period. This allows the dataplane to ignore overflow + // in the tokens-per-period multiplication since it could only + // happen if the policer were idle for more than a year. + // This is not really a constraint because 100Gbps at 1Ghz is only + // 1.6M tokens per period. +#define MAX_RATE_SHIFT 10 + max = MAX (*current_limit, *extended_limit); + max = MAX (max, (u32) internal_cir_bytes_per_period << MAX_RATE_SHIFT); + max = MAX (max, (u32) internal_pir_bytes_per_period << MAX_RATE_SHIFT); + scale_shift = __builtin_clz (max); + + scale_amount = 1 << scale_shift; + *scale = scale_shift; + + // Scale the limits + *current_limit = *current_limit << scale_shift; + *extended_limit = *extended_limit << scale_shift; + + // Scale the rates + internal_cir_bytes_per_period = + internal_cir_bytes_per_period * ((double) scale_amount); + internal_pir_bytes_per_period = + internal_pir_bytes_per_period * ((double) scale_amount); + + // Make sure the new rates are reasonable + // Only needed for very low rates with large bursts + if (internal_cir_bytes_per_period < 1.0) + { + internal_cir_bytes_per_period = 1.0; + } + if (internal_pir_bytes_per_period < 1.0) + { + internal_pir_bytes_per_period = 1.0; + } + + *cir_bytes_per_period = (u32) internal_cir_bytes_per_period; + *pir_bytes_per_period = (u32) internal_pir_bytes_per_period; + +// #define PRINT_X86_POLICE_PARAMS +#ifdef PRINT_X86_POLICE_PARAMS + { + u64 effective_BPS; + + // This value actually slightly conservative because it doesn't take into account + // the partial period at the end of a second. This really matters only for very low + // rates. + effective_BPS = + (((u64) (*cir_bytes_per_period * (u64) period)) >> *scale); + + printf ("hz=%llu, cir_rate=%llu, limit=%u => " + "periods-per-sec=%d usec-per-period=%d => " + "scale=%d cir_BPP=%u, scaled_limit=%u => " + "effective BPS=%llu, accuracy=%f\n", + // input values + (unsigned long long) hz, + (unsigned long long) cir_rate, orig_current_limit, + // computed values + (u32) (period), // periods per second + (u32) (1000.0 * 1000.0 / period), // in usec + *scale, *cir_bytes_per_period, *current_limit, + // accuracy + (unsigned long long) effective_BPS, + (double) cir_rate / (double) effective_BPS); + } +#endif + + return 0; // ok +} + + +/* + * Input: configured parameters in 'cfg'. + * Output: h/w parameters are returned in 'hw', + * Return: Status, success or failure code. + */ +int +x86_pol_compute_hw_params (sse2_qos_pol_cfg_params_st * cfg, + policer_read_response_type_st * hw) +{ + const int BYTES_PER_KBIT = (1000 / 8); + u64 hz; + u32 cap; + + if (!cfg || !hw) + { + SSE2_QOS_DEBUG_ERROR ("Illegal parameters"); + return (-1); + } + + hz = get_tsc_hz (); + hw->last_update_time = 0; + + // Cap the bursts to 32-bits. This allows up to almost one second of + // burst on a 40GE interface, which should be fine for x86. + cap = + (cfg->rb.kbps.cb_bytes > 0xFFFFFFFF) ? 0xFFFFFFFF : cfg->rb.kbps.cb_bytes; + hw->current_limit = cap; + cap = + (cfg->rb.kbps.eb_bytes > 0xFFFFFFFF) ? 0xFFFFFFFF : cfg->rb.kbps.eb_bytes; + hw->extended_limit = cap; + + if ((cfg->rb.kbps.cir_kbps == 0) && (cfg->rb.kbps.cb_bytes == 0) + && (cfg->rb.kbps.eb_bytes == 0)) + { + // This is a uninitialized, always-violate policer + hw->single_rate = 1; + hw->cir_tokens_per_period = 0; + return 0; + } + + if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) || + (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697)) + { + // Single-rate policer + + hw->single_rate = 1; + + if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_1R2C) && cfg->rb.kbps.eb_bytes) + { + SSE2_QOS_DEBUG_ERROR + ("Policer parameter validation failed -- 1R2C."); + return (-1); + } + + if ((cfg->rb.kbps.cir_kbps == 0) || + (cfg->rb.kbps.eir_kbps != 0) || + ((cfg->rb.kbps.cb_bytes == 0) && (cfg->rb.kbps.eb_bytes == 0))) + { + SSE2_QOS_DEBUG_ERROR ("Policer parameter validation failed -- 1R."); + return (-1); + } + + if (compute_policer_params (hz, + (u64) cfg->rb.kbps.cir_kbps * + BYTES_PER_KBIT, 0, &hw->current_limit, + &hw->extended_limit, + &hw->cir_tokens_per_period, + &hw->pir_tokens_per_period, &hw->scale)) + { + SSE2_QOS_DEBUG_ERROR ("Policer parameter computation failed."); + return (-1); + } + + } + else if ((cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698) || + (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115)) + { + // Two-rate policer + + if ((cfg->rb.kbps.cir_kbps == 0) || (cfg->rb.kbps.eir_kbps == 0) + || (cfg->rb.kbps.eir_kbps < cfg->rb.kbps.cir_kbps) + || (cfg->rb.kbps.cb_bytes == 0) || (cfg->rb.kbps.eb_bytes == 0)) + { + SSE2_QOS_DEBUG_ERROR ("Config parameter validation failed."); + return (-1); + } + + if (compute_policer_params (hz, + (u64) cfg->rb.kbps.cir_kbps * + BYTES_PER_KBIT, + (u64) cfg->rb.kbps.eir_kbps * + BYTES_PER_KBIT, &hw->current_limit, + &hw->extended_limit, + &hw->cir_tokens_per_period, + &hw->pir_tokens_per_period, &hw->scale)) + { + SSE2_QOS_DEBUG_ERROR ("Policer parameter computation failed."); + return (-1); + } + + } + else + { + SSE2_QOS_DEBUG_ERROR + ("Config parameter validation failed. RFC not supported"); + return (-1); + } + + hw->current_bucket = hw->current_limit; + hw->extended_bucket = hw->extended_limit; + + return 0; +} +#endif + + +/* + * Input: configured parameters in 'cfg'. + * Output: physical structure is returned in 'phys', + * Return: Status, success or failure code. + */ +int +sse2_pol_logical_2_physical (sse2_qos_pol_cfg_params_st * cfg, + policer_read_response_type_st * phys) +{ + int rc; + sse2_qos_pol_hw_params_st pol_hw; + sse2_qos_pol_cfg_params_st kbps_cfg; + + memset (phys, 0, sizeof (policer_read_response_type_st)); + memset (&kbps_cfg, 0, sizeof (sse2_qos_pol_cfg_params_st)); + + if (!cfg) + { + SSE2_QOS_DEBUG_ERROR ("Illegal parameters"); + return (-1); + } + + switch (cfg->rate_type) + { + case SSE2_QOS_RATE_KBPS: + /* copy all the data into kbps_cfg */ + kbps_cfg.rb.kbps.cir_kbps = cfg->rb.kbps.cir_kbps; + kbps_cfg.rb.kbps.eir_kbps = cfg->rb.kbps.eir_kbps; + kbps_cfg.rb.kbps.cb_bytes = cfg->rb.kbps.cb_bytes; + kbps_cfg.rb.kbps.eb_bytes = cfg->rb.kbps.eb_bytes; + break; + case SSE2_QOS_RATE_PPS: + kbps_cfg.rb.kbps.cir_kbps = + sse2_qos_convert_pps_to_kbps (cfg->rb.pps.cir_pps); + kbps_cfg.rb.kbps.eir_kbps = + sse2_qos_convert_pps_to_kbps (cfg->rb.pps.eir_pps); + kbps_cfg.rb.kbps.cb_bytes = sse2_qos_convert_burst_ms_to_bytes ((u32) + cfg-> + rb.pps.cb_ms, + kbps_cfg.rb. + kbps.cir_kbps); + kbps_cfg.rb.kbps.eb_bytes = + sse2_qos_convert_burst_ms_to_bytes ((u32) cfg->rb.pps.eb_ms, + kbps_cfg.rb.kbps.eir_kbps); + break; + default: + SSE2_QOS_DEBUG_ERROR ("Illegal rate type"); + return (-1); + } + + /* rate type is now converted to kbps */ + kbps_cfg.rate_type = SSE2_QOS_RATE_KBPS; + kbps_cfg.rnd_type = cfg->rnd_type; + kbps_cfg.rfc = cfg->rfc; + + phys->action[POLICE_CONFORM] = cfg->conform_action.action_type; + phys->mark_dscp[POLICE_CONFORM] = cfg->conform_action.dscp; + phys->action[POLICE_EXCEED] = cfg->exceed_action.action_type; + phys->mark_dscp[POLICE_EXCEED] = cfg->exceed_action.dscp; + phys->action[POLICE_VIOLATE] = cfg->violate_action.action_type; + phys->mark_dscp[POLICE_VIOLATE] = cfg->violate_action.dscp; + + phys->color_aware = cfg->color_aware; + +#if !defined (INTERNAL_SS) && !defined (X86) + // convert logical into hw params which involves qos calculations + rc = sse2_pol_compute_hw_params (&kbps_cfg, &pol_hw); + if (rc == -1) + { + SSE2_QOS_DEBUG_ERROR ("Unable to compute hw param. Error: %d", rc); + return (rc); + } + + // convert hw params into the physical + phys->rfc = pol_hw.rfc; + phys->an = pol_hw.allow_negative; + phys->rexp = pol_hw.rate_exp; + phys->arm = pol_hw.avg_rate_man; + phys->prm = pol_hw.peak_rate_man; + phys->cble = pol_hw.comm_bkt_limit_exp; + phys->cblm = pol_hw.comm_bkt_limit_man; + phys->eble = pol_hw.extd_bkt_limit_exp; + phys->eblm = pol_hw.extd_bkt_limit_man; + phys->cb = pol_hw.comm_bkt; + phys->eb = pol_hw.extd_bkt; + + /* for debugging purposes, the bucket token values can be overwritten */ + if (cfg->overwrite_bucket) + { + phys->cb = cfg->current_bucket; + phys->eb = cfg->extended_bucket; + } +#else + // convert logical into hw params which involves qos calculations + rc = x86_pol_compute_hw_params (&kbps_cfg, phys); + if (rc == -1) + { + SSE2_QOS_DEBUG_ERROR ("Unable to compute hw param. Error: %d", rc); + return (rc); + } + + /* for debugging purposes, the bucket token values can be overwritten */ + if (cfg->overwrite_bucket) + { + phys->current_bucket = cfg->current_bucket; + phys->extended_bucket = cfg->extended_bucket; + } + + // Touch to avoid compiler warning for X86 + pol_hw.allow_negative = pol_hw.allow_negative; + +#endif // if !defined (INTERNAL_SS) && !defined (X86) + + return 0; +} + + +static void +sse2_qos_convert_pol_bucket_to_hw_fmt (policer_read_response_type_st * bkt, + sse2_qos_pol_hw_params_st * hw_fmt) +{ + memset (hw_fmt, 0, sizeof (sse2_qos_pol_hw_params_st)); +#if !defined (INTERNAL_SS) && !defined (X86) + hw_fmt->rfc = (u8) bkt->rfc; + hw_fmt->allow_negative = (u8) bkt->an; + hw_fmt->rate_exp = (u8) bkt->rexp; + hw_fmt->avg_rate_man = (u16) bkt->arm; + hw_fmt->peak_rate_man = (u16) bkt->prm; + hw_fmt->comm_bkt_limit_man = (u8) bkt->cblm; + hw_fmt->comm_bkt_limit_exp = (u8) bkt->cble; + hw_fmt->extd_bkt_limit_man = (u8) bkt->eblm; + hw_fmt->extd_bkt_limit_exp = (u8) bkt->eble; + hw_fmt->extd_bkt = bkt->eb; + hw_fmt->comm_bkt = bkt->cb; +#endif // if !defined (INTERNAL_SS) && !defined (X86) +} + +/* + * Input: h/w programmable parameter values in 'hw' + * Output: configured parameter values in 'cfg' + * Return: Status, success or failure code. + */ +static int +sse2_pol_convert_hw_to_cfg_params (sse2_qos_pol_hw_params_st * hw, + sse2_qos_pol_cfg_params_st * cfg) +{ + u64 temp_rate; + + if ((hw == NULL) || (cfg == NULL)) + { + return EINVAL; + } + + if ((hw->rfc == IPE_RFC_RFC4115) && + !(hw->peak_rate_man << hw->rate_exp) && !(hw->extd_bkt_limit_man)) + { + /* + * For a 1R2C, we set EIR = 0, EB = 0 + */ + cfg->rfc = SSE2_QOS_POLICER_TYPE_1R2C; + } + else if (hw->rfc == IPE_RFC_RFC2697) + { + cfg->rfc = SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697; + } + else if (hw->rfc == IPE_RFC_RFC2698) + { + cfg->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698; + } + else if (hw->rfc == IPE_RFC_RFC4115) + { + cfg->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115; + } + else if (hw->rfc == IPE_RFC_MEF5CF1) + { + cfg->rfc = SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1; + } + else + { + return EINVAL; + } + + temp_rate = (((u64) hw->avg_rate_man << hw->rate_exp) * 8LL * + SSE2_QOS_POL_TICKS_PER_SEC) / 1000; + cfg->rb.kbps.cir_kbps = (u32) temp_rate; + + temp_rate = (((u64) hw->peak_rate_man << hw->rate_exp) * 8LL * + SSE2_QOS_POL_TICKS_PER_SEC) / 1000; + cfg->rb.kbps.eir_kbps = (u32) temp_rate; + + cfg->rb.kbps.cb_bytes = ((u64) hw->comm_bkt_limit_man << + (u64) hw->comm_bkt_limit_exp); + cfg->rb.kbps.eb_bytes = ((u64) hw->extd_bkt_limit_man << + (u64) hw->extd_bkt_limit_exp); + + if (cfg->rfc == SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697) + { + /* + * For 1R3C in the hardware, EB = sum(CB, EB). Also, EIR = CIR. Restore + * values such that the configured params don't reflect this adjustment + */ + cfg->rb.kbps.eb_bytes = (cfg->rb.kbps.eb_bytes - cfg->rb.kbps.cb_bytes); + cfg->rb.kbps.eir_kbps = 0; + } + else if (cfg->rfc == SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115) + { + /* + * For 4115 in the hardware is excess rate and burst, but EA provides + * peak-rate, so adjust it to be eir + */ + cfg->rb.kbps.eir_kbps += cfg->rb.kbps.cir_kbps; + cfg->rb.kbps.eb_bytes += cfg->rb.kbps.cb_bytes; + } + /* h/w conversion to cfg is in kbps */ + cfg->rate_type = SSE2_QOS_RATE_KBPS; + cfg->overwrite_bucket = 0; + cfg->current_bucket = hw->comm_bkt; + cfg->extended_bucket = hw->extd_bkt; + + SSE2_QOS_DEBUG_INFO ("configured params, cir: %u kbps, eir: %u kbps, cb " + "burst: 0x%llx bytes, eb burst: 0x%llx bytes", + cfg->rb.kbps.cir_kbps, cfg->rb.kbps.eir_kbps, + cfg->rb.kbps.cb_bytes, cfg->rb.kbps.eb_bytes); + SSE2_QOS_TR_INFO (SSE2_QOS_TP_INFO_22, cfg->rb.kbps.cir_kbps, + cfg->rb.kbps.eir_kbps, + (uint) cfg->rb.kbps.cb_bytes, + (uint) cfg->rb.kbps.eb_bytes); + + return 0; +} + +u32 +sse2_qos_convert_kbps_to_pps (u32 rate_kbps) +{ + u64 numer, denom, rnd_value = 0; + + // sse_qosrm_ship_inc_counter(SSE2_QOS_SHIP_COUNTER_TYPE_API_CNT, + // SSE2_QOS_SHIP_CNT_POL_CONV_KBPS_TO_PPS); + + numer = (u64) ((u64) rate_kbps * 1000LL); + denom = (u64) ((u64) SSE2_QOS_POLICER_FIXED_PKT_SIZE * 8LL); + + (void) sse2_qos_pol_round (numer, denom, &rnd_value, + SSE2_QOS_ROUND_TO_CLOSEST); + + return ((u32) rnd_value); +} + +u32 +sse2_qos_convert_burst_bytes_to_ms (u64 burst_bytes, u32 rate_kbps) +{ + u64 numer, denom, rnd_value = 0; + + //sse_qosrm_ship_inc_counter(SSE2_QOS_SHIP_COUNTER_TYPE_API_CNT, + // SSE2_QOS_SHIP_CNT_POL_CONV_BYTES_TO_BURST_MS); + + numer = burst_bytes * 8LL; + denom = (u64) rate_kbps; + + (void) sse2_qos_pol_round (numer, denom, &rnd_value, + SSE2_QOS_ROUND_TO_CLOSEST); + + return ((u32) rnd_value); +} + +/* + * Input: physical structure in 'phys', rate_type in cfg + * Output: configured parameters in 'cfg'. + * Return: Status, success or failure code. + */ +int +sse2_pol_physical_2_logical (policer_read_response_type_st * phys, + sse2_qos_pol_cfg_params_st * cfg) +{ + int rc; + sse2_qos_pol_hw_params_st pol_hw; + sse2_qos_pol_cfg_params_st kbps_cfg; + + memset (&pol_hw, 0, sizeof (sse2_qos_pol_hw_params_st)); + memset (&kbps_cfg, 0, sizeof (sse2_qos_pol_cfg_params_st)); + + if (!phys) + { + SSE2_QOS_DEBUG_ERROR ("Illegal parameters"); + return (-1); + } + + sse2_qos_convert_pol_bucket_to_hw_fmt (phys, &pol_hw); + + rc = sse2_pol_convert_hw_to_cfg_params (&pol_hw, &kbps_cfg); + if (rc != 0) + { + SSE2_QOS_DEBUG_ERROR ("Unable to convert hw params to config params. " + "Error: %d", rc); + return (-1); + } + + /* check what rate type is required */ + switch (cfg->rate_type) + { + case SSE2_QOS_RATE_KBPS: + /* copy all the data into kbps_cfg */ + cfg->rb.kbps.cir_kbps = kbps_cfg.rb.kbps.cir_kbps; + cfg->rb.kbps.eir_kbps = kbps_cfg.rb.kbps.eir_kbps; + cfg->rb.kbps.cb_bytes = kbps_cfg.rb.kbps.cb_bytes; + cfg->rb.kbps.eb_bytes = kbps_cfg.rb.kbps.eb_bytes; + break; + case SSE2_QOS_RATE_PPS: + cfg->rb.pps.cir_pps = + sse2_qos_convert_kbps_to_pps (kbps_cfg.rb.kbps.cir_kbps); + cfg->rb.pps.eir_pps = + sse2_qos_convert_kbps_to_pps (kbps_cfg.rb.kbps.eir_kbps); + cfg->rb.pps.cb_ms = + sse2_qos_convert_burst_bytes_to_ms (kbps_cfg.rb.kbps.cb_bytes, + kbps_cfg.rb.kbps.cir_kbps); + cfg->rb.pps.eb_ms = + sse2_qos_convert_burst_bytes_to_ms (kbps_cfg.rb.kbps.eb_bytes, + kbps_cfg.rb.kbps.eir_kbps); + break; + default: + SSE2_QOS_DEBUG_ERROR ("Illegal rate type"); + return (-1); + } + + /* cfg->rate_type remains what it was */ + cfg->rnd_type = kbps_cfg.rnd_type; + cfg->rfc = kbps_cfg.rfc; + cfg->overwrite_bucket = kbps_cfg.overwrite_bucket; + cfg->current_bucket = kbps_cfg.current_bucket; + cfg->extended_bucket = kbps_cfg.extended_bucket; + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/policer/xlate.h b/src/vnet/policer/xlate.h new file mode 100644 index 00000000000..16742f80a47 --- /dev/null +++ b/src/vnet/policer/xlate.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/*--------------------------------------------------------------------------- + * from gdp_logical_qos.h + *--------------------------------------------------------------------------- + */ + +#ifndef __included_xlate_h__ +#define __included_xlate_h__ + +#include + +/* + * edt: * enum sse2_qos_policer_type_en + * Defines type of policer to be allocated + */ +typedef enum sse2_qos_policer_type_en_ +{ + SSE2_QOS_POLICER_TYPE_1R2C = 0, + SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697 = 1, + SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698 = 2, + SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115 = 3, + SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1 = 4, + SSE2_QOS_POLICER_TYPE_MAX +} sse2_qos_policer_type_en; + +/* + * edt: * enum + * Enum used to define type of rounding used when calculating policer values + */ +typedef enum +{ + SSE2_QOS_ROUND_TO_CLOSEST = 0, + SSE2_QOS_ROUND_TO_UP, + SSE2_QOS_ROUND_TO_DOWN, + SSE2_QOS_ROUND_INVALID +} sse2_qos_round_type_en; + +/* + * edt: * enum + * Enum used to define type of rate for configuration, either pps or kbps. + * If kbps, then burst is in bytes, if pps, then burst is in ms. + * + * Default of zero is kbps, which is inline with how it is programmed + * in actual hardware. However, the warning is that this is reverse logic + * of units_in_bits field in sse2_static_policer_parameters_st, which is + * inline with sse_punt_drop.h. + */ +typedef enum +{ + SSE2_QOS_RATE_KBPS = 0, + SSE2_QOS_RATE_PPS, + SSE2_QOS_RATE_INVALID +} sse2_qos_rate_type_en; + +/* + * edt: * enum + * Defines type of policer actions. + */ +typedef enum +{ + SSE2_QOS_ACTION_DROP = 0, + SSE2_QOS_ACTION_TRANSMIT, + SSE2_QOS_ACTION_MARK_AND_TRANSMIT +} sse2_qos_action_type_en; + +/* + * edt * struct sse2_qos_pol_action_params_st + * This structure is used to hold user configured police action parameters. + * + * element: action_type + * Action type (see sse2_qos_action_type_en). + * elemtnt: dscp + * DSCP value to set when action is SSE2_QOS_ACTION_MARK_AND_TRANSMIT. + */ +typedef struct sse2_qos_pol_action_params_st_ +{ + u8 action_type; + u8 dscp; +} sse2_qos_pol_action_params_st; + +/* + * edt: * struct sse2_qos_pol_cfg_params_st + * + * Description: + * This structure is used to hold user configured policing parameters. + * + * element: cir_kbps + * CIR in kbps. + * element: eir_kbps + * EIR or PIR in kbps. + * element: cb_bytes + * Committed Burst in bytes. + * element: eb_bytes + * Excess or Peak Burst in bytes. + * element: cir_pps + * CIR in pps. + * element: eir_pps + * EIR or PIR in pps. + * element: cb_ms + * Committed Burst in milliseconds. + * element: eb_ms + * Excess or Peak Burst in milliseconds. + * element: rate_type + * Indicates the union if in kbps/bytes or pps/ms. + * element: rfc + * Policer algorithm - 1R2C, 1R3C (2697), 2R3C (2698) or 2R3C (4115). See + * sse_qos_policer_type_en + * element: rnd_type + * Rounding type (see sse_qos_round_type_en). Needed when policer values + * need to be rounded. Caller can decide on type of rounding used + */ +typedef struct sse2_qos_pol_cfg_params_st_ +{ + union + { + struct + { + u32 cir_kbps; + u32 eir_kbps; + u64 cb_bytes; + u64 eb_bytes; + } kbps; + struct + { + u32 cir_pps; + u32 eir_pps; + u64 cb_ms; + u64 eb_ms; + } pps; + } rb; /* rate burst config */ + u8 rate_type; /* sse2_qos_rate_type_en */ + u8 rnd_type; /* sse2_qos_round_type_en */ + u8 rfc; /* sse2_qos_policer_type_en */ + u8 color_aware; + u8 overwrite_bucket; /* for debugging purposes */ + u32 current_bucket; /* for debugging purposes */ + u32 extended_bucket; /* for debugging purposes */ + sse2_qos_pol_action_params_st conform_action; + sse2_qos_pol_action_params_st exceed_action; + sse2_qos_pol_action_params_st violate_action; +} sse2_qos_pol_cfg_params_st; + + +typedef struct sse2_qos_pol_hw_params_st_ +{ + u8 rfc; + u8 allow_negative; + u8 rate_exp; + u16 avg_rate_man; + u16 peak_rate_man; + u8 comm_bkt_limit_exp; + u8 comm_bkt_limit_man; + u8 extd_bkt_limit_exp; + u8 extd_bkt_limit_man; + u32 comm_bkt; + u32 extd_bkt; +} sse2_qos_pol_hw_params_st; + + +int +sse2_pol_logical_2_physical (sse2_qos_pol_cfg_params_st * cfg, + policer_read_response_type_st * phys); + + +#endif /* __included_xlate_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg