/*
* Copyright (c) 2020 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vnet/vnet.h>
#include <vnet/devices/devices.h>
#include <vnet/feature/feature.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/interface/tx_queue_funcs.h>
#include <vlib/unix/unix.h>
VLIB_REGISTER_LOG_CLASS (if_rxq_log, static) = {
.class_name = "interface",
.subclass_name = "runtime",
};
#define log_debug(fmt, ...) vlib_log_debug (if_rxq_log.class, fmt, __VA_ARGS__)
#define log_err(fmt, ...) vlib_log_err (if_rxq_log.class, fmt, __VA_ARGS__)
static char *node_state_str[] = {
[VLIB_NODE_STATE_DISABLED] = "disabled",
[VLIB_NODE_STATE_POLLING] = "polling",
[VLIB_NODE_STATE_INTERRUPT] = "interrupt",
};
static int
poll_data_sort (void *a1, void *a2)
{
vnet_hw_if_rxq_poll_vector_t *pv1 = a1;
vnet_hw_if_rxq_poll_vector_t *pv2 = a2;
if (pv1->dev_instance > pv2->dev_instance)
return 1;
else if (pv1->dev_instance < pv2->dev_instance)
return -1;
else if (pv1->queue_id > pv2->queue_id)
return 1;
else if (pv1->queue_id < pv2->queue_id)
return -1;
else
return 0;
}
void
vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index)
{
vlib_main_t *vm = vlib_get_main ();
vnet_interface_main_t *im = &vnm->interface_main;
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
u32 node_index = hi->input_node_index;
vnet_hw_if_rx_queue_t *rxq;
vnet_hw_if_rxq_poll_vector_t *pv, **d = 0;
vnet_hw_if_output_node_runtime_t *new_out_runtimes = 0;
vlib_node_state_t *per_thread_node_state = 0;
u32 n_threads = vlib_get_n_threads ();
u16 *per_thread_node_adaptive = 0;
int something_changed_on_rx = 0;
int something_changed_on_tx = 0;
clib_bitmap_t *pending_int = 0;
int last_int = -1;
log_debug ("update node '%U' triggered by interface %v",
format_vlib_node_name, vm, node_index, hi->name);
vec_validate (d, n_threads - 1);
vec_validate_init_empty (per_thread_node_state, n_threads - 1,
VLIB_NODE_STATE_DISABLED);
vec_validate_init_empty (per_thread_node_adaptive, n_threads - 1, 0);
/* find out desired node state on each thread */
pool_foreach (rxq, im->hw_if_rx_queues)
{
u32 ti = rxq->thread_index;
vnet_hw_interface_t *rxq_hi;
ASSERT (rxq->mode != VNET_HW_IF_RX_MODE_UNKNOWN);
ASSERT (rxq->mode != VNET_HW_IF_RX_MODE_DEFAULT);
rxq_hi = vnet_get_hw_interface (vnm, rxq->hw_if_index);
if (rxq_hi->input_node_index != node_index)
continue;
if (rxq->mode == VNET_HW_IF_RX_MODE_POLLING)
{
per_thread_node_state[ti] = VLIB_NODE_STATE_POLLING;
per_thread_node_adaptive[ti] = 0;
}
if (per_thread_node_state[ti] == VLIB_NODE_STATE_POLLING)
continue;
if (rxq->mode == VNET_HW_IF_RX_MODE_INTERRUPT ||
rxq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)
per_thread_node_state[ti] = VLIB_NODE_STATE_INTERRUPT;
if (rxq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)
per_thread_node_adaptive[ti] = 1;
}
/* construct per-thread polling vectors */
pool_foreach (rxq, im->hw_if_rx_queues)
{
u32 ti = rxq->thread_index;
vnet_hw_interface_t *rxq_hi;
rxq_hi = vnet_get_hw_interface (vnm, rxq->hw_if_index);
if (rxq_hi->input_node_index != node_index)
continue;
if (rxq->mode == VNET_HW_IF_RX_MODE_INTERRUPT ||
rxq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)
last_int = clib_max (last_int, rxq - im->hw_if_rx_queues);
if (per_thread_node_state[ti] != VLIB_NODE_STATE_POLLING)
continue;
vec_add2_aligned (d[ti], pv, 1, CLIB_CACHE_LINE_BYTES);
pv->dev_instance = rxq->dev_instance;
pv->queue_id = rxq->queue_id;
}
/* sort poll vectors and compare them with active ones to avoid
* unnecesary barrier */
for (int i = 0; i < n_threads; i++)
{
vlib_main_t *ovm = vlib_get_main_by_index (i);
vlib_node_state_t old_state;
vec_sort_with_function (d[i], poll_data_sort);
old_state = vlib_node_get_state (ovm, node_index);
if (per_thread_node_state[i] != old_state)
{
something_changed_on_rx = 1;
log_debug ("state changed for node %U on thread %u from %s to %s",
format_vlib_node_name, vm, node_index, i,
node_state_str[old_state],
node_state_str[per_thread_node_state[i]]);
}
/* check if something changed */
if (something_changed_on_rx == 0)
{
vnet_hw_if_rx_node_runtime_t *rt;
rt = vlib_node_get_runtime_data (ovm, node_index);
if (vec_len (rt->rxq_poll_vector) != vec_len (d[i]))
something_changed_on_rx = 1;
else if (memcmp (d[i], rt->rxq_poll_vector,
vec_len (d[i]) * sizeof (**d)))
something_changed_on_rx = 1;
if (clib_interrupt_get_n_int (rt->rxq_interrupts) != last_int + 1)
something_changed_on_rx = 1;
}
}
new_out_runtimes =
vec_dup_aligned (hi->output_node_thread_runtimes, CLIB_CACHE_LINE_BYTES);
vec_validate_aligned (new_out_runtimes, n_threads - 1,
CLIB_CACHE_LINE_BYTES);
if (vec_len (hi->output_node_thread_runtimes) != vec_len (new_out_runtimes))
something_changed_on_tx = 1;
for (int i = 0; i < vec_len (hi->tx_queue_indices); i++)
{
u32 thread_index;
u32 queue_index = hi->tx_queue_indices[i];
vnet_hw_if_tx_queue_t *txq = vnet_hw_if_get_tx_queue (vnm, queue_index);
uword n_threads = clib_bitmap_count_set_bits (txq->threads);
clib_bitmap_foreach (thread_index, txq->threads)
{
vnet_hw_if_output_node_runtime_t *rt;
rt = vec_elt_at_index (new_out_runtimes, thread_index);
if ((rt->frame.queue_id != txq->queue_id) ||
(rt->n_threads != n_threads))
{
log_debug ("tx queue data changed for interface %v, thread %u "
"(queue_id %u -> %u, n_threads %u -> %u)",
hi->name, thread_index, rt->frame.queue_id,
txq->queue_id, rt->n_threads, n_threads);
something_changed_on_tx = 1;
rt->frame.queue_id = txq->queue_id;
rt->frame.shared_queue = txq->shared_queue;
rt->n_threads = n_threads;
}
}
}
if (something_changed_on_rx || something_changed_on_tx)
{
int with_barrier;
if (vlib_worker_thread_barrier_held ())
{
with_barrier = 0;
log_debug ("%s", "already running under the barrier");
}
else
with_barrier = 1;
if (with_barrier)
vlib_worker_thread_barrier_sync (vm);
if (something_changed_on_rx)
{
for (int i = 0; i < n_threads; i++)
{
vlib_main_t *vm = vlib_get_main_by_index (i);
vnet_hw_if_rx_node_runtime_t *rt;
rt = vlib_node_get_runtime_data (vm, node_index);
pv = rt->rxq_poll_vector;
rt->rxq_poll_vector = d[i];
d[i] = pv;
if (rt->rxq_interrupts)
{
void *in = rt->rxq_interrupts;
int int_num = -1;
while ((int_num = clib_interrupt_get_next (in, int_num)) !=
-1)
{
clib_interrupt_clear (in, int_num);
pending_int = clib_bitmap_set (pending_int, int_num, 1);
last_int = clib_max (last_int, int_num);
}
}
vlib_node_set_state (vm, node_index, per_thread_node_state[i]);
vlib_node_set_flag (vm, node_index, VLIB_NODE_FLAG_ADAPTIVE_MODE,
per_thread_node_adaptive[i]);
if (last_int >= 0)
clib_interrupt_resize (&rt->rxq_interrupts, last_int + 1);
else
clib_interrupt_free (&rt->rxq_interrupts);
}
}
if (something_changed_on_tx)
{
vnet_hw_if_output_node_runtime_t *t;
t = hi->output_node_thread_runtimes;
hi->output_node_thread_runtimes = new_out_runtimes;
new_out_runtimes = t;
}
if (with_barrier)
vlib_worker_thread_barrier_release (vm);
}
else
log_debug ("skipping update of node '%U', no changes detected",
format_vlib_node_name, vm, node_index);
if (pending_int)
{
int i;
clib_bitmap_foreach (i, pending_int)
{
vnet_hw_if_rx_queue_set_int_pending (vnm, i);
}
clib_bitmap_free (pending_int);
}
for (int i = 0; i < n_threads; i++)
vec_free (d[i]);
vec_free (d);
vec_free (per_thread_node_state);
vec_free (per_thread_node_adaptive);
vec_free (new_out_runtimes);
}