aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/bonding
diff options
context:
space:
mode:
authorSteven <sluong@cisco.com>2018-09-27 20:06:26 -0700
committerDamjan Marion <dmarion@me.com>2018-09-28 19:39:50 +0000
commitc4e99c5d6f19cf7c026b021266e309f29de1ac7f (patch)
tree760eb9f8c6cd365521edac14b731635063562ad5 /src/vnet/bonding
parent9be93c8f85d752930566a1d37e9f4841ca78861f (diff)
bond: tx performance enhancement part deux
- Reduce per packet cost by buffering the output packet buffer indexes in the queue and process the queue outside the packet processing loop. - Move unnecessary variable initialization outside of the while loop. - There is no need to save the old interface if tracing is not enabled. Test result for 256 bytes packet comparison. Other packet size shows similar improvement. With the patch -------------- BondEthernet0-output active 52836 13526016 0 1.71e1 256.00 BondEthernet0-tx active 52836 13526016 0 2.68e1 256.00 TenGigabitEthernet6/0/0-output active 52836 6762896 0 9.17e0 127.99 TenGigabitEthernet6/0/0-tx active 52836 6762896 0 6.97e1 127.99 TenGigabitEthernet6/0/1-output active 52836 6763120 0 9.40e0 128.00 TenGigabitEthernet6/0/1-tx active 52836 6763120 0 7.00e1 128.00 bond-input active 52836 13526016 0 1.76e1 256.00 Without the patch ----------------- BondEthernet0-output active 60858 15579648 0 1.73e1 256.00 BondEthernet0-tx active 60858 15579648 0 2.94e1 256.00 TenGigabitEthernet6/0/0-output active 60858 7789626 0 9.29e0 127.99 TenGigabitEthernet6/0/0-tx active 60858 7789626 0 7.01e1 127.99 TenGigabitEthernet6/0/1-output active 60858 7790022 0 9.31e0 128.00 TenGigabitEthernet6/0/1-tx active 60858 7790022 0 7.10e1 128.00 bond-input active 60858 15579648 0 1.77e1 256.00 Change-Id: Ib6d73a63ceeaa2f1397ceaf4c5391c57fd865b04 Signed-off-by: Steven <sluong@cisco.com>
Diffstat (limited to 'src/vnet/bonding')
-rw-r--r--src/vnet/bonding/cli.c29
-rw-r--r--src/vnet/bonding/device.c186
-rw-r--r--src/vnet/bonding/node.h15
3 files changed, 106 insertions, 124 deletions
diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c
index ec34b47351e..522d13aadf7 100644
--- a/src/vnet/bonding/cli.c
+++ b/src/vnet/bonding/cli.c
@@ -198,7 +198,6 @@ bond_delete_if (vlib_main_t * vm, u32 sw_if_index)
slave_if_t *sif;
vnet_hw_interface_t *hw;
u32 *sif_sw_if_index;
- u32 thread_index;
u32 **s_list = 0;
u32 i;
@@ -232,12 +231,6 @@ bond_delete_if (vlib_main_t * vm, u32 sw_if_index)
clib_bitmap_free (bif->port_number_bitmap);
hash_unset (bm->bond_by_sw_if_index, bif->sw_if_index);
- for (thread_index = 0; thread_index < vlib_get_thread_main ()->n_vlib_mains;
- thread_index++)
- {
- vec_free (bif->per_thread_info[thread_index].frame);
- }
- vec_free (bif->per_thread_info);
memset (bif, 0, sizeof (*bif));
pool_put (bm->interfaces, bif);
@@ -310,9 +303,6 @@ bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args)
sw = vnet_get_hw_sw_interface (vnm, bif->hw_if_index);
bif->sw_if_index = sw->sw_if_index;
bif->group = bif->sw_if_index;
- vec_validate_aligned (bif->per_thread_info,
- vlib_get_thread_main ()->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
if (vlib_get_thread_main ()->n_vlib_mains > 1)
clib_spinlock_init (&bif->lockp);
@@ -431,6 +421,8 @@ bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args)
vnet_interface_main_t *im = &vnm->interface_main;
vnet_hw_interface_t *bif_hw, *sif_hw;
vnet_sw_interface_t *sw;
+ u32 thread_index;
+ u32 sif_if_index;
bif = bond_get_master_by_sw_if_index (args->group);
if (!bif)
@@ -527,6 +519,20 @@ bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args)
bond_enable_collecting_distributing (vm, sif);
}
+ vec_foreach_index (thread_index, bm->per_thread_data)
+ {
+ bond_per_thread_data_t *ptd = vec_elt_at_index (bm->per_thread_data,
+ thread_index);
+
+ vec_validate_aligned (ptd->per_port_queue, sif->sw_if_index,
+ CLIB_CACHE_LINE_BYTES);
+
+ vec_foreach_index (sif_if_index, ptd->per_port_queue)
+ {
+ ptd->per_port_queue[sif_if_index].n_buffers = 0;
+ }
+ }
+
args->rv = vnet_feature_enable_disable ("device-input", "bond-input",
sif_hw->hw_if_index, 1, 0, 0);
@@ -755,6 +761,9 @@ bond_cli_init (vlib_main_t * vm)
bm->vlib_main = vm;
bm->vnet_main = vnet_get_main ();
vec_validate_aligned (bm->slave_by_sw_if_index, 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (bm->per_thread_data,
+ vlib_get_thread_main ()->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
return 0;
}
diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c
index 7f9828407fb..79ca2faf164 100644
--- a/src/vnet/bonding/device.c
+++ b/src/vnet/bonding/device.c
@@ -136,29 +136,25 @@ bond_load_balance_broadcast (vlib_main_t * vm, vlib_node_runtime_t * node,
bond_if_t * bif, vlib_buffer_t * b0,
uword slave_count)
{
- vnet_main_t *vnm = vnet_get_main ();
+ bond_main_t *bm = &bond_main;
vlib_buffer_t *c0;
int port;
- u32 *to_next = 0;
u32 sw_if_index;
- vlib_frame_t *f;
u16 thread_index = vm->thread_index;
+ bond_per_thread_data_t *ptd = vec_elt_at_index (bm->per_thread_data,
+ thread_index);
for (port = 1; port < slave_count; port++)
{
sw_if_index = *vec_elt_at_index (bif->active_slaves, port);
- if (bif->per_thread_info[thread_index].frame[port] == 0)
- bif->per_thread_info[thread_index].frame[port] =
- vnet_get_frame_to_sw_interface (vnm, sw_if_index);
- f = bif->per_thread_info[thread_index].frame[port];
- to_next = vlib_frame_vector_args (f);
- to_next += f->n_vectors;
c0 = vlib_buffer_copy (vm, b0);
if (PREDICT_TRUE (c0 != 0))
{
vnet_buffer (c0)->sw_if_index[VLIB_TX] = sw_if_index;
- to_next[0] = vlib_get_buffer_index (vm, c0);
- f->n_vectors++;
+ ptd->per_port_queue[sw_if_index].buffers[ptd->per_port_queue
+ [sw_if_index].n_buffers] =
+ vlib_get_buffer_index (vm, c0);
+ ptd->per_port_queue[sw_if_index].n_buffers++;
}
}
@@ -399,16 +395,18 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u32 *from = vlib_frame_vector_args (frame);
ethernet_header_t *eth;
- u32 port, n_left;
- u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3;
+ u32 n_left;
+ u32 sw_if_index;
bond_packet_trace_t *t0;
uword n_trace = vlib_get_trace_count (vm, node);
u16 thread_index = vm->thread_index;
vnet_main_t *vnm = vnet_get_main ();
u32 *to_next;
- u32 sif_if_index, sif_if_index1, sif_if_index2, sif_if_index3;
vlib_frame_t *f;
uword slave_count;
+ u32 port0 = 0, port1 = 0, port2 = 0, port3 = 0;
+ bond_per_thread_data_t *ptd = vec_elt_at_index (bm->per_thread_data,
+ thread_index);
if (PREDICT_FALSE (bif->admin_up == 0))
{
@@ -438,14 +436,10 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
return frame->n_vectors;
}
- vec_validate_aligned (bif->per_thread_info[thread_index].frame, slave_count,
- CLIB_CACHE_LINE_BYTES);
-
b = bufs;
while (n_left >= 4)
{
- u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0;
- u32 port0 = 0, port1 = 0, port2 = 0, port3 = 0;
+ u32 sif_if_index0, sif_if_index1, sif_if_index2, sif_if_index3;
// Prefetch next iteration
if (n_left >= 8)
@@ -468,11 +462,6 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]);
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]);
- sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
- sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
- sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
- sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_TX];
-
if (PREDICT_TRUE (slave_count > 1))
{
port0 =
@@ -493,69 +482,22 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
slave_count);
}
- sif_if_index = *vec_elt_at_index (bif->active_slaves, port0);
+ sif_if_index0 = *vec_elt_at_index (bif->active_slaves, port0);
sif_if_index1 = *vec_elt_at_index (bif->active_slaves, port1);
sif_if_index2 = *vec_elt_at_index (bif->active_slaves, port2);
sif_if_index3 = *vec_elt_at_index (bif->active_slaves, port3);
- vnet_buffer (b[0])->sw_if_index[VLIB_TX] = sif_if_index;
- vnet_buffer (b[1])->sw_if_index[VLIB_TX] = sif_if_index1;
- vnet_buffer (b[2])->sw_if_index[VLIB_TX] = sif_if_index2;
- vnet_buffer (b[3])->sw_if_index[VLIB_TX] = sif_if_index3;
-
- if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port0]
- == 0)))
- bif->per_thread_info[thread_index].frame[port0] =
- vnet_get_frame_to_sw_interface (vnm, sif_if_index);
-
- if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port1]
- == 0)))
- bif->per_thread_info[thread_index].frame[port1] =
- vnet_get_frame_to_sw_interface (vnm, sif_if_index1);
-
- if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port2]
- == 0)))
- bif->per_thread_info[thread_index].frame[port2] =
- vnet_get_frame_to_sw_interface (vnm, sif_if_index2);
-
- if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port3]
- == 0)))
- bif->per_thread_info[thread_index].frame[port3] =
- vnet_get_frame_to_sw_interface (vnm, sif_if_index3);
-
- f = bif->per_thread_info[thread_index].frame[port0];
- to_next = vlib_frame_vector_args (f);
- to_next += f->n_vectors;
- to_next[0] = vlib_get_buffer_index (vm, b[0]);
- f->n_vectors++;
-
- f = bif->per_thread_info[thread_index].frame[port1];
- to_next = vlib_frame_vector_args (f);
- to_next += f->n_vectors;
- to_next[0] = vlib_get_buffer_index (vm, b[1]);
- f->n_vectors++;
-
- f = bif->per_thread_info[thread_index].frame[port2];
- to_next = vlib_frame_vector_args (f);
- to_next += f->n_vectors;
- to_next[0] = vlib_get_buffer_index (vm, b[2]);
- f->n_vectors++;
-
- f = bif->per_thread_info[thread_index].frame[port3];
- to_next = vlib_frame_vector_args (f);
- to_next += f->n_vectors;
- to_next[0] = vlib_get_buffer_index (vm, b[3]);
- f->n_vectors++;
-
+ /* Do the tracing before the interface is overwritten */
if (PREDICT_FALSE (n_trace > 0))
{
+ u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0;
vlib_trace_buffer (vm, node, next0, b[0], 0 /* follow_chain */ );
vlib_set_trace_count (vm, node, --n_trace);
t0 = vlib_add_trace (vm, node, b[0], sizeof (*t0));
eth = (ethernet_header_t *) vlib_buffer_get_current (b[0]);
t0->ethernet = *eth;
- t0->sw_if_index = sw_if_index;
- t0->bond_sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+ t0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+ t0->bond_sw_if_index = sif_if_index0;
if (PREDICT_TRUE (n_trace > 0))
{
@@ -565,8 +507,8 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
t0 = vlib_add_trace (vm, node, b[1], sizeof (*t0));
eth = (ethernet_header_t *) vlib_buffer_get_current (b[1]);
t0->ethernet = *eth;
- t0->sw_if_index = sw_if_index1;
- t0->bond_sw_if_index = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
+ t0->sw_if_index = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
+ t0->bond_sw_if_index = sif_if_index1;
if (PREDICT_TRUE (n_trace > 0))
{
@@ -576,9 +518,8 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
t0 = vlib_add_trace (vm, node, b[2], sizeof (*t0));
eth = (ethernet_header_t *) vlib_buffer_get_current (b[2]);
t0->ethernet = *eth;
- t0->sw_if_index = sw_if_index2;
- t0->bond_sw_if_index =
- vnet_buffer (b[2])->sw_if_index[VLIB_TX];
+ t0->sw_if_index = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
+ t0->bond_sw_if_index = sif_if_index2;
if (PREDICT_TRUE (n_trace > 0))
{
@@ -589,68 +530,93 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
eth =
(ethernet_header_t *) vlib_buffer_get_current (b[3]);
t0->ethernet = *eth;
- t0->sw_if_index = sw_if_index3;
- t0->bond_sw_if_index =
+ t0->sw_if_index =
vnet_buffer (b[3])->sw_if_index[VLIB_TX];
+ t0->bond_sw_if_index = sif_if_index3;
}
}
}
}
- from += 4;
+
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] = sif_if_index0;
+ vnet_buffer (b[1])->sw_if_index[VLIB_TX] = sif_if_index1;
+ vnet_buffer (b[2])->sw_if_index[VLIB_TX] = sif_if_index2;
+ vnet_buffer (b[3])->sw_if_index[VLIB_TX] = sif_if_index3;
+
+ ptd->per_port_queue[sif_if_index0].buffers[ptd->per_port_queue
+ [sif_if_index0].n_buffers] =
+ vlib_get_buffer_index (vm, b[0]);
+ ptd->per_port_queue[sif_if_index0].n_buffers++;
+
+ ptd->per_port_queue[sif_if_index1].buffers[ptd->per_port_queue
+ [sif_if_index1].n_buffers] =
+ vlib_get_buffer_index (vm, b[1]);
+ ptd->per_port_queue[sif_if_index1].n_buffers++;
+
+ ptd->per_port_queue[sif_if_index2].buffers[ptd->per_port_queue
+ [sif_if_index2].n_buffers] =
+ vlib_get_buffer_index (vm, b[2]);
+ ptd->per_port_queue[sif_if_index2].n_buffers++;
+
+ ptd->per_port_queue[sif_if_index3].buffers[ptd->per_port_queue
+ [sif_if_index3].n_buffers] =
+ vlib_get_buffer_index (vm, b[3]);
+ ptd->per_port_queue[sif_if_index3].n_buffers++;
+
n_left -= 4;
b += 4;
}
while (n_left > 0)
{
- u32 next0 = 0;
- u32 port0 = 0;
+ u32 sif_if_index0;
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
- sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
-
if (PREDICT_TRUE (slave_count > 1))
port0 =
(bond_load_balance_table[bif->lb]).load_balance (vm, node, bif,
b[0], slave_count);
- sif_if_index = *vec_elt_at_index (bif->active_slaves, port0);
- vnet_buffer (b[0])->sw_if_index[VLIB_TX] = sif_if_index;
- if (PREDICT_FALSE
- ((bif->per_thread_info[thread_index].frame[port0] == 0)))
- bif->per_thread_info[thread_index].frame[port0] =
- vnet_get_frame_to_sw_interface (vnm, sif_if_index);
- f = bif->per_thread_info[thread_index].frame[port0];
- to_next = vlib_frame_vector_args (f);
- to_next += f->n_vectors;
- to_next[0] = vlib_get_buffer_index (vm, b[0]);
- f->n_vectors++;
+ sif_if_index0 = *vec_elt_at_index (bif->active_slaves, port0);
+ /* Do the tracing before the old interface is overwritten */
if (PREDICT_FALSE (n_trace > 0))
{
+ u32 next0 = 0;
+
vlib_trace_buffer (vm, node, next0, b[0], 0 /* follow_chain */ );
vlib_set_trace_count (vm, node, --n_trace);
t0 = vlib_add_trace (vm, node, b[0], sizeof (*t0));
eth = (ethernet_header_t *) vlib_buffer_get_current (b[0]);
t0->ethernet = *eth;
- t0->sw_if_index = sw_if_index;
- t0->bond_sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+ t0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+ t0->bond_sw_if_index = sif_if_index0;
}
- from += 1;
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] = sif_if_index0;
+
+ ptd->per_port_queue[sif_if_index0].buffers[ptd->per_port_queue
+ [sif_if_index0].n_buffers] =
+ vlib_get_buffer_index (vm, b[0]);
+ ptd->per_port_queue[sif_if_index0].n_buffers++;
+
n_left -= 1;
b += 1;
}
- for (port = 0; port < slave_count; port++)
+ for (port0 = 0; port0 < slave_count; port0++)
{
- f = bif->per_thread_info[thread_index].frame[port];
- if (f == 0)
- continue;
-
- sw_if_index = *vec_elt_at_index (bif->active_slaves, port);
- vnet_put_frame_to_sw_interface (vnm, sw_if_index, f);
- bif->per_thread_info[thread_index].frame[port] = 0;
+ sw_if_index = *vec_elt_at_index (bif->active_slaves, port0);
+ if (PREDICT_TRUE (ptd->per_port_queue[sw_if_index].n_buffers))
+ {
+ f = vnet_get_frame_to_sw_interface (vnm, sw_if_index);
+ f->n_vectors = ptd->per_port_queue[sw_if_index].n_buffers;
+ to_next = vlib_frame_vector_args (f);
+ clib_memcpy (to_next, ptd->per_port_queue[sw_if_index].buffers,
+ f->n_vectors << 2);
+ vnet_put_frame_to_sw_interface (vnm, sw_if_index, f);
+ ptd->per_port_queue[sw_if_index].n_buffers = 0;
+ }
}
vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters
diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h
index 104b7b15700..6b13a46a355 100644
--- a/src/vnet/bonding/node.h
+++ b/src/vnet/bonding/node.h
@@ -139,9 +139,15 @@ typedef CLIB_PACKED (struct
typedef struct
{
- vlib_frame_t **frame;
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 buffers[VLIB_FRAME_SIZE];
+ u32 n_buffers;
+} bond_per_port_queue_t;
-} bond_if_per_thread_t;
+typedef struct
+{
+ bond_per_port_queue_t *per_port_queue;
+} bond_per_thread_data_t;
typedef struct
{
@@ -175,7 +181,6 @@ typedef struct
u8 hw_address[6];
clib_spinlock_t lockp;
- bond_if_per_thread_t *per_thread_info;
} bond_if_t;
typedef struct
@@ -292,7 +297,7 @@ typedef struct
/* pool of bonding interfaces */
bond_if_t *interfaces;
- /* pool of lacp neighbors */
+ /* pool of slave interfaces */
slave_if_t *neighbors;
/* rapidly find a bond by vlib software interface index */
@@ -308,6 +313,8 @@ typedef struct
lacp_enable_disable_func lacp_enable_disable;
uword *slave_by_sw_if_index;
+
+ bond_per_thread_data_t *per_thread_data;
} bond_main_t;
/* bond packet trace capture */