aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/util/refcount.h
blob: 4c7d7bdbdd5ce6ec12e57b13e92ef9c655df98ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
/*
 * Copyright (c) 2016 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * vlib provides lock-free counters but those
 * - Have 16bits per-CPU counter, which may overflow.
 * - Would only increment.
 *
 * This is very similar to vlib counters, but may be used to count reference.
 * Such a counter includes an arbitrary number of counters. Each counter
 * is identified by its index. This is used to aggregate per-cpu memory.
 *
 * Warning:
 *   This reference counter is lock-free but is not race-condition free.
 *   The counting result is approximate and another mechanism needs to be used
 *   in order to ensure that an object may be freed.
 *
 */

#include <vnet/vnet.h>
#include <vppinfra/lock.h>

/*
 * Reference counting
 * A specific reference counter is used. The design is quite
 * similar to vlib counters but:
 *   - It is possible to decrease the value
 *   - Summing will not zero the per-thread counters
 *   - Only the thread can reallocate its own counters vector (to avoid concurrency issues)
*/
typedef struct {
  u32 *counters;
  clib_spinlock_t counter_lock;
  CLIB_CACHE_LINE_ALIGN_MARK(o);
} vlib_refcount_per_cpu_t;

typedef struct {
  vlib_refcount_per_cpu_t *per_cpu;
} vlib_refcount_t;

static_always_inline
void vlib_refcount_lock (clib_spinlock_t counter_lock)
{
  clib_spinlock_lock (&counter_lock);
}

static_always_inline
void vlib_refcount_unlock (clib_spinlock_t counter_lock)
{
  clib_spinlock_unlock (&counter_lock);
}

void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size);

static_always_inline
void vlib_refcount_add(vlib_refcount_t *r, u32 thread_index, u32 counter_index, i32 v)
{
  vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[thread_index];
  if (PREDICT_FALSE(counter_index >= vec_len(per_cpu->counters)))
    __vlib_refcount_resize(per_cpu, clib_max(counter_index + 16,(vec_len(per_cpu->counters)) * 2));

  per_cpu->counters[counter_index] += v;
}

u64 vlib_refcount_get(vlib_refcount_t *r, u32 index);

static_always_inline
void vlib_refcount_init(vlib_refcount_t *r)
{
  vlib_thread_main_t *tm = vlib_get_thread_main ();
  u32 thread_index;
  r->per_cpu = 0;
  vec_validate (r->per_cpu, tm->n_vlib_mains - 1);

  for (thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++)
    {
      clib_spinlock_init (&r->per_cpu[thread_index].counter_lock);
    }
}
*/ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */ }
.. _tested_physical_topologies:

Physical Testbeds
=================

All :abbr:`FD.io (Fast Data Input/Ouput)` :abbr:`CSIT (Continuous System
Integration and Testing)` performance testing listed in this report are
executed on physical testbeds built with bare-metal servers hosted by
:abbr:`LF (Linux Foundation)` FD.io project. Two testbed topologies are
used:

- **3-Node Topology**: Consisting of two servers acting as SUTs
  (Systems Under Test) and one server as TG (Traffic Generator), all
  connected in ring topology. Used for executing all of the data plane
  tests including overlay tunnels and IPSec tests.
- **2-Node Topology**: Consisting of one server acting as SUTs (Systems
  Under Test) and one server as TG (Traffic Generator), both connected
  in ring topology. Used for execution of tests without any overlay
  tunnel encapsulations. Added in CSIT rls18.07.

Current FD.io production testbeds are built with servers based on two
processor generations of Intel Xeons: Haswell-SP (E5-2699v3) and Skylake
(Platinum 8180). Testbeds built with servers based on Arm processors are
in the process of being added to FD.io production.

Server SUT and DUT performance depends on server and processor type,
hence results for testbeds based on different servers must be reported
separately, and compared if appropriate.

Complete technical specifications of compute servers used in CSIT
physical testbeds are maintained in FD.io CSIT repository:
`FD.io CSIT testbeds - Xeon Skylake, Arm, Atom`_ and
`FD.io CSIT Testbeds - Xeon Haswell`_.

Following sections describe existing production testbed types.

3-Node Xeon Haswell (3n-hsw)
----------------------------

3n-hsw testbed is based on three Cisco UCS-c240m3 servers each equipped
with two Intel Xeon Haswell-SP E5-2699v3 2.3 GHz 18 core processors.
Physical testbed topology is depicted in a figure below.

.. only:: latex

    .. raw:: latex

        \begin{figure}[H]
            \centering
                \graphicspath{{../_tmp/src/introduction/}}
                \includegraphics[width=0.90\textwidth]{testbed-3n-hsw}
                \label{fig:testbed-3n-hsw}
        \end{figure}

.. only:: html

    .. figure:: testbed-3n-hsw.svg
        :alt: testbed-3n-hsw
        :align: center

SUT1 and SUT2 servers are populated with the following NIC models:

#. NIC-1: VIC 1385 2p40GE Cisco.
#. NIC-2: NIC x520 2p10GE Intel.
#. NIC-3: empty.
#. NIC-4: NIC xl710-QDA2 2p40GE Intel.
#. NIC-5: NIC x710-DA2 2p10GE Intel.
#. NIC-6: QAT 8950 50G (Walnut Hill) Intel.

TG servers run T-Rex application and are populated with the following
NIC models:

#. NIC-1: NIC xl710-QDA2 2p40GE Intel.
#. NIC-2: NIC x710-DA2 2p10GE Intel.
#. NIC-3: empty.
#. NIC-4: NIC xl710-QDA2 2p40GE Intel.
#. NIC-5: NIC x710-DA2 2p10GE Intel.
#. NIC-6: NIC x710-DA2 2p10GE Intel. (For self-tests.)

All Intel Xeon Haswell servers run with Intel Hyper-Threading disabled,
making the number of logical cores exposed to Linux match the number of
18 physical cores per processor socket.

Total of three 3n-hsw testbeds are in operation in FD.io labs.

3-Node Xeon Skylake (3n-skx)
----------------------------

3n-skx testbed is based on three SuperMicro SYS-7049GP-TRT servers each
equipped with two Intel Xeon Skylake Platinum 8180 2.5 GHz 28 core
processors. Physical testbed topology is depicted in a figure below.

.. only:: latex

    .. raw:: latex

        \begin{figure}[H]
            \centering
                \graphicspath{{../_tmp/src/introduction/}}
                \includegraphics[width=0.90\textwidth]{testbed-3n-skx}
                \label{fig:testbed-3n-skx}
        \end{figure}

.. only:: html

    .. figure:: testbed-3n-skx.svg
        :alt: testbed-3n-skx
        :align: center

SUT1 and SUT2 servers are populated with the following NIC models:

#. NIC-1: x710-DA4 4p10GE Intel.
#. NIC-2: xxv710-DA2 2p25GE Intel.
#. NIC-3: empty, future expansion.
#. NIC-4: empty, future expansion.
#. NIC-5: empty, future expansion.
#. NIC-6: empty, future expansion.

TG servers run T-Rex application and are populated with the following
NIC models:

#. NIC-1: x710-DA4 4p10GE Intel.
#. NIC-2: xxv710-DA2 2p25GE Intel.
#. NIC-3: empty, future expansion.
#. NIC-4: empty, future expansion.
#. NIC-5: empty, future expansion.
#. NIC-6: x710-DA4 4p10GE Intel. (For self-tests.)

All Intel Xeon Skylake servers run with Intel Hyper-Threading enabled,
doubling the number of logical cores exposed to Linux, with 56 logical
cores and 28 physical cores per processor socket.

Total of two 3n-skx testbeds are in operation in FD.io labs.

2-Node Xeon Skylake (2n-skx)
----------------------------

2n-skx testbed is based on two SuperMicro SYS-7049GP-TRT servers each
equipped with two Intel Xeon Skylake Platinum 8180 2.5 GHz 28 core
processors. Physical testbed topology is depicted in a figure below.

.. only:: latex

    .. raw:: latex

        \begin{figure}[H]
            \centering
                \graphicspath{{../_tmp/src/introduction/}}
                \includegraphics[width=0.90\textwidth]{testbed-2n-skx}
                \label{fig:testbed-2n-skx}
        \end{figure}

.. only:: html

    .. figure:: testbed-2n-skx.svg
        :alt: testbed-2n-skx
        :align: center

SUT servers are populated with the following NIC models:

#. NIC-1: x710-DA4 4p10GE Intel.
#. NIC-2: xxv710-DA2 2p25GE Intel.
#. NIC-3: mcx556a-edat ConnectX5 2p100GE Mellanox. (Not used yet.)
#. NIC-4: empty, future expansion.
#. NIC-5: empty, future expansion.
#. NIC-6: empty, future expansion.

TG servers run T-Rex application and are populated with the following
NIC models:

#. NIC-1: x710-DA4 4p10GE Intel.
#. NIC-2: xxv710-DA2 2p25GE Intel.
#. NIC-3: mcx556a-edat ConnectX5 2p100GE Mellanox. (Not used yet.)
#. NIC-4: empty, future expansion.
#. NIC-5: empty, future expansion.
#. NIC-6: x710-DA4 4p10GE Intel. (For self-tests.)

All Intel Xeon Skylake servers run with Intel Hyper-Threading enabled,
doubling the number of logical cores exposed to Linux, with 56 logical
cores and 28 physical cores per processor socket.

Total of four 2n-skx testbeds are in operation in FD.io labs.

2-Node Atom Denverton (2n-dnv)
------------------------------

2n-dnv testbed is based on one Intel S2600WFT Server that is equipped with
two Intel Xeon Skylake Platinum 8180 2.5GHz 28 core processors and one
SuperMicro SYS-E300-9A server that is equipped with one Intel Atom CPU
C3858 2.00GHz 12 core processors. Physical testbed topology is depicted in
a figure below.

.. only:: latex

    .. raw:: latex

        \begin{figure}[H]
            \centering
                \graphicspath{{../_tmp/src/introduction/}}
                \includegraphics[width=0.90\textwidth]{testbed-2n-dnv}
                \label{fig:testbed-2n-dnv}
        \end{figure}

.. only:: html

    .. figure:: testbed-2n-dnv.svg
        :alt: testbed-2n-dnv
        :align: center

SUT server have four internal 10G NIC port:

#. P-1: x553 copper port.
#. P-2: x553 copper port.
#. P-3: x553 fiber port.
#. P-4: x553 fiber port.

TG server run T-Rex software traffic generator and are populated with the
following NIC models:

#. NIC-1: x550-T2 2p10GE Intel.
#. NIC-2: x550-T2 2p10GE Intel.
#. NIC-3: x520-DA2 2p10GE Intel.
#. NIC-4: x520-DA2 2p10GE Intel.

The 2n-dnv testbed is in operation in Intel SH labs.