aboutsummaryrefslogtreecommitdiffstats
path: root/src/vppinfra/test_rwlock.c
blob: ad1c7bc4f332190de478be5ad87ed0c4bd9e247c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
/*
 * Copyright (c) 2019 Arm Limited.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif

#include <vppinfra/mem.h>
#include <vppinfra/cache.h>
#include <vppinfra/lock.h>
#include <pthread.h>
#include <vppinfra/format.h>
#include <vppinfra/error.h>
#include <vppinfra/time.h>
#include <sched.h>
#include <vppinfra/atomics.h>

static u32 all_threads_online = 0;

typedef struct
{
  uword threads_per_core;
  uword cpu_mask_read;
  uword read_cores;
  uword cpu_mask_write;
  uword write_cores;
  uword increment_per_thread;
  clib_rwlock_t rwlock;
  uword shared_count;
  uword iterations;
} rwlock_test_main_t;

void *
write_shared_counter (void *arg)
{
  f64 *time = vec_new (f64, 1);
  *time = 0;
  rwlock_test_main_t *rtm = arg;

  /* Wait for all threads to be created */
  while (!clib_atomic_load_acq_n (&all_threads_online));

  f64 start = clib_cpu_time_now ();
  for (uword i = 0; i < rtm->increment_per_thread; i++)
    {
      clib_rwlock_writer_lock (&rtm->rwlock);
      rtm->shared_count++;
      clib_rwlock_writer_unlock (&rtm->rwlock);
    }
  *time = clib_cpu_time_now () - start;
  return time;
}

void *
read_shared_counter (void *arg)
{
  f64 *time = vec_new (f64, 1);
  *time = 0;
  rwlock_test_main_t *rtm = arg;
  uword cnt_cpy = 0, exp = rtm->increment_per_thread * rtm->write_cores *
    rtm->threads_per_core;

  /* Wait for all threads to be created */
  while (!clib_atomic_load_acq_n (&all_threads_online));

  f64 start = clib_cpu_time_now ();
  while (cnt_cpy < exp)
    {
      clib_rwlock_reader_lock (&rtm->rwlock);
      cnt_cpy = rtm->shared_count;
      clib_rwlock_reader_unlock (&rtm->rwlock);
    }
  *time = clib_cpu_time_now () - start;
  return time;
}

unsigned
test_rwlock (rwlock_test_main_t * rtm, f64 * elapse_time)
{
  int error = 0, total_threads = (rtm->read_cores + rtm->write_cores)
    * rtm->threads_per_core;
  pthread_t pthread[total_threads];

  cpu_set_t cpuset;
  unsigned cores_set = 0, cpu_id = 0;

  /* Spawn reader (consumer) threads */
  for (unsigned cpu_mask = rtm->cpu_mask_read; cpu_mask; cpu_mask >>= 1)
    {
      if (!(cpu_mask & 1))
	{
	  cpu_id++;
	  continue;
	}

      CPU_ZERO (&cpuset);
      CPU_SET (cpu_id, &cpuset);
      for (uword t_num = 0; t_num < rtm->threads_per_core; t_num++)
	{
	  uword t_index = cores_set * rtm->threads_per_core + t_num;
	  if ((error = pthread_create (&pthread[t_index], NULL,
				       &read_shared_counter, rtm)))
	    clib_unix_warning ("pthread_create failed with %d", error);

	  if ((error = pthread_setaffinity_np (pthread[t_index],
					       sizeof (cpu_set_t), &cpuset)))
	    clib_unix_warning ("pthread_set_affinity_np failed with %d",
			       error);
	}
      cores_set++;
      cpu_id++;
    }

  /* Spawn writer (producer) threads */
  cpu_id = 0;
  for (unsigned cpu_mask = rtm->cpu_mask_write; cpu_mask; cpu_mask >>= 1)
    {
      if (!(cpu_mask & 1))
	{
	  cpu_id++;
	  continue;
	}

      CPU_ZERO (&cpuset);
      CPU_SET (cpu_id, &cpuset);
      for (uword t_num = 0; t_num < rtm->threads_per_core; t_num++)
	{
	  uword t_index = cores_set * rtm->threads_per_core + t_num;
	  if ((error = pthread_create (&pthread[t_index], NULL,
				       &write_shared_counter, rtm)))
	    clib_unix_warning ("pthread_create failed with %d", error);

	  if ((error = pthread_setaffinity_np (pthread[t_index],
					       sizeof (cpu_set_t), &cpuset)))
	    clib_unix_warning ("pthread_set_affinity_np failed with %d",
			       error);
	}
      cores_set++;
      cpu_id++;
    }

  /* Launch all threads */
  clib_atomic_store_rel_n (&all_threads_online, 1);

  for (uword thread_num = 0; thread_num < total_threads; thread_num++)
    {
      f64 *time;
      if ((error = pthread_join (pthread[thread_num], (void *) &time)))
	clib_unix_warning ("pthread_join failed with %d", error);
      *elapse_time += *time;
      vec_free (time);
    }

  fformat (stdout, "Time elapsed: %.4e\n", *elapse_time);
  return rtm->shared_count;
}

uword
num_cores_in_cpu_mask (uword mask)
{
  uword num_cores = 0;
  for (uword cpu_mask = mask; cpu_mask; cpu_mask >>= 1)
    num_cores += (cpu_mask & 1);
  return num_cores;
}

int
test_rwlock_main (unformat_input_t * i)
{
  rwlock_test_main_t _rtm, *rtm = &_rtm;
  clib_memset (rtm, 0, sizeof (rwlock_test_main_t));

  while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
    {
      if (0 == unformat (i, "threads/core %d", &rtm->threads_per_core)
	  && 0 == unformat (i, "cpu_mask_read %x", &rtm->cpu_mask_read)
	  && 0 == unformat (i, "cpu_mask_write %x", &rtm->cpu_mask_write)
	  && 0 == unformat (i, "increment %d", &rtm->increment_per_thread)
	  && 0 == unformat (i, "iterations %d", &rtm->iterations))
	{
	  clib_unix_warning ("unknown input '%U'", format_unformat_error, i);
	  return 1;
	}
    }

  rtm->read_cores = num_cores_in_cpu_mask (rtm->cpu_mask_read);
  rtm->write_cores = num_cores_in_cpu_mask (rtm->cpu_mask_write);

  uword total_increment = rtm->threads_per_core * rtm->write_cores *
    rtm->increment_per_thread;

  clib_rwlock_init (&rtm->rwlock);

  f64 average_time = 0;
  for (uword trial = 0; trial < rtm->iterations; trial++)
    {
      rtm->shared_count = 0;
      f64 elapse_time = 0;
      if (test_rwlock (rtm, &elapse_time) != total_increment)
	{
	  clib_rwlock_free (&rtm->rwlock);
	  fformat (stdout, "FAILED: expected count: %d, actual count: %d\n",
		   total_increment, rtm->shared_count);
	  return 1;
	}
      fformat (stdout, "Trial %d SUCCESS: %d = %d\n",
	       trial, rtm->shared_count, total_increment);
      average_time = (average_time * trial + elapse_time) / (trial + 1);
      fformat (stdout, "Average lock/unlock cycles: %.4e\n", average_time);
    }
  clib_rwlock_free (&rtm->rwlock);
  return 0;
}

#ifdef CLIB_UNIX
/** Launches a number of writer threads to simultaneously increment a global
    counter and a number of reader threads to continuously poll the counter,
    and records timestamps for rwlock performance benchmarking

    @param "threads/core [# threads/core]" - number of threads per core
    @param "cpu_mask_read [cpu_mask]" - reader thread cpu string e.g. input
            ff sets cpus 0 - 7
    @param "cpu_mask_write [cpu_mask]" - writer thread cpu string
    @param "increment [# increments]" - number of increments per writer thread
    @param "iterations [# iterations]" - number of iterations
    @returns exit code
*/
int
main (int argc, char *argv[])
{
  unformat_input_t i;
  i32 ret;
  clib_time_t time;

  clib_mem_init (0, 3ULL << 30);
  clib_time_init (&time);

  unformat_init_command_line (&i, argv);
  ret = test_rwlock_main (&i);
  unformat_free (&i);

  return ret;
}
#endif /* CLIB_UNIX */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */
= DPO_INVALID; /* * use the atomic copy operation. */ dpo_copy(dpo, &tmp); } /** * \brief * Compare two Data-path objects * * like memcmp, return 0 is matching, !0 otherwise. */ int dpo_cmp (const dpo_id_t *dpo1, const dpo_id_t *dpo2) { int res; res = dpo1->dpoi_type - dpo2->dpoi_type; if (0 != res) return (res); return (dpo1->dpoi_index - dpo2->dpoi_index); } void dpo_copy (dpo_id_t *dst, const dpo_id_t *src) { dpo_id_t tmp = *dst; /* * the destination is written in a single u64 write - hence atomically w.r.t * any packets inflight. */ *((u64*)dst) = *(u64*)src; dpo_lock(dst); dpo_unlock(&tmp); } int dpo_is_adj (const dpo_id_t *dpo) { return ((dpo->dpoi_type == DPO_ADJACENCY) || (dpo->dpoi_type == DPO_ADJACENCY_INCOMPLETE) || (dpo->dpoi_type == DPO_ADJACENCY_MIDCHAIN) || (dpo->dpoi_type == DPO_ADJACENCY_GLEAN)); } static u32 * dpo_default_get_next_node (const dpo_id_t *dpo) { u32 *node_indices = NULL; const char *node_name; u32 ii = 0; node_name = dpo_nodes[dpo->dpoi_type][dpo->dpoi_proto][ii]; while (NULL != node_name) { vlib_node_t *node; node = vlib_get_node_by_name(vlib_get_main(), (u8*) node_name); ASSERT(NULL != node); vec_add1(node_indices, node->index); ++ii; node_name = dpo_nodes[dpo->dpoi_type][dpo->dpoi_proto][ii]; } return (node_indices); } /** * A default variant of the make interpose function that just returns * the original */ static void dpo_default_mk_interpose (const dpo_id_t *original, const dpo_id_t *parent, dpo_id_t *clone) { dpo_copy(clone, original); } void dpo_register (dpo_type_t type, const dpo_vft_t *vft, const char * const * const * nodes) { vec_validate(dpo_vfts, type); dpo_vfts[type] = *vft; if (NULL == dpo_vfts[type].dv_get_next_node) { dpo_vfts[type].dv_get_next_node = dpo_default_get_next_node; } if (NULL == dpo_vfts[type].dv_mk_interpose) { dpo_vfts[type].dv_mk_interpose = dpo_default_mk_interpose; } vec_validate(dpo_nodes, type); dpo_nodes[type] = nodes; } dpo_type_t dpo_register_new_type (const dpo_vft_t *vft, const char * const * const * nodes) { dpo_type_t type = dpo_dynamic++; dpo_register(type, vft, nodes); return (type); } void dpo_mk_interpose (const dpo_id_t *original, const dpo_id_t *parent, dpo_id_t *clone) { if (!dpo_id_is_valid(original)) return; dpo_vfts[original->dpoi_type].dv_mk_interpose(original, parent, clone); } void dpo_lock (dpo_id_t *dpo) { if (!dpo_id_is_valid(dpo)) return; dpo_vfts[dpo->dpoi_type].dv_lock(dpo); } void dpo_unlock (dpo_id_t *dpo) { if (!dpo_id_is_valid(dpo)) return; dpo_vfts[dpo->dpoi_type].dv_unlock(dpo); } u32 dpo_get_urpf(const dpo_id_t *dpo) { if (dpo_id_is_valid(dpo) && (NULL != dpo_vfts[dpo->dpoi_type].dv_get_urpf)) { return (dpo_vfts[dpo->dpoi_type].dv_get_urpf(dpo)); } return (~0); } static u32 dpo_get_next_node (dpo_type_t child_type, dpo_proto_t child_proto, const dpo_id_t *parent_dpo) { dpo_proto_t parent_proto; dpo_type_t parent_type; parent_type = parent_dpo->dpoi_type; parent_proto = parent_dpo->dpoi_proto; vec_validate(dpo_edges, child_type); vec_validate(dpo_edges[child_type], child_proto); vec_validate(dpo_edges[child_type][child_proto], parent_type); vec_validate_init_empty( dpo_edges[child_type][child_proto][parent_type], parent_proto, ~0); /* * if the edge index has not yet been created for this node to node transistion */ if (~0 == dpo_edges[child_type][child_proto][parent_type][parent_proto]) { vlib_node_t *child_node; u32 *parent_indices; vlib_main_t *vm; u32 edge, *pi, cc; vm = vlib_get_main(); ASSERT(NULL != dpo_vfts[parent_type].dv_get_next_node); ASSERT(NULL != dpo_nodes[child_type]); ASSERT(NULL != dpo_nodes[child_type][child_proto]); cc = 0; parent_indices = dpo_vfts[parent_type].dv_get_next_node(parent_dpo); vlib_worker_thread_barrier_sync(vm); /* * create a graph arc from each of the child's registered node types, * to each of the parent's. */ while (NULL != dpo_nodes[child_type][child_proto][cc]) { child_node = vlib_get_node_by_name(vm, (u8*) dpo_nodes[child_type][child_proto][cc]); vec_foreach(pi, parent_indices) { edge = vlib_node_add_next(vm, child_node->index, *pi); if (~0 == dpo_edges[child_type][child_proto][parent_type][parent_proto]) { dpo_edges[child_type][child_proto][parent_type][parent_proto] = edge; } else { ASSERT(dpo_edges[child_type][child_proto][parent_type][parent_proto] == edge); } } cc++; } vlib_worker_thread_barrier_release(vm); vec_free(parent_indices); } return (dpo_edges[child_type][child_proto][parent_type][parent_proto]); } /** * @brief return already stacked up next node index for a given * child_type/child_proto and parent_type/patent_proto. * The VLIB graph arc used is taken from the parent and child types * passed. */ u32 dpo_get_next_node_by_type_and_proto (dpo_type_t child_type, dpo_proto_t child_proto, dpo_type_t parent_type, dpo_proto_t parent_proto) { return (dpo_edges[child_type][child_proto][parent_type][parent_proto]); } /** * @brief Stack one DPO object on another, and thus establish a child parent * relationship. The VLIB graph arc used is taken from the parent and child types * passed. */ static void dpo_stack_i (u32 edge, dpo_id_t *dpo, const dpo_id_t *parent) { /* * in order to get an atomic update of the parent we create a temporary, * from a copy of the child, and add the next_node. then we copy to the parent */ dpo_id_t tmp = DPO_INVALID; dpo_copy(&tmp, parent); /* * get the edge index for the parent to child VLIB graph transisition */ tmp.dpoi_next_node = edge; /* * this update is atomic. */ dpo_copy(dpo, &tmp); dpo_reset(&tmp); } /** * @brief Stack one DPO object on another, and thus establish a child-parent * relationship. The VLIB graph arc used is taken from the parent and child types * passed. */ void dpo_stack (dpo_type_t child_type, dpo_proto_t child_proto, dpo_id_t *dpo, const dpo_id_t *parent) { dpo_stack_i(dpo_get_next_node(child_type, child_proto, parent), dpo, parent); } /** * @brief Stack one DPO object on another, and thus establish a child parent * relationship. A new VLIB graph arc is created from the child node passed * to the nodes registered by the parent. The VLIB infra will ensure this arc * is added only once. */ void dpo_stack_from_node (u32 child_node_index, dpo_id_t *dpo, const dpo_id_t *parent) { dpo_type_t parent_type; u32 *parent_indices; vlib_main_t *vm; u32 edge, *pi; edge = 0; parent_type = parent->dpoi_type; vm = vlib_get_main(); ASSERT(NULL != dpo_vfts[parent_type].dv_get_next_node); parent_indices = dpo_vfts[parent_type].dv_get_next_node(parent); ASSERT(parent_indices); /* * This loop is purposefully written with the worker thread lock in the * inner loop because; * 1) the likelihood that the edge does not exist is smaller * 2) the likelihood there is more than one node is even smaller * so we are optimising for not need to take the lock */ vec_foreach(pi, parent_indices) { edge = vlib_node_get_next(vm, child_node_index, *pi); if (~0 == edge) { vlib_worker_thread_barrier_sync(vm); edge = vlib_node_add_next(vm, child_node_index, *pi); vlib_worker_thread_barrier_release(vm); } } dpo_stack_i(edge, dpo, parent); /* should free this local vector to avoid memory leak */ vec_free(parent_indices); } static clib_error_t * dpo_module_init (vlib_main_t * vm) { drop_dpo_module_init(); punt_dpo_module_init(); receive_dpo_module_init(); load_balance_module_init(); mpls_label_dpo_module_init(); classify_dpo_module_init(); lookup_dpo_module_init(); ip_null_dpo_module_init(); ip6_ll_dpo_module_init(); replicate_module_init(); interface_rx_dpo_module_init(); interface_tx_dpo_module_init(); mpls_disp_dpo_module_init(); dvr_dpo_module_init(); l3_proxy_dpo_module_init(); return (NULL); } VLIB_INIT_FUNCTION(dpo_module_init); static clib_error_t * dpo_memory_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { dpo_vft_t *vft; vlib_cli_output (vm, "DPO memory"); vlib_cli_output (vm, "%=30s %=5s %=8s/%=9s totals", "Name","Size", "in-use", "allocated"); vec_foreach(vft, dpo_vfts) { if (NULL != vft->dv_mem_show) vft->dv_mem_show(); } return (NULL); } /* *INDENT-OFF* */ /*? * The '<em>sh dpo memory </em>' command displays the memory usage for each * data-plane object type. * * @cliexpar * @cliexstart{show dpo memory} * DPO memory * Name Size in-use /allocated totals * load-balance 64 12 / 12 768/768 * Adjacency 256 1 / 1 256/256 * Receive 24 5 / 5 120/120 * Lookup 12 0 / 0 0/0 * Classify 12 0 / 0 0/0 * MPLS label 24 0 / 0 0/0 * @cliexend ?*/ VLIB_CLI_COMMAND (show_fib_memory, static) = { .path = "show dpo memory", .function = dpo_memory_show, .short_help = "show dpo memory", }; /* *INDENT-ON* */