aboutsummaryrefslogtreecommitdiffstats
path: root/cicn-plugin/cicn/cicn_rte_mbuf_inlines.h
blob: caf700438331f9699ae7431335b78120a0d3f03f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
/*
 * Copyright (c) 2017 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 * Part of cicn plugin's  dpdk/rte shim layer for using dpdk/rte mechanisms
 * directly while hiding that fact from the bulk of the cicn plugin coce.
 * - cicn plugin should not be looking at dpdk headers and should not need
 *    to. As of v17.01, howeverhowever, buffer cloning to support 0-copy on
 *    - content message replication
 *    - content message transmission based on CS hits
 *      is only available with dpdk, hence those mechanisms are used
 *      by cicn plugin.)
 * - when vlib_buffer cloning support is provided, this shim layer
 *   can be deprecated/deleted, and cicn plugin will be simpler and will
 *   be able to run with a vpp that does not include dpdk.
 * This file contains the code to use dpdk "struct rte_mbuf *" buffer
 * headers for 0-copy cloning of content messages that are in CS, while
 * hiding these references from the cicn plugin main code.
 */
#ifndef _CICN_RTE_MBUF_INLINES_H_
#define _CICN_RTE_MBUF_INLINES_H_ 1

#if !CICN_VPP_PLUGIN
#error "cicn-internal file included externally"
#endif

#include <cicn/cicn_rte_mbuf.h>
#include <vlib/vlib.h>

/*
 * Wrapper for buffer allocation that returns pointer rather than index
 */
static inline vlib_buffer_t *
cicn_infra_vlib_buffer_alloc (vlib_main_t * vm, vlib_buffer_free_list_t * fl,
			      unsigned socket_id,
			      cicn_face_db_entry_t * outface)
{
  vlib_buffer_t *b0;
  u32 bi0;
  if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
    {
      b0 = 0;
      goto done;
    }
  b0 = vlib_get_buffer (vm, bi0);

done:
  return (b0);
}

/*
 * Wrapper for buffer free that uses pointer rather than index
 */
static inline void
cicn_infra_vlib_buffer_free (vlib_buffer_t * b0, vlib_main_t * vm,
			     cicn_face_db_entry_t * outface)
{
  u32 bi0 = vlib_get_buffer_index (vm, b0);
  vlib_buffer_free_one (vm, bi0);
}

#if CICN_FEATURE_VPP_VLIB_CLONING	// to cut over, need API from vpp gerrit 4872
/*
 * Long-term, vlib_buffer_clone() API will be supported and
 * the cicn_rte_mbuf*.h files and all references to rte_mbuf can be removed from
 * cicn plugin, which will then perform better and be linkable with vpp-lite.
 *
 * For a brief interim, can leave this file but
 * with #define CICN_FEATURE_VPP_VLIB_CLONING 1
 * Some code below (e.g. cicn_infra_vlib_buffer_clone_attach_finalize()
 * contents) must be moved to node.c.
 *
 * See comments on alternate definition under !CICN_FEATURE_VPP_VLIB_CLONING
 */

/*
 * not used if rte not used.
 */
static inline unsigned
cicn_infra_rte_socket_id (void)
{
  return (0);
}

static inline void
cicn_infra_vlib_buffer_cs_prep_finalize (vlib_main_t * vm,
					 vlib_buffer_t * cs_b0)
{
  // No action
}

static inline vlib_buffer_t *
cicn_infra_vlib_buffer_clone (vlib_buffer_t * src_b0, vlib_main_t * vm,
			      vlib_buffer_free_list_t * fl,
			      unsigned socket_id,
			      cicn_face_db_entry_t * outface)
{
  return (vlib_buffer_clone (src_b0));
}

/*
 * Force dpdk drivers to rewalk chain that has been changed
 */
static inline void
cicn_infra_vlib_buffer_clone_attach_finalize (vlib_buffer_t * hdr_b0,
					      vlib_buffer_t * clone_b0)
{
  // no action
}
#else // !CICN_FEATURE_VPP_VLIB_CLONING

/*
 * Replacement for rte_mempool_get_bulk():
 * - rte_mempool_get_bulk() does not coexist with vlib_buffer_free(): vpp
 *   runs out of buffers (even when only 1 buffer is being allocated per call).
 * - this replacement instead calls vlib_buffer_alloc(), which does coexist
 *   with vlib_buffer_free().
 */
static inline int
cicn_infra_pvt_rte_mempool_get_bulk (vlib_main_t * vm,
				     struct rte_mempool *rmp,
				     void **rte_mbufs, u32 new_bufs)
{
  u32 bi_bufs[5];

  int i;
  ASSERT (new_bufs <= ARRAY_LEN (bi_bufs));

  if (vlib_buffer_alloc (vm, bi_bufs, new_bufs) != new_bufs)
    {
      return -ENOENT;
    }
  for (i = 0; i < new_bufs; i++)
    {
      vlib_buffer_t *b0 = vlib_get_buffer (vm, bi_bufs[i]);
      rte_mbufs[i] = rte_mbuf_from_vlib_buffer (b0);
    }
  return (0);
}

// #include <vnet/dpdk_replication.h> // copied/modified below

/*
 * Modified copy of .../vpp/vnet/vnet/dpdk_replication.h:
 * - maintain foreign indentation for easier comparison
 * - call cicn_infra_pvt_rte_mempool_get_bulk() in place of calling
 *   rte_mempool_get_bulk(), avoiding the issue described at
 *   cicn_infra_pvt_rte_mempool_get_bulk(), above.
 */
static inline vlib_buffer_t *
cicn_infra_pvt_vlib_dpdk_copy_buffer (vlib_main_t * vm, vlib_buffer_t * b)
{
  u32 new_buffers_needed = 1;
  unsigned socket_id = rte_socket_id ();
  struct rte_mempool *rmp = vm->buffer_main->pktmbuf_pools[socket_id];
  struct rte_mbuf *rte_mbufs[5];
  vlib_buffer_free_list_t *fl;
  vlib_buffer_t *rv;
  u8 *copy_src, *copy_dst;
  vlib_buffer_t *src_buf, *dst_buf;

  fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);

  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
    {
      vlib_buffer_t *tmp = b;
      int i;

      while (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
	{
	  new_buffers_needed++;
	  tmp = vlib_get_buffer (vm, tmp->next_buffer);
	}

      /* Should never happen... */
      if (PREDICT_FALSE (new_buffers_needed > ARRAY_LEN (rte_mbufs)))
	{
	  clib_warning ("need %d buffers", new_buffers_needed);
	  return 0;
	}

#if 0				// bug workaround: vlib_buffer_free() of these does not work right
      if (rte_mempool_get_bulk (rmp, (void **) rte_mbufs,
				new_buffers_needed) < 0)
	return 0;
#else
      if (cicn_infra_pvt_rte_mempool_get_bulk (vm, rmp, (void **) rte_mbufs,
					       new_buffers_needed) < 0)
	return 0;
#endif

      src_buf = b;
      rv = dst_buf = vlib_buffer_from_rte_mbuf (rte_mbufs[0]);
      vlib_buffer_init_for_free_list (dst_buf, fl);
      copy_src = b->data + src_buf->current_data;
      copy_dst = dst_buf->data + src_buf->current_data;

      for (i = 0; i < new_buffers_needed; i++)
	{
	  clib_memcpy (copy_src, copy_dst, src_buf->current_length);
	  dst_buf->current_data = src_buf->current_data;
	  dst_buf->current_length = src_buf->current_length;
	  dst_buf->flags = src_buf->flags;

	  if (i == 0)
	    {
	      dst_buf->total_length_not_including_first_buffer =
		src_buf->total_length_not_including_first_buffer;
	      vnet_buffer (dst_buf)->sw_if_index[VLIB_RX] =
		vnet_buffer (src_buf)->sw_if_index[VLIB_RX];
	      vnet_buffer (dst_buf)->sw_if_index[VLIB_TX] =
		vnet_buffer (src_buf)->sw_if_index[VLIB_TX];
	      vnet_buffer (dst_buf)->l2 = vnet_buffer (b)->l2;
	    }

	  if (i < new_buffers_needed - 1)
	    {
	      src_buf = vlib_get_buffer (vm, src_buf->next_buffer);
	      dst_buf = vlib_buffer_from_rte_mbuf (rte_mbufs[i + 1]);
	      vlib_buffer_init_for_free_list (dst_buf, fl);
	      copy_src = src_buf->data;
	      copy_dst = dst_buf->data;
	    }
	}
      return rv;
    }

#if 0				// bug workaround: vlib_buffer_free() of these does not work right
  if (rte_mempool_get_bulk (rmp, (void **) rte_mbufs, 1) < 0)
    return 0;
#else
  if (cicn_infra_pvt_rte_mempool_get_bulk (vm, rmp, (void **) rte_mbufs, 1) <
      0)
    return 0;
#endif

  rv = vlib_buffer_from_rte_mbuf (rte_mbufs[0]);
  vlib_buffer_init_for_free_list (rv, fl);

  clib_memcpy (rv->data + b->current_data, b->data + b->current_data,
	       b->current_length);
  rv->current_data = b->current_data;
  rv->current_length = b->current_length;
  vnet_buffer (rv)->sw_if_index[VLIB_RX] =
    vnet_buffer (b)->sw_if_index[VLIB_RX];
  vnet_buffer (rv)->sw_if_index[VLIB_TX] =
    vnet_buffer (b)->sw_if_index[VLIB_TX];
  vnet_buffer (rv)->l2 = vnet_buffer (b)->l2;

  return (rv);
}

/*
 * Could call rte_socket_id() wherever needed, not sure how expensive it is.
 * For now, export and cache.
 */
static inline unsigned
cicn_infra_rte_socket_id (void)
{
  return (rte_socket_id ());
}

/*
 * For cs_pref, update rte_mbuf fields to correspond to vlib_buffer fields.
 * (Probably could be skipped for non-dpdk drivers that must use copying.)
 */
static inline void
cicn_infra_vlib_buffer_cs_prep_finalize (vlib_main_t * vm,
					 vlib_buffer_t * cs_b0)
{
  /* Adjust the dpdk buffer header, so we can use this copy for
   * future cache hits.
   * - if dpdk buffer header invalid (e.g. content msg arrived on veth intfc,
   *   initialize it.
   * - effectively, advanceg the mbuf past the incoming IP and UDP headers,
   *   so that the buffer points  to the start of the ICN payload that is
   *   to be replicated.
   */
  struct rte_mbuf *cs_mb0;
  i16 delta;

  cs_mb0 = rte_mbuf_from_vlib_buffer (cs_b0);
  if ((cs_b0->flags & VNET_BUFFER_RTE_MBUF_VALID) == 0)
    {
      rte_pktmbuf_reset (cs_mb0);
    }

  delta = vlib_buffer_length_in_chain (vm, cs_b0) - (i16) (cs_mb0->pkt_len);

  cs_mb0->data_len += delta;
  cs_mb0->pkt_len += delta;
  cs_mb0->data_off = (RTE_PKTMBUF_HEADROOM + cs_b0->current_data);
}

/*
 * Wrapper for buffer "cloning" that uses
 * - rte_mbuf buffer cloning for dpdk drivers that support cloning
 * - vlib buffer copying for non-dpdk drivers that must use copying.
 *
 * CICN multicast support from vpp is currently problematic.
 * Three mechanisms on offer, CICN currently uses [1] for physical
 * output faces and [3] for virtual output faces:
 * 1. rte_mbuf's rte_pktmbuf_clone()
 *    - advantages
 *      - PIT deaggregation (multicast) case
 *        - high-performance creation of clone chains (relying on
 *          reference-counting mechanism)
 *        - avoids copying
 *        - allows parallel transmission
 *      - CS hit case
 *        - allows modular handling of sending content and deleting CS entries
 *          (relying on reference counting mechanism)
 *    - disadvantages
 *      - requires allocating indirect buffers, which has a cost even
 *        without copying (but Content messages are generally large)
 *      - rte_pktmbufs are a DPDK mechanism
 *        - not supported by non-DPDK (i.e. virtual) drivers
 *        - not supported by vpp-lite, which is used for unit test
 * 2. recycling-based replication (recirculation)
 *    - advantages
 *      - avoids copying
 *      - currently approved by vpp team
 *    - disadvantages
 *      - increased latency since need to transmit copies serially since
 *        only one buffer
 *      - mechanism not quite yet fully supported: notification that
 *        transmission <n> has occurred and recycle for transmission <n+1>
 *        may start does not occur on transmission completion, but on next
 *        transmission on that interface
 * 3. cicn_infra_pvt_vlib_dpdk_copy_buffer (was vlib_dpdk_clone_buffer())
 *    - advantages
 *      - works in both cases, for all drivers
 *    - disadvantages
 *      - slow, due to copying
 */
static inline vlib_buffer_t *
cicn_infra_vlib_buffer_clone (vlib_buffer_t * src_b0, vlib_main_t * vm,
			      vlib_buffer_free_list_t * fl,
			      unsigned socket_id,
			      cicn_face_db_entry_t * outface)
{
  vlib_buffer_t *dst_b0;

  if (outface->swif_cloning_supported)
    {
      vlib_buffer_main_t *bm = vm->buffer_main;
      struct rte_mbuf *src_mb0 = rte_mbuf_from_vlib_buffer (src_b0);
      struct rte_mbuf *dst_mb0;
      dst_mb0 = rte_pktmbuf_clone (src_mb0, bm->pktmbuf_pools[socket_id]);
      if (dst_mb0 == 0)
	{
	  dst_b0 = 0;
	  goto done;
	}

      // rte_mbuf_clone uses rte_mbuf (dpdk) buffer header:
      // copy relevant value to vlib_buffer_t header
      dst_b0 = vlib_buffer_from_rte_mbuf (dst_mb0);
      vlib_buffer_init_for_free_list (dst_b0, fl);
      ASSERT (dst_b0->current_data == 0);
      dst_b0->current_data = src_b0->current_data;
      dst_b0->current_length = dst_mb0->data_len;
    }
  else
    {
      dst_b0 = cicn_infra_pvt_vlib_dpdk_copy_buffer (vm, src_b0);
      if (dst_b0 == 0)
	{
	  goto done;
	}
    }

  //TODO: af_packet device.c chain walker ignores VLIB_BUFFER_NEXT_PRESENT
  //      clear next_buffer to maintain buffer sanity
  ASSERT ((dst_b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
  if (!(dst_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
    {
      dst_b0->next_buffer = 0;
    }
  ASSERT ((dst_b0->flags & VNET_BUFFER_RTE_MBUF_VALID) == 0);

done:
  return (dst_b0);
}

/*
 * For clone attach, vlib_buffer chain is being changed, invalidating
 * rte_mbuf chain (if present). Update the rte_mbuf chain information to
 * be valid.
 */
static inline void
cicn_infra_vlib_buffer_clone_attach_finalize (vlib_buffer_t * hdr_b0,
					      vlib_buffer_t * clone_b0,
					      cicn_face_db_entry_t * outface)
{
  struct rte_mbuf *hdr_mb0;
  struct rte_mbuf *clone_mb0;
  int hdr_rte_mbuf_valid;

  hdr_mb0 = rte_mbuf_from_vlib_buffer (hdr_b0);
  clone_mb0 = rte_mbuf_from_vlib_buffer (clone_b0);

  hdr_rte_mbuf_valid = ((hdr_b0->flags & VNET_BUFFER_RTE_MBUF_VALID) != 0);
  ASSERT ((clone_b0->flags & VNET_BUFFER_RTE_MBUF_VALID) == 0);

  /* Update main rte_mbuf fields, even for non-dkdk output interfaces */
  if (!hdr_rte_mbuf_valid)
    {
      rte_pktmbuf_reset (hdr_mb0);
    }
  hdr_mb0->data_len = hdr_b0->current_length;
  hdr_mb0->pkt_len = hdr_b0->current_length +
    hdr_b0->total_length_not_including_first_buffer;
  hdr_mb0->next = clone_mb0;
  hdr_mb0->nb_segs = clone_mb0->nb_segs + 1;

  if (!outface->swif_is_dpdk_driver)
    {
      goto done;
    }

  hdr_b0->flags |= VNET_BUFFER_RTE_MBUF_VALID;
  clone_b0->flags |= VNET_BUFFER_RTE_MBUF_VALID;

  /* copy metadata from source packet (see sr_replicate.c) */
  hdr_mb0->port = clone_mb0->port;
  hdr_mb0->vlan_tci = clone_mb0->vlan_tci;
  hdr_mb0->vlan_tci_outer = clone_mb0->vlan_tci_outer;
  hdr_mb0->tx_offload = clone_mb0->tx_offload;
  hdr_mb0->hash = clone_mb0->hash;

  hdr_mb0->ol_flags = clone_mb0->ol_flags & ~(IND_ATTACHED_MBUF);

  __rte_mbuf_sanity_check (hdr_mb0, 1);

done:;
}
#endif // !CICN_FEATURE_VPP_VLIB_CLONING

#endif // CICN_RTE_MBUF_INLINES_H_