aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/vmxnet3/output.c
blob: 8ba3f99022f8f7497e236bf37ff3ec958aaefb64 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
/*
 *------------------------------------------------------------------
 * Copyright (c) 2018 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *------------------------------------------------------------------
 */

#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vlib/pci/pci.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/devices/devices.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/ip/ip4_packet.h>

#include <vmxnet3/vmxnet3.h>

static_always_inline void
vmxnet3_tx_comp_ring_advance_next (vmxnet3_txq_t * txq)
{
  vmxnet3_tx_comp_ring *comp_ring = &txq->tx_comp_ring;

  comp_ring->next++;
  if (PREDICT_FALSE (comp_ring->next == txq->size))
    {
      comp_ring->next = 0;
      comp_ring->gen ^= VMXNET3_TXCF_GEN;
    }
}

static_always_inline void
vmxnet3_tx_ring_advance_produce (vmxnet3_txq_t * txq)
{
  txq->tx_ring.produce++;
  if (PREDICT_FALSE (txq->tx_ring.produce == txq->size))
    {
      txq->tx_ring.produce = 0;
      txq->tx_ring.gen ^= VMXNET3_TXF_GEN;
    }
}

static_always_inline void
vmxnet3_tx_ring_advance_consume (vmxnet3_txq_t * txq)
{
  txq->tx_ring.consume++;
  txq->tx_ring.consume &= txq->size - 1;
}

static_always_inline void
vmxnet3_txq_release (vlib_main_t * vm, vmxnet3_device_t * vd,
		     vmxnet3_txq_t * txq)
{
  vmxnet3_tx_comp *tx_comp;
  vmxnet3_tx_comp_ring *comp_ring;

  comp_ring = &txq->tx_comp_ring;
  tx_comp = &txq->tx_comp[comp_ring->next];

  while ((tx_comp->flags & VMXNET3_TXCF_GEN) == comp_ring->gen)
    {
      u16 eop_idx = tx_comp->index & VMXNET3_TXC_INDEX;
      u32 bi0 = txq->tx_ring.bufs[txq->tx_ring.consume];

      vlib_buffer_free_one (vm, bi0);
      while (txq->tx_ring.consume != eop_idx)
	{
	  vmxnet3_tx_ring_advance_consume (txq);
	}
      vmxnet3_tx_ring_advance_consume (txq);

      vmxnet3_tx_comp_ring_advance_next (txq);
      tx_comp = &txq->tx_comp[comp_ring->next];
    }
}

static_always_inline u16
vmxnet3_tx_ring_space_left (vmxnet3_txq_t * txq)
{
  u16 count;

  count = (txq->tx_ring.consume - txq->tx_ring.produce - 1);
  /* Wrapped? */
  if (txq->tx_ring.produce >= txq->tx_ring.consume)
    count += txq->size;
  return count;
}

VNET_DEVICE_CLASS_TX_FN (vmxnet3_device_class) (vlib_main_t * vm,
						vlib_node_runtime_t * node,
						vlib_frame_t * frame)
{
  vmxnet3_main_t *vmxm = &vmxnet3_main;
  vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
  vmxnet3_device_t *vd = pool_elt_at_index (vmxm->devices, rd->dev_instance);
  u32 *buffers = vlib_frame_vector_args (frame);
  u32 bi0;
  vlib_buffer_t *b0;
  vmxnet3_tx_desc *txd = 0;
  u32 desc_idx, generation, first_idx;
  u16 space_left;
  u16 n_left = frame->n_vectors;
  vmxnet3_txq_t *txq;
  vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
  u16 qid = tf->queue_id, produce;

  if (PREDICT_FALSE (!(vd->flags & VMXNET3_DEVICE_F_LINK_UP)))
    {
      vlib_buffer_free (vm, buffers, n_left);
      vlib_error_count (vm, node->node_index, VMXNET3_TX_ERROR_LINK_DOWN,
			n_left);
      return (0);
    }

  txq = vec_elt_at_index (vd->txqs, qid);
  if (tf->shared_queue)
    clib_spinlock_lock (&txq->lock);

  vmxnet3_txq_release (vm, vd, txq);

  produce = txq->tx_ring.produce;
  while (PREDICT_TRUE (n_left))
    {
      u16 space_needed = 1, i;
      u32 gso_size = 0;
      u32 l4_hdr_sz;
      vlib_buffer_t *b;
      u32 hdr_len = 0;

      bi0 = buffers[0];
      b0 = vlib_get_buffer (vm, bi0);
      b = b0;

      space_left = vmxnet3_tx_ring_space_left (txq);
      while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
	{
	  u32 next_buffer = b->next_buffer;

	  b = vlib_get_buffer (vm, next_buffer);
	  space_needed++;
	}
      if (PREDICT_FALSE (space_left < space_needed))
	{
	  vmxnet3_txq_release (vm, vd, txq);
	  space_left = vmxnet3_tx_ring_space_left (txq);

	  if (PREDICT_FALSE (space_left < space_needed))
	    {
	      vlib_buffer_free_one (vm, bi0);
	      vlib_error_count (vm, node->node_index,
				VMXNET3_TX_ERROR_NO_FREE_SLOTS, 1);
	      buffers++;
	      n_left--;
	      /*
	       * Drop this packet. But we may have enough room for the next
	       * packet
	       */
	      continue;
	    }
	}

      /*
       * Toggle the generation bit for SOP fragment to avoid device starts
       * reading incomplete packet
       */
      generation = txq->tx_ring.gen ^ VMXNET3_TXF_GEN;
      first_idx = txq->tx_ring.produce;
      for (i = 0; i < space_needed; i++)
	{
	  b0 = vlib_get_buffer (vm, bi0);

	  desc_idx = txq->tx_ring.produce;

	  vmxnet3_tx_ring_advance_produce (txq);
	  txq->tx_ring.bufs[desc_idx] = bi0;

	  txd = &txq->tx_desc[desc_idx];

	  txd->address = vlib_buffer_get_current_pa (vm, b0);

	  txd->flags[0] = generation | b0->current_length;
	  txd->flags[1] = 0;
	  if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_GSO))
	    {
	      /*
	       * We should not be getting GSO outbound traffic unless it is
	       * lro is enable
	       */
	      ASSERT (vd->gso_enable == 1);
	      gso_size = vnet_buffer2 (b0)->gso_size;
	      l4_hdr_sz = vnet_buffer2 (b0)->gso_l4_hdr_sz;
	      if (b0->flags & VNET_BUFFER_F_IS_IP6)
		hdr_len = sizeof (ethernet_header_t) + sizeof (ip6_header_t) +
		  l4_hdr_sz;
	      else
		hdr_len = sizeof (ethernet_header_t) + sizeof (ip4_header_t) +
		  l4_hdr_sz;
	    }

	  generation = txq->tx_ring.gen;
	  bi0 = b0->next_buffer;
	}
      if (PREDICT_FALSE (gso_size != 0))
	{
	  txq->tx_desc[first_idx].flags[1] = hdr_len;
	  txq->tx_desc[first_idx].flags[1] |= VMXNET3_TXF_OM (VMXNET3_OM_TSO);
	  txq->tx_desc[first_idx].flags[0] |= VMXNET3_TXF_MSSCOF (gso_size);
	}
      txd->flags[1] |= VMXNET3_TXF_CQ | VMXNET3_TXF_EOP;
      asm volatile ("":::"memory");
      /*
       * Now toggle back the generation bit for the first segment.
       * Device can start reading the packet
       */
      txq->tx_desc[first_idx].flags[0] ^= VMXNET3_TXF_GEN;

      buffers++;
      n_left--;
    }

  if (PREDICT_TRUE (produce != txq->tx_ring.produce))
    vmxnet3_reg_write_inline (vd, 0, txq->reg_txprod, txq->tx_ring.produce);

  if (tf->shared_queue)
    clib_spinlock_unlock (&txq->lock);

  return (frame->n_vectors - n_left);
}

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */
); } always_inline void * clib_mem_alloc_aligned (uword size, uword align) { return clib_mem_alloc_aligned_at_offset (size, align, /* align_offset */ 0, /* os_out_of_memory */ 1); } /* Memory allocator which calls os_out_of_memory() when it fails */ always_inline void * clib_mem_alloc_or_null (uword size) { return clib_mem_alloc_aligned_at_offset (size, /* align */ 1, /* align_offset */ 0, /* os_out_of_memory */ 0); } always_inline void * clib_mem_alloc_aligned_or_null (uword size, uword align) { return clib_mem_alloc_aligned_at_offset (size, align, /* align_offset */ 0, /* os_out_of_memory */ 0); } /* Memory allocator which panics when it fails. Use macro so that clib_panic macro can expand __FUNCTION__ and __LINE__. */ #define clib_mem_alloc_aligned_no_fail(size,align) \ ({ \ uword _clib_mem_alloc_size = (size); \ void * _clib_mem_alloc_p; \ _clib_mem_alloc_p = clib_mem_alloc_aligned (_clib_mem_alloc_size, (align)); \ if (! _clib_mem_alloc_p) \ clib_panic ("failed to allocate %d bytes", _clib_mem_alloc_size); \ _clib_mem_alloc_p; \ }) #define clib_mem_alloc_no_fail(size) clib_mem_alloc_aligned_no_fail(size,1) /* Alias to stack allocator for naming consistency. */ #define clib_mem_alloc_stack(bytes) __builtin_alloca(bytes) always_inline uword clib_mem_is_heap_object (void *p) { int mspace_is_heap_object (void *msp, void *p); clib_mem_heap_t *h = clib_mem_get_per_cpu_heap (); return mspace_is_heap_object (h->mspace, p); } always_inline void clib_mem_free (void *p) { void mspace_put (void *msp, void *p_arg); clib_mem_heap_t *h = clib_mem_get_per_cpu_heap (); /* Make sure object is in the correct heap. */ ASSERT (clib_mem_is_heap_object (p)); CLIB_MEM_POISON (p, clib_mem_size_nocheck (p)); mspace_put (h->mspace, p); } always_inline void * clib_mem_realloc (void *p, uword new_size, uword old_size) { /* By default use alloc, copy and free to emulate realloc. */ void *q = clib_mem_alloc (new_size); if (q) { uword copy_size; if (old_size < new_size) copy_size = old_size; else copy_size = new_size; clib_memcpy_fast (q, p, copy_size); clib_mem_free (p); } return q; } always_inline uword clib_mem_size (void *p) { ASSERT (clib_mem_is_heap_object (p)); return clib_mem_size_nocheck (p); } always_inline void clib_mem_free_s (void *p) { uword size = clib_mem_size (p); CLIB_MEM_UNPOISON (p, size); memset_s_inline (p, size, 0, size); clib_mem_free (p); } always_inline clib_mem_heap_t * clib_mem_get_heap (void) { return clib_mem_get_per_cpu_heap (); } always_inline clib_mem_heap_t * clib_mem_set_heap (clib_mem_heap_t * heap) { return clib_mem_set_per_cpu_heap (heap); } void clib_mem_destroy_heap (clib_mem_heap_t * heap); clib_mem_heap_t *clib_mem_create_heap (void *base, uword size, int is_locked, char *fmt, ...); void clib_mem_main_init (); void *clib_mem_init (void *base, uword size); void *clib_mem_init_with_page_size (uword memory_size, clib_mem_page_sz_t log2_page_sz); void *clib_mem_init_thread_safe (void *memory, uword memory_size); void clib_mem_exit (void); void clib_mem_trace (int enable); int clib_mem_is_traced (void); typedef struct { /* Total number of objects allocated. */ uword object_count; /* Total allocated bytes. Bytes used and free. used + free = total */ uword bytes_total, bytes_used, bytes_free; /* Number of bytes used by mheap data structure overhead (e.g. free lists, mheap header). */ uword bytes_overhead; /* Amount of free space returned to operating system. */ uword bytes_free_reclaimed; /* For malloc which puts small objects in sbrk region and large objects in mmap'ed regions. */ uword bytes_used_sbrk; uword bytes_used_mmap; /* Max. number of bytes in this heap. */ uword bytes_max; } clib_mem_usage_t; void clib_mem_get_heap_usage (clib_mem_heap_t * heap, clib_mem_usage_t * usage); void *clib_mem_get_heap_base (clib_mem_heap_t * heap); uword clib_mem_get_heap_size (clib_mem_heap_t * heap); uword clib_mem_get_heap_free_space (clib_mem_heap_t * heap); u8 *format_clib_mem_usage (u8 * s, va_list * args); u8 *format_clib_mem_heap (u8 * s, va_list * va); u8 *format_clib_mem_page_stats (u8 * s, va_list * va); /* Allocate virtual address space. */ always_inline void * clib_mem_vm_alloc (uword size) { void *mmap_addr; uword flags = MAP_PRIVATE; #ifdef MAP_ANONYMOUS flags |= MAP_ANONYMOUS; #endif mmap_addr = mmap (0, size, PROT_READ | PROT_WRITE, flags, -1, 0); if (mmap_addr == (void *) -1) mmap_addr = 0; else CLIB_MEM_UNPOISON (mmap_addr, size); return mmap_addr; } always_inline void clib_mem_vm_free (void *addr, uword size) { munmap (addr, size); } void *clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz, uword size, int fd, uword offset, char *name); void *clib_mem_vm_map (void *start, uword size, clib_mem_page_sz_t log2_page_size, char *fmt, ...); void *clib_mem_vm_map_stack (uword size, clib_mem_page_sz_t log2_page_size, char *fmt, ...); void *clib_mem_vm_map_shared (void *start, uword size, int fd, uword offset, char *fmt, ...); int clib_mem_vm_unmap (void *base); clib_mem_vm_map_hdr_t *clib_mem_vm_get_next_map_hdr (clib_mem_vm_map_hdr_t * hdr); static_always_inline clib_mem_page_sz_t clib_mem_get_log2_page_size (void) { return clib_mem_main.log2_page_sz; } static_always_inline uword clib_mem_get_page_size (void) { return 1ULL << clib_mem_main.log2_page_sz; } static_always_inline clib_mem_page_sz_t clib_mem_get_log2_default_hugepage_size () { return clib_mem_main.log2_default_hugepage_sz; } int clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...); uword clib_mem_get_fd_page_size (int fd); uword clib_mem_get_default_hugepage_size (void); clib_mem_page_sz_t clib_mem_get_fd_log2_page_size (int fd); uword clib_mem_vm_reserve (uword start, uword size, clib_mem_page_sz_t log2_page_sz); u64 *clib_mem_vm_get_paddr (void *mem, clib_mem_page_sz_t log2_page_size, int n_pages); void clib_mem_destroy (void); int clib_mem_set_numa_affinity (u8 numa_node, int force); int clib_mem_set_default_numa_affinity (); void clib_mem_vm_randomize_va (uword * requested_va, clib_mem_page_sz_t log2_page_size); void mheap_trace (clib_mem_heap_t * v, int enable); uword clib_mem_trace_enable_disable (uword enable); void clib_mem_trace (int enable); always_inline uword clib_mem_round_to_page_size (uword size, clib_mem_page_sz_t log2_page_size) { ASSERT (log2_page_size != CLIB_MEM_PAGE_SZ_UNKNOWN); if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT) log2_page_size = clib_mem_get_log2_page_size (); else if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE) log2_page_size = clib_mem_get_log2_default_hugepage_size (); return round_pow2 (size, 1ULL << log2_page_size); } typedef struct { clib_mem_page_sz_t log2_page_sz; uword total; uword mapped; uword not_mapped; uword per_numa[CLIB_MAX_NUMAS]; uword unknown; } clib_mem_page_stats_t; void clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size, uword n_pages, clib_mem_page_stats_t * stats); static_always_inline int vlib_mem_get_next_numa_node (int numa) { clib_mem_main_t *mm = &clib_mem_main; u32 bitmap = mm->numa_node_bitmap; if (numa >= 0) bitmap &= ~pow2_mask (numa + 1); if (bitmap == 0) return -1; return count_trailing_zeros (bitmap); } static_always_inline clib_mem_page_sz_t clib_mem_log2_page_size_validate (clib_mem_page_sz_t log2_page_size) { if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT) return clib_mem_get_log2_page_size (); if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE) return clib_mem_get_log2_default_hugepage_size (); return log2_page_size; } static_always_inline uword clib_mem_page_bytes (clib_mem_page_sz_t log2_page_size) { return 1ULL << clib_mem_log2_page_size_validate (log2_page_size); } static_always_inline clib_error_t * clib_mem_get_last_error (void) { return clib_mem_main.error; } #include <vppinfra/error.h> /* clib_panic */ #endif /* _included_clib_mem_h */ /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */