summaryrefslogtreecommitdiffstats
path: root/src/svm/ssvm.c
blob: 6cda1f279e8d03cf17d8c4dc0a377d43001af98b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include "ssvm.h"

int
ssvm_master_init (ssvm_private_t * ssvm, u32 master_index)
{
  int ssvm_fd;
  u8 *ssvm_filename;
  u8 junk = 0;
  int flags;
  ssvm_shared_header_t *sh;
  u64 ticks = clib_cpu_time_now ();
  u64 randomize_baseva;
  void *oldheap;

  if (ssvm->ssvm_size == 0)
    return SSVM_API_ERROR_NO_SIZE;

  ssvm_filename = format (0, "/dev/shm/%s%c", ssvm->name, 0);

  unlink ((char *) ssvm_filename);

  vec_free (ssvm_filename);

  ssvm_fd = shm_open ((char *) ssvm->name, O_RDWR | O_CREAT | O_EXCL, 0777);

  if (ssvm_fd < 0)
    {
      clib_unix_warning ("create segment '%s'", ssvm->name);
      return SSVM_API_ERROR_CREATE_FAILURE;
    }

  if (lseek (ssvm_fd, ssvm->ssvm_size, SEEK_SET) < 0)
    {
      clib_unix_warning ("lseek");
      close (ssvm_fd);
      return SSVM_API_ERROR_SET_SIZE;
    }

  if (write (ssvm_fd, &junk, 1) != 1)
    {
      clib_unix_warning ("set ssvm size");
      close (ssvm_fd);
      return SSVM_API_ERROR_SET_SIZE;
    }

  flags = MAP_SHARED;
  if (ssvm->requested_va)
    flags |= MAP_FIXED;

  randomize_baseva = (ticks & 15) * MMAP_PAGESIZE;

  if (ssvm->requested_va)
    ssvm->requested_va += randomize_baseva;

  sh = ssvm->sh =
    (ssvm_shared_header_t *) mmap ((void *) ssvm->requested_va,
				   ssvm->ssvm_size, PROT_READ | PROT_WRITE,
				   flags, ssvm_fd, 0);

  if (ssvm->sh == MAP_FAILED)
    {
      clib_unix_warning ("mmap");
      close (ssvm_fd);
      return SSVM_API_ERROR_MMAP;
    }

  close (ssvm_fd);

  ssvm->my_pid = getpid ();
  sh->master_pid = ssvm->my_pid;
  sh->ssvm_size = ssvm->ssvm_size;
  sh->heap = mheap_alloc_with_flags
    (((u8 *) sh) + MMAP_PAGESIZE, ssvm->ssvm_size - MMAP_PAGESIZE,
     MHEAP_FLAG_DISABLE_VM | MHEAP_FLAG_THREAD_SAFE);

  sh->ssvm_va = pointer_to_uword (sh);
  sh->master_index = master_index;

  oldheap = ssvm_push_heap (sh);
  sh->name = format (0, "%s%c", ssvm->name, 0);
  ssvm_pop_heap (oldheap);

  ssvm->i_am_master = 1;

  /* The application has to set set sh->ready... */
  return 0;
}

int
ssvm_slave_init (ssvm_private_t * ssvm, int timeout_in_seconds)
{
  struct stat stat;
  int ssvm_fd = -1;
  ssvm_shared_header_t *sh;

  ssvm->i_am_master = 0;

  while (timeout_in_seconds-- > 0)
    {
      if (ssvm_fd < 0)
	ssvm_fd = shm_open ((char *) ssvm->name, O_RDWR, 0777);
      if (ssvm_fd < 0)
	{
	  sleep (1);
	  continue;
	}
      if (fstat (ssvm_fd, &stat) < 0)
	{
	  sleep (1);
	  continue;
	}

      if (stat.st_size > 0)
	goto map_it;
    }
  clib_warning ("slave timeout");
  return SSVM_API_ERROR_SLAVE_TIMEOUT;

map_it:
  sh = (void *) mmap (0, MMAP_PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
		      ssvm_fd, 0);
  if (sh == MAP_FAILED)
    {
      clib_unix_warning ("slave research mmap");
      close (ssvm_fd);
      return SSVM_API_ERROR_MMAP;
    }

  while (timeout_in_seconds-- > 0)
    {
      if (sh->ready)
	goto re_map_it;
    }
  close (ssvm_fd);
  munmap (sh, MMAP_PAGESIZE);
  clib_warning ("slave timeout 2");
  return SSVM_API_ERROR_SLAVE_TIMEOUT;

re_map_it:
  ssvm->requested_va = (u64) sh->ssvm_va;
  ssvm->ssvm_size = sh->ssvm_size;
  munmap (sh, MMAP_PAGESIZE);

  sh = ssvm->sh = (void *) mmap ((void *) ssvm->requested_va, ssvm->ssvm_size,
				 PROT_READ | PROT_WRITE,
				 MAP_SHARED | MAP_FIXED, ssvm_fd, 0);

  if (sh == MAP_FAILED)
    {
      clib_unix_warning ("slave final mmap");
      close (ssvm_fd);
      return SSVM_API_ERROR_MMAP;
    }
  sh->slave_pid = getpid ();
  return 0;
}

void
ssvm_delete (ssvm_private_t * ssvm)
{
  u8 *fn;

  fn = format (0, "/dev/shm/%s%c", ssvm->name, 0);

  /* Throw away the backing file */
  if (unlink ((char *) fn) < 0)
    clib_unix_warning ("unlink segment '%s'", ssvm->name);

  munmap ((void *) ssvm->requested_va, ssvm->ssvm_size);
  vec_free (fn);
}


/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */
>(struct cq_enet_rq_desc *cqrd) { return le16_to_cpu(cqrd->vlan); } static inline uint16_t enic_cq_rx_desc_n_bytes(struct cq_desc *cqd) { struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; return le16_to_cpu(cqrd->bytes_written_flags) & CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; } static inline uint8_t enic_cq_rx_check_err(struct cq_desc *cqd) { struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; uint16_t bwflags; bwflags = enic_cq_rx_desc_bwflags(cqrd); if (unlikely(enic_cq_rx_desc_packet_error(bwflags))) return 1; return 0; } /* Lookup table to translate RX CQ flags to mbuf flags. */ static inline uint32_t enic_cq_rx_flags_to_pkt_type(struct cq_desc *cqd) { struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; uint8_t cqrd_flags = cqrd->flags; static const uint32_t cq_type_table[128] __rte_cache_aligned = { [0x00] = RTE_PTYPE_UNKNOWN, [0x20] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG, [0x22] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, [0x24] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, [0x60] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, [0x62] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, [0x64] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, [0x10] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG, [0x12] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, [0x14] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, [0x50] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, [0x52] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, [0x54] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, /* All others reserved */ }; cqrd_flags &= CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT | CQ_ENET_RQ_DESC_FLAGS_IPV4 | CQ_ENET_RQ_DESC_FLAGS_IPV6 | CQ_ENET_RQ_DESC_FLAGS_TCP | CQ_ENET_RQ_DESC_FLAGS_UDP; return cq_type_table[cqrd_flags]; } static inline void enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) { struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; uint16_t ciflags, bwflags, pkt_flags = 0, vlan_tci; ciflags = enic_cq_rx_desc_ciflags(cqrd); bwflags = enic_cq_rx_desc_bwflags(cqrd); vlan_tci = enic_cq_rx_desc_vlan(cqrd); mbuf->ol_flags = 0; /* flags are meaningless if !EOP */ if (unlikely(!enic_cq_rx_desc_eop(ciflags))) goto mbuf_flags_done; /* VLAN STRIPPED flag. The L2 packet type updated here also */ if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) { pkt_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED; mbuf->packet_type |= RTE_PTYPE_L2_ETHER; } else { if (vlan_tci != 0) mbuf->packet_type |= RTE_PTYPE_L2_ETHER_VLAN; else mbuf->packet_type |= RTE_PTYPE_L2_ETHER; } mbuf->vlan_tci = vlan_tci; /* RSS flag */ if (enic_cq_rx_desc_rss_type(cqrd)) { pkt_flags |= PKT_RX_RSS_HASH; mbuf->hash.rss = enic_cq_rx_desc_rss_hash(cqrd); } /* checksum flags */ if (!enic_cq_rx_desc_csum_not_calc(cqrd) && (mbuf->packet_type & RTE_PTYPE_L3_IPV4)) { uint32_t l4_flags = mbuf->packet_type & RTE_PTYPE_L4_MASK; if (unlikely(!enic_cq_rx_desc_ipv4_csum_ok(cqrd))) pkt_flags |= PKT_RX_IP_CKSUM_BAD; if (l4_flags == RTE_PTYPE_L4_UDP || l4_flags == RTE_PTYPE_L4_TCP) { if (unlikely(!enic_cq_rx_desc_tcp_udp_csum_ok(cqrd))) pkt_flags |= PKT_RX_L4_CKSUM_BAD; } } mbuf_flags_done: mbuf->ol_flags = pkt_flags; } /* dummy receive function to replace actual function in * order to do safe reconfiguration operations. */ uint16_t enic_dummy_recv_pkts(__rte_unused void *rx_queue, __rte_unused struct rte_mbuf **rx_pkts, __rte_unused uint16_t nb_pkts) { return 0; } uint16_t enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct vnic_rq *sop_rq = rx_queue; struct vnic_rq *data_rq; struct vnic_rq *rq; struct enic *enic = vnic_dev_priv(sop_rq->vdev); uint16_t cq_idx; uint16_t rq_idx; uint16_t rq_num; struct rte_mbuf *nmb, *rxmb; uint16_t nb_rx = 0; struct vnic_cq *cq; volatile struct cq_desc *cqd_ptr; uint8_t color; uint16_t seg_length; struct rte_mbuf *first_seg = sop_rq->pkt_first_seg; struct rte_mbuf *last_seg = sop_rq->pkt_last_seg; cq = &enic->cq[enic_cq_rq(enic, sop_rq->index)]; cq_idx = cq->to_clean; /* index of cqd, rqd, mbuf_table */ cqd_ptr = (struct cq_desc *)(cq->ring.descs) + cq_idx; data_rq = &enic->rq[sop_rq->data_queue_idx]; while (nb_rx < nb_pkts) { volatile struct rq_enet_desc *rqd_ptr; dma_addr_t dma_addr; struct cq_desc cqd; uint8_t packet_error; uint16_t ciflags; /* Check for pkts available */ color = (cqd_ptr->type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK; if (color == cq->last_color) break; /* Get the cq descriptor and extract rq info from it */ cqd = *cqd_ptr; rq_num = cqd.q_number & CQ_DESC_Q_NUM_MASK; rq_idx = cqd.completed_index & CQ_DESC_COMP_NDX_MASK; rq = &enic->rq[rq_num]; rqd_ptr = ((struct rq_enet_desc *)rq->ring.descs) + rq_idx; /* allocate a new mbuf */ nmb = rte_mbuf_raw_alloc(rq->mp); if (nmb == NULL) { rte_atomic64_inc(&enic->soft_stats.rx_nombuf); break; } /* A packet error means descriptor and data are untrusted */ packet_error = enic_cq_rx_check_err(&cqd); /* Get the mbuf to return and replace with one just allocated */ rxmb = rq->mbuf_ring[rq_idx]; rq->mbuf_ring[rq_idx] = nmb; /* Increment cqd, rqd, mbuf_table index */ cq_idx++; if (unlikely(cq_idx == cq->ring.desc_count)) { cq_idx = 0; cq->last_color = cq->last_color ? 0 : 1; } /* Prefetch next mbuf & desc while processing current one */ cqd_ptr = (struct cq_desc *)(cq->ring.descs) + cq_idx; rte_enic_prefetch(cqd_ptr); ciflags = enic_cq_rx_desc_ciflags( (struct cq_enet_rq_desc *)&cqd); /* Push descriptor for newly allocated mbuf */ nmb->data_off = RTE_PKTMBUF_HEADROOM; dma_addr = (dma_addr_t)(nmb->buf_physaddr + RTE_PKTMBUF_HEADROOM); rq_enet_desc_enc(rqd_ptr, dma_addr, (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP : RQ_ENET_TYPE_NOT_SOP), nmb->buf_len - RTE_PKTMBUF_HEADROOM); /* Fill in the rest of the mbuf */ seg_length = enic_cq_rx_desc_n_bytes(&cqd); if (rq->is_sop) { first_seg = rxmb; first_seg->nb_segs = 1; first_seg->pkt_len = seg_length; } else { first_seg->pkt_len = (uint16_t)(first_seg->pkt_len + seg_length); first_seg->nb_segs++; last_seg->next = rxmb; } rxmb->next = NULL; rxmb->port = enic->port_id; rxmb->data_len = seg_length; rq->rx_nb_hold++; if (!(enic_cq_rx_desc_eop(ciflags))) { last_seg = rxmb; continue; } /* cq rx flags are only valid if eop bit is set */ first_seg->packet_type = enic_cq_rx_flags_to_pkt_type(&cqd); enic_cq_rx_to_pkt_flags(&cqd, first_seg); if (unlikely(packet_error)) { rte_pktmbuf_free(first_seg); rte_atomic64_inc(&enic->soft_stats.rx_packet_errors); continue; } /* prefetch mbuf data for caller */ rte_packet_prefetch(RTE_PTR_ADD(first_seg->buf_addr, RTE_PKTMBUF_HEADROOM)); /* store the mbuf address into the next entry of the array */ rx_pkts[nb_rx++] = first_seg; } sop_rq->pkt_first_seg = first_seg; sop_rq->pkt_last_seg = last_seg; cq->to_clean = cq_idx; if ((sop_rq->rx_nb_hold + data_rq->rx_nb_hold) > sop_rq->rx_free_thresh) { if (data_rq->in_use) { data_rq->posted_index = enic_ring_add(data_rq->ring.desc_count, data_rq->posted_index, data_rq->rx_nb_hold); data_rq->rx_nb_hold = 0; } sop_rq->posted_index = enic_ring_add(sop_rq->ring.desc_count, sop_rq->posted_index, sop_rq->rx_nb_hold); sop_rq->rx_nb_hold = 0; rte_mb(); if (data_rq->in_use) iowrite32(data_rq->posted_index, &data_rq->ctrl->posted_index); rte_compiler_barrier(); iowrite32(sop_rq->posted_index, &sop_rq->ctrl->posted_index); } return nb_rx; } static inline void enic_free_wq_bufs(struct vnic_wq *wq, u16 completed_index) { struct vnic_wq_buf *buf; struct rte_mbuf *m, *free[ENIC_MAX_WQ_DESCS]; unsigned int nb_to_free, nb_free = 0, i; struct rte_mempool *pool; unsigned int tail_idx; unsigned int desc_count = wq->ring.desc_count; nb_to_free = enic_ring_sub(desc_count, wq->tail_idx, completed_index) + 1; tail_idx = wq->tail_idx; buf = &wq->bufs[tail_idx]; pool = ((struct rte_mbuf *)buf->mb)->pool; for (i = 0; i < nb_to_free; i++) { buf = &wq->bufs[tail_idx]; m = __rte_pktmbuf_prefree_seg((struct rte_mbuf *)(buf->mb)); buf->mb = NULL; if (unlikely(m == NULL)) { tail_idx = enic_ring_incr(desc_count, tail_idx); continue; } if (likely(m->pool == pool)) { RTE_ASSERT(nb_free < ENIC_MAX_WQ_DESCS); free[nb_free++] = m; } else { rte_mempool_put_bulk(pool, (void *)free, nb_free); free[0] = m; nb_free = 1; pool = m->pool; } tail_idx = enic_ring_incr(desc_count, tail_idx); } rte_mempool_put_bulk(pool, (void **)free, nb_free); wq->tail_idx = tail_idx; wq->ring.desc_avail += nb_to_free; } unsigned int enic_cleanup_wq(__rte_unused struct enic *enic, struct vnic_wq *wq) { u16 completed_index; completed_index = *((uint32_t *)wq->cqmsg_rz->addr) & 0xffff; if (wq->last_completed_index != completed_index) { enic_free_wq_bufs(wq, completed_index); wq->last_completed_index = completed_index; } return 0; } uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { uint16_t index; unsigned int pkt_len, data_len; unsigned int nb_segs; struct rte_mbuf *tx_pkt; struct vnic_wq *wq = (struct vnic_wq *)tx_queue; struct enic *enic = vnic_dev_priv(wq->vdev); unsigned short vlan_id; uint64_t ol_flags; uint64_t ol_flags_mask; unsigned int wq_desc_avail; int head_idx; struct vnic_wq_buf *buf; unsigned int desc_count; struct wq_enet_desc *descs, *desc_p, desc_tmp; uint16_t mss; uint8_t vlan_tag_insert; uint8_t eop; uint64_t bus_addr; enic_cleanup_wq(enic, wq); wq_desc_avail = vnic_wq_desc_avail(wq); head_idx = wq->head_idx; desc_count = wq->ring.desc_count; ol_flags_mask = PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK; nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX); for (index = 0; index < nb_pkts; index++) { tx_pkt = *tx_pkts++; pkt_len = tx_pkt->pkt_len; data_len = tx_pkt->data_len; ol_flags = tx_pkt->ol_flags; nb_segs = tx_pkt->nb_segs; if (pkt_len > ENIC_TX_MAX_PKT_SIZE) { rte_pktmbuf_free(tx_pkt); rte_atomic64_inc(&enic->soft_stats.tx_oversized); continue; } if (nb_segs > wq_desc_avail) { if (index > 0) goto post; goto done; } mss = 0; vlan_id = 0; vlan_tag_insert = 0; bus_addr = (dma_addr_t) (tx_pkt->buf_physaddr + tx_pkt->data_off); descs = (struct wq_enet_desc *)wq->ring.descs; desc_p = descs + head_idx; eop = (data_len == pkt_len); if (ol_flags & ol_flags_mask) { if (ol_flags & PKT_TX_VLAN_PKT) { vlan_tag_insert = 1; vlan_id = tx_pkt->vlan_tci; } if (ol_flags & PKT_TX_IP_CKSUM) mss |= ENIC_CALC_IP_CKSUM; /* Nic uses just 1 bit for UDP and TCP */ switch (ol_flags & PKT_TX_L4_MASK) { case PKT_TX_TCP_CKSUM: case PKT_TX_UDP_CKSUM: mss |= ENIC_CALC_TCP_UDP_CKSUM; break; } } wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, 0, 0, eop, eop, 0, vlan_tag_insert, vlan_id, 0); *desc_p = desc_tmp; buf = &wq->bufs[head_idx]; buf->mb = (void *)tx_pkt; head_idx = enic_ring_incr(desc_count, head_idx); wq_desc_avail--; if (!eop) { for (tx_pkt = tx_pkt->next; tx_pkt; tx_pkt = tx_pkt->next) { data_len = tx_pkt->data_len; if (tx_pkt->next == NULL) eop = 1; desc_p = descs + head_idx; bus_addr = (dma_addr_t)(tx_pkt->buf_physaddr + tx_pkt->data_off); wq_enet_desc_enc((struct wq_enet_desc *) &desc_tmp, bus_addr, data_len, mss, 0, 0, eop, eop, 0, vlan_tag_insert, vlan_id, 0); *desc_p = desc_tmp; buf = &wq->bufs[head_idx]; buf->mb = (void *)tx_pkt; head_idx = enic_ring_incr(desc_count, head_idx); wq_desc_avail--; } } } post: rte_wmb(); iowrite32(head_idx, &wq->ctrl->posted_index); done: wq->ring.desc_avail = wq_desc_avail; wq->head_idx = head_idx; return index; }