diff options
Diffstat (limited to 'src/plugins/dev_octeon/tx_node.c')
-rw-r--r-- | src/plugins/dev_octeon/tx_node.c | 154 |
1 files changed, 121 insertions, 33 deletions
diff --git a/src/plugins/dev_octeon/tx_node.c b/src/plugins/dev_octeon/tx_node.c index 0dbf8759d35..f42f18d989b 100644 --- a/src/plugins/dev_octeon/tx_node.c +++ b/src/plugins/dev_octeon/tx_node.c @@ -22,13 +22,54 @@ typedef struct u32 n_tx_bytes; u32 n_drop; vlib_buffer_t *drop[VLIB_FRAME_SIZE]; + u32 n_exd_mtu; + vlib_buffer_t *exd_mtu[VLIB_FRAME_SIZE]; u32 batch_alloc_not_ready; u32 batch_alloc_issue_fail; + int max_pkt_len; u16 lmt_id; u64 lmt_ioaddr; lmt_line_t *lmt_lines; } oct_tx_ctx_t; +#ifdef PLATFORM_OCTEON9 +static_always_inline u32 +oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) +{ + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + u16 off = ctq->hdr_off; + u64 ah = ctq->aura_handle; + u32 n_freed = 0, n; + + ah = ctq->aura_handle; + + if ((n = roc_npa_aura_op_available (ah)) >= 32) + { + u64 buffers[n]; + u32 bi[n]; + + n_freed = roc_npa_aura_op_bulk_alloc (ah, buffers, n, 0, 1); + vlib_get_buffer_indices_with_offset (vm, (void **) &buffers, bi, n_freed, + off); + vlib_buffer_free_no_next (vm, bi, n_freed); + } + + return n_freed; +} + +static_always_inline void +oct_lmt_copy (void *lmt_addr, u64 io_addr, void *desc, u64 dwords) +{ + u64 lmt_status; + + do + { + roc_lmt_mov_seg (lmt_addr, desc, dwords); + lmt_status = roc_lmt_submit_ldeor (io_addr); + } + while (lmt_status == 0); +} +#else static_always_inline u32 oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) { @@ -130,10 +171,12 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) return n_freed; } +#endif static_always_inline u8 oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, - lmt_line_t *line, u32 flags, int simple, int trace) + lmt_line_t *line, u32 flags, int simple, int trace, u32 *n, + u8 *dpl) { u8 n_dwords = 2; u32 total_len = 0; @@ -148,6 +191,17 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, }, }; + if (PREDICT_FALSE (vlib_buffer_length_in_chain (vm, b) > ctx->max_pkt_len)) + { + ctx->exd_mtu[ctx->n_exd_mtu++] = b; + return 0; + } + +#ifdef PLATFORM_OCTEON9 + /* Override line for Octeon9 */ + line = ctx->lmt_lines; +#endif + if (!simple && flags & VLIB_BUFFER_NEXT_PRESENT) { u8 n_tail_segs = 0; @@ -159,7 +213,7 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, tail_segs[n_tail_segs++] = t; if (n_tail_segs > 5) { - ctx->drop[ctx->n_drop++] = t; + ctx->drop[ctx->n_drop++] = b; return 0; } } @@ -201,19 +255,18 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) { d.hdr_w1.ol3type = NIX_SENDL3TYPE_IP4_CKSUM; - d.hdr_w1.ol3ptr = vnet_buffer (b)->l3_hdr_offset; - d.hdr_w1.ol4ptr = - vnet_buffer (b)->l3_hdr_offset + sizeof (ip4_header_t); + d.hdr_w1.ol3ptr = vnet_buffer (b)->l3_hdr_offset - b->current_data; + d.hdr_w1.ol4ptr = d.hdr_w1.ol3ptr + sizeof (ip4_header_t); } if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM) { d.hdr_w1.ol4type = NIX_SENDL4TYPE_UDP_CKSUM; - d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset; + d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset - b->current_data; } else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM) { d.hdr_w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM; - d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset; + d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset - b->current_data; } } @@ -228,8 +281,15 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, t->sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX]; } +#ifdef PLATFORM_OCTEON9 + oct_lmt_copy (line, ctx->lmt_ioaddr, &d, n_dwords); +#else for (u32 i = 0; i < n_dwords; i++) line->dwords[i] = d.as_u128[i]; +#endif + + *dpl = n_dwords; + *n = *n + 1; return n_dwords; } @@ -239,8 +299,9 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, vlib_buffer_t **b, u32 n_pkts, int trace) { u8 dwords_per_line[16], *dpl = dwords_per_line; - u64 lmt_arg, ioaddr, n_lines; - u32 n_left, or_flags_16 = 0; + u64 __attribute__ ((unused)) lmt_arg, ioaddr, n_lines; + u32 __attribute__ ((unused)) or_flags_16 = 0; + u32 n_left, n = 0; const u32 not_simple_flags = VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD; lmt_line_t *l = ctx->lmt_lines; @@ -248,7 +309,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, /* Data Store Memory Barrier - outer shareable domain */ asm volatile("dmb oshst" ::: "memory"); - for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8, l += 8) + for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8) { u32 f0, f1, f2, f3, f4, f5, f6, f7, or_f = 0; vlib_prefetch_buffer_header (b[8], LOAD); @@ -269,49 +330,56 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, if ((or_f & not_simple_flags) == 0) { int simple = 1; - oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace); - oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace); + oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[1], l + n, f1, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[13], LOAD); - oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace); - oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace); + oct_tx_enq1 (vm, ctx, b[2], l + n, f2, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[3], l + n, f3, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[14], LOAD); - oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace); - oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace); + oct_tx_enq1 (vm, ctx, b[4], l + n, f4, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[5], l + n, f5, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[15], LOAD); - oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace); - oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace); - dpl[0] = dpl[1] = dpl[2] = dpl[3] = 2; - dpl[4] = dpl[5] = dpl[6] = dpl[7] = 2; + oct_tx_enq1 (vm, ctx, b[6], l + n, f6, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[7], l + n, f7, simple, trace, &n, &dpl[n]); } else { int simple = 0; - dpl[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace); - dpl[1] = oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace); + oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[1], l + n, f1, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[13], LOAD); - dpl[2] = oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace); - dpl[3] = oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace); + oct_tx_enq1 (vm, ctx, b[2], l + n, f2, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[3], l + n, f3, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[14], LOAD); - dpl[4] = oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace); - dpl[5] = oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace); + oct_tx_enq1 (vm, ctx, b[4], l + n, f4, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[5], l + n, f5, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[15], LOAD); - dpl[6] = oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace); - dpl[7] = oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace); + oct_tx_enq1 (vm, ctx, b[6], l + n, f6, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[7], l + n, f7, simple, trace, &n, &dpl[n]); } - dpl += 8; + dpl += n; + l += n; + n = 0; } - for (; n_left > 0; n_left -= 1, b += 1, l += 1) + for (; n_left > 0; n_left -= 1, b += 1) { u32 f0 = b[0]->flags; - dpl++[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace); + oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace, &n, &dpl[n]); or_flags_16 |= f0; + dpl += n; + l += n; + n = 0; } lmt_arg = ctx->lmt_id; ioaddr = ctx->lmt_ioaddr; - n_lines = n_pkts; + n_lines = dpl - dwords_per_line; + + if (PREDICT_FALSE (!n_lines)) + return n_pkts; +#ifndef PLATFORM_OCTEON9 if (PREDICT_FALSE (or_flags_16 & VLIB_BUFFER_NEXT_PRESENT)) { dpl = dwords_per_line; @@ -340,6 +408,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, } roc_lmt_submit_steorl (lmt_arg, ioaddr); +#endif return n_pkts; } @@ -350,11 +419,17 @@ VNET_DEV_NODE_FN (oct_tx_node) vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node); vnet_dev_tx_queue_t *txq = rt->tx_queue; oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + vnet_dev_t *dev = txq->port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); u32 node_index = node->node_index; u32 *from = vlib_frame_vector_args (frame); u32 n, n_enq, n_left, n_pkts = frame->n_vectors; vlib_buffer_t *buffers[VLIB_FRAME_SIZE + 8], **b = buffers; +#ifdef PLATFORM_OCTEON9 + u64 lmt_id = 0; +#else u64 lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2; +#endif oct_tx_ctx_t ctx = { .node = node, @@ -363,6 +438,7 @@ VNET_DEV_NODE_FN (oct_tx_node) .sq = ctq->sq.qid, .sizem1 = 1, }, + .max_pkt_len = roc_nix_max_pkt_len (cd->nix), .lmt_id = lmt_id, .lmt_ioaddr = ctq->io_addr, .lmt_lines = ctq->lmt_addr + (lmt_id << ROC_LMT_LINE_SIZE_LOG2), @@ -396,7 +472,7 @@ VNET_DEV_NODE_FN (oct_tx_node) n += oct_tx_enq16 (vm, &ctx, txq, b, n_left, /* trace */ 0); } - ctq->n_enq = n_enq + n; + ctq->n_enq = n_enq + n - ctx.n_drop - ctx.n_exd_mtu; if (n < n_pkts) { @@ -411,6 +487,10 @@ VNET_DEV_NODE_FN (oct_tx_node) vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_CHAIN_TOO_LONG, ctx.n_drop); + if (PREDICT_FALSE (ctx.n_exd_mtu)) + vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_MTU_EXCEEDED, + ctx.n_exd_mtu); + if (ctx.batch_alloc_not_ready) vlib_error_count (vm, node_index, OCT_TX_NODE_CTR_AURA_BATCH_ALLOC_NOT_READY, @@ -431,5 +511,13 @@ VNET_DEV_NODE_FN (oct_tx_node) n_pkts -= ctx.n_drop; } + if (PREDICT_FALSE (ctx.n_exd_mtu)) + { + u32 bi[VLIB_FRAME_SIZE]; + vlib_get_buffer_indices (vm, ctx.exd_mtu, bi, ctx.n_exd_mtu); + vlib_buffer_free (vm, bi, ctx.n_exd_mtu); + n_pkts -= ctx.n_exd_mtu; + } + return n_pkts; } |