/* * BSD LICENSE * * Copyright (C) Cavium, Inc 2017. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "test_perf_common.h" int perf_test_result(struct evt_test *test, struct evt_options *opt) { RTE_SET_USED(opt); struct test_perf *t = evt_test_priv(test); return t->result; } static inline int perf_producer(void *arg) { struct prod_data *p = arg; struct test_perf *t = p->t; struct evt_options *opt = t->opt; const uint8_t dev_id = p->dev_id; const uint8_t port = p->port_id; struct rte_mempool *pool = t->pool; const uint64_t nb_pkts = t->nb_pkts; const uint32_t nb_flows = t->nb_flows; uint32_t flow_counter = 0; uint64_t count = 0; struct perf_elt *m; struct rte_event ev; if (opt->verbose_level > 1) printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__, rte_lcore_id(), dev_id, port, p->queue_id); ev.event = 0; ev.op = RTE_EVENT_OP_NEW; ev.queue_id = p->queue_id; ev.sched_type = t->opt->sched_type_list[0]; ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL; ev.event_type = RTE_EVENT_TYPE_CPU; ev.sub_event_type = 0; /* stage 0 */ while (count < nb_pkts && t->done == false) { if (rte_mempool_get(pool, (void **)&m) < 0) continue; ev.flow_id = flow_counter++ % nb_flows; ev.event_ptr = m; m->timestamp = rte_get_timer_cycles(); while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) { if (t->done) break; rte_pause(); m->timestamp = rte_get_timer_cycles(); } count++; } return 0; } static inline uint64_t processed_pkts(struct test_perf *t) { uint8_t i; uint64_t total = 0; rte_smp_rmb(); for (i = 0; i < t->nb_workers; i++) total += t->worker[i].processed_pkts; return total; } static inline uint64_t total_latency(struct test_perf *t) { uint8_t i; uint64_t total = 0; rte_smp_rmb(); for (i = 0; i < t->nb_workers; i++) total += t->worker[i].latency; return total; } int perf_launch_lcores(struct evt_test *test, struct evt_options *opt, int (*worker)(void *)) { int ret, lcore_id; struct test_perf *t = evt_test_priv(test); int port_idx = 0; /* launch workers */ RTE_LCORE_FOREACH_SLAVE(lcore_id) { if (!(opt->wlcores[lcore_id])) continue; ret = rte_eal_remote_launch(worker, &t->worker[port_idx], lcore_id); if (ret) { evt_err("failed to launch worker %d", lcore_id); return ret; } port_idx++; } /* launch producers */ RTE_LCORE_FOREACH_SLAVE(lcore_id) { if (!(opt->plcores[lcore_id])) continue; ret = rte_eal_remote_launch(perf_producer, &t->prod[port_idx], lcore_id); if (ret) { evt_err("failed to launch perf_producer %d", lcore_id); return ret; } port_idx++; } const uint64_t total_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores); uint64_t dead_lock_cycles = rte_get_timer_cycles(); int64_t dead_lock_remaining = total_pkts; const uint64_t dead_lock_sample = rte_get_timer_hz() * 5; uint64_t perf_cycles = rte_get_timer_cycles(); int64_t perf_remaining = total_pkts; const uint64_t perf_sample = rte_get_timer_hz(); static float total_mpps; static uint64_t samples; const uint64_t freq_mhz = rte_get_timer_hz() / 1000000; int64_t remaining = t->outstand_pkts - processed_pkts(t); while (t->done == false) { const uint64_t new_cycles = rte_get_timer_cycles(); if ((new_cycles - perf_cycles) > perf_sample) { const uint64_t latency = total_latency(t); const uint64_t pkts = processed_pkts(t); remaining = t->outstand_pkts - pkts; float mpps = (float)(perf_remaining-remaining)/1000000; perf_remaining = remaining; perf_cycles = new_cycles; total_mpps += mpps; ++samples; if (opt->fwd_latency && pkts > 0) { printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM, mpps, total_mpps/samples, (float)(latency/pkts)/freq_mhz); } else { printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM, mpps, total_mpps/samples); } fflush(stdout); if (remaining <= 0) { t->done = true; t->result = EVT_TEST_SUCCESS; rte_smp_wmb(); break; } } if (new_cycles - dead_lock_cycles > dead_lock_sample) { remaining = t->outstand_pkts - processed_pkts(t); if (dead_lock_remaining == remaining) { rte_event_dev_dump(opt->dev_id, stdout); evt_err("No schedules for seconds, deadlock"); t->done = true; rte_smp_wmb(); break; } dead_lock_remaining = remaining; dead_lock_cycles = new_cycles; } } printf("\n"); return 0; } int perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, uint8_t stride, uint8_t nb_queues) { struct test_perf *t = evt_test_priv(test); uint8_t port, prod; int ret = -1; /* port configuration */ const struct rte_event_port_conf wkr_p_conf = { .dequeue_depth = opt->wkr_deq_dep, .enqueue_depth = 64, .new_event_threshold = 4096, }; /* setup one port per worker, linking to all queues */ for (port = 0; port < evt_nr_active_lcores(opt->wlcores); port++) { struct worker_data *w = &t->worker[port]; w->dev_id = opt->dev_id; w->port_id = port; w->t = t; w->processed_pkts = 0; w->latency = 0; ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf); if (ret) { evt_err("failed to setup port %d", port); return ret; } ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0); if (ret != nb_queues) { evt_err("failed to link all queues to port %d", port); return -EINVAL; } } /* port for producers, no links */ const struct rte_event_port_conf prod_conf = { .dequeue_depth = 8, .enqueue_depth = 32, .new_event_threshold = 1200, }; prod = 0; for ( ; port < perf_nb_event_ports(opt); port++) { struct prod_data *p = &t->prod[port]; p->dev_id = opt->dev_id; p->port_id = port; p->queue_id = prod * stride; p->t = t; ret = rte_event_port_setup(opt->dev_id, port, &prod_conf); if (ret) { evt_err("failed to setup port %d", port); return ret; } prod++; } return ret; } int perf_opt_check(struct evt_options *opt, uint64_t nb_queues) { unsigned int lcores; /* N producer + N worker + 1 master */ lcores = 3; if (rte_lcore_count() < lcores) { evt_err("test need minimum %d lcores", lcores); return -1; } /* Validate worker lcores */ if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) { evt_err("worker lcores overlaps with master lcore"); return -1; } if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) { evt_err("worker lcores overlaps producer lcores"); return -1; } if (evt_has_disabled_lcore(opt->wlcores)) { evt_err("one or more workers lcores are not enabled"); return -1; } if (!evt_has_active_lcore(opt->wlcores)) { evt_err("minimum one worker is required"); return -1; } /* Validate producer lcores */ if (evt_lcores_has_overlap(opt->plcores, rte_get_master_lcore())) { evt_err("producer lcores overlaps with master lcore"); return -1; } if (evt_has_disabled_lcore(opt->plcores)) { evt_err("one or more producer lcores are not enabled"); return -1; } if (!evt_has_active_lcore(opt->plcores)) { evt_err("minimum one producer is required"); return -1; } if (evt_has_invalid_stage(opt)) return -1; if (evt_has_invalid_sched_type(opt)) return -1; if (nb_queues > EVT_MAX_QUEUES) { evt_err("number of queues exceeds %d", EVT_MAX_QUEUES); return -1; } if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) { evt_err("number of ports exceeds %d", EVT_MAX_PORTS); return -1; } /* Fixups */ if (opt->nb_stages == 1 && opt->fwd_latency) { evt_info("fwd_latency is valid when nb_stages > 1, disabling"); opt->fwd_latency = 0; } if (opt->fwd_latency && !opt->q_priority) { evt_info("enabled queue priority for latency measurement"); opt->q_priority = 1; } if (opt->nb_pkts == 0) opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores); return 0; } void perf_opt_dump(struct evt_options *opt, uint8_t nb_queues) { evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores)); evt_dump_producer_lcores(opt); evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores)); evt_dump_worker_lcores(opt); evt_dump_nb_stages(opt); evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt)); evt_dump("nb_evdev_queues", "%d", nb_queues); evt_dump_queue_priority(opt); evt_dump_sched_type_list(opt); } void perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt) { RTE_SET_USED(test); rte_event_dev_stop(opt->dev_id); rte_event_dev_close(opt->dev_id); } static inline void perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused, void *obj, unsigned i __rte_unused) { memset(obj, 0, mp->elt_size); } int perf_mempool_setup(struct evt_test *test, struct evt_options *opt) { struct test_perf *t = evt_test_priv(test); t->pool = rte_mempool_create(test->name, /* mempool name */ opt->pool_sz, /* number of elements*/ sizeof(struct perf_elt), /* element size*/ 512, /* cache size*/ 0, NULL, NULL, perf_elt_init, /* obj constructor */ NULL, opt->socket_id, 0); /* flags */ if (t->pool == NULL) { evt_err("failed to create mempool"); return -ENOMEM; } return 0; } void perf_mempool_destroy(struct evt_test *test, struct evt_options *opt) { RTE_SET_USED(opt); struct test_perf *t = evt_test_priv(test); rte_mempool_free(t->pool); } int perf_test_setup(struct evt_test *test, struct evt_options *opt) { void *test_perf; test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf), RTE_CACHE_LINE_SIZE, opt->socket_id); if (test_perf == NULL) { evt_err("failed to allocate test_perf memory"); goto nomem; } test->test_priv = test_perf; struct test_perf *t = evt_test_priv(test); t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores); t->nb_workers = evt_nr_active_lcores(opt->wlcores); t->done = false; t->nb_pkts = opt->nb_pkts; t->nb_flows = opt->nb_flows; t->result = EVT_TEST_FAILED; t->opt = opt; memcpy(t->sched_type_list, opt->sched_type_list, sizeof(opt->sched_type_list)); return 0; nomem: return -ENOMEM; } void perf_test_destroy(struct evt_test *test, struct evt_options *opt) { RTE_SET_USED(opt); rte_free(test->test_priv); }